summaryrefslogtreecommitdiffstats
path: root/src/arrow/java/vector
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/arrow/java/vector/pom.xml274
-rw-r--r--src/arrow/java/vector/src/main/codegen/config.fmpp24
-rw-r--r--src/arrow/java/vector/src/main/codegen/data/ArrowTypes.tdd124
-rw-r--r--src/arrow/java/vector/src/main/codegen/data/ValueVectorTypes.tdd206
-rw-r--r--src/arrow/java/vector/src/main/codegen/includes/license.ftl16
-rw-r--r--src/arrow/java/vector/src/main/codegen/includes/vv_imports.ftl61
-rw-r--r--src/arrow/java/vector/src/main/codegen/templates/AbstractFieldReader.java132
-rw-r--r--src/arrow/java/vector/src/main/codegen/templates/AbstractFieldWriter.java230
-rw-r--r--src/arrow/java/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java238
-rw-r--r--src/arrow/java/vector/src/main/codegen/templates/ArrowType.java375
-rw-r--r--src/arrow/java/vector/src/main/codegen/templates/BaseReader.java85
-rw-r--r--src/arrow/java/vector/src/main/codegen/templates/BaseWriter.java131
-rw-r--r--src/arrow/java/vector/src/main/codegen/templates/CaseSensitiveStructWriters.java53
-rw-r--r--src/arrow/java/vector/src/main/codegen/templates/ComplexCopier.java191
-rw-r--r--src/arrow/java/vector/src/main/codegen/templates/ComplexReaders.java147
-rw-r--r--src/arrow/java/vector/src/main/codegen/templates/ComplexWriters.java211
-rw-r--r--src/arrow/java/vector/src/main/codegen/templates/DenseUnionReader.java229
-rw-r--r--src/arrow/java/vector/src/main/codegen/templates/DenseUnionVector.java943
-rw-r--r--src/arrow/java/vector/src/main/codegen/templates/DenseUnionWriter.java302
-rw-r--r--src/arrow/java/vector/src/main/codegen/templates/HolderReaderImpl.java173
-rw-r--r--src/arrow/java/vector/src/main/codegen/templates/NullReader.java147
-rw-r--r--src/arrow/java/vector/src/main/codegen/templates/StructWriters.java326
-rw-r--r--src/arrow/java/vector/src/main/codegen/templates/UnionFixedSizeListWriter.java319
-rw-r--r--src/arrow/java/vector/src/main/codegen/templates/UnionListWriter.java326
-rw-r--r--src/arrow/java/vector/src/main/codegen/templates/UnionMapWriter.java222
-rw-r--r--src/arrow/java/vector/src/main/codegen/templates/UnionReader.java223
-rw-r--r--src/arrow/java/vector/src/main/codegen/templates/UnionVector.java854
-rw-r--r--src/arrow/java/vector/src/main/codegen/templates/UnionWriter.java364
-rw-r--r--src/arrow/java/vector/src/main/codegen/templates/ValueHolders.java81
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/AddOrGetResult.java46
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/AllocationHelper.java95
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java930
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BaseIntVector.java43
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BaseLargeVariableWidthVector.java1370
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BaseValueVector.java231
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java1410
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BigIntVector.java358
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java599
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BitVectorHelper.java449
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BufferBacked.java31
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BufferLayout.java153
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/DateDayVector.java347
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/DateMilliVector.java350
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/Decimal256Vector.java584
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/DecimalVector.java584
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/DensityAwareVector.java57
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/DurationVector.java406
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ElementAddressableVector.java42
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ExtensionTypeVector.java274
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/FieldVector.java93
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/FixedSizeBinaryVector.java386
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/FixedWidthVector.java36
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/Float4Vector.java361
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/Float8Vector.java362
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/FloatingPointVector.java46
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/GenerateSampleData.java337
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/IntVector.java362
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/IntervalDayVector.java433
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/IntervalMonthDayNanoVector.java442
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/IntervalYearVector.java382
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/LargeVarBinaryVector.java305
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/LargeVarCharVector.java331
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/NullCheckingForGet.java84
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/NullVector.java338
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/PeriodDuration.java64
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/SchemaChangeCallBack.java57
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/SmallIntVector.java389
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeMicroVector.java347
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeMilliVector.java351
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeNanoVector.java347
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeSecVector.java348
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMicroTZVector.java239
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMicroVector.java236
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMilliTZVector.java238
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMilliVector.java236
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeStampNanoTZVector.java241
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeStampNanoVector.java236
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeStampSecTZVector.java238
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeStampSecVector.java237
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeStampVector.java197
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TinyIntVector.java390
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java448
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/UInt1Vector.java368
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/UInt2Vector.java346
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/UInt4Vector.java340
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/UInt8Vector.java336
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ValueVector.java285
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/VarBinaryVector.java306
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/VarCharVector.java331
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/VariableWidthVector.java53
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/VectorDefinitionSetter.java26
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java137
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/VectorSchemaRoot.java429
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/VectorUnloader.java107
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ZeroVector.java138
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compare/ApproxEqualsVisitor.java147
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compare/Range.java85
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compare/RangeEqualsVisitor.java563
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compare/TypeEqualsVisitor.java154
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compare/VectorEqualsVisitor.java60
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compare/VectorValueEqualizer.java44
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compare/VectorVisitor.java61
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compare/util/ValueEpsilonEqualizers.java149
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractContainerVector.java140
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractStructVector.java425
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseListVector.java36
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java367
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/EmptyValuePopulator.java51
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java675
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java1036
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java879
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java122
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/NonNullableStructVector.java440
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/Positionable.java29
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/PromotableVector.java32
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/RepeatedFixedWidthVectorLike.java32
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/RepeatedValueVector.java49
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/RepeatedVariableWidthVectorLike.java40
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/StateTool.java44
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/StructVector.java608
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/VectorWithOrdinal.java34
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/AbstractBaseReader.java118
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/AbstractBaseWriter.java55
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/ComplexWriterImpl.java227
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/NullableStructReaderImpl.java59
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/NullableStructWriterFactory.java48
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java398
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/SingleListReaderImpl.java91
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/SingleStructReaderImpl.java113
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/StructOrListWriterImpl.java137
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionFixedSizeListReader.java105
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionLargeListReader.java109
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionListReader.java107
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionMapReader.java77
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/reader/FieldReader.java35
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/writer/FieldWriter.java33
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compression/AbstractCompressionCodec.java116
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compression/CompressionCodec.java62
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compression/CompressionUtil.java103
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compression/NoCompressionCodec.java67
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/dictionary/Dictionary.java75
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/dictionary/DictionaryEncoder.java196
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/dictionary/DictionaryHashTable.java295
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/dictionary/DictionaryProvider.java62
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/dictionary/ListSubfieldEncoder.java137
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/dictionary/StructSubfieldEncoder.java196
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/holders/ComplexHolder.java28
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/holders/DenseUnionHolder.java38
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/holders/RepeatedListHolder.java26
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/holders/RepeatedStructHolder.java26
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/holders/UnionHolder.java37
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/holders/ValueHolder.java31
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowFileReader.java230
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowFileWriter.java119
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowMagic.java44
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowReader.java255
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowStreamReader.java229
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowStreamWriter.java86
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowWriter.java210
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/InvalidArrowFileException.java30
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileReader.java806
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileWriter.java417
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/ReadChannel.java102
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/SeekableReadChannel.java43
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/WriteChannel.java162
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowBlock.java95
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowBodyCompression.java55
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowBuffer.java90
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowDictionaryBatch.java94
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowFieldNode.java64
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowFooter.java226
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowMessage.java42
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowRecordBatch.java259
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/FBSerializable.java30
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/FBSerializables.java67
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/IpcOption.java44
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/MessageChannelReader.java91
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/MessageMetadataResult.java115
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/MessageResult.java61
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/MessageSerializer.java736
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/DateUnit.java50
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/FloatingPointPrecision.java55
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/IntervalUnit.java52
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/MetadataVersion.java65
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/TimeUnit.java50
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java1016
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/UnionMode.java57
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/DictionaryEncoding.java88
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/ExtensionTypeRegistry.java42
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java306
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/FieldType.java123
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Schema.java247
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/ByteArrayReadableSeekableByteChannel.java86
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/CallBack.java25
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/DataSizeRoundingUtil.java99
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/DateUtility.java134
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/DecimalUtility.java188
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/DictionaryUtility.java145
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/ElementAddressableVectorIterator.java86
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/JsonStringArrayList.java55
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/JsonStringHashMap.java48
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/MapWithOrdinal.java67
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/MapWithOrdinalImpl.java248
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/MultiMapWithOrdinal.java230
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/OversizedAllocationException.java52
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/PromotableMultiMapWithOrdinal.java133
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/SchemaChangeRuntimeException.java48
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/SchemaUtility.java63
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/Text.java688
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/TransferPair.java33
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/Validator.java190
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/ValueVectorUtility.java187
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java542
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/VectorBatchAppender.java39
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/VectorSchemaRootAppender.java83
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/validate/MetadataV4UnionChecker.java82
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateUtil.java61
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorBufferVisitor.java246
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorDataVisitor.java180
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorTypeVisitor.java378
-rw-r--r--src/arrow/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorVisitor.java273
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/util/TestSchemaUtil.java51
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/DirtyRootAllocator.java52
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ITTestLargeVector.java280
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestBitVector.java543
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestBitVectorHelper.java235
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestBufferOwnershipTransfer.java131
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestCopyFrom.java1104
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestDecimal256Vector.java357
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestDecimalVector.java365
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestDenseUnionVector.java639
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestDictionaryVector.java1032
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestDurationVector.java137
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeBinaryVector.java279
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeListVector.java507
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestIntervalMonthDayNanoVector.java99
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestIntervalYearVector.java58
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java982
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarBinaryVector.java104
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarCharVector.java816
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java981
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java1113
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestNullCheckingForGet.java92
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestOutOfMemoryForValueVector.java73
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestOversizedAllocationForValueVector.java132
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestPeriodDuration.java46
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java410
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java183
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestTypeLayout.java98
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestUnionVector.java520
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestUtils.java45
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java3061
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharListVector.java77
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestVectorAlloc.java169
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReAlloc.java474
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReset.java168
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestVectorSchemaRoot.java318
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestVectorUnloadLoad.java332
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/compare/TestRangeEqualsVisitor.java740
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/compare/TestTypeEqualsVisitor.java185
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestComplexCopier.java763
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java167
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java1335
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/BaseFileTest.java849
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/ITTestIPCWithLargeArrowBuffers.java187
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/MessageSerializerTest.java247
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowFile.java134
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowFooter.java68
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowReaderWriter.java882
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowStream.java147
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowStreamPipe.java161
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestJSONFile.java458
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestRoundTrip.java628
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestUIntDictionaryRoundTrip.java246
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/message/TestMessageMetadataResult.java36
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/pojo/TestConvert.java169
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/testing/RandomDataGenerator.java44
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/testing/TestValueVectorPopulator.java604
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java708
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java420
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestField.java63
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestSchema.java254
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/util/DecimalUtilityTest.java127
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/util/TestDataSizeRoundingUtil.java76
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/util/TestElementAddressableVectorIterator.java134
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/util/TestMultiMapWithOrdinal.java60
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/util/TestValidator.java56
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java794
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorBatchAppender.java72
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorSchemaRootAppender.java161
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVector.java260
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVectorFull.java234
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVectorSchemaRoot.java101
-rw-r--r--src/arrow/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVectorTypeVisitor.java301
-rw-r--r--src/arrow/java/vector/src/test/resources/logback.xml28
295 files changed, 76818 insertions, 0 deletions
diff --git a/src/arrow/java/vector/pom.xml b/src/arrow/java/vector/pom.xml
new file mode 100644
index 000000000..e37e931ef
--- /dev/null
+++ b/src/arrow/java/vector/pom.xml
@@ -0,0 +1,274 @@
+<?xml version="1.0"?>
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor
+ license agreements. See the NOTICE file distributed with this work for additional
+ information regarding copyright ownership. The ASF licenses this file to
+ You under the Apache License, Version 2.0 (the "License"); you may not use
+ this file except in compliance with the License. You may obtain a copy of
+ the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+ by applicable law or agreed to in writing, software distributed under the
+ License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+ OF ANY KIND, either express or implied. See the License for the specific
+ language governing permissions and limitations under the License. -->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>arrow-java-root</artifactId>
+ <version>6.0.1</version>
+ </parent>
+ <artifactId>arrow-vector</artifactId>
+ <name>Arrow Vectors</name>
+ <description>An off-heap reference implementation for Arrow columnar data format.</description>
+
+ <dependencies>
+
+ <dependency>
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>arrow-format</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>arrow-memory-core</artifactId>
+ <version>${project.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>com.fasterxml.jackson.core</groupId>
+ <artifactId>jackson-core</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>com.fasterxml.jackson.core</groupId>
+ <artifactId>jackson-annotations</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>com.fasterxml.jackson.core</groupId>
+ <artifactId>jackson-databind</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>commons-codec</groupId>
+ <artifactId>commons-codec</artifactId>
+ <version>1.10</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>arrow-memory-netty</artifactId>
+ <version>${project.version}</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.arrow</groupId>
+ <artifactId>arrow-memory-unsafe</artifactId>
+ <version>${project.version}</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>io.netty</groupId>
+ <artifactId>netty-common</artifactId>
+ </dependency>
+ <dependency>
+ <groupId>com.google.flatbuffers</groupId>
+ <artifactId>flatbuffers-java</artifactId>
+ <version>${dep.fbs.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-api</artifactId>
+ </dependency>
+ </dependencies>
+
+ <pluginRepositories>
+ <pluginRepository>
+ <id>apache</id>
+ <name>apache</name>
+ <url>https://repo.maven.apache.org/maven2/</url>
+ <releases>
+ <enabled>true</enabled>
+ </releases>
+ <snapshots>
+ <enabled>false</enabled>
+ </snapshots>
+ </pluginRepository>
+ </pluginRepositories>
+
+ <build>
+
+ <resources>
+ <resource>
+ <!-- Copy freemarker template and fmpp configuration files of Vector's
+ to allow clients to leverage definitions. -->
+ <directory>${basedir}/src/main/codegen</directory>
+ <targetPath>codegen</targetPath>
+ </resource>
+ </resources>
+
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-surefire-plugin</artifactId>
+ <executions>
+ <execution>
+ <id>default-test</id>
+ <phase>test</phase>
+ <configuration>
+ <classpathDependencyExcludes>
+ <classpathDependencyExclude>org.apache.arrow:arrow-memory-unsafe</classpathDependencyExclude>
+ </classpathDependencyExcludes>
+ </configuration>
+ </execution>
+ <execution>
+ <id>run-unsafe</id>
+ <phase>test</phase>
+ <goals>
+ <goal>test</goal>
+ </goals>
+ <configuration>
+ <classpathDependencyExcludes>
+ <classpathDependencyExclude>org.apache.arrow:arrow-memory-netty</classpathDependencyExclude>
+ </classpathDependencyExcludes>
+ <reportNameSuffix>netty</reportNameSuffix>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <artifactId>maven-resources-plugin</artifactId>
+ <executions>
+ <execution> <!-- copy all templates in the same location to compile them at once -->
+ <id>copy-fmpp-resources</id>
+ <phase>initialize</phase>
+ <goals>
+ <goal>copy-resources</goal>
+ </goals>
+ <configuration>
+ <outputDirectory>${project.build.directory}/codegen</outputDirectory>
+ <resources>
+ <resource>
+ <directory>src/main/codegen</directory>
+ <filtering>false</filtering>
+ </resource>
+ </resources>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin> <!-- generate sources from fmpp -->
+ <groupId>org.apache.drill.tools</groupId>
+ <artifactId>drill-fmpp-maven-plugin</artifactId>
+ <version>1.5.0</version>
+ <executions>
+ <execution>
+ <id>generate-fmpp</id>
+ <phase>generate-sources</phase>
+ <goals>
+ <goal>generate</goal>
+ </goals>
+ <configuration>
+ <config>src/main/codegen/config.fmpp</config>
+ <output>${project.build.directory}/generated-sources</output>
+ <templates>${project.build.directory}/codegen/templates</templates>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-shade-plugin</artifactId>
+ <version>3.1.1</version>
+ <executions>
+ <execution>
+ <phase>package</phase>
+ <goals>
+ <goal>shade</goal>
+ </goals>
+ <configuration>
+ <artifactSet>
+ <includes>
+ <include>org.apache.arrow:arrow-format</include>
+ <include>com.google.flatbuffers:*</include>
+ </includes>
+ </artifactSet>
+ <shadedArtifactAttached>true</shadedArtifactAttached>
+ <shadedClassifierName>shade-format-flatbuffers</shadedClassifierName>
+ <keepDependenciesWithProvidedScope>true</keepDependenciesWithProvidedScope>
+ <promoteTransitiveDependencies>true</promoteTransitiveDependencies>
+ <relocations>
+ <relocation>
+ <pattern>com.google.flatbuffers</pattern>
+ <shadedPattern>arrow.vector.com.google.flatbuffers</shadedPattern>
+ </relocation>
+ </relocations>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ <pluginManagement>
+ <plugins>
+ <!--This plugin's configuration is used to store Eclipse m2e settings
+ only. It has no influence on the Maven build itself. -->
+ <plugin>
+ <groupId>org.eclipse.m2e</groupId>
+ <artifactId>lifecycle-mapping</artifactId>
+ <version>1.0.0</version>
+ <configuration>
+ <lifecycleMappingMetadata>
+ <pluginExecutions>
+ <pluginExecution>
+ <pluginExecutionFilter>
+ <groupId>org.apache.drill.tools</groupId>
+ <artifactId>drill-fmpp-maven-plugin</artifactId>
+ <versionRange>[1.0,)</versionRange>
+ <goals>
+ <goal>generate</goal>
+ </goals>
+ </pluginExecutionFilter>
+ <action>
+ <execute>
+ <runOnIncremental>false</runOnIncremental>
+ <runOnConfiguration>true</runOnConfiguration>
+ </execute>
+ </action>
+ </pluginExecution>
+ </pluginExecutions>
+ </lifecycleMappingMetadata>
+ </configuration>
+ </plugin>
+ </plugins>
+ </pluginManagement>
+
+
+ </build>
+
+
+ <profiles>
+ <profile>
+ <!-- This profile turns on integration testing. It activates the failsafe plugin and will run any tests
+ with the 'IT' prefix. This should be run in a separate CI build or on developers machines as it potentially
+ uses quite a bit of memory. Activate the tests by adding -Pintegration-tests to your maven command line -->
+ <id>integration-tests</id>
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-failsafe-plugin</artifactId>
+ <configuration>
+ <forkedProcessTimeoutInSeconds>3600</forkedProcessTimeoutInSeconds>
+ <systemPropertyVariables>
+ <arrow.memory.debug.allocator>false</arrow.memory.debug.allocator>
+ </systemPropertyVariables>
+ </configuration>
+ <executions>
+ <execution>
+ <goals>
+ <goal>integration-test</goal>
+ <goal>verify</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+ </profile>
+ </profiles>
+
+</project>
diff --git a/src/arrow/java/vector/src/main/codegen/config.fmpp b/src/arrow/java/vector/src/main/codegen/config.fmpp
new file mode 100644
index 000000000..ef5a5072a
--- /dev/null
+++ b/src/arrow/java/vector/src/main/codegen/config.fmpp
@@ -0,0 +1,24 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+data: {
+ # TODO: Rename to ~valueVectorModesAndTypes for clarity.
+ vv: tdd(../data/ValueVectorTypes.tdd),
+ arrowTypes: tdd(../data/ArrowTypes.tdd)
+
+}
+freemarkerLinks: {
+ includes: includes/
+}
diff --git a/src/arrow/java/vector/src/main/codegen/data/ArrowTypes.tdd b/src/arrow/java/vector/src/main/codegen/data/ArrowTypes.tdd
new file mode 100644
index 000000000..3cf9a9687
--- /dev/null
+++ b/src/arrow/java/vector/src/main/codegen/data/ArrowTypes.tdd
@@ -0,0 +1,124 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+{
+ types: [
+ {
+ name: "Null",
+ fields: [],
+ complex: false
+ },
+ {
+ name: "Struct_",
+ fields: [],
+ complex: true
+ },
+ {
+ name: "List",
+ fields: [],
+ complex: true
+ },
+ {
+ name: "LargeList",
+ fields: [],
+ complex: true
+ },
+ {
+ name: "FixedSizeList",
+ fields: [{name: "listSize", type: int}],
+ complex: true
+ },
+ {
+ name: "Union",
+ fields: [{name: "mode", type: short, valueType: UnionMode}, {name: "typeIds", type: "int[]"}],
+ complex: true
+ },
+ {
+ name: "Map",
+ fields: [{name: "keysSorted", type: boolean}],
+ complex: true
+ },
+ {
+ name: "Int",
+ fields: [{name: "bitWidth", type: int}, {name: "isSigned", type: boolean}],
+ complex: false
+ },
+ {
+ name: "FloatingPoint",
+ fields: [{name: precision, type: short, valueType: FloatingPointPrecision}],
+ complex: false
+ },
+ {
+ name: "Utf8",
+ fields: [],
+ complex: false
+ },
+ {
+ name: "LargeUtf8",
+ fields: [],
+ complex: false
+ },
+ {
+ name: "Binary",
+ fields: [],
+ complex: false
+ },
+ {
+ name: "LargeBinary",
+ fields: [],
+ complex: false
+ },
+ {
+ name: "FixedSizeBinary",
+ fields: [{name: "byteWidth", type: int}],
+ complex: false
+ }
+ {
+ name: "Bool",
+ fields: [],
+ complex: false
+ },
+ {
+ name: "Decimal",
+ fields: [{name: "precision", type: int}, {name: "scale", type: int}, {name: "bitWidth", type: int}],
+ complex: false
+ },
+ {
+ name: "Date",
+ fields: [{name: "unit", type: short, valueType: DateUnit}]
+ complex: false
+ },
+ {
+ name: "Time",
+ fields: [{name: "unit", type: short, valueType: TimeUnit}, {name: "bitWidth", type: int}],
+ complex: false
+ },
+ {
+ name: "Timestamp",
+ fields: [{name: "unit", type: short, valueType: TimeUnit}, {name: "timezone", type: String}]
+ complex: false
+ },
+ {
+ name: "Interval",
+ fields: [{name: "unit", type: short, valueType: IntervalUnit}],
+ complex: false
+ },
+ {
+ name: "Duration",
+ fields: [{name: "unit", type: short, valueType: TimeUnit}],
+ complex: false
+ }
+ ]
+}
diff --git a/src/arrow/java/vector/src/main/codegen/data/ValueVectorTypes.tdd b/src/arrow/java/vector/src/main/codegen/data/ValueVectorTypes.tdd
new file mode 100644
index 000000000..2a9218042
--- /dev/null
+++ b/src/arrow/java/vector/src/main/codegen/data/ValueVectorTypes.tdd
@@ -0,0 +1,206 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+{
+ modes: [
+ {name: "Optional", prefix: "Nullable"},
+ {name: "Required", prefix: ""}
+ ],
+ types: [
+ {
+ major: "Fixed",
+ width: 1,
+ javaType: "byte",
+ boxedType: "Byte",
+ fields: [{name: "value", type: "byte"}],
+ minor: [
+ { class: "TinyInt", valueHolder: "IntHolder" },
+ { class: "UInt1", valueHolder: "UInt1Holder" }
+ ]
+ },
+ {
+ major: "Fixed",
+ width: 2,
+ javaType: "char",
+ boxedType: "Character",
+ fields: [{name: "value", type: "char"}],
+ minor: [
+ { class: "UInt2", valueHolder: "UInt2Holder"}
+ ]
+ }, {
+ major: "Fixed",
+ width: 2,
+ javaType: "short",
+ boxedType: "Short",
+ fields: [{name: "value", type: "short"}],
+ minor: [
+ { class: "SmallInt", valueHolder: "Int2Holder"},
+ ]
+ },
+ {
+ major: "Fixed",
+ width: 4,
+ javaType: "int",
+ boxedType: "Integer",
+ fields: [{name: "value", type: "int"}],
+ minor: [
+ { class: "Int", valueHolder: "IntHolder"},
+ { class: "UInt4", valueHolder: "UInt4Holder" },
+ { class: "Float4", javaType: "float" , boxedType: "Float", fields: [{name: "value", type: "float"}]},
+ { class: "DateDay" },
+ { class: "IntervalYear", javaType: "int", friendlyType: "Period" },
+ { class: "TimeSec" },
+ { class: "TimeMilli", javaType: "int", friendlyType: "LocalDateTime" }
+ ]
+ },
+ {
+ major: "Fixed",
+ width: 8,
+ javaType: "long",
+ boxedType: "Long",
+ fields: [{name: "value", type: "long"}],
+ minor: [
+ { class: "BigInt"},
+ { class: "UInt8" },
+ { class: "Float8", javaType: "double", boxedType: "Double", fields: [{name: "value", type: "double"}] },
+ { class: "DateMilli", javaType: "long", friendlyType: "LocalDateTime" },
+ { class: "Duration", javaType: "long", friendlyType: "Duration",
+ arrowType: "org.apache.arrow.vector.types.pojo.ArrowType.Duration",
+ typeParams: [ {name: "unit", type: "org.apache.arrow.vector.types.TimeUnit"} ],
+ arrowTypeConstructorParams: ["unit"]}
+ { class: "TimeStampSec", javaType: "long", boxedType: "Long", friendlyType: "LocalDateTime" },
+ { class: "TimeStampMilli", javaType: "long", boxedType: "Long", friendlyType: "LocalDateTime" },
+ { class: "TimeStampMicro", javaType: "long", boxedType: "Long", friendlyType: "LocalDateTime" },
+ { class: "TimeStampNano", javaType: "long", boxedType: "Long", friendlyType: "LocalDateTime" },
+ { class: "TimeStampSecTZ", javaType: "long", boxedType: "Long",
+ typeParams: [ {name: "timezone", type: "String"} ],
+ arrowType: "org.apache.arrow.vector.types.pojo.ArrowType.Timestamp",
+ arrowTypeConstructorParams: ["org.apache.arrow.vector.types.TimeUnit.SECOND", "timezone"] },
+ { class: "TimeStampMilliTZ", javaType: "long", boxedType: "Long",
+ typeParams: [ {name: "timezone", type: "String"} ],
+ arrowType: "org.apache.arrow.vector.types.pojo.ArrowType.Timestamp",
+ arrowTypeConstructorParams: ["org.apache.arrow.vector.types.TimeUnit.MILLISECOND", "timezone"] },
+ { class: "TimeStampMicroTZ", javaType: "long", boxedType: "Long",
+ typeParams: [ {name: "timezone", type: "String"} ],
+ arrowType: "org.apache.arrow.vector.types.pojo.ArrowType.Timestamp",
+ arrowTypeConstructorParams: ["org.apache.arrow.vector.types.TimeUnit.MICROSECOND", "timezone"] },
+ { class: "TimeStampNanoTZ", javaType: "long", boxedType: "Long",
+ typeParams: [ {name: "timezone", type: "String"} ],
+ arrowType: "org.apache.arrow.vector.types.pojo.ArrowType.Timestamp",
+ arrowTypeConstructorParams: ["org.apache.arrow.vector.types.TimeUnit.NANOSECOND", "timezone"] },
+ { class: "TimeMicro" },
+ { class: "TimeNano" }
+ ]
+ },
+ {
+ major: "Fixed",
+ width: 8,
+ javaType: "ArrowBuf",
+ boxedType: "ArrowBuf",
+ minor: [
+ { class: "IntervalDay", millisecondsOffset: 4, friendlyType: "Duration", fields: [ {name: "days", type:"int"}, {name: "milliseconds", type:"int"}] }
+ ]
+ },
+ {
+ major: "Fixed",
+ width: 16,
+ javaType: "ArrowBuf",
+ boxedType: "ArrowBuf",
+ minor: [
+ { class: "IntervalMonthDayNano", daysOffset: 4, nanosecondsOffset: 8, friendlyType: "PeriodDuration", fields: [ {name: "months", type:"int"}, {name: "days", type:"int"}, {name: "nanoseconds", type:"long"}] }
+ ]
+ },
+
+ {
+ major: "Fixed",
+ width: 32,
+ javaType: "ArrowBuf",
+ boxedType: "ArrowBuf",
+
+ minor: [
+ {
+ class: "Decimal256",
+ maxPrecisionDigits: 76, nDecimalDigits: 4, friendlyType: "BigDecimal",
+ typeParams: [ {name: "scale", type: "int"}, { name: "precision", type: "int"}],
+ arrowType: "org.apache.arrow.vector.types.pojo.ArrowType.Decimal",
+ fields: [{name: "start", type: "long"}, {name: "buffer", type: "ArrowBuf"}]
+ }
+ ]
+ },
+ {
+ major: "Fixed",
+ width: 16,
+ javaType: "ArrowBuf",
+ boxedType: "ArrowBuf",
+
+ minor: [
+ {
+ class: "Decimal",
+ maxPrecisionDigits: 38, nDecimalDigits: 4, friendlyType: "BigDecimal",
+ typeParams: [ {name: "scale", type: "int"}, { name: "precision", type: "int"}],
+ arrowType: "org.apache.arrow.vector.types.pojo.ArrowType.Decimal",
+ fields: [{name: "start", type: "long"}, {name: "buffer", type: "ArrowBuf"}]
+ }
+ ]
+ },
+
+ {
+ major: "Fixed",
+ width: -1,
+ javaType: "byte[]",
+ boxedType: "ArrowBuf",
+ minor: [
+ {
+ class: "FixedSizeBinary",
+ typeParams: [ {name: "byteWidth", type: "int"} ],
+ arrowType: "org.apache.arrow.vector.types.pojo.ArrowType.FixedSizeBinary",
+ friendlyType: "byte[]",
+ fields: [{name: "buffer", type: "ArrowBuf"}],
+ }
+ ]
+ },
+ {
+ major: "VarLen",
+ width: 4,
+ javaType: "int",
+ boxedType: "ArrowBuf",
+ fields: [{name: "start", type: "int"}, {name: "end", type: "int"}, {name: "buffer", type: "ArrowBuf"}],
+ minor: [
+ { class: "VarBinary" , friendlyType: "byte[]" },
+ { class: "VarChar" , friendlyType: "Text" }
+ ]
+ },
+ {
+ major: "VarLen",
+ width: 8,
+ javaType: "long",
+ boxedType: "ArrowBuf",
+ fields: [{name: "start", type: "long"}, {name: "end", type: "long"}, {name: "buffer", type: "ArrowBuf"}],
+ minor: [
+ { class: "LargeVarChar" , friendlyType: "Text" }
+ { class: "LargeVarBinary" , friendlyType: "byte[]" }
+ ]
+ },
+ {
+ major: "Bit",
+ width: 1,
+ javaType: "int",
+ boxedType: "Integer",
+ minor: [
+ { class: "Bit" , friendlyType: "Boolean", fields: [{name: "value", type: "int"}] }
+ ]
+ }
+ ]
+}
diff --git a/src/arrow/java/vector/src/main/codegen/includes/license.ftl b/src/arrow/java/vector/src/main/codegen/includes/license.ftl
new file mode 100644
index 000000000..c6a5afeef
--- /dev/null
+++ b/src/arrow/java/vector/src/main/codegen/includes/license.ftl
@@ -0,0 +1,16 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */ \ No newline at end of file
diff --git a/src/arrow/java/vector/src/main/codegen/includes/vv_imports.ftl b/src/arrow/java/vector/src/main/codegen/includes/vv_imports.ftl
new file mode 100644
index 000000000..c9a8820b2
--- /dev/null
+++ b/src/arrow/java/vector/src/main/codegen/includes/vv_imports.ftl
@@ -0,0 +1,61 @@
+<#--
+ ~ Licensed to the Apache Software Foundation (ASF) under one or more
+ ~ contributor license agreements. See the NOTICE file distributed with
+ ~ this work for additional information regarding copyright ownership.
+ ~ The ASF licenses this file to You under the Apache License, Version 2.0
+ ~ (the "License"); you may not use this file except in compliance with
+ ~ the License. You may obtain a copy of the License at
+ ~
+ ~ http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing, software
+ ~ distributed under the License is distributed on an "AS IS" BASIS,
+ ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ~ See the License for the specific language governing permissions and
+ ~ limitations under the License.
+ -->
+
+import static org.apache.arrow.util.Preconditions.checkArgument;
+import static org.apache.arrow.util.Preconditions.checkState;
+
+import com.google.flatbuffers.FlatBufferBuilder;
+
+import org.apache.arrow.memory.*;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.Types.*;
+import org.apache.arrow.vector.types.pojo.*;
+import org.apache.arrow.vector.types.pojo.ArrowType.*;
+import org.apache.arrow.vector.types.*;
+import org.apache.arrow.vector.*;
+import org.apache.arrow.vector.holders.*;
+import org.apache.arrow.vector.util.*;
+import org.apache.arrow.vector.complex.*;
+import org.apache.arrow.vector.complex.reader.*;
+import org.apache.arrow.vector.complex.impl.*;
+import org.apache.arrow.vector.complex.writer.*;
+import org.apache.arrow.vector.complex.writer.BaseWriter.StructWriter;
+import org.apache.arrow.vector.complex.writer.BaseWriter.ListWriter;
+import org.apache.arrow.vector.complex.writer.BaseWriter.MapWriter;
+import org.apache.arrow.vector.util.JsonStringArrayList;
+
+import java.util.Arrays;
+import java.util.Random;
+import java.util.List;
+
+import java.io.Closeable;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.nio.ByteBuffer;
+
+import java.sql.Date;
+import java.sql.Time;
+import java.sql.Timestamp;
+import java.math.BigDecimal;
+import java.math.BigInteger;
+import java.time.Duration;
+import java.time.LocalDateTime;
+import java.time.Period;
+import java.time.ZonedDateTime;
+
+
diff --git a/src/arrow/java/vector/src/main/codegen/templates/AbstractFieldReader.java b/src/arrow/java/vector/src/main/codegen/templates/AbstractFieldReader.java
new file mode 100644
index 000000000..e3c872946
--- /dev/null
+++ b/src/arrow/java/vector/src/main/codegen/templates/AbstractFieldReader.java
@@ -0,0 +1,132 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+<@pp.dropOutputFile />
+<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/AbstractFieldReader.java" />
+
+
+<#include "/@includes/license.ftl" />
+
+package org.apache.arrow.vector.complex.impl;
+
+<#include "/@includes/vv_imports.ftl" />
+
+/**
+ * Source code generated using FreeMarker template ${.template_name}
+ */
+@SuppressWarnings("unused")
+abstract class AbstractFieldReader extends AbstractBaseReader implements FieldReader{
+
+ AbstractFieldReader(){
+ super();
+ }
+
+ /**
+ * Returns true if the current value of the reader is not null
+ * @return whether the current value is set
+ */
+ public boolean isSet() {
+ return true;
+ }
+
+ @Override
+ public Field getField() {
+ fail("getField");
+ return null;
+ }
+
+ <#list ["Object", "BigDecimal", "Short", "Integer", "Long", "Boolean",
+ "LocalDateTime", "Duration", "Period", "Double", "Float",
+ "Character", "Text", "String", "Byte", "byte[]", "PeriodDuration"] as friendlyType>
+ <#assign safeType=friendlyType />
+ <#if safeType=="byte[]"><#assign safeType="ByteArray" /></#if>
+ public ${friendlyType} read${safeType}(int arrayIndex) {
+ fail("read${safeType}(int arrayIndex)");
+ return null;
+ }
+
+ public ${friendlyType} read${safeType}() {
+ fail("read${safeType}()");
+ return null;
+ }
+
+ </#list>
+ public void copyAsValue(StructWriter writer) {
+ fail("CopyAsValue StructWriter");
+ }
+
+ public void copyAsField(String name, StructWriter writer) {
+ fail("CopyAsField StructWriter");
+ }
+
+ public void copyAsField(String name, ListWriter writer) {
+ fail("CopyAsFieldList");
+ }
+
+ public void copyAsField(String name, MapWriter writer) {
+ fail("CopyAsFieldMap");
+ }
+
+ <#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first />
+ <#assign boxedType = (minor.boxedType!type.boxedType) />
+ public void read(${name}Holder holder) {
+ fail("${name}");
+ }
+
+ public void read(Nullable${name}Holder holder) {
+ fail("${name}");
+ }
+
+ public void read(int arrayIndex, ${name}Holder holder) {
+ fail("Repeated${name}");
+ }
+
+ public void read(int arrayIndex, Nullable${name}Holder holder) {
+ fail("Repeated${name}");
+ }
+
+ public void copyAsValue(${name}Writer writer) {
+ fail("CopyAsValue${name}");
+ }
+
+ public void copyAsField(String name, ${name}Writer writer) {
+ fail("CopyAsField${name}");
+ }
+
+ </#list></#list>
+ public FieldReader reader(String name) {
+ fail("reader(String name)");
+ return null;
+ }
+
+ public FieldReader reader() {
+ fail("reader()");
+ return null;
+ }
+
+ public int size() {
+ fail("size()");
+ return -1;
+ }
+
+ private void fail(String name) {
+ throw new IllegalArgumentException(String.format("You tried to read a [%s] type when you are using a field reader of type [%s].", name, this.getClass().getSimpleName()));
+ }
+}
+
+
+
diff --git a/src/arrow/java/vector/src/main/codegen/templates/AbstractFieldWriter.java b/src/arrow/java/vector/src/main/codegen/templates/AbstractFieldWriter.java
new file mode 100644
index 000000000..1f80f2526
--- /dev/null
+++ b/src/arrow/java/vector/src/main/codegen/templates/AbstractFieldWriter.java
@@ -0,0 +1,230 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+<@pp.dropOutputFile />
+<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/AbstractFieldWriter.java" />
+
+
+<#include "/@includes/license.ftl" />
+
+package org.apache.arrow.vector.complex.impl;
+
+<#include "/@includes/vv_imports.ftl" />
+
+/*
+ * This class is generated using freemarker and the ${.template_name} template.
+ */
+@SuppressWarnings("unused")
+abstract class AbstractFieldWriter extends AbstractBaseWriter implements FieldWriter {
+
+ protected boolean addVectorAsNullable = true;
+
+ /**
+ * Set flag to control the FieldType.nullable property when a writer creates a new vector.
+ * If true then vectors created will be nullable, this is the default behavior. If false then
+ * vectors created will be non-nullable.
+ *
+ * @param nullable Whether or not to create nullable vectors (default behavior is true)
+ */
+ public void setAddVectorAsNullable(boolean nullable) {
+ addVectorAsNullable = nullable;
+ }
+
+ @Override
+ public void start() {
+ throw new IllegalStateException(String.format("You tried to start when you are using a ValueWriter of type %s.", this.getClass().getSimpleName()));
+ }
+
+ @Override
+ public void end() {
+ throw new IllegalStateException(String.format("You tried to end when you are using a ValueWriter of type %s.", this.getClass().getSimpleName()));
+ }
+
+ @Override
+ public void startList() {
+ throw new IllegalStateException(String.format("You tried to start a list when you are using a ValueWriter of type %s.", this.getClass().getSimpleName()));
+ }
+
+ @Override
+ public void endList() {
+ throw new IllegalStateException(String.format("You tried to end a list when you are using a ValueWriter of type %s.", this.getClass().getSimpleName()));
+ }
+
+ @Override
+ public void startMap() {
+ throw new IllegalStateException(String.format("You tried to start a map when you are using a ValueWriter of type %s.", this.getClass().getSimpleName()));
+ }
+
+ @Override
+ public void endMap() {
+ throw new IllegalStateException(String.format("You tried to end a map when you are using a ValueWriter of type %s.", this.getClass().getSimpleName()));
+ }
+
+ @Override
+ public void startEntry() {
+ throw new IllegalStateException(String.format("You tried to start a map entry when you are using a ValueWriter of type %s.", this.getClass().getSimpleName()));
+ }
+
+ @Override
+ public MapWriter key() {
+ throw new IllegalStateException(String.format("You tried to start a map key when you are using a ValueWriter of type %s.", this.getClass().getSimpleName()));
+ }
+
+ @Override
+ public MapWriter value() {
+ throw new IllegalStateException(String.format("You tried to start a map value when you are using a ValueWriter of type %s.", this.getClass().getSimpleName()));
+ }
+
+ @Override
+ public void endEntry() {
+ throw new IllegalStateException(String.format("You tried to end a map entry when you are using a ValueWriter of type %s.", this.getClass().getSimpleName()));
+ }
+
+ <#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first />
+ <#assign fields = minor.fields!type.fields />
+ <#assign friendlyType = (minor.friendlyType!minor.boxedType!type.boxedType) />
+ @Override
+ public void write(${name}Holder holder) {
+ fail("${name}");
+ }
+
+ public void write${minor.class}(<#list fields as field>${field.type} ${field.name}<#if field_has_next>, </#if></#list>) {
+ fail("${name}");
+ }
+
+ <#if minor.class?starts_with("Decimal")>
+ public void write${minor.class}(${friendlyType} value) {
+ fail("${name}");
+ }
+
+ public void write${minor.class}(<#list fields as field>${field.type} ${field.name}<#if field_has_next>, </#if></#list>, ArrowType arrowType) {
+ fail("${name}");
+ }
+
+ public void writeBigEndianBytesTo${minor.class}(byte[] value) {
+ fail("${name}");
+ }
+
+ public void writeBigEndianBytesTo${minor.class}(byte[] value, ArrowType arrowType) {
+ fail("${name}");
+ }
+ </#if>
+
+ </#list></#list>
+
+ public void writeNull() {
+ fail("${name}");
+ }
+
+ /**
+ * This implementation returns {@code false}.
+ * <p>
+ * Must be overridden by struct writers.
+ * </p>
+ */
+ @Override
+ public boolean isEmptyStruct() {
+ return false;
+ }
+
+ @Override
+ public StructWriter struct() {
+ fail("Struct");
+ return null;
+ }
+
+ @Override
+ public ListWriter list() {
+ fail("List");
+ return null;
+ }
+
+ @Override
+ public MapWriter map() {
+ fail("Map");
+ return null;
+ }
+
+ @Override
+ public StructWriter struct(String name) {
+ fail("Struct");
+ return null;
+ }
+
+ @Override
+ public ListWriter list(String name) {
+ fail("List");
+ return null;
+ }
+
+ @Override
+ public MapWriter map(String name) {
+ fail("Map");
+ return null;
+ }
+
+ @Override
+ public MapWriter map(boolean keysSorted) {
+ fail("Map");
+ return null;
+ }
+
+ @Override
+ public MapWriter map(String name, boolean keysSorted) {
+ fail("Map");
+ return null;
+ }
+ <#list vv.types as type><#list type.minor as minor>
+ <#assign lowerName = minor.class?uncap_first />
+ <#if lowerName == "int" ><#assign lowerName = "integer" /></#if>
+ <#assign upperName = minor.class?upper_case />
+ <#assign capName = minor.class?cap_first />
+ <#if minor.typeParams?? >
+
+ @Override
+ public ${capName}Writer ${lowerName}(String name<#list minor.typeParams as typeParam>, ${typeParam.type} ${typeParam.name}</#list>) {
+ fail("${capName}(" + <#list minor.typeParams as typeParam>"${typeParam.name}: " + ${typeParam.name} + ", " + </#list>")");
+ return null;
+ }
+ </#if>
+
+ @Override
+ public ${capName}Writer ${lowerName}(String name) {
+ fail("${capName}");
+ return null;
+ }
+
+ @Override
+ public ${capName}Writer ${lowerName}() {
+ fail("${capName}");
+ return null;
+ }
+
+ </#list></#list>
+
+ public void copyReader(FieldReader reader) {
+ fail("Copy FieldReader");
+ }
+
+ public void copyReaderToField(String name, FieldReader reader) {
+ fail("Copy FieldReader to STring");
+ }
+
+ private void fail(String name) {
+ throw new IllegalArgumentException(String.format("You tried to write a %s type when you are using a ValueWriter of type %s.", name, this.getClass().getSimpleName()));
+ }
+}
diff --git a/src/arrow/java/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java b/src/arrow/java/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java
new file mode 100644
index 000000000..264e85021
--- /dev/null
+++ b/src/arrow/java/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java
@@ -0,0 +1,238 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+<@pp.dropOutputFile />
+<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/AbstractPromotableFieldWriter.java" />
+
+
+<#include "/@includes/license.ftl" />
+
+package org.apache.arrow.vector.complex.impl;
+
+<#include "/@includes/vv_imports.ftl" />
+
+/*
+ * A FieldWriter which delegates calls to another FieldWriter. The delegate FieldWriter can be promoted to a new type
+ * when necessary. Classes that extend this class are responsible for handling promotion.
+ *
+ * This class is generated using freemarker and the ${.template_name} template.
+ *
+ */
+@SuppressWarnings("unused")
+abstract class AbstractPromotableFieldWriter extends AbstractFieldWriter {
+ /**
+ * Retrieve the FieldWriter, promoting if it is not a FieldWriter of the specified type
+ * @param type the type of the values we want to write
+ * @return the corresponding field writer
+ */
+ protected FieldWriter getWriter(MinorType type) {
+ return getWriter(type, null);
+ }
+
+ abstract protected FieldWriter getWriter(MinorType type, ArrowType arrowType);
+
+ /**
+ * @return the current FieldWriter
+ */
+ abstract protected FieldWriter getWriter();
+
+ @Override
+ public void start() {
+ getWriter(MinorType.STRUCT).start();
+ }
+
+ @Override
+ public void end() {
+ getWriter(MinorType.STRUCT).end();
+ setPosition(idx() + 1);
+ }
+
+ @Override
+ public void startList() {
+ getWriter(MinorType.LIST).startList();
+ }
+
+ @Override
+ public void endList() {
+ getWriter(MinorType.LIST).endList();
+ setPosition(idx() + 1);
+ }
+
+ @Override
+ public void startMap() {
+ getWriter(MinorType.MAP).startMap();
+ }
+
+ @Override
+ public void endMap() {
+ getWriter(MinorType.MAP).endMap();
+ setPosition(idx() + 1);
+ }
+
+ @Override
+ public void startEntry() {
+ getWriter(MinorType.MAP).startEntry();
+ }
+
+ @Override
+ public MapWriter key() {
+ return getWriter(MinorType.MAP).key();
+ }
+
+ @Override
+ public MapWriter value() {
+ return getWriter(MinorType.MAP).value();
+ }
+
+ @Override
+ public void endEntry() {
+ getWriter(MinorType.MAP).endEntry();
+ }
+
+ <#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first />
+ <#assign fields = minor.fields!type.fields />
+ <#if minor.class != "Decimal" && minor.class != "Decimal256">
+ @Override
+ public void write(${name}Holder holder) {
+ getWriter(MinorType.${name?upper_case}).write(holder);
+ }
+
+ public void write${minor.class}(<#list fields as field>${field.type} ${field.name}<#if field_has_next>, </#if></#list>) {
+ getWriter(MinorType.${name?upper_case}).write${minor.class}(<#list fields as field>${field.name}<#if field_has_next>, </#if></#list>);
+ }
+
+ <#elseif minor.class == "Decimal">
+ @Override
+ public void write(DecimalHolder holder) {
+ getWriter(MinorType.DECIMAL).write(holder);
+ }
+
+ public void writeDecimal(int start, ArrowBuf buffer, ArrowType arrowType) {
+ getWriter(MinorType.DECIMAL).writeDecimal(start, buffer, arrowType);
+ }
+
+ public void writeDecimal(int start, ArrowBuf buffer) {
+ getWriter(MinorType.DECIMAL).writeDecimal(start, buffer);
+ }
+
+ public void writeBigEndianBytesToDecimal(byte[] value, ArrowType arrowType) {
+ getWriter(MinorType.DECIMAL).writeBigEndianBytesToDecimal(value, arrowType);
+ }
+
+ public void writeBigEndianBytesToDecimal(byte[] value) {
+ getWriter(MinorType.DECIMAL).writeBigEndianBytesToDecimal(value);
+ }
+ <#elseif minor.class == "Decimal256">
+ @Override
+ public void write(Decimal256Holder holder) {
+ getWriter(MinorType.DECIMAL256).write(holder);
+ }
+
+ public void writeDecimal256(long start, ArrowBuf buffer, ArrowType arrowType) {
+ getWriter(MinorType.DECIMAL256).writeDecimal256(start, buffer, arrowType);
+ }
+
+ public void writeDecimal256(long start, ArrowBuf buffer) {
+ getWriter(MinorType.DECIMAL256).writeDecimal256(start, buffer);
+ }
+ public void writeBigEndianBytesToDecimal256(byte[] value, ArrowType arrowType) {
+ getWriter(MinorType.DECIMAL256).writeBigEndianBytesToDecimal256(value, arrowType);
+ }
+
+ public void writeBigEndianBytesToDecimal256(byte[] value) {
+ getWriter(MinorType.DECIMAL256).writeBigEndianBytesToDecimal256(value);
+ }
+
+
+ </#if>
+
+ </#list></#list>
+ public void writeNull() {
+ }
+
+ @Override
+ public StructWriter struct() {
+ return getWriter(MinorType.LIST).struct();
+ }
+
+ @Override
+ public ListWriter list() {
+ return getWriter(MinorType.LIST).list();
+ }
+
+ @Override
+ public MapWriter map() {
+ return getWriter(MinorType.LIST).map();
+ }
+
+ @Override
+ public MapWriter map(boolean keysSorted) {
+ return getWriter(MinorType.MAP, new ArrowType.Map(keysSorted));
+ }
+
+ @Override
+ public StructWriter struct(String name) {
+ return getWriter(MinorType.STRUCT).struct(name);
+ }
+
+ @Override
+ public ListWriter list(String name) {
+ return getWriter(MinorType.STRUCT).list(name);
+ }
+
+ @Override
+ public MapWriter map(String name) {
+ return getWriter(MinorType.STRUCT).map(name);
+ }
+
+ @Override
+ public MapWriter map(String name, boolean keysSorted) {
+ return getWriter(MinorType.STRUCT).map(name, keysSorted);
+ }
+ <#list vv.types as type><#list type.minor as minor>
+ <#assign lowerName = minor.class?uncap_first />
+ <#if lowerName == "int" ><#assign lowerName = "integer" /></#if>
+ <#assign upperName = minor.class?upper_case />
+ <#assign capName = minor.class?cap_first />
+
+ <#if minor.typeParams?? >
+ @Override
+ public ${capName}Writer ${lowerName}(String name<#list minor.typeParams as typeParam>, ${typeParam.type} ${typeParam.name}</#list>) {
+ return getWriter(MinorType.STRUCT).${lowerName}(name<#list minor.typeParams as typeParam>, ${typeParam.name}</#list>);
+ }
+
+ </#if>
+ @Override
+ public ${capName}Writer ${lowerName}(String name) {
+ return getWriter(MinorType.STRUCT).${lowerName}(name);
+ }
+
+ @Override
+ public ${capName}Writer ${lowerName}() {
+ return getWriter(MinorType.LIST).${lowerName}();
+ }
+
+ </#list></#list>
+
+ public void copyReader(FieldReader reader) {
+ getWriter().copyReader(reader);
+ }
+
+ public void copyReaderToField(String name, FieldReader reader) {
+ getWriter().copyReaderToField(name, reader);
+ }
+}
diff --git a/src/arrow/java/vector/src/main/codegen/templates/ArrowType.java b/src/arrow/java/vector/src/main/codegen/templates/ArrowType.java
new file mode 100644
index 000000000..b08d4ad0a
--- /dev/null
+++ b/src/arrow/java/vector/src/main/codegen/templates/ArrowType.java
@@ -0,0 +1,375 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+<@pp.dropOutputFile />
+<@pp.changeOutputFile name="/org/apache/arrow/vector/types/pojo/ArrowType.java" />
+<#include "/@includes/license.ftl" />
+
+package org.apache.arrow.vector.types.pojo;
+
+import com.google.flatbuffers.FlatBufferBuilder;
+
+import java.util.Objects;
+
+import org.apache.arrow.flatbuf.Type;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.types.*;
+import org.apache.arrow.vector.FieldVector;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonIgnore;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.annotation.JsonSubTypes;
+import com.fasterxml.jackson.annotation.JsonTypeInfo;
+
+/**
+ * Arrow types
+ * Source code generated using FreeMarker template ${.template_name}
+ **/
+@JsonTypeInfo(
+ use = JsonTypeInfo.Id.NAME,
+ include = JsonTypeInfo.As.PROPERTY,
+ property = "name")
+@JsonSubTypes({
+<#list arrowTypes.types as type>
+ @JsonSubTypes.Type(value = ArrowType.${type.name?remove_ending("_")}.class, name = "${type.name?remove_ending("_")?lower_case}"),
+</#list>
+})
+public abstract class ArrowType {
+
+ public static abstract class PrimitiveType extends ArrowType {
+
+ private PrimitiveType() {
+ }
+
+ @Override
+ public boolean isComplex() {
+ return false;
+ }
+ }
+
+ public static abstract class ComplexType extends ArrowType {
+
+ private ComplexType() {
+ }
+
+ @Override
+ public boolean isComplex() {
+ return true;
+ }
+ }
+
+ public static enum ArrowTypeID {
+ <#list arrowTypes.types as type>
+ <#assign name = type.name>
+ ${name?remove_ending("_")}(Type.${name}),
+ </#list>
+ NONE(Type.NONE);
+
+ private final byte flatbufType;
+
+ public byte getFlatbufID() {
+ return this.flatbufType;
+ }
+
+ private ArrowTypeID(byte flatbufType) {
+ this.flatbufType = flatbufType;
+ }
+ }
+
+ @JsonIgnore
+ public abstract ArrowTypeID getTypeID();
+ @JsonIgnore
+ public abstract boolean isComplex();
+ public abstract int getType(FlatBufferBuilder builder);
+ public abstract <T> T accept(ArrowTypeVisitor<T> visitor);
+
+ /**
+ * to visit the ArrowTypes
+ * <code>
+ * type.accept(new ArrowTypeVisitor&lt;Type&gt;() {
+ * ...
+ * });
+ * </code>
+ */
+ public static interface ArrowTypeVisitor<T> {
+ <#list arrowTypes.types as type>
+ T visit(${type.name?remove_ending("_")} type);
+ </#list>
+ default T visit(ExtensionType type) {
+ return type.storageType().accept(this);
+ }
+ }
+
+ /**
+ * to visit the Complex ArrowTypes and bundle Primitive ones in one case
+ */
+ public static abstract class ComplexTypeVisitor<T> implements ArrowTypeVisitor<T> {
+
+ public T visit(PrimitiveType type) {
+ throw new UnsupportedOperationException("Unexpected Primitive type: " + type);
+ }
+
+ <#list arrowTypes.types as type>
+ <#if !type.complex>
+ public final T visit(${type.name?remove_ending("_")} type) {
+ return visit((PrimitiveType) type);
+ }
+ </#if>
+ </#list>
+ }
+
+ /**
+ * to visit the Primitive ArrowTypes and bundle Complex ones under one case
+ */
+ public static abstract class PrimitiveTypeVisitor<T> implements ArrowTypeVisitor<T> {
+
+ public T visit(ComplexType type) {
+ throw new UnsupportedOperationException("Unexpected Complex type: " + type);
+ }
+
+ <#list arrowTypes.types as type>
+ <#if type.complex>
+ public final T visit(${type.name?remove_ending("_")} type) {
+ return visit((ComplexType) type);
+ }
+ </#if>
+ </#list>
+ }
+
+ <#list arrowTypes.types as type>
+ <#assign name = type.name?remove_ending("_")>
+ <#assign fields = type.fields>
+ public static class ${name} extends <#if type.complex>ComplexType<#else>PrimitiveType</#if> {
+ public static final ArrowTypeID TYPE_TYPE = ArrowTypeID.${name};
+ <#if type.fields?size == 0>
+ public static final ${name} INSTANCE = new ${name}();
+ <#else>
+
+ <#list fields as field>
+ <#assign fieldType = field.valueType!field.type>
+ ${fieldType} ${field.name};
+ </#list>
+
+
+ <#if type.name == "Decimal">
+ // Needed to support golden file integration tests.
+ @JsonCreator
+ public static Decimal createDecimal(
+ @JsonProperty("precision") int precision,
+ @JsonProperty("scale") int scale,
+ @JsonProperty("bitWidth") Integer bitWidth) {
+
+ return new Decimal(precision, scale, bitWidth == null ? 128 : bitWidth);
+ }
+
+ /**
+ * Construct Decimal with 128 bits.
+ *
+ * This is kept mainly for the sake of backward compatibility.
+ * Please use {@link org.apache.arrow.vector.types.pojo.ArrowType.Decimal#Decimal(int, int, int)} instead.
+ *
+ * @deprecated This API will be removed in a future release.
+ */
+ @Deprecated
+ public Decimal(int precision, int scale) {
+ this(precision, scale, 128);
+ }
+
+ <#else>
+ @JsonCreator
+ </#if>
+ public ${type.name}(
+ <#list type.fields as field>
+ <#assign fieldType = field.valueType!field.type>
+ @JsonProperty("${field.name}") ${fieldType} ${field.name}<#if field_has_next>, </#if>
+ </#list>
+ ) {
+ <#list type.fields as field>
+ this.${field.name} = ${field.name};
+ </#list>
+ }
+
+ <#list fields as field>
+ <#assign fieldType = field.valueType!field.type>
+ public ${fieldType} get${field.name?cap_first}() {
+ return ${field.name};
+ }
+ </#list>
+ </#if>
+
+ @Override
+ public ArrowTypeID getTypeID() {
+ return TYPE_TYPE;
+ }
+
+ @Override
+ public int getType(FlatBufferBuilder builder) {
+ <#list type.fields as field>
+ <#if field.type == "String">
+ int ${field.name} = this.${field.name} == null ? -1 : builder.createString(this.${field.name});
+ </#if>
+ <#if field.type == "int[]">
+ int ${field.name} = this.${field.name} == null ? -1 : org.apache.arrow.flatbuf.${type.name}.create${field.name?cap_first}Vector(builder, this.${field.name});
+ </#if>
+ </#list>
+ org.apache.arrow.flatbuf.${type.name}.start${type.name}(builder);
+ <#list type.fields as field>
+ <#if field.type == "String" || field.type == "int[]">
+ if (this.${field.name} != null) {
+ org.apache.arrow.flatbuf.${type.name}.add${field.name?cap_first}(builder, ${field.name});
+ }
+ <#else>
+ org.apache.arrow.flatbuf.${type.name}.add${field.name?cap_first}(builder, this.${field.name}<#if field.valueType??>.getFlatbufID()</#if>);
+ </#if>
+ </#list>
+ return org.apache.arrow.flatbuf.${type.name}.end${type.name}(builder);
+ }
+
+ public String toString() {
+ return "${name}"
+ <#if fields?size != 0>
+ + "("
+ <#list fields as field>
+ + <#if field.type == "int[]">java.util.Arrays.toString(${field.name})<#else>${field.name}</#if><#if field_has_next> + ", " </#if>
+ </#list>
+ + ")"
+ </#if>
+ ;
+ }
+
+ @Override
+ public int hashCode() {
+ return java.util.Arrays.deepHashCode(new Object[] {<#list type.fields as field>${field.name}<#if field_has_next>, </#if></#list>});
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (!(obj instanceof ${name})) {
+ return false;
+ }
+ <#if type.fields?size == 0>
+ return true;
+ <#else>
+ ${type.name} that = (${type.name}) obj;
+ return <#list type.fields as field>Objects.deepEquals(this.${field.name}, that.${field.name}) <#if field_has_next>&&<#else>;</#if>
+ </#list>
+ </#if>
+ }
+
+ @Override
+ public <T> T accept(ArrowTypeVisitor<T> visitor) {
+ return visitor.visit(this);
+ }
+ }
+ </#list>
+
+ /**
+ * A user-defined data type that wraps an underlying storage type.
+ */
+ public abstract static class ExtensionType extends ComplexType {
+ /** The on-wire type for this user-defined type. */
+ public abstract ArrowType storageType();
+ /** The name of this user-defined type. Used to identify the type during serialization. */
+ public abstract String extensionName();
+ /** Check equality of this type to another user-defined type. */
+ public abstract boolean extensionEquals(ExtensionType other);
+ /** Save any metadata for this type. */
+ public abstract String serialize();
+ /** Given saved metadata and the underlying storage type, construct a new instance of the user type. */
+ public abstract ArrowType deserialize(ArrowType storageType, String serializedData);
+ /** Construct a vector for the user type. */
+ public abstract FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator);
+
+ /** The field metadata key storing the name of the extension type. */
+ public static final String EXTENSION_METADATA_KEY_NAME = "ARROW:extension:name";
+ /** The field metadata key storing metadata for the extension type. */
+ public static final String EXTENSION_METADATA_KEY_METADATA = "ARROW:extension:metadata";
+
+ @Override
+ public ArrowTypeID getTypeID() {
+ return storageType().getTypeID();
+ }
+
+ @Override
+ public int getType(FlatBufferBuilder builder) {
+ return storageType().getType(builder);
+ }
+
+ public String toString() {
+ return "ExtensionType(" + extensionName() + ", " + storageType().toString() + ")";
+ }
+
+ @Override
+ public int hashCode() {
+ return java.util.Arrays.deepHashCode(new Object[] {storageType(), extensionName()});
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (!(obj instanceof ExtensionType)) {
+ return false;
+ }
+ return this.extensionEquals((ExtensionType) obj);
+ }
+
+ @Override
+ public <T> T accept(ArrowTypeVisitor<T> visitor) {
+ return visitor.visit(this);
+ }
+ }
+
+ private static final int defaultDecimalBitWidth = 128;
+
+ public static org.apache.arrow.vector.types.pojo.ArrowType getTypeForField(org.apache.arrow.flatbuf.Field field) {
+ switch(field.typeType()) {
+ <#list arrowTypes.types as type>
+ <#assign name = type.name?remove_ending("_")>
+ <#assign nameLower = type.name?lower_case>
+ <#assign fields = type.fields>
+ case Type.${type.name}: {
+ org.apache.arrow.flatbuf.${type.name} ${nameLower}Type = (org.apache.arrow.flatbuf.${type.name}) field.type(new org.apache.arrow.flatbuf.${type.name}());
+ <#list type.fields as field>
+ <#if field.type == "int[]">
+ ${field.type} ${field.name} = new int[${nameLower}Type.${field.name}Length()];
+ for (int i = 0; i< ${field.name}.length; ++i) {
+ ${field.name}[i] = ${nameLower}Type.${field.name}(i);
+ }
+ <#else>
+ ${field.type} ${field.name} = ${nameLower}Type.${field.name}();
+ </#if>
+ </#list>
+ <#if type.name == "Decimal">
+ if (bitWidth != defaultDecimalBitWidth && bitWidth != 256) {
+ throw new IllegalArgumentException("Library only supports 128-bit and 256-bit decimal values");
+ }
+ </#if>
+ return new ${name}(<#list type.fields as field><#if field.valueType??>${field.valueType}.fromFlatbufID(${field.name})<#else>${field.name}</#if><#if field_has_next>, </#if></#list>);
+ }
+ </#list>
+ default:
+ throw new UnsupportedOperationException("Unsupported type: " + field.typeType());
+ }
+ }
+
+ public static Int getInt(org.apache.arrow.flatbuf.Field field) {
+ org.apache.arrow.flatbuf.Int intType = (org.apache.arrow.flatbuf.Int) field.type(new org.apache.arrow.flatbuf.Int());
+ return new Int(intType.bitWidth(), intType.isSigned());
+ }
+}
+
+
diff --git a/src/arrow/java/vector/src/main/codegen/templates/BaseReader.java b/src/arrow/java/vector/src/main/codegen/templates/BaseReader.java
new file mode 100644
index 000000000..85d582a53
--- /dev/null
+++ b/src/arrow/java/vector/src/main/codegen/templates/BaseReader.java
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+<@pp.dropOutputFile />
+<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/reader/BaseReader.java" />
+
+
+<#include "/@includes/license.ftl" />
+
+package org.apache.arrow.vector.complex.reader;
+
+<#include "/@includes/vv_imports.ftl" />
+
+/**
+ * Source code generated using FreeMarker template ${.template_name}
+ */
+@SuppressWarnings("unused")
+public interface BaseReader extends Positionable{
+ Field getField();
+ MinorType getMinorType();
+ void reset();
+ void read(UnionHolder holder);
+ void read(int index, UnionHolder holder);
+ void copyAsValue(UnionWriter writer);
+ void read(DenseUnionHolder holder);
+ void read(int index, DenseUnionHolder holder);
+ void copyAsValue(DenseUnionWriter writer);
+ boolean isSet();
+
+ public interface StructReader extends BaseReader, Iterable<String>{
+ FieldReader reader(String name);
+ }
+
+ public interface RepeatedStructReader extends StructReader{
+ boolean next();
+ int size();
+ void copyAsValue(StructWriter writer);
+ }
+
+ public interface ListReader extends BaseReader{
+ FieldReader reader();
+ }
+
+ public interface RepeatedListReader extends ListReader{
+ boolean next();
+ int size();
+ void copyAsValue(ListWriter writer);
+ }
+
+ public interface MapReader extends BaseReader{
+ FieldReader reader();
+ }
+
+ public interface RepeatedMapReader extends MapReader{
+ boolean next();
+ int size();
+ void copyAsValue(MapWriter writer);
+ }
+
+ public interface ScalarReader extends
+ <#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first /> ${name}Reader, </#list></#list>
+ BaseReader {}
+
+ interface ComplexReader{
+ StructReader rootAsStruct();
+ ListReader rootAsList();
+ boolean rootIsStruct();
+ boolean ok();
+ }
+}
+
diff --git a/src/arrow/java/vector/src/main/codegen/templates/BaseWriter.java b/src/arrow/java/vector/src/main/codegen/templates/BaseWriter.java
new file mode 100644
index 000000000..4d63fb73e
--- /dev/null
+++ b/src/arrow/java/vector/src/main/codegen/templates/BaseWriter.java
@@ -0,0 +1,131 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+<@pp.dropOutputFile />
+<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/writer/BaseWriter.java" />
+
+
+<#include "/@includes/license.ftl" />
+
+package org.apache.arrow.vector.complex.writer;
+
+<#include "/@includes/vv_imports.ftl" />
+
+/*
+ * File generated from ${.template_name} using FreeMarker.
+ */
+@SuppressWarnings("unused")
+public interface BaseWriter extends AutoCloseable, Positionable {
+ int getValueCapacity();
+ void writeNull();
+
+ public interface StructWriter extends BaseWriter {
+
+ Field getField();
+
+ /**
+ * Whether this writer is a struct writer and is empty (has no children).
+ *
+ * <p>
+ * Intended only for use in determining whether to add dummy vector to
+ * avoid empty (zero-column) schema, as in JsonReader.
+ * </p>
+ * @return whether the struct is empty
+ */
+ boolean isEmptyStruct();
+
+ <#list vv.types as type><#list type.minor as minor>
+ <#assign lowerName = minor.class?uncap_first />
+ <#if lowerName == "int" ><#assign lowerName = "integer" /></#if>
+ <#assign upperName = minor.class?upper_case />
+ <#assign capName = minor.class?cap_first />
+ <#if minor.typeParams?? >
+ ${capName}Writer ${lowerName}(String name<#list minor.typeParams as typeParam>, ${typeParam.type} ${typeParam.name}</#list>);
+ </#if>
+ ${capName}Writer ${lowerName}(String name);
+ </#list></#list>
+
+ void copyReaderToField(String name, FieldReader reader);
+ StructWriter struct(String name);
+ ListWriter list(String name);
+ MapWriter map(String name);
+ MapWriter map(String name, boolean keysSorted);
+ void start();
+ void end();
+ }
+
+ public interface ListWriter extends BaseWriter {
+ void startList();
+ void endList();
+ StructWriter struct();
+ ListWriter list();
+ MapWriter map();
+ MapWriter map(boolean keysSorted);
+ void copyReader(FieldReader reader);
+
+ <#list vv.types as type><#list type.minor as minor>
+ <#assign lowerName = minor.class?uncap_first />
+ <#if lowerName == "int" ><#assign lowerName = "integer" /></#if>
+ <#assign upperName = minor.class?upper_case />
+ <#assign capName = minor.class?cap_first />
+ ${capName}Writer ${lowerName}();
+ </#list></#list>
+ }
+
+ public interface MapWriter extends ListWriter {
+ void startMap();
+ void endMap();
+
+ void startEntry();
+ void endEntry();
+
+ MapWriter key();
+ MapWriter value();
+ }
+
+ public interface ScalarWriter extends
+ <#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first /> ${name}Writer, </#list></#list> BaseWriter {}
+
+ public interface ComplexWriter {
+ void allocate();
+ void clear();
+ void copyReader(FieldReader reader);
+ StructWriter rootAsStruct();
+ ListWriter rootAsList();
+
+ void setPosition(int index);
+ void setValueCount(int count);
+ void reset();
+ }
+
+ public interface StructOrListWriter {
+ void start();
+ void end();
+ StructOrListWriter struct(String name);
+ StructOrListWriter listoftstruct(String name);
+ StructOrListWriter list(String name);
+ boolean isStructWriter();
+ boolean isListWriter();
+ VarCharWriter varChar(String name);
+ IntWriter integer(String name);
+ BigIntWriter bigInt(String name);
+ Float4Writer float4(String name);
+ Float8Writer float8(String name);
+ BitWriter bit(String name);
+ VarBinaryWriter binary(String name);
+ }
+}
diff --git a/src/arrow/java/vector/src/main/codegen/templates/CaseSensitiveStructWriters.java b/src/arrow/java/vector/src/main/codegen/templates/CaseSensitiveStructWriters.java
new file mode 100644
index 000000000..cc0dd7b33
--- /dev/null
+++ b/src/arrow/java/vector/src/main/codegen/templates/CaseSensitiveStructWriters.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+<@pp.dropOutputFile />
+<#list ["Nullable", "Single"] as mode>
+<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/${mode}CaseSensitiveStructWriter.java" />
+<#assign index = "idx()">
+<#if mode == "Single">
+<#assign containerClass = "NonNullableStructVector" />
+<#else>
+<#assign containerClass = "StructVector" />
+</#if>
+
+<#include "/@includes/license.ftl" />
+
+package org.apache.arrow.vector.complex.impl;
+
+<#include "/@includes/vv_imports.ftl" />
+/*
+ * This class is generated using FreeMarker and the ${.template_name} template.
+ */
+@SuppressWarnings("unused")
+public class ${mode}CaseSensitiveStructWriter extends ${mode}StructWriter {
+ public ${mode}CaseSensitiveStructWriter(${containerClass} container) {
+ super(container);
+ }
+
+ @Override
+ protected String handleCase(final String input){
+ return input;
+ }
+
+ @Override
+ protected NullableStructWriterFactory getNullableStructWriterFactory() {
+ return NullableStructWriterFactory.getNullableCaseSensitiveStructWriterFactoryInstance();
+ }
+
+}
+</#list>
diff --git a/src/arrow/java/vector/src/main/codegen/templates/ComplexCopier.java b/src/arrow/java/vector/src/main/codegen/templates/ComplexCopier.java
new file mode 100644
index 000000000..39a84041e
--- /dev/null
+++ b/src/arrow/java/vector/src/main/codegen/templates/ComplexCopier.java
@@ -0,0 +1,191 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.arrow.vector.complex.MapVector;
+import org.apache.arrow.vector.complex.impl.UnionMapReader;
+import org.apache.arrow.vector.complex.impl.UnionMapWriter;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.complex.writer.FieldWriter;
+import org.apache.arrow.vector.types.Types;
+
+<@pp.dropOutputFile />
+<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/ComplexCopier.java" />
+
+
+<#include "/@includes/license.ftl" />
+
+package org.apache.arrow.vector.complex.impl;
+
+<#include "/@includes/vv_imports.ftl" />
+
+/*
+ * This class is generated using freemarker and the ${.template_name} template.
+ */
+@SuppressWarnings("unused")
+public class ComplexCopier {
+
+ /**
+ * Do a deep copy of the value in input into output
+ * @param input field to read from
+ * @param output field to write to
+ */
+ public static void copy(FieldReader input, FieldWriter output) {
+ writeValue(input, output);
+ }
+
+ private static void writeValue(FieldReader reader, FieldWriter writer) {
+ final MinorType mt = reader.getMinorType();
+
+ switch (mt) {
+
+ case LIST:
+ case LARGELIST:
+ case FIXED_SIZE_LIST:
+ if (reader.isSet()) {
+ writer.startList();
+ while (reader.next()) {
+ FieldReader childReader = reader.reader();
+ FieldWriter childWriter = getListWriterForReader(childReader, writer);
+ if (childReader.isSet()) {
+ writeValue(childReader, childWriter);
+ } else {
+ childWriter.writeNull();
+ }
+ }
+ writer.endList();
+ } else {
+ writer.writeNull();
+ }
+ break;
+ case MAP:
+ if (reader.isSet()) {
+ UnionMapWriter mapWriter = (UnionMapWriter) writer;
+ UnionMapReader mapReader = (UnionMapReader) reader;
+
+ mapWriter.startMap();
+ while (mapReader.next()) {
+ FieldReader structReader = reader.reader();
+ UnionMapWriter structWriter = (UnionMapWriter) writer.struct();
+ if (structReader.isSet()) {
+ mapWriter.startEntry();
+ writeValue(mapReader.key(), getStructWriterForReader(mapReader.key(), structWriter.key(), MapVector.KEY_NAME));
+ writeValue(mapReader.value(), getStructWriterForReader(mapReader.value(), structWriter.value(), MapVector.VALUE_NAME));
+ mapWriter.endEntry();
+ } else {
+ structWriter.writeNull();
+ }
+ }
+ mapWriter.endMap();
+ } else {
+ writer.writeNull();
+ }
+ break;
+ case STRUCT:
+ if (reader.isSet()) {
+ writer.start();
+ for(String name : reader){
+ FieldReader childReader = reader.reader(name);
+ if (childReader.getMinorType() != Types.MinorType.NULL) {
+ FieldWriter childWriter = getStructWriterForReader(childReader, writer, name);
+ if (childReader.isSet()) {
+ writeValue(childReader, childWriter);
+ } else {
+ childWriter.writeNull();
+ }
+ }
+ }
+ writer.end();
+ } else {
+ writer.writeNull();
+ }
+ break;
+ <#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first />
+ <#assign fields = minor.fields!type.fields />
+ <#assign uncappedName = name?uncap_first/>
+
+ <#if !minor.typeParams?? || minor.class?starts_with("Decimal") >
+
+ case ${name?upper_case}:
+ if (reader.isSet()) {
+ Nullable${name}Holder ${uncappedName}Holder = new Nullable${name}Holder();
+ reader.read(${uncappedName}Holder);
+ if (${uncappedName}Holder.isSet == 1) {
+ writer.write${name}(<#list fields as field>${uncappedName}Holder.${field.name}<#if field_has_next>, </#if></#list><#if minor.class?starts_with("Decimal")>, new ArrowType.Decimal(${uncappedName}Holder.precision, ${uncappedName}Holder.scale, ${name}Holder.WIDTH * 8)</#if>);
+ }
+ } else {
+ writer.writeNull();
+ }
+ break;
+
+ </#if>
+ </#list></#list>
+ }
+ }
+
+ private static FieldWriter getStructWriterForReader(FieldReader reader, StructWriter writer, String name) {
+ switch (reader.getMinorType()) {
+ <#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first />
+ <#assign fields = minor.fields!type.fields />
+ <#assign uncappedName = name?uncap_first/>
+ <#if !minor.typeParams??>
+ case ${name?upper_case}:
+ return (FieldWriter) writer.<#if name == "Int">integer<#else>${uncappedName}</#if>(name);
+ </#if>
+ <#if minor.class?starts_with("Decimal")>
+ case ${name?upper_case}:
+ if (reader.getField().getType() instanceof ArrowType.Decimal) {
+ ArrowType.Decimal type = (ArrowType.Decimal) reader.getField().getType();
+ return (FieldWriter) writer.${uncappedName}(name, type.getScale(), type.getPrecision());
+ } else {
+ return (FieldWriter) writer.${uncappedName}(name);
+ }
+ </#if>
+
+ </#list></#list>
+ case STRUCT:
+ return (FieldWriter) writer.struct(name);
+ case FIXED_SIZE_LIST:
+ case LIST:
+ case MAP:
+ return (FieldWriter) writer.list(name);
+ default:
+ throw new UnsupportedOperationException(reader.getMinorType().toString());
+ }
+ }
+
+ private static FieldWriter getListWriterForReader(FieldReader reader, ListWriter writer) {
+ switch (reader.getMinorType()) {
+ <#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first />
+ <#assign fields = minor.fields!type.fields />
+ <#assign uncappedName = name?uncap_first/>
+ <#if !minor.typeParams?? || minor.class?starts_with("Decimal") >
+ case ${name?upper_case}:
+ return (FieldWriter) writer.<#if name == "Int">integer<#else>${uncappedName}</#if>();
+ </#if>
+ </#list></#list>
+ case STRUCT:
+ return (FieldWriter) writer.struct();
+ case FIXED_SIZE_LIST:
+ case LIST:
+ case MAP:
+ case NULL:
+ return (FieldWriter) writer.list();
+ default:
+ throw new UnsupportedOperationException(reader.getMinorType().toString());
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/codegen/templates/ComplexReaders.java b/src/arrow/java/vector/src/main/codegen/templates/ComplexReaders.java
new file mode 100644
index 000000000..48fb6603a
--- /dev/null
+++ b/src/arrow/java/vector/src/main/codegen/templates/ComplexReaders.java
@@ -0,0 +1,147 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.Override;
+import java.util.List;
+
+import org.apache.arrow.record.TransferPair;
+import org.apache.arrow.vector.complex.IndexHolder;
+import org.apache.arrow.vector.complex.writer.IntervalWriter;
+import org.apache.arrow.vector.complex.writer.BaseWriter.StructWriter;
+
+<@pp.dropOutputFile />
+<#list vv.types as type>
+<#list type.minor as minor>
+<#list [""] as mode>
+<#assign lowerName = minor.class?uncap_first />
+<#if lowerName == "int" ><#assign lowerName = "integer" /></#if>
+<#assign name = minor.class?cap_first />
+<#assign javaType = (minor.javaType!type.javaType) />
+<#assign friendlyType = (minor.friendlyType!minor.boxedType!type.boxedType) />
+<#assign safeType=friendlyType />
+<#if safeType=="byte[]"><#assign safeType="ByteArray" /></#if>
+
+<#assign hasFriendly = minor.friendlyType!"no" == "no" />
+
+<#list ["Nullable"] as nullMode>
+<#if mode == "" >
+<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/${name}ReaderImpl.java" />
+<#include "/@includes/license.ftl" />
+
+package org.apache.arrow.vector.complex.impl;
+
+<#include "/@includes/vv_imports.ftl" />
+
+/**
+ * Source code generated using FreeMarker template ${.template_name}
+ */
+@SuppressWarnings("unused")
+public class ${name}ReaderImpl extends AbstractFieldReader {
+
+ private final ${name}Vector vector;
+
+ public ${name}ReaderImpl(${name}Vector vector){
+ super();
+ this.vector = vector;
+ }
+
+ public MinorType getMinorType(){
+ return vector.getMinorType();
+ }
+
+ public Field getField(){
+ return vector.getField();
+ }
+
+ public boolean isSet(){
+ return !vector.isNull(idx());
+ }
+
+ public void copyAsValue(${minor.class?cap_first}Writer writer){
+ ${minor.class?cap_first}WriterImpl impl = (${minor.class?cap_first}WriterImpl) writer;
+ impl.vector.copyFromSafe(idx(), impl.idx(), vector);
+ }
+
+ public void copyAsField(String name, StructWriter writer){
+ ${minor.class?cap_first}WriterImpl impl = (${minor.class?cap_first}WriterImpl) writer.${lowerName}(name);
+ impl.vector.copyFromSafe(idx(), impl.idx(), vector);
+ }
+
+ <#if nullMode != "Nullable">
+ public void read(${minor.class?cap_first}Holder h){
+ vector.get(idx(), h);
+ }
+ </#if>
+
+ public void read(Nullable${minor.class?cap_first}Holder h){
+ vector.get(idx(), h);
+ }
+
+ public ${friendlyType} read${safeType}(){
+ return vector.getObject(idx());
+ }
+
+ <#if minor.class == "TimeStampSec" ||
+ minor.class == "TimeStampMilli" ||
+ minor.class == "TimeStampMicro" ||
+ minor.class == "TimeStampNano">
+ @Override
+ public ${minor.boxedType} read${minor.boxedType}(){
+ return vector.get(idx());
+ }
+ </#if>
+
+ public void copyValue(FieldWriter w){
+
+ }
+
+ public Object readObject(){
+ return (Object)vector.getObject(idx());
+ }
+}
+</#if>
+</#list>
+<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/reader/${name}Reader.java" />
+<#include "/@includes/license.ftl" />
+
+package org.apache.arrow.vector.complex.reader;
+
+<#include "/@includes/vv_imports.ftl" />
+/**
+ * Source code generated using FreeMarker template ${.template_name}
+ */
+@SuppressWarnings("unused")
+public interface ${name}Reader extends BaseReader{
+
+ public void read(${minor.class?cap_first}Holder h);
+ public void read(Nullable${minor.class?cap_first}Holder h);
+ public Object readObject();
+ // read friendly type
+ public ${friendlyType} read${safeType}();
+ public boolean isSet();
+ public void copyAsValue(${minor.class}Writer writer);
+ public void copyAsField(String name, ${minor.class}Writer writer);
+
+}
+
+
+
+</#list>
+</#list>
+</#list>
+
+
diff --git a/src/arrow/java/vector/src/main/codegen/templates/ComplexWriters.java b/src/arrow/java/vector/src/main/codegen/templates/ComplexWriters.java
new file mode 100644
index 000000000..0381e5559
--- /dev/null
+++ b/src/arrow/java/vector/src/main/codegen/templates/ComplexWriters.java
@@ -0,0 +1,211 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+<@pp.dropOutputFile />
+<#list vv.types as type>
+<#list type.minor as minor>
+<#list ["Nullable"] as mode>
+<#assign name = minor.class?cap_first />
+<#assign eName = name />
+<#assign javaType = (minor.javaType!type.javaType) />
+<#assign fields = minor.fields!type.fields />
+<#assign friendlyType = (minor.friendlyType!minor.boxedType!type.boxedType) />
+
+<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/${eName}WriterImpl.java" />
+<#include "/@includes/license.ftl" />
+
+package org.apache.arrow.vector.complex.impl;
+
+<#include "/@includes/vv_imports.ftl" />
+
+/*
+ * This class is generated using FreeMarker on the ${.template_name} template.
+ */
+@SuppressWarnings("unused")
+public class ${eName}WriterImpl extends AbstractFieldWriter {
+
+ final ${name}Vector vector;
+
+ public ${eName}WriterImpl(${name}Vector vector) {
+ this.vector = vector;
+ }
+
+ @Override
+ public Field getField() {
+ return vector.getField();
+ }
+
+ @Override
+ public int getValueCapacity() {
+ return vector.getValueCapacity();
+ }
+
+ @Override
+ public void allocate() {
+ vector.allocateNew();
+ }
+
+ @Override
+ public void close() {
+ vector.close();
+ }
+
+ @Override
+ public void clear() {
+ vector.clear();
+ }
+
+ @Override
+ protected int idx() {
+ return super.idx();
+ }
+
+ <#if mode == "Repeated">
+
+ public void write(${minor.class?cap_first}Holder h) {
+ mutator.addSafe(idx(), h);
+ vector.setValueCount(idx()+1);
+ }
+
+ public void write(${minor.class?cap_first}Holder h) {
+ mutator.addSafe(idx(), h);
+ vector.setValueCount(idx()+1);
+ }
+
+ public void write${minor.class}(<#list fields as field>${field.type} ${field.name}<#if field_has_next>, </#if></#list>) {
+ mutator.addSafe(idx(), <#list fields as field>${field.name}<#if field_has_next>, </#if></#list>);
+ vector.setValueCount(idx()+1);
+ }
+
+ public void setPosition(int idx) {
+ super.setPosition(idx);
+ mutator.startNewValue(idx);
+ }
+
+
+ <#else>
+
+ <#if !minor.class?starts_with("Decimal")>
+ public void write(${minor.class}Holder h) {
+ vector.setSafe(idx(), h);
+ vector.setValueCount(idx()+1);
+ }
+
+ public void write(Nullable${minor.class}Holder h) {
+ vector.setSafe(idx(), h);
+ vector.setValueCount(idx()+1);
+ }
+
+ public void write${minor.class}(<#list fields as field>${field.type} ${field.name}<#if field_has_next>, </#if></#list>) {
+ vector.setSafe(idx(), 1<#list fields as field><#if field.include!true >, ${field.name}</#if></#list>);
+ vector.setValueCount(idx()+1);
+ }
+ </#if>
+
+ <#if minor.class == "VarChar">
+ public void write${minor.class}(${friendlyType} value) {
+ vector.setSafe(idx(), value);
+ vector.setValueCount(idx()+1);
+ }
+ </#if>
+
+ <#if minor.class?starts_with("Decimal")>
+
+ public void write(${minor.class}Holder h){
+ DecimalUtility.checkPrecisionAndScale(h.precision, h.scale, vector.getPrecision(), vector.getScale());
+ vector.setSafe(idx(), h);
+ vector.setValueCount(idx() + 1);
+ }
+
+ public void write(Nullable${minor.class}Holder h){
+ if (h.isSet == 1) {
+ DecimalUtility.checkPrecisionAndScale(h.precision, h.scale, vector.getPrecision(), vector.getScale());
+ }
+ vector.setSafe(idx(), h);
+ vector.setValueCount(idx() + 1);
+ }
+
+ public void write${minor.class}(long start, ArrowBuf buffer){
+ vector.setSafe(idx(), 1, start, buffer);
+ vector.setValueCount(idx() + 1);
+ }
+
+ public void write${minor.class}(long start, ArrowBuf buffer, ArrowType arrowType){
+ DecimalUtility.checkPrecisionAndScale(((ArrowType.Decimal) arrowType).getPrecision(),
+ ((ArrowType.Decimal) arrowType).getScale(), vector.getPrecision(), vector.getScale());
+ vector.setSafe(idx(), 1, start, buffer);
+ vector.setValueCount(idx() + 1);
+ }
+
+ public void write${minor.class}(BigDecimal value){
+ // vector.setSafe already does precision and scale checking
+ vector.setSafe(idx(), value);
+ vector.setValueCount(idx() + 1);
+ }
+
+ public void writeBigEndianBytesTo${minor.class}(byte[] value, ArrowType arrowType){
+ DecimalUtility.checkPrecisionAndScale(((ArrowType.Decimal) arrowType).getPrecision(),
+ ((ArrowType.Decimal) arrowType).getScale(), vector.getPrecision(), vector.getScale());
+ vector.setBigEndianSafe(idx(), value);
+ vector.setValueCount(idx() + 1);
+ }
+
+ public void writeBigEndianBytesTo${minor.class}(byte[] value){
+ vector.setBigEndianSafe(idx(), value);
+ vector.setValueCount(idx() + 1);
+ }
+ </#if>
+
+
+ public void writeNull() {
+ vector.setNull(idx());
+ vector.setValueCount(idx()+1);
+ }
+ </#if>
+}
+
+<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/writer/${eName}Writer.java" />
+<#include "/@includes/license.ftl" />
+
+package org.apache.arrow.vector.complex.writer;
+
+<#include "/@includes/vv_imports.ftl" />
+/*
+ * This class is generated using FreeMarker on the ${.template_name} template.
+ */
+@SuppressWarnings("unused")
+public interface ${eName}Writer extends BaseWriter {
+ public void write(${minor.class}Holder h);
+
+ <#if minor.class?starts_with("Decimal")>@Deprecated</#if>
+ public void write${minor.class}(<#list fields as field>${field.type} ${field.name}<#if field_has_next>, </#if></#list>);
+<#if minor.class?starts_with("Decimal")>
+
+ public void write${minor.class}(<#list fields as field>${field.type} ${field.name}<#if field_has_next>, </#if></#list>, ArrowType arrowType);
+
+ public void write${minor.class}(${friendlyType} value);
+
+ public void writeBigEndianBytesTo${minor.class}(byte[] value, ArrowType arrowType);
+
+ @Deprecated
+ public void writeBigEndianBytesTo${minor.class}(byte[] value);
+</#if>
+}
+
+</#list>
+</#list>
+</#list>
diff --git a/src/arrow/java/vector/src/main/codegen/templates/DenseUnionReader.java b/src/arrow/java/vector/src/main/codegen/templates/DenseUnionReader.java
new file mode 100644
index 000000000..a085e03ea
--- /dev/null
+++ b/src/arrow/java/vector/src/main/codegen/templates/DenseUnionReader.java
@@ -0,0 +1,229 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+import org.apache.arrow.vector.complex.impl.UnionListReader;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+
+<@pp.dropOutputFile />
+<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/DenseUnionReader.java" />
+
+
+<#include "/@includes/license.ftl" />
+
+ package org.apache.arrow.vector.complex.impl;
+
+<#include "/@includes/vv_imports.ftl" />
+/**
+ * Source code generated using FreeMarker template ${.template_name}
+ */
+@SuppressWarnings("unused")
+public class DenseUnionReader extends AbstractFieldReader {
+
+ private BaseReader[] readers = new BaseReader[Byte.MAX_VALUE + 1];
+ public DenseUnionVector data;
+
+ public DenseUnionReader(DenseUnionVector data) {
+ this.data = data;
+ }
+
+ public MinorType getMinorType() {
+ byte typeId = data.getTypeId(idx());
+ return data.getVectorByType(typeId).getMinorType();
+ }
+
+ public byte getTypeId() {
+ return data.getTypeId(idx());
+ }
+
+ @Override
+ public Field getField() {
+ return data.getField();
+ }
+
+ public boolean isSet(){
+ return !data.isNull(idx());
+ }
+
+ public void read(DenseUnionHolder holder) {
+ holder.reader = this;
+ holder.isSet = this.isSet() ? 1 : 0;
+ holder.typeId = getTypeId();
+ }
+
+ public void read(int index, UnionHolder holder) {
+ byte typeId = data.getTypeId(index);
+ getList(typeId).read(index, holder);
+ }
+
+ private FieldReader getReaderForIndex(int index) {
+ byte typeId = data.getTypeId(index);
+ MinorType minorType = data.getVectorByType(typeId).getMinorType();
+ FieldReader reader = (FieldReader) readers[typeId];
+ if (reader != null) {
+ return reader;
+ }
+ switch (minorType) {
+ case NULL:
+ reader = NullReader.INSTANCE;
+ break;
+ case STRUCT:
+ reader = (FieldReader) getStruct(typeId);
+ break;
+ case LIST:
+ reader = (FieldReader) getList(typeId);
+ break;
+ <#list vv.types as type>
+ <#list type.minor as minor>
+ <#assign name = minor.class?cap_first />
+ <#assign uncappedName = name?uncap_first/>
+ <#if !minor.typeParams?? || minor.class?starts_with("Decimal")>
+ case ${name?upper_case}:
+ reader = (FieldReader) get${name}(typeId);
+ break;
+ </#if>
+ </#list>
+ </#list>
+ default:
+ throw new UnsupportedOperationException("Unsupported type: " + MinorType.values()[typeId]);
+ }
+ return reader;
+ }
+
+ private SingleStructReaderImpl structReader;
+
+ private StructReader getStruct(byte typeId) {
+ StructReader structReader = (StructReader) readers[typeId];
+ if (structReader == null) {
+ structReader = (SingleStructReaderImpl) data.getVectorByType(typeId).getReader();
+ structReader.setPosition(idx());
+ readers[typeId] = structReader;
+ }
+ return structReader;
+ }
+
+ private UnionListReader listReader;
+
+ private FieldReader getList(byte typeId) {
+ UnionListReader listReader = (UnionListReader) readers[typeId];
+ if (listReader == null) {
+ listReader = new UnionListReader((ListVector) data.getVectorByType(typeId));
+ listReader.setPosition(idx());
+ readers[typeId] = listReader;
+ }
+ return listReader;
+ }
+
+ private UnionMapReader mapReader;
+
+ private FieldReader getMap(byte typeId) {
+ UnionMapReader mapReader = (UnionMapReader) readers[typeId];
+ if (mapReader == null) {
+ mapReader = new UnionMapReader((MapVector) data.getVectorByType(typeId));
+ mapReader.setPosition(idx());
+ readers[typeId] = mapReader;
+ }
+ return mapReader;
+ }
+
+ @Override
+ public java.util.Iterator<String> iterator() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void copyAsValue(UnionWriter writer) {
+ writer.data.copyFrom(idx(), writer.idx(), data);
+ }
+
+ <#list ["Object", "BigDecimal", "Short", "Integer", "Long", "Boolean",
+ "LocalDateTime", "Duration", "Period", "Double", "Float",
+ "Character", "Text", "Byte", "byte[]", "PeriodDuration"] as friendlyType>
+ <#assign safeType=friendlyType />
+ <#if safeType=="byte[]"><#assign safeType="ByteArray" /></#if>
+
+ @Override
+ public ${friendlyType} read${safeType}() {
+ return getReaderForIndex(idx()).read${safeType}();
+ }
+
+ </#list>
+
+ public int size() {
+ return getReaderForIndex(idx()).size();
+ }
+
+ <#list vv.types as type>
+ <#list type.minor as minor>
+ <#assign name = minor.class?cap_first />
+ <#assign uncappedName = name?uncap_first/>
+ <#assign boxedType = (minor.boxedType!type.boxedType) />
+ <#assign javaType = (minor.javaType!type.javaType) />
+ <#assign friendlyType = (minor.friendlyType!minor.boxedType!type.boxedType) />
+ <#assign safeType=friendlyType />
+ <#if safeType=="byte[]"><#assign safeType="ByteArray" /></#if>
+ <#if !minor.typeParams?? || minor.class?starts_with("Decimal")>
+
+ private ${name}ReaderImpl get${name}(byte typeId) {
+ ${name}ReaderImpl reader = (${name}ReaderImpl) readers[typeId];
+ if (reader == null) {
+ reader = new ${name}ReaderImpl((${name}Vector) data.getVectorByType(typeId));
+ reader.setPosition(idx());
+ readers[typeId] = reader;
+ }
+ return reader;
+ }
+
+ public void read(Nullable${name}Holder holder){
+ getReaderForIndex(idx()).read(holder);
+ }
+
+ public void copyAsValue(${name}Writer writer){
+ getReaderForIndex(idx()).copyAsValue(writer);
+ }
+ </#if>
+ </#list>
+ </#list>
+
+ @Override
+ public void copyAsValue(ListWriter writer) {
+ ComplexCopier.copy(this, (FieldWriter) writer);
+ }
+
+ @Override
+ public void setPosition(int index) {
+ super.setPosition(index);
+ byte typeId = data.getTypeId(index);
+ if (readers[typeId] != null) {
+ int offset = data.getOffset(index);
+ readers[typeId].setPosition(offset);
+ }
+ }
+
+ public FieldReader reader(byte typeId, String name){
+ return getStruct(typeId).reader(name);
+ }
+
+ public FieldReader reader(byte typeId) {
+ return getList(typeId).reader();
+ }
+
+ public boolean next() {
+ return getReaderForIndex(idx()).next();
+ }
+}
diff --git a/src/arrow/java/vector/src/main/codegen/templates/DenseUnionVector.java b/src/arrow/java/vector/src/main/codegen/templates/DenseUnionVector.java
new file mode 100644
index 000000000..63f4f5876
--- /dev/null
+++ b/src/arrow/java/vector/src/main/codegen/templates/DenseUnionVector.java
@@ -0,0 +1,943 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.ReferenceManager;
+import org.apache.arrow.memory.util.CommonUtil;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.BaseValueVector;
+import org.apache.arrow.vector.BitVectorHelper;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.complex.AbstractStructVector;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.NonNullableStructVector;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.compare.VectorVisitor;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.UnionMode;
+import org.apache.arrow.vector.compare.RangeEqualsVisitor;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.CallBack;
+import org.apache.arrow.vector.util.DataSizeRoundingUtil;
+import org.apache.arrow.vector.util.TransferPair;
+
+import java.util.Arrays;
+import java.util.stream.Collectors;
+
+<@pp.dropOutputFile />
+<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/DenseUnionVector.java" />
+
+
+<#include "/@includes/license.ftl" />
+
+package org.apache.arrow.vector.complex;
+
+<#include "/@includes/vv_imports.ftl" />
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Iterator;
+import org.apache.arrow.memory.util.CommonUtil;
+import org.apache.arrow.memory.util.hash.ArrowBufHasher;
+import org.apache.arrow.memory.util.hash.SimpleHasher;
+import org.apache.arrow.vector.compare.VectorVisitor;
+import org.apache.arrow.vector.complex.impl.ComplexCopier;
+import org.apache.arrow.vector.util.CallBack;
+import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
+import org.apache.arrow.vector.BaseValueVector;
+import org.apache.arrow.vector.util.OversizedAllocationException;
+import org.apache.arrow.util.Preconditions;
+
+import static org.apache.arrow.vector.types.UnionMode.Dense;
+
+
+
+/*
+ * This class is generated using freemarker and the ${.template_name} template.
+ */
+@SuppressWarnings("unused")
+
+
+/**
+ * A vector which can hold values of different types. It does so by using a StructVector which contains a vector for each
+ * primitive type that is stored. StructVector is used in order to take advantage of its serialization/deserialization methods,
+ * as well as the addOrGet method.
+ *
+ * For performance reasons, DenseUnionVector stores a cached reference to each subtype vector, to avoid having to do the struct lookup
+ * each time the vector is accessed.
+ * Source code generated using FreeMarker template ${.template_name}
+ */
+public class DenseUnionVector extends AbstractContainerVector implements FieldVector {
+ int valueCount;
+
+ NonNullableStructVector internalStruct;
+ private ArrowBuf typeBuffer;
+ private ArrowBuf offsetBuffer;
+
+ /**
+ * The key is type Id, and the value is vector.
+ */
+ private ValueVector[] childVectors = new ValueVector[Byte.MAX_VALUE + 1];
+
+ /**
+ * The index is the type id, and the value is the type field.
+ */
+ private Field[] typeFields = new Field[Byte.MAX_VALUE + 1];
+ /**
+ * The index is the index into the typeFields array, and the value is the logical field id.
+ */
+ private byte[] typeMapFields = new byte[Byte.MAX_VALUE + 1];
+
+ /**
+ * The next type id to allocate.
+ */
+ private byte nextTypeId = 0;
+
+ private FieldReader reader;
+
+ private long typeBufferAllocationSizeInBytes;
+ private long offsetBufferAllocationSizeInBytes;
+
+ private final FieldType fieldType;
+
+ public static final byte TYPE_WIDTH = 1;
+ public static final byte OFFSET_WIDTH = 4;
+
+ private static final FieldType INTERNAL_STRUCT_TYPE = new FieldType(/*nullable*/ false,
+ ArrowType.Struct.INSTANCE, /*dictionary*/ null, /*metadata*/ null);
+
+ public static DenseUnionVector empty(String name, BufferAllocator allocator) {
+ FieldType fieldType = FieldType.nullable(new ArrowType.Union(
+ UnionMode.Dense, null));
+ return new DenseUnionVector(name, allocator, fieldType, null);
+ }
+
+ public DenseUnionVector(String name, BufferAllocator allocator, FieldType fieldType, CallBack callBack) {
+ super(name, allocator, callBack);
+ this.fieldType = fieldType;
+ this.internalStruct = new NonNullableStructVector(
+ "internal",
+ allocator,
+ INTERNAL_STRUCT_TYPE,
+ callBack,
+ AbstractStructVector.ConflictPolicy.CONFLICT_REPLACE,
+ false);
+ this.typeBuffer = allocator.getEmpty();
+ this.typeBufferAllocationSizeInBytes = BaseValueVector.INITIAL_VALUE_ALLOCATION * TYPE_WIDTH;
+ this.offsetBuffer = allocator.getEmpty();
+ this.offsetBufferAllocationSizeInBytes = BaseValueVector.INITIAL_VALUE_ALLOCATION * OFFSET_WIDTH;
+ }
+
+ public BufferAllocator getAllocator() {
+ return allocator;
+ }
+
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.DENSEUNION;
+ }
+
+ @Override
+ public void initializeChildrenFromFields(List<Field> children) {
+ for (Field field : children) {
+ byte typeId = registerNewTypeId(field);
+ FieldVector vector = (FieldVector) internalStruct.add(field.getName(), field.getFieldType());
+ vector.initializeChildrenFromFields(field.getChildren());
+ childVectors[typeId] = vector;
+ }
+ }
+
+ @Override
+ public List<FieldVector> getChildrenFromFields() {
+ return internalStruct.getChildrenFromFields();
+ }
+
+ @Override
+ public void loadFieldBuffers(ArrowFieldNode fieldNode, List<ArrowBuf> ownBuffers) {
+ if (ownBuffers.size() != 2) {
+ throw new IllegalArgumentException("Illegal buffer count for dense union with type " + getField().getFieldType() +
+ ", expected " + 2 + ", got: " + ownBuffers.size());
+ }
+
+ ArrowBuf buffer = ownBuffers.get(0);
+ typeBuffer.getReferenceManager().release();
+ typeBuffer = buffer.getReferenceManager().retain(buffer, allocator);
+ typeBufferAllocationSizeInBytes = typeBuffer.capacity();
+
+ buffer = ownBuffers.get(1);
+ offsetBuffer.getReferenceManager().release();
+ offsetBuffer = buffer.getReferenceManager().retain(buffer, allocator);
+ offsetBufferAllocationSizeInBytes = offsetBuffer.capacity();
+
+ this.valueCount = fieldNode.getLength();
+ }
+
+ @Override
+ public List<ArrowBuf> getFieldBuffers() {
+ List<ArrowBuf> result = new ArrayList<>(2);
+ setReaderAndWriterIndex();
+ result.add(typeBuffer);
+ result.add(offsetBuffer);
+
+ return result;
+ }
+
+ private void setReaderAndWriterIndex() {
+ typeBuffer.readerIndex(0);
+ typeBuffer.writerIndex(valueCount * TYPE_WIDTH);
+
+ offsetBuffer.readerIndex(0);
+ offsetBuffer.writerIndex((long) valueCount * OFFSET_WIDTH);
+ }
+
+ /**
+ * Get the inner vectors.
+ *
+ * @deprecated This API will be removed as the current implementations no longer support inner vectors.
+ *
+ * @return the inner vectors for this field as defined by the TypeLayout
+ */
+ @Override
+ @Deprecated
+ public List<BufferBacked> getFieldInnerVectors() {
+ throw new UnsupportedOperationException("There are no inner vectors. Use geFieldBuffers");
+ }
+
+ private String fieldName(byte typeId, MinorType type) {
+ return type.name().toLowerCase() + typeId;
+ }
+
+ private FieldType fieldType(MinorType type) {
+ return FieldType.nullable(type.getType());
+ }
+
+ public synchronized byte registerNewTypeId(Field field) {
+ if (nextTypeId == typeFields.length) {
+ throw new IllegalStateException("Dense union vector support at most " +
+ typeFields.length + " relative types. Please use union of union instead");
+ }
+ byte typeId = nextTypeId;
+ if (fieldType != null) {
+ int[] typeIds = ((ArrowType.Union) fieldType.getType()).getTypeIds();
+ if (typeIds != null) {
+ int thisTypeId = typeIds[nextTypeId];
+ if (thisTypeId > Byte.MAX_VALUE) {
+ throw new IllegalStateException("Dense union vector types must be bytes. " + thisTypeId + " is too large");
+ }
+ typeId = (byte) thisTypeId;
+ }
+ }
+ typeFields[typeId] = field;
+ typeMapFields[nextTypeId] = typeId;
+ this.nextTypeId += 1;
+ return typeId;
+ }
+
+ private <T extends FieldVector> T addOrGet(byte typeId, MinorType minorType, Class<T> c) {
+ return internalStruct.addOrGet(fieldName(typeId, minorType), fieldType(minorType), c);
+ }
+
+ private <T extends FieldVector> T addOrGet(byte typeId, MinorType minorType, ArrowType arrowType, Class<T> c) {
+ return internalStruct.addOrGet(fieldName(typeId, minorType), FieldType.nullable(arrowType), c);
+ }
+
+ @Override
+ public long getOffsetBufferAddress() {
+ return offsetBuffer.memoryAddress();
+ }
+
+ @Override
+ public long getDataBufferAddress() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public long getValidityBufferAddress() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public ArrowBuf getValidityBuffer() { throw new UnsupportedOperationException(); }
+
+ @Override
+ public ArrowBuf getOffsetBuffer() { return offsetBuffer; }
+
+ public ArrowBuf getTypeBuffer() { return typeBuffer; }
+
+ @Override
+ public ArrowBuf getDataBuffer() { throw new UnsupportedOperationException(); }
+
+ public StructVector getStruct(byte typeId) {
+ StructVector structVector = typeId < 0 ? null : (StructVector) childVectors[typeId];
+ if (structVector == null) {
+ int vectorCount = internalStruct.size();
+ structVector = addOrGet(typeId, MinorType.STRUCT, StructVector.class);
+ if (internalStruct.size() > vectorCount) {
+ structVector.allocateNew();
+ childVectors[typeId] = structVector;
+ if (callBack != null) {
+ callBack.doWork();
+ }
+ }
+ }
+ return structVector;
+ }
+
+ <#list vv.types as type>
+ <#list type.minor as minor>
+ <#assign name = minor.class?cap_first />
+ <#assign fields = minor.fields!type.fields />
+ <#assign uncappedName = name?uncap_first/>
+ <#assign lowerCaseName = name?lower_case/>
+ <#if !minor.typeParams?? || minor.class?starts_with("Decimal")>
+
+ public ${name}Vector get${name}Vector(byte typeId<#if minor.class?starts_with("Decimal")>, ArrowType arrowType</#if>) {
+ ValueVector vector = typeId < 0 ? null : childVectors[typeId];
+ if (vector == null) {
+ int vectorCount = internalStruct.size();
+ vector = addOrGet(typeId, MinorType.${name?upper_case}<#if minor.class?starts_with("Decimal")>, arrowType</#if>, ${name}Vector.class);
+ childVectors[typeId] = vector;
+ if (internalStruct.size() > vectorCount) {
+ vector.allocateNew();
+ if (callBack != null) {
+ callBack.doWork();
+ }
+ }
+ }
+ return (${name}Vector) vector;
+ }
+ </#if>
+ </#list>
+ </#list>
+
+ public ListVector getList(byte typeId) {
+ ListVector listVector = typeId < 0 ? null : (ListVector) childVectors[typeId];
+ if (listVector == null) {
+ int vectorCount = internalStruct.size();
+ listVector = addOrGet(typeId, MinorType.LIST, ListVector.class);
+ if (internalStruct.size() > vectorCount) {
+ listVector.allocateNew();
+ childVectors[typeId] = listVector;
+ if (callBack != null) {
+ callBack.doWork();
+ }
+ }
+ }
+ return listVector;
+ }
+
+ public MapVector getMap(byte typeId) {
+ MapVector mapVector = typeId < 0 ? null : (MapVector) childVectors[typeId];
+ if (mapVector == null) {
+ int vectorCount = internalStruct.size();
+ mapVector = addOrGet(typeId, MinorType.MAP, MapVector.class);
+ if (internalStruct.size() > vectorCount) {
+ mapVector.allocateNew();
+ childVectors[typeId] = mapVector;
+ if (callBack != null) {
+ callBack.doWork();
+ }
+ }
+ }
+ return mapVector;
+ }
+
+ public byte getTypeId(int index) {
+ return typeBuffer.getByte(index * TYPE_WIDTH);
+ }
+
+ public ValueVector getVectorByType(byte typeId) {
+ return typeId < 0 ? null : childVectors[typeId];
+ }
+
+ @Override
+ public void allocateNew() throws OutOfMemoryException {
+ /* new allocation -- clear the current buffers */
+ clear();
+ internalStruct.allocateNew();
+ try {
+ allocateTypeBuffer();
+ allocateOffsetBuffer();
+ } catch (Exception e) {
+ clear();
+ throw e;
+ }
+ }
+
+ @Override
+ public boolean allocateNewSafe() {
+ /* new allocation -- clear the current buffers */
+ clear();
+ boolean safe = internalStruct.allocateNewSafe();
+ if (!safe) { return false; }
+ try {
+ allocateTypeBuffer();
+ allocateOffsetBuffer();
+ } catch (Exception e) {
+ clear();
+ return false;
+ }
+
+ return true;
+ }
+
+ private void allocateTypeBuffer() {
+ typeBuffer = allocator.buffer(typeBufferAllocationSizeInBytes);
+ typeBuffer.readerIndex(0);
+ setNegative(0, typeBuffer.capacity());
+ }
+
+ private void allocateOffsetBuffer() {
+ offsetBuffer = allocator.buffer(offsetBufferAllocationSizeInBytes);
+ offsetBuffer.readerIndex(0);
+ offsetBuffer.setZero(0, offsetBuffer.capacity());
+ }
+
+
+ @Override
+ public void reAlloc() {
+ internalStruct.reAlloc();
+ reallocTypeBuffer();
+ reallocOffsetBuffer();
+ }
+
+ public int getOffset(int index) {
+ return offsetBuffer.getInt((long) index * OFFSET_WIDTH);
+ }
+
+ private void reallocTypeBuffer() {
+ final long currentBufferCapacity = typeBuffer.capacity();
+ long newAllocationSize = currentBufferCapacity * 2;
+ if (newAllocationSize == 0) {
+ if (typeBufferAllocationSizeInBytes > 0) {
+ newAllocationSize = typeBufferAllocationSizeInBytes;
+ } else {
+ newAllocationSize = BaseValueVector.INITIAL_VALUE_ALLOCATION * TYPE_WIDTH * 2;
+ }
+ }
+
+ newAllocationSize = CommonUtil.nextPowerOfTwo(newAllocationSize);
+ assert newAllocationSize >= 1;
+
+ if (newAllocationSize > BaseValueVector.MAX_ALLOCATION_SIZE) {
+ throw new OversizedAllocationException("Unable to expand the buffer");
+ }
+
+ final ArrowBuf newBuf = allocator.buffer((int)newAllocationSize);
+ newBuf.setBytes(0, typeBuffer, 0, currentBufferCapacity);
+ typeBuffer.getReferenceManager().release(1);
+ typeBuffer = newBuf;
+ typeBufferAllocationSizeInBytes = (int)newAllocationSize;
+ setNegative(currentBufferCapacity, newBuf.capacity() - currentBufferCapacity);
+ }
+
+ private void reallocOffsetBuffer() {
+ final long currentBufferCapacity = offsetBuffer.capacity();
+ long newAllocationSize = currentBufferCapacity * 2;
+ if (newAllocationSize == 0) {
+ if (offsetBufferAllocationSizeInBytes > 0) {
+ newAllocationSize = offsetBufferAllocationSizeInBytes;
+ } else {
+ newAllocationSize = BaseValueVector.INITIAL_VALUE_ALLOCATION * OFFSET_WIDTH * 2;
+ }
+ }
+
+ newAllocationSize = CommonUtil.nextPowerOfTwo(newAllocationSize);
+ assert newAllocationSize >= 1;
+
+ if (newAllocationSize > BaseValueVector.MAX_ALLOCATION_SIZE) {
+ throw new OversizedAllocationException("Unable to expand the buffer");
+ }
+
+ final ArrowBuf newBuf = allocator.buffer((int) newAllocationSize);
+ newBuf.setBytes(0, offsetBuffer, 0, currentBufferCapacity);
+ newBuf.setZero(currentBufferCapacity, newBuf.capacity() - currentBufferCapacity);
+ offsetBuffer.getReferenceManager().release(1);
+ offsetBuffer = newBuf;
+ offsetBufferAllocationSizeInBytes = (int) newAllocationSize;
+ }
+
+ @Override
+ public void setInitialCapacity(int numRecords) { }
+
+ @Override
+ public int getValueCapacity() {
+ long capacity = getTypeBufferValueCapacity();
+ long offsetCapacity = getOffsetBufferValueCapacity();
+ if (offsetCapacity < capacity) {
+ capacity = offsetCapacity;
+ }
+ long structCapacity = internalStruct.getValueCapacity();
+ if (structCapacity < capacity) {
+ structCapacity = capacity;
+ }
+ return (int) capacity;
+ }
+
+ @Override
+ public void close() {
+ clear();
+ }
+
+ @Override
+ public void clear() {
+ valueCount = 0;
+ typeBuffer.getReferenceManager().release();
+ typeBuffer = allocator.getEmpty();
+ offsetBuffer.getReferenceManager().release();
+ offsetBuffer = allocator.getEmpty();
+ internalStruct.clear();
+ }
+
+ @Override
+ public void reset() {
+ valueCount = 0;
+ setNegative(0, typeBuffer.capacity());
+ offsetBuffer.setZero(0, offsetBuffer.capacity());
+ internalStruct.reset();
+ }
+
+ @Override
+ public Field getField() {
+ int childCount = (int) Arrays.stream(typeFields).filter(field -> field != null).count();
+ List<org.apache.arrow.vector.types.pojo.Field> childFields = new ArrayList<>(childCount);
+ int[] typeIds = new int[childCount];
+ for (int i = 0; i < typeFields.length; i++) {
+ if (typeFields[i] != null) {
+ int curIdx = childFields.size();
+ typeIds[curIdx] = i;
+ childFields.add(typeFields[i]);
+ }
+ }
+
+ FieldType fieldType;
+ if (this.fieldType == null) {
+ fieldType = FieldType.nullable(new ArrowType.Union(Dense, typeIds));
+ } else {
+ final UnionMode mode = UnionMode.Dense;
+ fieldType = new FieldType(this.fieldType.isNullable(), new ArrowType.Union(mode, typeIds),
+ this.fieldType.getDictionary(), this.fieldType.getMetadata());
+ }
+
+ return new Field(name, fieldType, childFields);
+ }
+
+ @Override
+ public TransferPair getTransferPair(BufferAllocator allocator) {
+ return getTransferPair(name, allocator);
+ }
+
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return getTransferPair(ref, allocator, null);
+ }
+
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) {
+ return new org.apache.arrow.vector.complex.DenseUnionVector.TransferImpl(ref, allocator, callBack);
+ }
+
+ @Override
+ public TransferPair makeTransferPair(ValueVector target) {
+ return new TransferImpl((DenseUnionVector) target);
+ }
+
+ @Override
+ public void copyFrom(int inIndex, int outIndex, ValueVector from) {
+ Preconditions.checkArgument(this.getMinorType() == from.getMinorType());
+ DenseUnionVector fromCast = (DenseUnionVector) from;
+ int inOffset = fromCast.offsetBuffer.getInt((long) inIndex * OFFSET_WIDTH);
+ fromCast.getReader().setPosition(inOffset);
+ int outOffset = offsetBuffer.getInt((long) outIndex * OFFSET_WIDTH);
+ getWriter().setPosition(outOffset);
+ ComplexCopier.copy(fromCast.reader, writer);
+ }
+
+ @Override
+ public void copyFromSafe(int inIndex, int outIndex, ValueVector from) {
+ copyFrom(inIndex, outIndex, from);
+ }
+
+ public FieldVector addVector(byte typeId, FieldVector v) {
+ final String name = v.getName().isEmpty() ? fieldName(typeId, v.getMinorType()) : v.getName();
+ Preconditions.checkState(internalStruct.getChild(name) == null, String.format("%s vector already exists", name));
+ final FieldVector newVector = internalStruct.addOrGet(name, v.getField().getFieldType(), v.getClass());
+ v.makeTransferPair(newVector).transfer();
+ internalStruct.putChild(name, newVector);
+ childVectors[typeId] = newVector;
+ if (callBack != null) {
+ callBack.doWork();
+ }
+ return newVector;
+ }
+
+ private class TransferImpl implements TransferPair {
+ private final TransferPair[] internalTransferPairs = new TransferPair[nextTypeId];
+ private final DenseUnionVector to;
+
+ public TransferImpl(String name, BufferAllocator allocator, CallBack callBack) {
+ to = new DenseUnionVector(name, allocator, null, callBack);
+ internalStruct.makeTransferPair(to.internalStruct);
+ createTransferPairs();
+ }
+
+ public TransferImpl(DenseUnionVector to) {
+ this.to = to;
+ internalStruct.makeTransferPair(to.internalStruct);
+ createTransferPairs();
+ }
+
+ private void createTransferPairs() {
+ for (int i = 0; i < nextTypeId; i++) {
+ ValueVector srcVec = internalStruct.getVectorById(i);
+ ValueVector dstVec = to.internalStruct.getVectorById(i);
+ to.typeFields[i] = typeFields[i];
+ to.typeMapFields[i] = typeMapFields[i];
+ to.childVectors[i] = dstVec;
+ internalTransferPairs[i] = srcVec.makeTransferPair(dstVec);
+ }
+ }
+
+ @Override
+ public void transfer() {
+ to.clear();
+
+ ReferenceManager refManager = typeBuffer.getReferenceManager();
+ to.typeBuffer = refManager.transferOwnership(typeBuffer, to.allocator).getTransferredBuffer();
+
+ refManager = offsetBuffer.getReferenceManager();
+ to.offsetBuffer = refManager.transferOwnership(offsetBuffer, to.allocator).getTransferredBuffer();
+
+ for (int i = 0; i < nextTypeId; i++) {
+ if (internalTransferPairs[i] != null) {
+ internalTransferPairs[i].transfer();
+ to.childVectors[i] = internalTransferPairs[i].getTo();
+ }
+ }
+ to.valueCount = valueCount;
+ clear();
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ to.clear();
+
+ // transfer type buffer
+ int startPoint = startIndex * TYPE_WIDTH;
+ int sliceLength = length * TYPE_WIDTH;
+ ArrowBuf slicedBuffer = typeBuffer.slice(startPoint, sliceLength);
+ ReferenceManager refManager = slicedBuffer.getReferenceManager();
+ to.typeBuffer = refManager.transferOwnership(slicedBuffer, to.allocator).getTransferredBuffer();
+
+ // transfer offset byffer
+ while (to.offsetBuffer.capacity() < (long) length * OFFSET_WIDTH) {
+ to.reallocOffsetBuffer();
+ }
+
+ int [] typeCounts = new int[nextTypeId];
+ int [] typeStarts = new int[nextTypeId];
+ for (int i = 0; i < typeCounts.length; i++) {
+ typeCounts[i] = 0;
+ typeStarts[i] = -1;
+ }
+
+ for (int i = startIndex; i < startIndex + length; i++) {
+ byte typeId = typeBuffer.getByte(i);
+ to.offsetBuffer.setInt((long) (i - startIndex) * OFFSET_WIDTH, typeCounts[typeId]);
+ typeCounts[typeId] += 1;
+ if (typeStarts[typeId] == -1) {
+ typeStarts[typeId] = offsetBuffer.getInt((long) i * OFFSET_WIDTH);
+ }
+ }
+
+ // transfer vector values
+ for (int i = 0; i < nextTypeId; i++) {
+ if (typeCounts[i] > 0 && typeStarts[i] != -1) {
+ internalTransferPairs[i].splitAndTransfer(typeStarts[i], typeCounts[i]);
+ to.childVectors[i] = internalTransferPairs[i].getTo();
+ }
+ }
+
+ to.setValueCount(length);
+ }
+
+ @Override
+ public ValueVector getTo() {
+ return to;
+ }
+
+ @Override
+ public void copyValueSafe(int from, int to) {
+ this.to.copyFrom(from, to, DenseUnionVector.this);
+ }
+ }
+
+ @Override
+ public FieldReader getReader() {
+ if (reader == null) {
+ reader = new DenseUnionReader(this);
+ }
+ return reader;
+ }
+
+ public FieldWriter getWriter() {
+ if (writer == null) {
+ writer = new DenseUnionWriter(this);
+ }
+ return writer;
+ }
+
+ @Override
+ public int getBufferSize() {
+ return this.getBufferSizeFor(this.valueCount);
+ }
+
+ @Override
+ public int getBufferSizeFor(final int count) {
+ if (count == 0) {
+ return 0;
+ }
+ return (int) (count * TYPE_WIDTH + (long) count * OFFSET_WIDTH
+ + DataSizeRoundingUtil.divideBy8Ceil(count) + internalStruct.getBufferSizeFor(count));
+ }
+
+ @Override
+ public ArrowBuf[] getBuffers(boolean clear) {
+ List<ArrowBuf> list = new java.util.ArrayList<>();
+ setReaderAndWriterIndex();
+ if (getBufferSize() != 0) {
+ list.add(typeBuffer);
+ list.add(offsetBuffer);
+ list.addAll(java.util.Arrays.asList(internalStruct.getBuffers(clear)));
+ }
+ if (clear) {
+ valueCount = 0;
+ typeBuffer.getReferenceManager().retain();
+ typeBuffer.close();
+ typeBuffer = allocator.getEmpty();
+ offsetBuffer.getReferenceManager().retain();
+ offsetBuffer.close();
+ offsetBuffer = allocator.getEmpty();
+ }
+ return list.toArray(new ArrowBuf[list.size()]);
+ }
+
+ @Override
+ public Iterator<ValueVector> iterator() {
+ return internalStruct.iterator();
+ }
+
+ private ValueVector getVector(int index) {
+ byte typeId = typeBuffer.getByte(index * TYPE_WIDTH);
+ return getVectorByType(typeId);
+ }
+
+ public Object getObject(int index) {
+ ValueVector vector = getVector(index);
+ if (vector != null) {
+ int offset = offsetBuffer.getInt((long) index * OFFSET_WIDTH);
+ return vector.isNull(offset) ? null : vector.getObject(offset);
+ }
+ return null;
+ }
+
+ public void get(int index, DenseUnionHolder holder) {
+ FieldReader reader = new DenseUnionReader(DenseUnionVector.this);
+ reader.setPosition(index);
+ holder.reader = reader;
+ }
+
+ public int getValueCount() {
+ return valueCount;
+ }
+
+ /**
+ * IMPORTANT: Union types always return non null as there is no validity buffer.
+ *
+ * To check validity correctly you must check the underlying vector.
+ */
+ public boolean isNull(int index) {
+ return false;
+ }
+
+ @Override
+ public int getNullCount() {
+ return 0;
+ }
+
+ public int isSet(int index) {
+ return isNull(index) ? 0 : 1;
+ }
+
+ DenseUnionWriter writer;
+
+ public void setValueCount(int valueCount) {
+ this.valueCount = valueCount;
+ while (valueCount > getTypeBufferValueCapacity()) {
+ reallocTypeBuffer();
+ reallocOffsetBuffer();
+ }
+ setChildVectorValueCounts();
+ }
+
+ private void setChildVectorValueCounts() {
+ int [] counts = new int[Byte.MAX_VALUE + 1];
+ for (int i = 0; i < this.valueCount; i++) {
+ byte typeId = getTypeId(i);
+ if (typeId != -1) {
+ counts[typeId] += 1;
+ }
+ }
+ for (int i = 0; i < nextTypeId; i++) {
+ childVectors[typeMapFields[i]].setValueCount(counts[typeMapFields[i]]);
+ }
+ }
+
+ public void setSafe(int index, DenseUnionHolder holder) {
+ FieldReader reader = holder.reader;
+ if (writer == null) {
+ writer = new DenseUnionWriter(DenseUnionVector.this);
+ }
+ int offset = offsetBuffer.getInt((long) index * OFFSET_WIDTH);
+ MinorType type = reader.getMinorType();
+ writer.setPosition(offset);
+ byte typeId = holder.typeId;
+ switch (type) {
+ <#list vv.types as type>
+ <#list type.minor as minor>
+ <#assign name = minor.class?cap_first />
+ <#assign fields = minor.fields!type.fields />
+ <#assign uncappedName = name?uncap_first/>
+ <#if !minor.typeParams?? || minor.class?starts_with("Decimal")>
+ case ${name?upper_case}:
+ Nullable${name}Holder ${uncappedName}Holder = new Nullable${name}Holder();
+ reader.read(${uncappedName}Holder);
+ setSafe(index, ${uncappedName}Holder);
+ break;
+ </#if>
+ </#list>
+ </#list>
+ case STRUCT:
+ case LIST: {
+ setTypeId(index, typeId);
+ ComplexCopier.copy(reader, writer);
+ break;
+ }
+ default:
+ throw new UnsupportedOperationException();
+ }
+ }
+ <#list vv.types as type>
+ <#list type.minor as minor>
+ <#assign name = minor.class?cap_first />
+ <#assign fields = minor.fields!type.fields />
+ <#assign uncappedName = name?uncap_first/>
+ <#if !minor.typeParams?? || minor.class?starts_with("Decimal")>
+ public void setSafe(int index, Nullable${name}Holder holder) {
+ while (index >= getOffsetBufferValueCapacity()) {
+ reallocOffsetBuffer();
+ }
+ byte typeId = getTypeId(index);
+ ${name}Vector vector = get${name}Vector(typeId<#if minor.class?starts_with("Decimal")>, new ArrowType.Decimal(holder.precision, holder.scale, holder.WIDTH * 8)</#if>);
+ int offset = vector.getValueCount();
+ vector.setValueCount(offset + 1);
+ vector.setSafe(offset, holder);
+ offsetBuffer.setInt((long) index * OFFSET_WIDTH, offset);
+ }
+ </#if>
+ </#list>
+ </#list>
+
+ public void setTypeId(int index, byte typeId) {
+ while (index >= getTypeBufferValueCapacity()) {
+ reallocTypeBuffer();
+ }
+ typeBuffer.setByte(index * TYPE_WIDTH , typeId);
+ }
+
+ private int getTypeBufferValueCapacity() {
+ return (int) typeBuffer.capacity() / TYPE_WIDTH;
+ }
+
+ private long getOffsetBufferValueCapacity() {
+ return offsetBuffer.capacity() / OFFSET_WIDTH;
+ }
+
+ @Override
+ public int hashCode(int index, ArrowBufHasher hasher) {
+ if (isNull(index)) {
+ return 0;
+ }
+ int offset = offsetBuffer.getInt((long) index * OFFSET_WIDTH);
+ return getVector(index).hashCode(offset, hasher);
+ }
+
+ @Override
+ public int hashCode(int index) {
+ return hashCode(index, SimpleHasher.INSTANCE);
+ }
+
+ @Override
+ public <OUT, IN> OUT accept(VectorVisitor<OUT, IN> visitor, IN value) {
+ return visitor.visit(this, value);
+ }
+
+ @Override
+ public String getName() {
+ return name;
+ }
+
+ private void setNegative(long start, long end) {
+ for (long i = start;i < end; i++) {
+ typeBuffer.setByte(i, -1);
+ }
+ }
+
+ @Override
+ public <T extends FieldVector> T addOrGet(String name, FieldType fieldType, Class<T> clazz) {
+ return internalStruct.addOrGet(name, fieldType, clazz);
+ }
+
+ @Override
+ public <T extends FieldVector> T getChild(String name, Class<T> clazz) {
+ return internalStruct.getChild(name, clazz);
+ }
+
+ @Override
+ public VectorWithOrdinal getChildVectorWithOrdinal(String name) {
+ return internalStruct.getChildVectorWithOrdinal(name);
+ }
+
+ @Override
+ public int size() {
+ return internalStruct.size();
+ }
+
+ @Override
+ public void setInitialCapacity(int valueCount, double density) {
+ for (final ValueVector vector : internalStruct) {
+ if (vector instanceof DensityAwareVector) {
+ ((DensityAwareVector) vector).setInitialCapacity(valueCount, density);
+ } else {
+ vector.setInitialCapacity(valueCount);
+ }
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/codegen/templates/DenseUnionWriter.java b/src/arrow/java/vector/src/main/codegen/templates/DenseUnionWriter.java
new file mode 100644
index 000000000..e69a62a9e
--- /dev/null
+++ b/src/arrow/java/vector/src/main/codegen/templates/DenseUnionWriter.java
@@ -0,0 +1,302 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.complex.impl.NullableStructWriterFactory;
+import org.apache.arrow.vector.types.Types;
+
+<@pp.dropOutputFile />
+<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/DenseUnionWriter.java" />
+
+
+<#include "/@includes/license.ftl" />
+
+ package org.apache.arrow.vector.complex.impl;
+
+<#include "/@includes/vv_imports.ftl" />
+ import org.apache.arrow.vector.complex.writer.BaseWriter;
+ import org.apache.arrow.vector.types.Types.MinorType;
+
+/*
+ * This class is generated using freemarker and the ${.template_name} template.
+ */
+@SuppressWarnings("unused")
+public class DenseUnionWriter extends AbstractFieldWriter implements FieldWriter {
+
+ DenseUnionVector data;
+
+ private BaseWriter[] writers = new BaseWriter[Byte.MAX_VALUE + 1];
+ private final NullableStructWriterFactory nullableStructWriterFactory;
+
+ public DenseUnionWriter(DenseUnionVector vector) {
+ this(vector, NullableStructWriterFactory.getNullableStructWriterFactoryInstance());
+ }
+
+ public DenseUnionWriter(DenseUnionVector vector, NullableStructWriterFactory nullableStructWriterFactory) {
+ data = vector;
+ this.nullableStructWriterFactory = nullableStructWriterFactory;
+ }
+
+ @Override
+ public void setPosition(int index) {
+ super.setPosition(index);
+ for (BaseWriter writer : writers) {
+ writer.setPosition(index);
+ }
+ }
+
+ @Override
+ public void start() {
+ byte typeId = data.getTypeId(idx());
+ getStructWriter((byte) idx()).start();
+ }
+
+ @Override
+ public void end() {
+ byte typeId = data.getTypeId(idx());
+ getStructWriter(typeId).end();
+ }
+
+ @Override
+ public void startList() {
+ byte typeId = data.getTypeId(idx());
+ getListWriter(typeId).startList();
+ }
+
+ @Override
+ public void endList() {
+ byte typeId = data.getTypeId(idx());
+ getListWriter(typeId).endList();
+ }
+
+ private StructWriter getStructWriter(byte typeId) {
+ StructWriter structWriter = (StructWriter) writers[typeId];
+ if (structWriter == null) {
+ structWriter = nullableStructWriterFactory.build((StructVector) data.getVectorByType(typeId));
+ writers[typeId] = structWriter;
+ }
+ return structWriter;
+ }
+
+ public StructWriter asStruct(byte typeId) {
+ data.setTypeId(idx(), typeId);
+ return getStructWriter(typeId);
+ }
+
+ private ListWriter getListWriter(byte typeId) {
+ ListWriter listWriter = (ListWriter) writers[typeId];
+ if (listWriter == null) {
+ listWriter = new UnionListWriter((ListVector) data.getVectorByType(typeId), nullableStructWriterFactory);
+ writers[typeId] = listWriter;
+ }
+ return listWriter;
+ }
+
+ public ListWriter asList(byte typeId) {
+ data.setTypeId(idx(), typeId);
+ return getListWriter(typeId);
+ }
+
+ private MapWriter getMapWriter(byte typeId) {
+ MapWriter mapWriter = (MapWriter) writers[typeId];
+ if (mapWriter == null) {
+ mapWriter = new UnionMapWriter((MapVector) data.getVectorByType(typeId));
+ writers[typeId] = mapWriter;
+ }
+ return mapWriter;
+ }
+
+ public MapWriter asMap(byte typeId) {
+ data.setTypeId(idx(), typeId);
+ return getMapWriter(typeId);
+ }
+
+ BaseWriter getWriter(byte typeId) {
+ MinorType minorType = data.getVectorByType(typeId).getMinorType();
+ switch (minorType) {
+ case STRUCT:
+ return getStructWriter(typeId);
+ case LIST:
+ return getListWriter(typeId);
+ case MAP:
+ return getMapWriter(typeId);
+ <#list vv.types as type>
+ <#list type.minor as minor>
+ <#assign name = minor.class?cap_first />
+ <#assign fields = minor.fields!type.fields />
+ <#assign uncappedName = name?uncap_first/>
+ <#if !minor.typeParams?? || minor.class?starts_with("Decimal")>
+ case ${name?upper_case}:
+ return get${name}Writer(typeId);
+ </#if>
+ </#list>
+ </#list>
+ default:
+ throw new UnsupportedOperationException("Unknown type: " + minorType);
+ }
+ }
+ <#list vv.types as type>
+ <#list type.minor as minor>
+ <#assign name = minor.class?cap_first />
+ <#assign fields = minor.fields!type.fields />
+ <#assign uncappedName = name?uncap_first/>
+ <#if !minor.typeParams?? || minor.class?starts_with("Decimal")>
+
+ private ${name}Writer get${name}Writer(byte typeId) {
+ ${name}Writer writer = (${name}Writer) writers[typeId];
+ if (writer == null) {
+ writer = new ${name}WriterImpl((${name}Vector) data.getVectorByType(typeId));
+ writers[typeId] = writer;
+ }
+ return writer;
+ }
+
+ public ${name}Writer as${name}(byte typeId) {
+ data.setTypeId(idx(), typeId);
+ return get${name}Writer(typeId);
+ }
+
+ @Override
+ public void write(${name}Holder holder) {
+ throw new UnsupportedOperationException();
+ }
+
+ public void write${minor.class}(<#list fields as field>${field.type} ${field.name}<#if field_has_next>, </#if></#list>, byte typeId<#if minor.class?starts_with("Decimal")>, ArrowType arrowType</#if>) {
+ data.setTypeId(idx(), typeId);
+ get${name}Writer(typeId).setPosition(data.getOffset(idx()));
+ get${name}Writer(typeId).write${name}(<#list fields as field>${field.name}<#if field_has_next>, </#if></#list><#if minor.class?starts_with("Decimal")>, arrowType</#if>);
+ }
+ </#if>
+ </#list>
+ </#list>
+
+ public void writeNull() {
+ }
+
+ @Override
+ public StructWriter struct() {
+ byte typeId = data.getTypeId(idx());
+ data.setTypeId(idx(), typeId);
+ getListWriter(typeId).setPosition(data.getOffset(idx()));
+ return getListWriter(typeId).struct();
+ }
+
+ @Override
+ public ListWriter list() {
+ byte typeId = data.getTypeId(idx());
+ data.setTypeId(idx(), typeId);
+ getListWriter(typeId).setPosition(data.getOffset(idx()));
+ return getListWriter(typeId).list();
+ }
+
+ @Override
+ public ListWriter list(String name) {
+ byte typeId = data.getTypeId(idx());
+ data.setTypeId(idx(), typeId);
+ getStructWriter(typeId).setPosition(data.getOffset(idx()));
+ return getStructWriter(typeId).list(name);
+ }
+
+ @Override
+ public MapWriter map() {
+ byte typeId = data.getTypeId(idx());
+ data.setTypeId(idx(), typeId);
+ getListWriter(typeId).setPosition(data.getOffset(idx()));
+ return getMapWriter(typeId).map();
+ }
+
+ @Override
+ public MapWriter map(String name) {
+ byte typeId = data.getTypeId(idx());
+ data.setTypeId(idx(), typeId);
+ getStructWriter(typeId).setPosition(data.getOffset(idx()));
+ return getStructWriter(typeId).map(name);
+ }
+
+ @Override
+ public MapWriter map(String name, boolean keysSorted) {
+ byte typeId = data.getTypeId(idx());
+ data.setTypeId(idx(), typeId);
+ getStructWriter(typeId).setPosition(data.getOffset(idx()));
+ return getStructWriter(typeId).map(name, keysSorted);
+ }
+
+ @Override
+ public StructWriter struct(String name) {
+ byte typeId = data.getTypeId(idx());
+ data.setTypeId(idx(), typeId);
+ getStructWriter(typeId).setPosition(data.getOffset(idx()));
+ return getStructWriter(typeId).struct(name);
+ }
+
+ <#list vv.types as type><#list type.minor as minor>
+ <#assign lowerName = minor.class?uncap_first />
+ <#if lowerName == "int" ><#assign lowerName = "integer" /></#if>
+ <#assign upperName = minor.class?upper_case />
+ <#assign capName = minor.class?cap_first />
+ <#if !minor.typeParams?? || minor.class?starts_with("Decimal") >
+ @Override
+ public ${capName}Writer ${lowerName}(String name) {
+ byte typeId = data.getTypeId(idx());
+ data.setTypeId(idx(), typeId);
+ getStructWriter(typeId).setPosition(data.getOffset(idx()));
+ return getStructWriter(typeId).${lowerName}(name);
+ }
+
+ @Override
+ public ${capName}Writer ${lowerName}() {
+ byte typeId = data.getTypeId(idx());
+ data.setTypeId(idx(), typeId);
+ getListWriter(typeId).setPosition(data.getOffset(idx()));
+ return getListWriter(typeId).${lowerName}();
+ }
+ </#if>
+ <#if minor.class?starts_with("Decimal")>
+ public ${capName}Writer ${lowerName}(String name<#list minor.typeParams as typeParam>, ${typeParam.type} ${typeParam.name}</#list>) {
+ byte typeId = data.getTypeId(idx());
+ data.setTypeId(idx(), typeId);
+ getStructWriter(typeId).setPosition(data.getOffset(idx()));
+ return getStructWriter(typeId).${lowerName}(name<#list minor.typeParams as typeParam>, ${typeParam.name}</#list>);
+ }
+ </#if>
+ </#list></#list>
+
+ @Override
+ public void allocate() {
+ data.allocateNew();
+ }
+
+ @Override
+ public void clear() {
+ data.clear();
+ }
+
+ @Override
+ public void close() throws Exception {
+ data.close();
+ }
+
+ @Override
+ public Field getField() {
+ return data.getField();
+ }
+
+ @Override
+ public int getValueCapacity() {
+ return data.getValueCapacity();
+ }
+}
diff --git a/src/arrow/java/vector/src/main/codegen/templates/HolderReaderImpl.java b/src/arrow/java/vector/src/main/codegen/templates/HolderReaderImpl.java
new file mode 100644
index 000000000..8394aaad4
--- /dev/null
+++ b/src/arrow/java/vector/src/main/codegen/templates/HolderReaderImpl.java
@@ -0,0 +1,173 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+<@pp.dropOutputFile />
+<#list vv.types as type>
+<#list type.minor as minor>
+<#list ["", "Nullable"] as holderMode>
+<#assign nullMode = holderMode />
+
+<#assign lowerName = minor.class?uncap_first />
+<#if lowerName == "int" ><#assign lowerName = "integer" /></#if>
+<#assign name = minor.class?cap_first />
+<#assign javaType = (minor.javaType!type.javaType) />
+<#assign friendlyType = (minor.friendlyType!minor.boxedType!type.boxedType) />
+<#assign safeType=friendlyType />
+<#if safeType=="byte[]"><#assign safeType="ByteArray" /></#if>
+<#assign fields = (minor.fields!type.fields) + minor.typeParams![]/>
+
+<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/${holderMode}${name}HolderReaderImpl.java" />
+<#include "/@includes/license.ftl" />
+
+package org.apache.arrow.vector.complex.impl;
+
+<#include "/@includes/vv_imports.ftl" />
+
+// Source code generated using FreeMarker template ${.template_name}
+
+@SuppressWarnings("unused")
+public class ${holderMode}${name}HolderReaderImpl extends AbstractFieldReader {
+
+ private ${nullMode}${name}Holder holder;
+ public ${holderMode}${name}HolderReaderImpl(${holderMode}${name}Holder holder) {
+ this.holder = holder;
+ }
+
+ @Override
+ public int size() {
+ throw new UnsupportedOperationException("You can't call size on a Holder value reader.");
+ }
+
+ @Override
+ public boolean next() {
+ throw new UnsupportedOperationException("You can't call next on a single value reader.");
+
+ }
+
+ @Override
+ public void setPosition(int index) {
+ throw new UnsupportedOperationException("You can't call next on a single value reader.");
+ }
+
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.${name?upper_case};
+ }
+
+ @Override
+ public boolean isSet() {
+ <#if holderMode == "Nullable">
+ return this.holder.isSet == 1;
+ <#else>
+ return true;
+ </#if>
+ }
+
+ @Override
+ public void read(${name}Holder h) {
+ <#list fields as field>
+ h.${field.name} = holder.${field.name};
+ </#list>
+ }
+
+ @Override
+ public void read(Nullable${name}Holder h) {
+ <#list fields as field>
+ h.${field.name} = holder.${field.name};
+ </#list>
+ h.isSet = isSet() ? 1 : 0;
+ }
+
+ // read friendly type
+ @Override
+ public ${friendlyType} read${safeType}() {
+ <#if nullMode == "Nullable">
+ if (!isSet()) {
+ return null;
+ }
+ </#if>
+
+ <#if type.major == "VarLen">
+ <#if type.width == 4>
+ int length = holder.end - holder.start;
+ <#elseif type.width == 8>
+ int length = (int) (holder.end - holder.start);
+ </#if>
+ byte[] value = new byte [length];
+ holder.buffer.getBytes(holder.start, value, 0, length);
+ <#if minor.class == "VarBinary" || minor.class == "LargeVarBinary">
+ return value;
+ <#elseif minor.class == "VarChar" || minor.class == "LargeVarChar">
+ Text text = new Text();
+ text.set(value);
+ return text;
+ </#if>
+ <#elseif minor.class == "IntervalDay">
+ return Duration.ofDays(holder.days).plusMillis(holder.milliseconds);
+ <#elseif minor.class == "IntervalYear">
+ return Period.ofMonths(holder.value);
+ <#elseif minor.class == "IntervalMonthDayNano">
+ return new PeriodDuration(Period.ofMonths(holder.months).plusDays(holder.days),
+ Duration.ofNanos(holder.nanoseconds));
+ <#elseif minor.class == "Duration">
+ return DurationVector.toDuration(holder.value, holder.unit);
+ <#elseif minor.class == "Bit" >
+ return new Boolean(holder.value != 0);
+ <#elseif minor.class == "Decimal">
+ byte[] bytes = new byte[${type.width}];
+ holder.buffer.getBytes(holder.start, bytes, 0, ${type.width});
+ ${friendlyType} value = new BigDecimal(new BigInteger(bytes), holder.scale);
+ return value;
+ <#elseif minor.class == "Decimal256">
+ byte[] bytes = new byte[${type.width}];
+ holder.buffer.getBytes(holder.start, bytes, 0, ${type.width});
+ ${friendlyType} value = new BigDecimal(new BigInteger(bytes), holder.scale);
+ return value;
+ <#elseif minor.class == "FixedSizeBinary">
+ byte[] value = new byte [holder.byteWidth];
+ holder.buffer.getBytes(0, value, 0, holder.byteWidth);
+ return value;
+ <#elseif minor.class == "TimeStampSec">
+ final long millis = java.util.concurrent.TimeUnit.SECONDS.toMillis(holder.value);
+ return DateUtility.getLocalDateTimeFromEpochMilli(millis);
+ <#elseif minor.class == "TimeStampMilli" || minor.class == "DateMilli" || minor.class == "TimeMilli">
+ return DateUtility.getLocalDateTimeFromEpochMilli(holder.value);
+ <#elseif minor.class == "TimeStampMicro">
+ return DateUtility.getLocalDateTimeFromEpochMicro(holder.value);
+ <#elseif minor.class == "TimeStampNano">
+ return DateUtility.getLocalDateTimeFromEpochNano(holder.value);
+ <#else>
+ ${friendlyType} value = new ${friendlyType}(this.holder.value);
+ return value;
+ </#if>
+ }
+
+ @Override
+ public Object readObject() {
+ return read${safeType}();
+ }
+
+ <#if nullMode != "Nullable">
+ public void copyAsValue(${minor.class?cap_first}Writer writer){
+ writer.write(holder);
+ }
+ </#if>
+}
+
+</#list>
+</#list>
+</#list>
diff --git a/src/arrow/java/vector/src/main/codegen/templates/NullReader.java b/src/arrow/java/vector/src/main/codegen/templates/NullReader.java
new file mode 100644
index 000000000..0c65f9a56
--- /dev/null
+++ b/src/arrow/java/vector/src/main/codegen/templates/NullReader.java
@@ -0,0 +1,147 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.arrow.vector.types.pojo.ArrowType.Null;
+import org.apache.arrow.vector.types.pojo.Field;
+
+<@pp.dropOutputFile />
+<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/NullReader.java" />
+
+
+<#include "/@includes/license.ftl" />
+
+package org.apache.arrow.vector.complex.impl;
+
+<#include "/@includes/vv_imports.ftl" />
+
+/**
+ * Source code generated using FreeMarker template ${.template_name}
+ */
+@SuppressWarnings("unused")
+public class NullReader extends AbstractBaseReader implements FieldReader{
+
+ public static final NullReader INSTANCE = new NullReader();
+ public static final NullReader EMPTY_LIST_INSTANCE = new NullReader(MinorType.NULL);
+ public static final NullReader EMPTY_STRUCT_INSTANCE = new NullReader(MinorType.STRUCT);
+ private MinorType type;
+
+ private NullReader(){
+ super();
+ type = MinorType.NULL;
+ }
+
+ private NullReader(MinorType type){
+ super();
+ this.type = type;
+ }
+
+ @Override
+ public MinorType getMinorType() {
+ return type;
+ }
+
+ @Override
+ public Field getField() {
+ return new Field("", FieldType.nullable(new Null()), null);
+ }
+
+ public void copyAsValue(StructWriter writer) {}
+
+ public void copyAsValue(ListWriter writer) {}
+
+ public void copyAsValue(UnionWriter writer) {}
+
+ <#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first />
+ public void read(${name}Holder holder){
+ throw new UnsupportedOperationException("NullReader cannot write into non-nullable holder");
+ }
+
+ public void read(Nullable${name}Holder holder){
+ holder.isSet = 0;
+ }
+
+ public void read(int arrayIndex, ${name}Holder holder){
+ throw new ArrayIndexOutOfBoundsException();
+ }
+
+ public void copyAsValue(${minor.class}Writer writer){}
+ public void copyAsField(String name, ${minor.class}Writer writer){}
+
+ public void read(int arrayIndex, Nullable${name}Holder holder){
+ throw new ArrayIndexOutOfBoundsException();
+ }
+ </#list></#list>
+
+ public int size(){
+ return 0;
+ }
+
+ public boolean isSet(){
+ return false;
+ }
+
+ public boolean next(){
+ return false;
+ }
+
+ public RepeatedStructReader struct(){
+ return this;
+ }
+
+ public RepeatedListReader list(){
+ return this;
+ }
+
+ public StructReader struct(String name){
+ return this;
+ }
+
+ public ListReader list(String name){
+ return this;
+ }
+
+ public FieldReader reader(String name){
+ return this;
+ }
+
+ public FieldReader reader(){
+ return this;
+ }
+
+ private void fail(String name){
+ throw new IllegalArgumentException(String.format("You tried to read a %s type when you are using a ValueReader of type %s.", name, this.getClass().getSimpleName()));
+ }
+
+ <#list ["Object", "BigDecimal", "Short", "Integer", "Long", "Boolean",
+ "LocalDateTime", "Duration", "Period", "Double", "Float",
+ "Character", "Text", "String", "Byte", "byte[]", "PeriodDuration"] as friendlyType>
+ <#assign safeType=friendlyType />
+ <#if safeType=="byte[]"><#assign safeType="ByteArray" /></#if>
+
+ public ${friendlyType} read${safeType}(int arrayIndex){
+ return null;
+ }
+
+ public ${friendlyType} read${safeType}(){
+ return null;
+ }
+ </#list>
+
+}
+
+
+
diff --git a/src/arrow/java/vector/src/main/codegen/templates/StructWriters.java b/src/arrow/java/vector/src/main/codegen/templates/StructWriters.java
new file mode 100644
index 000000000..69693c630
--- /dev/null
+++ b/src/arrow/java/vector/src/main/codegen/templates/StructWriters.java
@@ -0,0 +1,326 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+<@pp.dropOutputFile />
+<#list ["Nullable", "Single"] as mode>
+<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/${mode}StructWriter.java" />
+<#assign index = "idx()">
+<#if mode == "Single">
+<#assign containerClass = "NonNullableStructVector" />
+<#else>
+<#assign containerClass = "StructVector" />
+</#if>
+
+<#include "/@includes/license.ftl" />
+
+package org.apache.arrow.vector.complex.impl;
+
+<#include "/@includes/vv_imports.ftl" />
+import java.util.Map;
+import java.util.HashMap;
+
+import org.apache.arrow.vector.holders.RepeatedStructHolder;
+import org.apache.arrow.vector.AllocationHelper;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.complex.writer.FieldWriter;
+
+/*
+ * This class is generated using FreeMarker and the ${.template_name} template.
+ */
+@SuppressWarnings("unused")
+public class ${mode}StructWriter extends AbstractFieldWriter {
+
+ protected final ${containerClass} container;
+ private int initialCapacity;
+ private final Map<String, FieldWriter> fields = new HashMap<>();
+ public ${mode}StructWriter(${containerClass} container) {
+ <#if mode == "Single">
+ if (container instanceof StructVector) {
+ throw new IllegalArgumentException("Invalid container: " + container);
+ }
+ </#if>
+ this.container = container;
+ this.initialCapacity = 0;
+ for (Field child : container.getField().getChildren()) {
+ MinorType minorType = Types.getMinorTypeForArrowType(child.getType());
+ switch (minorType) {
+ case STRUCT:
+ struct(child.getName());
+ break;
+ case LIST:
+ list(child.getName());
+ break;
+ case MAP: {
+ ArrowType.Map arrowType = (ArrowType.Map) child.getType();
+ map(child.getName(), arrowType.getKeysSorted());
+ break;
+ }
+ case UNION:
+ FieldType fieldType = new FieldType(addVectorAsNullable, MinorType.UNION.getType(), null, null);
+ UnionWriter writer = new UnionWriter(container.addOrGet(child.getName(), fieldType, UnionVector.class), getNullableStructWriterFactory());
+ fields.put(handleCase(child.getName()), writer);
+ break;
+<#list vv.types as type><#list type.minor as minor>
+<#assign lowerName = minor.class?uncap_first />
+<#if lowerName == "int" ><#assign lowerName = "integer" /></#if>
+<#assign upperName = minor.class?upper_case />
+ case ${upperName}: {
+ <#if minor.typeParams?? >
+ ${minor.arrowType} arrowType = (${minor.arrowType})child.getType();
+ ${lowerName}(child.getName()<#list minor.typeParams as typeParam>, arrowType.get${typeParam.name?cap_first}()</#list>);
+ <#else>
+ ${lowerName}(child.getName());
+ </#if>
+ break;
+ }
+</#list></#list>
+ default:
+ throw new UnsupportedOperationException("Unknown type: " + minorType);
+ }
+ }
+ }
+
+ protected String handleCase(final String input) {
+ return input.toLowerCase();
+ }
+
+ protected NullableStructWriterFactory getNullableStructWriterFactory() {
+ return NullableStructWriterFactory.getNullableStructWriterFactoryInstance();
+ }
+
+ @Override
+ public int getValueCapacity() {
+ return container.getValueCapacity();
+ }
+
+ public void setInitialCapacity(int initialCapacity) {
+ this.initialCapacity = initialCapacity;
+ container.setInitialCapacity(initialCapacity);
+ }
+
+ @Override
+ public boolean isEmptyStruct() {
+ return 0 == container.size();
+ }
+
+ @Override
+ public Field getField() {
+ return container.getField();
+ }
+
+ @Override
+ public StructWriter struct(String name) {
+ String finalName = handleCase(name);
+ FieldWriter writer = fields.get(finalName);
+ if(writer == null){
+ int vectorCount=container.size();
+ FieldType fieldType = new FieldType(addVectorAsNullable, MinorType.STRUCT.getType(), null, null);
+ StructVector vector = container.addOrGet(name, fieldType, StructVector.class);
+ writer = new PromotableWriter(vector, container, getNullableStructWriterFactory());
+ if(vectorCount != container.size()) {
+ writer.allocate();
+ }
+ writer.setPosition(idx());
+ fields.put(finalName, writer);
+ } else {
+ if (writer instanceof PromotableWriter) {
+ // ensure writers are initialized
+ ((PromotableWriter)writer).getWriter(MinorType.STRUCT);
+ }
+ }
+ return writer;
+ }
+
+ @Override
+ public void close() throws Exception {
+ clear();
+ container.close();
+ }
+
+ @Override
+ public void allocate() {
+ container.allocateNew();
+ for(final FieldWriter w : fields.values()) {
+ w.allocate();
+ }
+ }
+
+ @Override
+ public void clear() {
+ container.clear();
+ for(final FieldWriter w : fields.values()) {
+ w.clear();
+ }
+ }
+
+ @Override
+ public ListWriter list(String name) {
+ String finalName = handleCase(name);
+ FieldWriter writer = fields.get(finalName);
+ int vectorCount = container.size();
+ if(writer == null) {
+ FieldType fieldType = new FieldType(addVectorAsNullable, MinorType.LIST.getType(), null, null);
+ writer = new PromotableWriter(container.addOrGet(name, fieldType, ListVector.class), container, getNullableStructWriterFactory());
+ if (container.size() > vectorCount) {
+ writer.allocate();
+ }
+ writer.setPosition(idx());
+ fields.put(finalName, writer);
+ } else {
+ if (writer instanceof PromotableWriter) {
+ // ensure writers are initialized
+ ((PromotableWriter)writer).getWriter(MinorType.LIST);
+ }
+ }
+ return writer;
+ }
+
+ @Override
+ public MapWriter map(String name) {
+ return map(name, false);
+ }
+
+ @Override
+ public MapWriter map(String name, boolean keysSorted) {
+ FieldWriter writer = fields.get(handleCase(name));
+ if(writer == null) {
+ ValueVector vector;
+ ValueVector currentVector = container.getChild(name);
+ MapVector v = container.addOrGet(name,
+ new FieldType(addVectorAsNullable,
+ new ArrowType.Map(keysSorted)
+ ,null, null),
+ MapVector.class);
+ writer = new PromotableWriter(v, container, getNullableStructWriterFactory());
+ vector = v;
+ if (currentVector == null || currentVector != vector) {
+ if(this.initialCapacity > 0) {
+ vector.setInitialCapacity(this.initialCapacity);
+ }
+ vector.allocateNewSafe();
+ }
+ writer.setPosition(idx());
+ fields.put(handleCase(name), writer);
+ } else {
+ if (writer instanceof PromotableWriter) {
+ // ensure writers are initialized
+ ((PromotableWriter)writer).getWriter(MinorType.MAP, new ArrowType.Map(keysSorted));
+ }
+ }
+ return writer;
+ }
+
+ public void setValueCount(int count) {
+ container.setValueCount(count);
+ }
+
+ @Override
+ public void setPosition(int index) {
+ super.setPosition(index);
+ for(final FieldWriter w: fields.values()) {
+ w.setPosition(index);
+ }
+ }
+
+ <#if mode="Nullable">
+ @Override
+ public void writeNull() {
+ container.setNull(idx());
+ setValueCount(idx()+1);
+ super.setPosition(idx()+1);
+ }
+ </#if>
+
+ @Override
+ public void start() {
+ <#if mode == "Single">
+ <#else>
+ container.setIndexDefined(idx());
+ </#if>
+ }
+
+ @Override
+ public void end() {
+ setPosition(idx()+1);
+ }
+
+ <#list vv.types as type><#list type.minor as minor>
+ <#assign lowerName = minor.class?uncap_first />
+ <#if lowerName == "int" ><#assign lowerName = "integer" /></#if>
+ <#assign upperName = minor.class?upper_case />
+ <#assign capName = minor.class?cap_first />
+ <#assign vectName = capName />
+
+ <#if minor.typeParams?? >
+ @Override
+ public ${minor.class}Writer ${lowerName}(String name) {
+ // returns existing writer
+ final FieldWriter writer = fields.get(handleCase(name));
+ Preconditions.checkNotNull(writer);
+ return writer;
+ }
+
+ @Override
+ public ${minor.class}Writer ${lowerName}(String name<#list minor.typeParams as typeParam>, ${typeParam.type} ${typeParam.name}</#list>) {
+ <#else>
+ @Override
+ public ${minor.class}Writer ${lowerName}(String name) {
+ </#if>
+ FieldWriter writer = fields.get(handleCase(name));
+ if(writer == null) {
+ ValueVector vector;
+ ValueVector currentVector = container.getChild(name);
+ ${vectName}Vector v = container.addOrGet(name,
+ new FieldType(addVectorAsNullable,
+ <#if minor.typeParams??>
+ <#if minor.arrowTypeConstructorParams??>
+ <#assign constructorParams = minor.arrowTypeConstructorParams />
+ <#else>
+ <#assign constructorParams = [] />
+ <#list minor.typeParams?reverse as typeParam>
+ <#assign constructorParams = constructorParams + [ typeParam.name ] />
+ </#list>
+ </#if>
+ new ${minor.arrowType}(${constructorParams?join(", ")}<#if minor.class?starts_with("Decimal")>, ${vectName}Vector.TYPE_WIDTH * 8</#if>)
+ <#else>
+ MinorType.${upperName}.getType()
+ </#if>
+ ,null, null),
+ ${vectName}Vector.class);
+ writer = new PromotableWriter(v, container, getNullableStructWriterFactory());
+ vector = v;
+ if (currentVector == null || currentVector != vector) {
+ if(this.initialCapacity > 0) {
+ vector.setInitialCapacity(this.initialCapacity);
+ }
+ vector.allocateNewSafe();
+ }
+ writer.setPosition(idx());
+ fields.put(handleCase(name), writer);
+ } else {
+ if (writer instanceof PromotableWriter) {
+ // ensure writers are initialized
+ ((PromotableWriter)writer).getWriter(MinorType.${upperName}<#if minor.class?starts_with("Decimal")>, new ${minor.arrowType}(precision, scale, ${vectName}Vector.TYPE_WIDTH * 8)</#if>);
+ }
+ }
+ return writer;
+ }
+
+ </#list></#list>
+
+}
+</#list>
diff --git a/src/arrow/java/vector/src/main/codegen/templates/UnionFixedSizeListWriter.java b/src/arrow/java/vector/src/main/codegen/templates/UnionFixedSizeListWriter.java
new file mode 100644
index 000000000..55c661bfc
--- /dev/null
+++ b/src/arrow/java/vector/src/main/codegen/templates/UnionFixedSizeListWriter.java
@@ -0,0 +1,319 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.vector.complex.writer.Decimal256Writer;
+import org.apache.arrow.vector.complex.writer.DecimalWriter;
+import org.apache.arrow.vector.holders.Decimal256Holder;
+import org.apache.arrow.vector.holders.DecimalHolder;
+
+
+import java.lang.UnsupportedOperationException;
+import java.math.BigDecimal;
+
+<@pp.dropOutputFile />
+<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/UnionFixedSizeListWriter.java" />
+
+
+<#include "/@includes/license.ftl" />
+
+ package org.apache.arrow.vector.complex.impl;
+
+<#include "/@includes/vv_imports.ftl" />
+
+/*
+ * This class is generated using freemarker and the ${.template_name} template.
+ */
+
+@SuppressWarnings("unused")
+public class UnionFixedSizeListWriter extends AbstractFieldWriter {
+
+ protected FixedSizeListVector vector;
+ protected PromotableWriter writer;
+ private boolean inStruct = false;
+ private String structName;
+ private final int listSize;
+
+ public UnionFixedSizeListWriter(FixedSizeListVector vector) {
+ this(vector, NullableStructWriterFactory.getNullableStructWriterFactoryInstance());
+ }
+
+ public UnionFixedSizeListWriter(FixedSizeListVector vector, NullableStructWriterFactory nullableStructWriterFactory) {
+ this.vector = vector;
+ this.writer = new PromotableWriter(vector.getDataVector(), vector, nullableStructWriterFactory);
+ this.listSize = vector.getListSize();
+ }
+
+ public UnionFixedSizeListWriter(FixedSizeListVector vector, AbstractFieldWriter parent) {
+ this(vector);
+ }
+
+ @Override
+ public void allocate() {
+ vector.allocateNew();
+ }
+
+ @Override
+ public void clear() {
+ vector.clear();
+ }
+
+ @Override
+ public Field getField() {
+ return vector.getField();
+ }
+
+ public void setValueCount(int count) {
+ vector.setValueCount(count);
+ }
+
+ @Override
+ public int getValueCapacity() {
+ return vector.getValueCapacity();
+ }
+
+ @Override
+ public void close() throws Exception {
+ vector.close();
+ writer.close();
+ }
+
+ @Override
+ public void setPosition(int index) {
+ super.setPosition(index);
+ }
+ <#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first />
+ <#assign fields = minor.fields!type.fields />
+ <#assign uncappedName = name?uncap_first/>
+ <#if uncappedName == "int" ><#assign uncappedName = "integer" /></#if>
+ <#if !minor.typeParams?? >
+
+ @Override
+ public ${name}Writer ${uncappedName}() {
+ return this;
+ }
+
+ @Override
+ public ${name}Writer ${uncappedName}(String name) {
+ structName = name;
+ return writer.${uncappedName}(name);
+ }
+ </#if>
+ </#list></#list>
+
+ @Override
+ public DecimalWriter decimal() {
+ return this;
+ }
+
+ @Override
+ public DecimalWriter decimal(String name, int scale, int precision) {
+ return writer.decimal(name, scale, precision);
+ }
+
+ @Override
+ public DecimalWriter decimal(String name) {
+ return writer.decimal(name);
+ }
+
+
+ @Override
+ public Decimal256Writer decimal256() {
+ return this;
+ }
+
+ @Override
+ public Decimal256Writer decimal256(String name, int scale, int precision) {
+ return writer.decimal256(name, scale, precision);
+ }
+
+ @Override
+ public Decimal256Writer decimal256(String name) {
+ return writer.decimal256(name);
+ }
+
+ @Override
+ public StructWriter struct() {
+ inStruct = true;
+ return this;
+ }
+
+ @Override
+ public ListWriter list() {
+ return writer;
+ }
+
+ @Override
+ public ListWriter list(String name) {
+ ListWriter listWriter = writer.list(name);
+ return listWriter;
+ }
+
+ @Override
+ public StructWriter struct(String name) {
+ StructWriter structWriter = writer.struct(name);
+ return structWriter;
+ }
+
+ @Override
+ public MapWriter map() {
+ return writer;
+ }
+
+ @Override
+ public MapWriter map(String name) {
+ MapWriter mapWriter = writer.map(name);
+ return mapWriter;
+ }
+
+ @Override
+ public MapWriter map(boolean keysSorted) {
+ writer.map(keysSorted);
+ return writer;
+ }
+
+ @Override
+ public MapWriter map(String name, boolean keysSorted) {
+ MapWriter mapWriter = writer.map(name, keysSorted);
+ return mapWriter;
+ }
+
+ @Override
+ public void startList() {
+ int start = vector.startNewValue(idx());
+ writer.setPosition(start);
+ }
+
+ @Override
+ public void endList() {
+ setPosition(idx() + 1);
+ }
+
+ @Override
+ public void start() {
+ writer.start();
+ }
+
+ @Override
+ public void end() {
+ writer.end();
+ inStruct = false;
+ }
+
+ @Override
+ public void write(DecimalHolder holder) {
+ if (writer.idx() >= (idx() + 1) * listSize) {
+ throw new IllegalStateException(String.format("values at index %s is greater than listSize %s", idx(), listSize));
+ }
+ writer.write(holder);
+ writer.setPosition(writer.idx() + 1);
+ }
+
+ @Override
+ public void write(Decimal256Holder holder) {
+ if (writer.idx() >= (idx() + 1) * listSize) {
+ throw new IllegalStateException(String.format("values at index %s is greater than listSize %s", idx(), listSize));
+ }
+ writer.write(holder);
+ writer.setPosition(writer.idx() + 1);
+ }
+
+
+ @Override
+ public void writeNull() {
+ if (writer.idx() >= (idx() + 1) * listSize) {
+ throw new IllegalStateException(String.format("values at index %s is greater than listSize %s", idx(), listSize));
+ }
+ writer.writeNull();
+ }
+
+ public void writeDecimal(long start, ArrowBuf buffer, ArrowType arrowType) {
+ if (writer.idx() >= (idx() + 1) * listSize) {
+ throw new IllegalStateException(String.format("values at index %s is greater than listSize %s", idx(), listSize));
+ }
+ writer.writeDecimal(start, buffer, arrowType);
+ writer.setPosition(writer.idx() + 1);
+ }
+
+ public void writeDecimal(BigDecimal value) {
+ if (writer.idx() >= (idx() + 1) * listSize) {
+ throw new IllegalStateException(String.format("values at index %s is greater than listSize %s", idx(), listSize));
+ }
+ writer.writeDecimal(value);
+ writer.setPosition(writer.idx() + 1);
+ }
+
+ public void writeBigEndianBytesToDecimal(byte[] value, ArrowType arrowType) {
+ if (writer.idx() >= (idx() + 1) * listSize) {
+ throw new IllegalStateException(String.format("values at index %s is greater than listSize %s", idx(), listSize));
+ }
+ writer.writeBigEndianBytesToDecimal(value, arrowType);
+ writer.setPosition(writer.idx() + 1);
+ }
+
+ public void writeDecimal256(long start, ArrowBuf buffer, ArrowType arrowType) {
+ if (writer.idx() >= (idx() + 1) * listSize) {
+ throw new IllegalStateException(String.format("values at index %s is greater than listSize %s", idx(), listSize));
+ }
+ writer.writeDecimal256(start, buffer, arrowType);
+ writer.setPosition(writer.idx() + 1);
+ }
+
+ public void writeDecimal256(BigDecimal value) {
+ if (writer.idx() >= (idx() + 1) * listSize) {
+ throw new IllegalStateException(String.format("values at index %s is greater than listSize %s", idx(), listSize));
+ }
+ writer.writeDecimal256(value);
+ writer.setPosition(writer.idx() + 1);
+ }
+
+ public void writeBigEndianBytesToDecimal256(byte[] value, ArrowType arrowType) {
+ if (writer.idx() >= (idx() + 1) * listSize) {
+ throw new IllegalStateException(String.format("values at index %s is greater than listSize %s", idx(), listSize));
+ }
+ writer.writeBigEndianBytesToDecimal256(value, arrowType);
+ writer.setPosition(writer.idx() + 1);
+ }
+
+
+ <#list vv.types as type>
+ <#list type.minor as minor>
+ <#assign name = minor.class?cap_first />
+ <#assign fields = minor.fields!type.fields />
+ <#assign uncappedName = name?uncap_first/>
+ <#if !minor.typeParams?? >
+ @Override
+ public void write${name}(<#list fields as field>${field.type} ${field.name}<#if field_has_next>, </#if></#list>) {
+ if (writer.idx() >= (idx() + 1) * listSize) {
+ throw new IllegalStateException(String.format("values at index %s is greater than listSize %s", idx(), listSize));
+ }
+ writer.write${name}(<#list fields as field>${field.name}<#if field_has_next>, </#if></#list>);
+ writer.setPosition(writer.idx() + 1);
+ }
+
+ public void write(${name}Holder holder) {
+ if (writer.idx() >= (idx() + 1) * listSize) {
+ throw new IllegalStateException(String.format("values at index %s is greater than listSize %s", idx(), listSize));
+ }
+ writer.write${name}(<#list fields as field>holder.${field.name}<#if field_has_next>, </#if></#list>);
+ writer.setPosition(writer.idx() + 1);
+ }
+
+ </#if>
+ </#list>
+ </#list>
+}
diff --git a/src/arrow/java/vector/src/main/codegen/templates/UnionListWriter.java b/src/arrow/java/vector/src/main/codegen/templates/UnionListWriter.java
new file mode 100644
index 000000000..926276b5e
--- /dev/null
+++ b/src/arrow/java/vector/src/main/codegen/templates/UnionListWriter.java
@@ -0,0 +1,326 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.vector.complex.writer.Decimal256Writer;
+import org.apache.arrow.vector.complex.writer.DecimalWriter;
+import org.apache.arrow.vector.holders.Decimal256Holder;
+import org.apache.arrow.vector.holders.DecimalHolder;
+
+
+import java.lang.UnsupportedOperationException;
+import java.math.BigDecimal;
+
+<@pp.dropOutputFile />
+<#list ["List", "LargeList"] as listName>
+
+<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/Union${listName}Writer.java" />
+
+<#include "/@includes/license.ftl" />
+
+package org.apache.arrow.vector.complex.impl;
+
+import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt;
+<#include "/@includes/vv_imports.ftl" />
+
+/*
+ * This class is generated using freemarker and the ${.template_name} template.
+ */
+
+@SuppressWarnings("unused")
+public class Union${listName}Writer extends AbstractFieldWriter {
+
+ protected ${listName}Vector vector;
+ protected PromotableWriter writer;
+ private boolean inStruct = false;
+ private boolean listStarted = false;
+ private String structName;
+ <#if listName == "LargeList">
+ private static final long OFFSET_WIDTH = 8;
+ <#else>
+ private static final int OFFSET_WIDTH = 4;
+ </#if>
+
+ public Union${listName}Writer(${listName}Vector vector) {
+ this(vector, NullableStructWriterFactory.getNullableStructWriterFactoryInstance());
+ }
+
+ public Union${listName}Writer(${listName}Vector vector, NullableStructWriterFactory nullableStructWriterFactory) {
+ this.vector = vector;
+ this.writer = new PromotableWriter(vector.getDataVector(), vector, nullableStructWriterFactory);
+ }
+
+ public Union${listName}Writer(${listName}Vector vector, AbstractFieldWriter parent) {
+ this(vector);
+ }
+
+ @Override
+ public void allocate() {
+ vector.allocateNew();
+ }
+
+ @Override
+ public void clear() {
+ vector.clear();
+ }
+
+ @Override
+ public Field getField() {
+ return vector.getField();
+ }
+
+ public void setValueCount(int count) {
+ vector.setValueCount(count);
+ }
+
+ @Override
+ public int getValueCapacity() {
+ return vector.getValueCapacity();
+ }
+
+ @Override
+ public void close() throws Exception {
+ vector.close();
+ writer.close();
+ }
+
+ @Override
+ public void setPosition(int index) {
+ super.setPosition(index);
+ }
+
+ <#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first />
+ <#assign fields = minor.fields!type.fields />
+ <#assign uncappedName = name?uncap_first/>
+ <#if uncappedName == "int" ><#assign uncappedName = "integer" /></#if>
+ <#if !minor.typeParams?? >
+
+ @Override
+ public ${name}Writer ${uncappedName}() {
+ return this;
+ }
+
+ @Override
+ public ${name}Writer ${uncappedName}(String name) {
+ structName = name;
+ return writer.${uncappedName}(name);
+ }
+ </#if>
+ </#list></#list>
+
+ @Override
+ public DecimalWriter decimal() {
+ return this;
+ }
+
+ @Override
+ public DecimalWriter decimal(String name, int scale, int precision) {
+ return writer.decimal(name, scale, precision);
+ }
+
+ @Override
+ public DecimalWriter decimal(String name) {
+ return writer.decimal(name);
+ }
+
+ @Override
+ public Decimal256Writer decimal256() {
+ return this;
+ }
+
+ @Override
+ public Decimal256Writer decimal256(String name, int scale, int precision) {
+ return writer.decimal256(name, scale, precision);
+ }
+
+ @Override
+ public Decimal256Writer decimal256(String name) {
+ return writer.decimal256(name);
+ }
+
+
+ @Override
+ public StructWriter struct() {
+ inStruct = true;
+ return this;
+ }
+
+ @Override
+ public ListWriter list() {
+ return writer;
+ }
+
+ @Override
+ public ListWriter list(String name) {
+ ListWriter listWriter = writer.list(name);
+ return listWriter;
+ }
+
+ @Override
+ public StructWriter struct(String name) {
+ StructWriter structWriter = writer.struct(name);
+ return structWriter;
+ }
+
+ @Override
+ public MapWriter map() {
+ return writer;
+ }
+
+ @Override
+ public MapWriter map(String name) {
+ MapWriter mapWriter = writer.map(name);
+ return mapWriter;
+ }
+
+ @Override
+ public MapWriter map(boolean keysSorted) {
+ writer.map(keysSorted);
+ return writer;
+ }
+
+ @Override
+ public MapWriter map(String name, boolean keysSorted) {
+ MapWriter mapWriter = writer.map(name, keysSorted);
+ return mapWriter;
+ }
+
+ <#if listName == "LargeList">
+ @Override
+ public void startList() {
+ vector.startNewValue(idx());
+ writer.setPosition(checkedCastToInt(vector.getOffsetBuffer().getLong((idx() + 1L) * OFFSET_WIDTH)));
+ listStarted = true;
+ }
+
+ @Override
+ public void endList() {
+ vector.getOffsetBuffer().setLong((idx() + 1L) * OFFSET_WIDTH, writer.idx());
+ setPosition(idx() + 1);
+ listStarted = false;
+ }
+ <#else>
+ @Override
+ public void startList() {
+ vector.startNewValue(idx());
+ writer.setPosition(vector.getOffsetBuffer().getInt((idx() + 1L) * OFFSET_WIDTH));
+ listStarted = true;
+ }
+
+ @Override
+ public void endList() {
+ vector.getOffsetBuffer().setInt((idx() + 1L) * OFFSET_WIDTH, writer.idx());
+ setPosition(idx() + 1);
+ listStarted = false;
+ }
+ </#if>
+
+ @Override
+ public void start() {
+ writer.start();
+ }
+
+ @Override
+ public void end() {
+ writer.end();
+ inStruct = false;
+ }
+
+ @Override
+ public void write(DecimalHolder holder) {
+ writer.write(holder);
+ writer.setPosition(writer.idx()+1);
+ }
+
+ @Override
+ public void write(Decimal256Holder holder) {
+ writer.write(holder);
+ writer.setPosition(writer.idx()+1);
+ }
+
+ @Override
+ public void writeNull() {
+ if (!listStarted){
+ vector.setNull(idx());
+ } else {
+ writer.writeNull();
+ }
+ }
+
+ public void writeDecimal(long start, ArrowBuf buffer, ArrowType arrowType) {
+ writer.writeDecimal(start, buffer, arrowType);
+ writer.setPosition(writer.idx()+1);
+ }
+
+ public void writeDecimal(long start, ArrowBuf buffer) {
+ writer.writeDecimal(start, buffer);
+ writer.setPosition(writer.idx()+1);
+ }
+
+ public void writeDecimal(BigDecimal value) {
+ writer.writeDecimal(value);
+ writer.setPosition(writer.idx()+1);
+ }
+
+ public void writeBigEndianBytesToDecimal(byte[] value, ArrowType arrowType){
+ writer.writeBigEndianBytesToDecimal(value, arrowType);
+ writer.setPosition(writer.idx() + 1);
+ }
+
+ public void writeDecimal256(long start, ArrowBuf buffer, ArrowType arrowType) {
+ writer.writeDecimal256(start, buffer, arrowType);
+ writer.setPosition(writer.idx()+1);
+ }
+
+ public void writeDecimal256(long start, ArrowBuf buffer) {
+ writer.writeDecimal256(start, buffer);
+ writer.setPosition(writer.idx()+1);
+ }
+
+ public void writeDecimal256(BigDecimal value) {
+ writer.writeDecimal256(value);
+ writer.setPosition(writer.idx()+1);
+ }
+
+ public void writeBigEndianBytesToDecimal256(byte[] value, ArrowType arrowType){
+ writer.writeBigEndianBytesToDecimal256(value, arrowType);
+ writer.setPosition(writer.idx() + 1);
+ }
+
+
+ <#list vv.types as type>
+ <#list type.minor as minor>
+ <#assign name = minor.class?cap_first />
+ <#assign fields = minor.fields!type.fields />
+ <#assign uncappedName = name?uncap_first/>
+ <#if !minor.typeParams?? >
+ @Override
+ public void write${name}(<#list fields as field>${field.type} ${field.name}<#if field_has_next>, </#if></#list>) {
+ writer.write${name}(<#list fields as field>${field.name}<#if field_has_next>, </#if></#list>);
+ writer.setPosition(writer.idx()+1);
+ }
+
+ public void write(${name}Holder holder) {
+ writer.write${name}(<#list fields as field>holder.${field.name}<#if field_has_next>, </#if></#list>);
+ writer.setPosition(writer.idx()+1);
+ }
+
+ </#if>
+ </#list>
+ </#list>
+}
+</#list>
diff --git a/src/arrow/java/vector/src/main/codegen/templates/UnionMapWriter.java b/src/arrow/java/vector/src/main/codegen/templates/UnionMapWriter.java
new file mode 100644
index 000000000..606f88037
--- /dev/null
+++ b/src/arrow/java/vector/src/main/codegen/templates/UnionMapWriter.java
@@ -0,0 +1,222 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.vector.complex.writer.Decimal256Writer;
+import org.apache.arrow.vector.complex.writer.DecimalWriter;
+import org.apache.arrow.vector.holders.Decimal256Holder;
+import org.apache.arrow.vector.holders.DecimalHolder;
+
+import java.lang.UnsupportedOperationException;
+import java.math.BigDecimal;
+
+<@pp.dropOutputFile />
+<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/UnionMapWriter.java" />
+
+
+<#include "/@includes/license.ftl" />
+
+package org.apache.arrow.vector.complex.impl;
+
+<#include "/@includes/vv_imports.ftl" />
+
+/*
+ * This class is generated using freemarker and the ${.template_name} template.
+ */
+
+/**
+ * <p>Writer for MapVectors. This extends UnionListWriter to simplify writing map entries to a list
+ * of struct elements, with "key" and "value" fields. The procedure for writing a map begin with
+ * {@link #startMap()} followed by {@link #startEntry()}. An entry is written by using the
+ * {@link #key()} writer to write the key, then the {@link #value()} writer to write a value. After
+ * writing the value, call {@link #endEntry()} to complete the entry. Each map can have 1 or more
+ * entries. When done writing entries, call {@link #endMap()} to complete the map.
+ *
+ * <p>NOTE: the MapVector can have NULL values by not writing to position. If a map is started with
+ * {@link #startMap()}, then it must have a key written. The value of a map entry can be NULL by
+ * not using the {@link #value()} writer.
+ *
+ * <p>Example to write the following map to position 5 of a vector
+ * <pre>{@code
+ * // {
+ * // 1 -> 3,
+ * // 2 -> 4,
+ * // 3 -> NULL
+ * // }
+ *
+ * UnionMapWriter writer = ...
+ *
+ * writer.setPosition(5);
+ * writer.startMap();
+ * writer.startEntry();
+ * writer.key().integer().writeInt(1);
+ * writer.value().integer().writeInt(3);
+ * writer.endEntry();
+ * writer.startEntry();
+ * writer.key().integer().writeInt(2);
+ * writer.value().integer().writeInt(4);
+ * writer.endEntry();
+ * writer.startEntry();
+ * writer.key().integer().writeInt(3);
+ * writer.endEntry();
+ * writer.endMap();
+ * </pre>
+ * </p>
+ */
+@SuppressWarnings("unused")
+public class UnionMapWriter extends UnionListWriter {
+
+ /**
+ * Current mode for writing map entries, set by calling {@link #key()} or {@link #value()}
+ * and reset with a call to {@link #endEntry()}. With KEY mode, a struct writer with field
+ * named "key" is returned. With VALUE mode, a struct writer with field named "value" is
+ * returned. In OFF mode, the writer will behave like a standard UnionListWriter
+ */
+ private enum MapWriteMode {
+ OFF,
+ KEY,
+ VALUE,
+ }
+
+ private MapWriteMode mode = MapWriteMode.OFF;
+ private StructWriter entryWriter;
+
+ public UnionMapWriter(MapVector vector) {
+ super(vector);
+ entryWriter = struct();
+ }
+
+ /** Start writing a map that consists of 1 or more entries. */
+ public void startMap() {
+ startList();
+ }
+
+ /** Complete the map. */
+ public void endMap() {
+ endList();
+ }
+
+ /**
+ * Start a map entry that should be followed by calls to {@link #key()} and {@link #value()}
+ * writers. Call {@link #endEntry()} to complete the entry.
+ */
+ public void startEntry() {
+ writer.setAddVectorAsNullable(false);
+ entryWriter.start();
+ }
+
+ /** Complete the map entry. */
+ public void endEntry() {
+ entryWriter.end();
+ mode = MapWriteMode.OFF;
+ writer.setAddVectorAsNullable(true);
+ }
+
+ /** Return the key writer that is used to write to the "key" field. */
+ public UnionMapWriter key() {
+ writer.setAddVectorAsNullable(false);
+ mode = MapWriteMode.KEY;
+ return this;
+ }
+
+ /** Return the value writer that is used to write to the "value" field. */
+ public UnionMapWriter value() {
+ writer.setAddVectorAsNullable(true);
+ mode = MapWriteMode.VALUE;
+ return this;
+ }
+
+ <#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first />
+ <#assign fields = minor.fields!type.fields />
+ <#assign uncappedName = name?uncap_first/>
+ <#if uncappedName == "int" ><#assign uncappedName = "integer" /></#if>
+ <#if !minor.typeParams?? >
+ @Override
+ public ${name}Writer ${uncappedName}() {
+ switch (mode) {
+ case KEY:
+ return entryWriter.${uncappedName}(MapVector.KEY_NAME);
+ case VALUE:
+ return entryWriter.${uncappedName}(MapVector.VALUE_NAME);
+ default:
+ return this;
+ }
+ }
+
+ </#if>
+ </#list></#list>
+ @Override
+ public DecimalWriter decimal() {
+ switch (mode) {
+ case KEY:
+ return entryWriter.decimal(MapVector.KEY_NAME);
+ case VALUE:
+ return entryWriter.decimal(MapVector.VALUE_NAME);
+ default:
+ return this;
+ }
+ }
+
+ @Override
+ public Decimal256Writer decimal256() {
+ switch (mode) {
+ case KEY:
+ return entryWriter.decimal256(MapVector.KEY_NAME);
+ case VALUE:
+ return entryWriter.decimal256(MapVector.VALUE_NAME);
+ default:
+ return this;
+ }
+ }
+
+
+ @Override
+ public StructWriter struct() {
+ switch (mode) {
+ case KEY:
+ return entryWriter.struct(MapVector.KEY_NAME);
+ case VALUE:
+ return entryWriter.struct(MapVector.VALUE_NAME);
+ default:
+ return super.struct();
+ }
+ }
+
+ @Override
+ public ListWriter list() {
+ switch (mode) {
+ case KEY:
+ return entryWriter.list(MapVector.KEY_NAME);
+ case VALUE:
+ return entryWriter.list(MapVector.VALUE_NAME);
+ default:
+ return super.list();
+ }
+ }
+
+ @Override
+ public MapWriter map(boolean keysSorted) {
+ switch (mode) {
+ case KEY:
+ return entryWriter.map(MapVector.KEY_NAME, keysSorted);
+ case VALUE:
+ return entryWriter.map(MapVector.VALUE_NAME, keysSorted);
+ default:
+ return super.map();
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/codegen/templates/UnionReader.java b/src/arrow/java/vector/src/main/codegen/templates/UnionReader.java
new file mode 100644
index 000000000..444ca9ca7
--- /dev/null
+++ b/src/arrow/java/vector/src/main/codegen/templates/UnionReader.java
@@ -0,0 +1,223 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+
+<@pp.dropOutputFile />
+<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/UnionReader.java" />
+
+
+<#include "/@includes/license.ftl" />
+
+package org.apache.arrow.vector.complex.impl;
+
+<#include "/@includes/vv_imports.ftl" />
+/**
+ * Source code generated using FreeMarker template ${.template_name}
+ */
+@SuppressWarnings("unused")
+public class UnionReader extends AbstractFieldReader {
+
+ private BaseReader[] readers = new BaseReader[45];
+ public UnionVector data;
+
+ public UnionReader(UnionVector data) {
+ this.data = data;
+ }
+
+ public MinorType getMinorType() {
+ return TYPES[data.getTypeValue(idx())];
+ }
+
+ private static MinorType[] TYPES = new MinorType[45];
+
+ static {
+ for (MinorType minorType : MinorType.values()) {
+ TYPES[minorType.ordinal()] = minorType;
+ }
+ }
+
+ @Override
+ public Field getField() {
+ return data.getField();
+ }
+
+ public boolean isSet(){
+ return !data.isNull(idx());
+ }
+
+ public void read(UnionHolder holder) {
+ holder.reader = this;
+ holder.isSet = this.isSet() ? 1 : 0;
+ }
+
+ public void read(int index, UnionHolder holder) {
+ getList().read(index, holder);
+ }
+
+ private FieldReader getReaderForIndex(int index) {
+ int typeValue = data.getTypeValue(index);
+ FieldReader reader = (FieldReader) readers[typeValue];
+ if (reader != null) {
+ return reader;
+ }
+ switch (MinorType.values()[typeValue]) {
+ case NULL:
+ return NullReader.INSTANCE;
+ case STRUCT:
+ return (FieldReader) getStruct();
+ case LIST:
+ return (FieldReader) getList();
+ case MAP:
+ return (FieldReader) getMap();
+ <#list vv.types as type>
+ <#list type.minor as minor>
+ <#assign name = minor.class?cap_first />
+ <#assign uncappedName = name?uncap_first/>
+ <#if !minor.typeParams?? || minor.class?starts_with("Decimal")>
+ case ${name?upper_case}:
+ return (FieldReader) get${name}();
+ </#if>
+ </#list>
+ </#list>
+ default:
+ throw new UnsupportedOperationException("Unsupported type: " + MinorType.values()[typeValue]);
+ }
+ }
+
+ private SingleStructReaderImpl structReader;
+
+ private StructReader getStruct() {
+ if (structReader == null) {
+ structReader = (SingleStructReaderImpl) data.getStruct().getReader();
+ structReader.setPosition(idx());
+ readers[MinorType.STRUCT.ordinal()] = structReader;
+ }
+ return structReader;
+ }
+
+ private UnionListReader listReader;
+
+ private FieldReader getList() {
+ if (listReader == null) {
+ listReader = new UnionListReader(data.getList());
+ listReader.setPosition(idx());
+ readers[MinorType.LIST.ordinal()] = listReader;
+ }
+ return listReader;
+ }
+
+ private UnionMapReader mapReader;
+
+ private FieldReader getMap() {
+ if (mapReader == null) {
+ mapReader = new UnionMapReader(data.getMap());
+ mapReader.setPosition(idx());
+ readers[MinorType.MAP.ordinal()] = mapReader;
+ }
+ return mapReader;
+ }
+
+ @Override
+ public java.util.Iterator<String> iterator() {
+ return getStruct().iterator();
+ }
+
+ @Override
+ public void copyAsValue(UnionWriter writer) {
+ writer.data.copyFrom(idx(), writer.idx(), data);
+ }
+
+ <#list ["Object", "BigDecimal", "Short", "Integer", "Long", "Boolean",
+ "LocalDateTime", "Duration", "Period", "Double", "Float",
+ "Character", "Text", "Byte", "byte[]", "PeriodDuration"] as friendlyType>
+ <#assign safeType=friendlyType />
+ <#if safeType=="byte[]"><#assign safeType="ByteArray" /></#if>
+
+ @Override
+ public ${friendlyType} read${safeType}() {
+ return getReaderForIndex(idx()).read${safeType}();
+ }
+
+ </#list>
+
+ public int size() {
+ return getReaderForIndex(idx()).size();
+ }
+
+ <#list vv.types as type>
+ <#list type.minor as minor>
+ <#assign name = minor.class?cap_first />
+ <#assign uncappedName = name?uncap_first/>
+ <#assign boxedType = (minor.boxedType!type.boxedType) />
+ <#assign javaType = (minor.javaType!type.javaType) />
+ <#assign friendlyType = (minor.friendlyType!minor.boxedType!type.boxedType) />
+ <#assign safeType=friendlyType />
+ <#if safeType=="byte[]"><#assign safeType="ByteArray" /></#if>
+ <#if !minor.typeParams?? || minor.class?starts_with("Decimal") >
+
+ private ${name}ReaderImpl ${uncappedName}Reader;
+
+ private ${name}ReaderImpl get${name}() {
+ if (${uncappedName}Reader == null) {
+ ${uncappedName}Reader = new ${name}ReaderImpl(data.get${name}Vector());
+ ${uncappedName}Reader.setPosition(idx());
+ readers[MinorType.${name?upper_case}.ordinal()] = ${uncappedName}Reader;
+ }
+ return ${uncappedName}Reader;
+ }
+
+ public void read(Nullable${name}Holder holder){
+ getReaderForIndex(idx()).read(holder);
+ }
+
+ public void copyAsValue(${name}Writer writer){
+ getReaderForIndex(idx()).copyAsValue(writer);
+ }
+ </#if>
+ </#list>
+ </#list>
+
+ @Override
+ public void copyAsValue(ListWriter writer) {
+ ComplexCopier.copy(this, (FieldWriter) writer);
+ }
+
+ @Override
+ public void setPosition(int index) {
+ super.setPosition(index);
+ for (BaseReader reader : readers) {
+ if (reader != null) {
+ reader.setPosition(index);
+ }
+ }
+ }
+
+ public FieldReader reader(String name){
+ return getStruct().reader(name);
+ }
+
+ public FieldReader reader() {
+ return getList().reader();
+ }
+
+ public boolean next() {
+ return getReaderForIndex(idx()).next();
+ }
+}
diff --git a/src/arrow/java/vector/src/main/codegen/templates/UnionVector.java b/src/arrow/java/vector/src/main/codegen/templates/UnionVector.java
new file mode 100644
index 000000000..1468116c7
--- /dev/null
+++ b/src/arrow/java/vector/src/main/codegen/templates/UnionVector.java
@@ -0,0 +1,854 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.ReferenceManager;
+import org.apache.arrow.memory.util.CommonUtil;
+import org.apache.arrow.memory.util.hash.ArrowBufHasher;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.BaseValueVector;
+import org.apache.arrow.vector.BitVectorHelper;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.complex.AbstractStructVector;
+import org.apache.arrow.vector.complex.NonNullableStructVector;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.compare.VectorVisitor;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.UnionMode;
+import org.apache.arrow.vector.compare.RangeEqualsVisitor;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.CallBack;
+import org.apache.arrow.vector.util.DataSizeRoundingUtil;
+
+<@pp.dropOutputFile />
+<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/UnionVector.java" />
+
+
+<#include "/@includes/license.ftl" />
+
+package org.apache.arrow.vector.complex;
+
+<#include "/@includes/vv_imports.ftl" />
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Iterator;
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.util.CommonUtil;
+import org.apache.arrow.vector.compare.VectorVisitor;
+import org.apache.arrow.vector.complex.impl.ComplexCopier;
+import org.apache.arrow.vector.util.CallBack;
+import org.apache.arrow.vector.util.ValueVectorUtility;
+import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
+import org.apache.arrow.memory.util.ArrowBufPointer;
+import org.apache.arrow.memory.util.hash.ArrowBufHasher;
+import org.apache.arrow.vector.BaseValueVector;
+import org.apache.arrow.vector.util.OversizedAllocationException;
+import org.apache.arrow.util.Preconditions;
+
+import static org.apache.arrow.vector.types.UnionMode.Sparse;
+import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt;
+import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt;
+
+
+
+/*
+ * This class is generated using freemarker and the ${.template_name} template.
+ */
+@SuppressWarnings("unused")
+
+
+/**
+ * A vector which can hold values of different types. It does so by using a StructVector which contains a vector for each
+ * primitive type that is stored. StructVector is used in order to take advantage of its serialization/deserialization methods,
+ * as well as the addOrGet method.
+ *
+ * For performance reasons, UnionVector stores a cached reference to each subtype vector, to avoid having to do the struct lookup
+ * each time the vector is accessed.
+ * Source code generated using FreeMarker template ${.template_name}
+ */
+public class UnionVector extends AbstractContainerVector implements FieldVector {
+ int valueCount;
+
+ NonNullableStructVector internalStruct;
+ protected ArrowBuf typeBuffer;
+
+ private StructVector structVector;
+ private ListVector listVector;
+ private MapVector mapVector;
+
+ private FieldReader reader;
+
+ private int singleType = 0;
+ private ValueVector singleVector;
+
+ private int typeBufferAllocationSizeInBytes;
+
+ private final FieldType fieldType;
+ private final Field[] typeIds = new Field[Byte.MAX_VALUE + 1];
+
+ public static final byte TYPE_WIDTH = 1;
+ private static final FieldType INTERNAL_STRUCT_TYPE = new FieldType(false /*nullable*/,
+ ArrowType.Struct.INSTANCE, null /*dictionary*/, null /*metadata*/);
+
+ public static UnionVector empty(String name, BufferAllocator allocator) {
+ FieldType fieldType = FieldType.nullable(new ArrowType.Union(
+ UnionMode.Sparse, null));
+ return new UnionVector(name, allocator, fieldType, null);
+ }
+
+ public UnionVector(String name, BufferAllocator allocator, FieldType fieldType, CallBack callBack) {
+ super(name, allocator, callBack);
+ this.fieldType = fieldType;
+ this.internalStruct = new NonNullableStructVector(
+ "internal",
+ allocator,
+ INTERNAL_STRUCT_TYPE,
+ callBack,
+ AbstractStructVector.ConflictPolicy.CONFLICT_REPLACE,
+ false);
+ this.typeBuffer = allocator.getEmpty();
+ this.typeBufferAllocationSizeInBytes = BaseValueVector.INITIAL_VALUE_ALLOCATION * TYPE_WIDTH;
+ }
+
+ public BufferAllocator getAllocator() {
+ return allocator;
+ }
+
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.UNION;
+ }
+
+ @Override
+ public void initializeChildrenFromFields(List<Field> children) {
+ int count = 0;
+ for (Field child: children) {
+ int typeId = Types.getMinorTypeForArrowType(child.getType()).ordinal();
+ if (fieldType != null) {
+ int[] typeIds = ((ArrowType.Union)fieldType.getType()).getTypeIds();
+ if (typeIds != null) {
+ typeId = typeIds[count++];
+ }
+ }
+ typeIds[typeId] = child;
+ }
+ internalStruct.initializeChildrenFromFields(children);
+ }
+
+ @Override
+ public List<FieldVector> getChildrenFromFields() {
+ return internalStruct.getChildrenFromFields();
+ }
+
+ @Override
+ public void loadFieldBuffers(ArrowFieldNode fieldNode, List<ArrowBuf> ownBuffers) {
+ if (ownBuffers.size() != 1) {
+ throw new IllegalArgumentException("Illegal buffer count, expected 1, got: " + ownBuffers.size());
+ }
+ ArrowBuf buffer = ownBuffers.get(0);
+ typeBuffer.getReferenceManager().release();
+ typeBuffer = buffer.getReferenceManager().retain(buffer, allocator);
+ typeBufferAllocationSizeInBytes = checkedCastToInt(typeBuffer.capacity());
+ this.valueCount = fieldNode.getLength();
+ }
+
+ @Override
+ public List<ArrowBuf> getFieldBuffers() {
+ List<ArrowBuf> result = new ArrayList<>(1);
+ setReaderAndWriterIndex();
+ result.add(typeBuffer);
+
+ return result;
+ }
+
+ private void setReaderAndWriterIndex() {
+ typeBuffer.readerIndex(0);
+ typeBuffer.writerIndex(valueCount * TYPE_WIDTH);
+ }
+
+ /**
+ * Get the inner vectors.
+ *
+ * @deprecated This API will be removed as the current implementations no longer support inner vectors.
+ *
+ * @return the inner vectors for this field as defined by the TypeLayout
+ */
+ @Deprecated
+ @Override
+ public List<BufferBacked> getFieldInnerVectors() {
+ throw new UnsupportedOperationException("There are no inner vectors. Use geFieldBuffers");
+ }
+
+ private String fieldName(MinorType type) {
+ return type.name().toLowerCase();
+ }
+
+ private FieldType fieldType(MinorType type) {
+ return FieldType.nullable(type.getType());
+ }
+
+ private <T extends FieldVector> T addOrGet(Types.MinorType minorType, Class<T> c) {
+ return addOrGet(null, minorType, c);
+ }
+
+ private <T extends FieldVector> T addOrGet(String name, Types.MinorType minorType, ArrowType arrowType, Class<T> c) {
+ return internalStruct.addOrGet(name == null ? fieldName(minorType) : name, FieldType.nullable(arrowType), c);
+ }
+
+ private <T extends FieldVector> T addOrGet(String name, Types.MinorType minorType, Class<T> c) {
+ return internalStruct.addOrGet(name == null ? fieldName(minorType) : name, fieldType(minorType), c);
+ }
+
+
+ @Override
+ public long getValidityBufferAddress() {
+ throw new UnsupportedOperationException();
+ }
+
+ public long getTypeBufferAddress() {
+ return typeBuffer.memoryAddress();
+ }
+
+ @Override
+ public long getDataBufferAddress() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public long getOffsetBufferAddress() {
+ throw new UnsupportedOperationException();
+ }
+
+ public ArrowBuf getTypeBuffer() {
+ return typeBuffer;
+ }
+
+ @Override
+ public ArrowBuf getValidityBuffer() { throw new UnsupportedOperationException(); }
+
+ @Override
+ public ArrowBuf getDataBuffer() { throw new UnsupportedOperationException(); }
+
+ @Override
+ public ArrowBuf getOffsetBuffer() { throw new UnsupportedOperationException(); }
+
+ public StructVector getStruct() {
+ if (structVector == null) {
+ int vectorCount = internalStruct.size();
+ structVector = addOrGet(MinorType.STRUCT, StructVector.class);
+ if (internalStruct.size() > vectorCount) {
+ structVector.allocateNew();
+ if (callBack != null) {
+ callBack.doWork();
+ }
+ }
+ }
+ return structVector;
+ }
+ <#list vv.types as type>
+ <#list type.minor as minor>
+ <#assign name = minor.class?cap_first />
+ <#assign fields = minor.fields!type.fields />
+ <#assign uncappedName = name?uncap_first/>
+ <#assign lowerCaseName = name?lower_case/>
+ <#if !minor.typeParams?? || minor.class?starts_with("Decimal") >
+
+ private ${name}Vector ${uncappedName}Vector;
+
+ public ${name}Vector get${name}Vector(<#if minor.class?starts_with("Decimal")> ArrowType arrowType</#if>) {
+ return get${name}Vector(null<#if minor.class?starts_with("Decimal")>, arrowType</#if>);
+ }
+
+ public ${name}Vector get${name}Vector(String name<#if minor.class?starts_with("Decimal")>, ArrowType arrowType</#if>) {
+ if (${uncappedName}Vector == null) {
+ int vectorCount = internalStruct.size();
+ ${uncappedName}Vector = addOrGet(name, MinorType.${name?upper_case},<#if minor.class?starts_with("Decimal")> arrowType,</#if> ${name}Vector.class);
+ if (internalStruct.size() > vectorCount) {
+ ${uncappedName}Vector.allocateNew();
+ if (callBack != null) {
+ callBack.doWork();
+ }
+ }
+ }
+ return ${uncappedName}Vector;
+ }
+ <#if minor.class?starts_with("Decimal")>
+ public ${name}Vector get${name}Vector() {
+ if (${uncappedName}Vector == null) {
+ throw new IllegalArgumentException("No ${uncappedName} present. Provide ArrowType argument to create a new vector");
+ }
+ return ${uncappedName}Vector;
+ }
+ </#if>
+ </#if>
+ </#list>
+ </#list>
+
+ public ListVector getList() {
+ if (listVector == null) {
+ int vectorCount = internalStruct.size();
+ listVector = addOrGet(MinorType.LIST, ListVector.class);
+ if (internalStruct.size() > vectorCount) {
+ listVector.allocateNew();
+ if (callBack != null) {
+ callBack.doWork();
+ }
+ }
+ }
+ return listVector;
+ }
+
+ public MapVector getMap() {
+ if (mapVector == null) {
+ throw new IllegalArgumentException("No map present. Provide ArrowType argument to create a new vector");
+ }
+ return mapVector;
+ }
+
+ public MapVector getMap(ArrowType arrowType) {
+ return getMap(null, arrowType);
+ }
+
+ public MapVector getMap(String name, ArrowType arrowType) {
+ if (mapVector == null) {
+ int vectorCount = internalStruct.size();
+ mapVector = addOrGet(name, MinorType.MAP, arrowType, MapVector.class);
+ if (internalStruct.size() > vectorCount) {
+ mapVector.allocateNew();
+ if (callBack != null) {
+ callBack.doWork();
+ }
+ }
+ }
+ return mapVector;
+ }
+
+ public int getTypeValue(int index) {
+ return typeBuffer.getByte(index * TYPE_WIDTH);
+ }
+
+ @Override
+ public void allocateNew() throws OutOfMemoryException {
+ /* new allocation -- clear the current buffers */
+ clear();
+ internalStruct.allocateNew();
+ try {
+ allocateTypeBuffer();
+ } catch (Exception e) {
+ clear();
+ throw e;
+ }
+ }
+
+ @Override
+ public boolean allocateNewSafe() {
+ /* new allocation -- clear the current buffers */
+ clear();
+ boolean safe = internalStruct.allocateNewSafe();
+ if (!safe) { return false; }
+ try {
+ allocateTypeBuffer();
+ } catch (Exception e) {
+ clear();
+ return false;
+ }
+
+ return true;
+ }
+
+ private void allocateTypeBuffer() {
+ typeBuffer = allocator.buffer(typeBufferAllocationSizeInBytes);
+ typeBuffer.readerIndex(0);
+ typeBuffer.setZero(0, typeBuffer.capacity());
+ }
+
+ @Override
+ public void reAlloc() {
+ internalStruct.reAlloc();
+ reallocTypeBuffer();
+ }
+
+ private void reallocTypeBuffer() {
+ final long currentBufferCapacity = typeBuffer.capacity();
+ long newAllocationSize = currentBufferCapacity * 2;
+ if (newAllocationSize == 0) {
+ if (typeBufferAllocationSizeInBytes > 0) {
+ newAllocationSize = typeBufferAllocationSizeInBytes;
+ } else {
+ newAllocationSize = BaseValueVector.INITIAL_VALUE_ALLOCATION * TYPE_WIDTH * 2;
+ }
+ }
+ newAllocationSize = CommonUtil.nextPowerOfTwo(newAllocationSize);
+ assert newAllocationSize >= 1;
+
+ if (newAllocationSize > BaseValueVector.MAX_ALLOCATION_SIZE) {
+ throw new OversizedAllocationException("Unable to expand the buffer");
+ }
+
+ final ArrowBuf newBuf = allocator.buffer(checkedCastToInt(newAllocationSize));
+ newBuf.setBytes(0, typeBuffer, 0, currentBufferCapacity);
+ newBuf.setZero(currentBufferCapacity, newBuf.capacity() - currentBufferCapacity);
+ typeBuffer.getReferenceManager().release(1);
+ typeBuffer = newBuf;
+ typeBufferAllocationSizeInBytes = (int)newAllocationSize;
+ }
+
+ @Override
+ public void setInitialCapacity(int numRecords) { }
+
+ @Override
+ public int getValueCapacity() {
+ return Math.min(getTypeBufferValueCapacity(), internalStruct.getValueCapacity());
+ }
+
+ @Override
+ public void close() {
+ clear();
+ }
+
+ @Override
+ public void clear() {
+ valueCount = 0;
+ typeBuffer.getReferenceManager().release();
+ typeBuffer = allocator.getEmpty();
+ internalStruct.clear();
+ }
+
+ @Override
+ public void reset() {
+ valueCount = 0;
+ typeBuffer.setZero(0, typeBuffer.capacity());
+ internalStruct.reset();
+ }
+
+ @Override
+ public Field getField() {
+ List<org.apache.arrow.vector.types.pojo.Field> childFields = new ArrayList<>();
+ List<FieldVector> children = internalStruct.getChildren();
+ int[] typeIds = new int[children.size()];
+ for (ValueVector v : children) {
+ typeIds[childFields.size()] = v.getMinorType().ordinal();
+ childFields.add(v.getField());
+ }
+
+ FieldType fieldType;
+ if (this.fieldType == null) {
+ fieldType = FieldType.nullable(new ArrowType.Union(Sparse, typeIds));
+ } else {
+ final UnionMode mode = ((ArrowType.Union)this.fieldType.getType()).getMode();
+ fieldType = new FieldType(this.fieldType.isNullable(), new ArrowType.Union(mode, typeIds),
+ this.fieldType.getDictionary(), this.fieldType.getMetadata());
+ }
+
+ return new Field(name, fieldType, childFields);
+ }
+
+ @Override
+ public TransferPair getTransferPair(BufferAllocator allocator) {
+ return getTransferPair(name, allocator);
+ }
+
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return getTransferPair(ref, allocator, null);
+ }
+
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) {
+ return new org.apache.arrow.vector.complex.UnionVector.TransferImpl(ref, allocator, callBack);
+ }
+
+ @Override
+ public TransferPair makeTransferPair(ValueVector target) {
+ return new TransferImpl((UnionVector) target);
+ }
+
+ @Override
+ public void copyFrom(int inIndex, int outIndex, ValueVector from) {
+ Preconditions.checkArgument(this.getMinorType() == from.getMinorType());
+ UnionVector fromCast = (UnionVector) from;
+ fromCast.getReader().setPosition(inIndex);
+ getWriter().setPosition(outIndex);
+ ComplexCopier.copy(fromCast.reader, writer);
+ }
+
+ @Override
+ public void copyFromSafe(int inIndex, int outIndex, ValueVector from) {
+ copyFrom(inIndex, outIndex, from);
+ }
+
+ public FieldVector addVector(FieldVector v) {
+ final String name = v.getName().isEmpty() ? fieldName(v.getMinorType()) : v.getName();
+ Preconditions.checkState(internalStruct.getChild(name) == null, String.format("%s vector already exists", name));
+ final FieldVector newVector = internalStruct.addOrGet(name, v.getField().getFieldType(), v.getClass());
+ v.makeTransferPair(newVector).transfer();
+ internalStruct.putChild(name, newVector);
+ if (callBack != null) {
+ callBack.doWork();
+ }
+ return newVector;
+ }
+
+ /**
+ * Directly put a vector to internalStruct without creating a new one with same type.
+ */
+ public void directAddVector(FieldVector v) {
+ String name = fieldName(v.getMinorType());
+ Preconditions.checkState(internalStruct.getChild(name) == null, String.format("%s vector already exists", name));
+ internalStruct.putChild(name, v);
+ if (callBack != null) {
+ callBack.doWork();
+ }
+ }
+
+ private class TransferImpl implements TransferPair {
+ private final TransferPair internalStructVectorTransferPair;
+ private final UnionVector to;
+
+ public TransferImpl(String name, BufferAllocator allocator, CallBack callBack) {
+ to = new UnionVector(name, allocator, /* field type */ null, callBack);
+ internalStructVectorTransferPair = internalStruct.makeTransferPair(to.internalStruct);
+ }
+
+ public TransferImpl(UnionVector to) {
+ this.to = to;
+ internalStructVectorTransferPair = internalStruct.makeTransferPair(to.internalStruct);
+ }
+
+ @Override
+ public void transfer() {
+ to.clear();
+ ReferenceManager refManager = typeBuffer.getReferenceManager();
+ to.typeBuffer = refManager.transferOwnership(typeBuffer, to.allocator).getTransferredBuffer();
+ internalStructVectorTransferPair.transfer();
+ to.valueCount = valueCount;
+ clear();
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ Preconditions.checkArgument(startIndex >= 0 && length >= 0 && startIndex + length <= valueCount,
+ "Invalid parameters startIndex: %s, length: %s for valueCount: %s", startIndex, length, valueCount);
+ to.clear();
+
+ internalStructVectorTransferPair.splitAndTransfer(startIndex, length);
+ final int startPoint = startIndex * TYPE_WIDTH;
+ final int sliceLength = length * TYPE_WIDTH;
+ final ArrowBuf slicedBuffer = typeBuffer.slice(startPoint, sliceLength);
+ final ReferenceManager refManager = slicedBuffer.getReferenceManager();
+ to.typeBuffer = refManager.transferOwnership(slicedBuffer, to.allocator).getTransferredBuffer();
+ to.setValueCount(length);
+ }
+
+ @Override
+ public ValueVector getTo() {
+ return to;
+ }
+
+ @Override
+ public void copyValueSafe(int from, int to) {
+ this.to.copyFrom(from, to, UnionVector.this);
+ }
+ }
+
+ @Override
+ public FieldReader getReader() {
+ if (reader == null) {
+ reader = new UnionReader(this);
+ }
+ return reader;
+ }
+
+ public FieldWriter getWriter() {
+ if (writer == null) {
+ writer = new UnionWriter(this);
+ }
+ return writer;
+ }
+
+ @Override
+ public int getBufferSize() {
+ if (valueCount == 0) { return 0; }
+
+ return (valueCount * TYPE_WIDTH) + internalStruct.getBufferSize();
+ }
+
+ @Override
+ public int getBufferSizeFor(final int valueCount) {
+ if (valueCount == 0) {
+ return 0;
+ }
+
+ long bufferSize = 0;
+ for (final ValueVector v : (Iterable<ValueVector>) this) {
+ bufferSize += v.getBufferSizeFor(valueCount);
+ }
+
+ return (int) bufferSize + (valueCount * TYPE_WIDTH);
+ }
+
+ @Override
+ public ArrowBuf[] getBuffers(boolean clear) {
+ List<ArrowBuf> list = new java.util.ArrayList<>();
+ setReaderAndWriterIndex();
+ if (getBufferSize() != 0) {
+ list.add(typeBuffer);
+ list.addAll(java.util.Arrays.asList(internalStruct.getBuffers(clear)));
+ }
+ if (clear) {
+ valueCount = 0;
+ typeBuffer.getReferenceManager().retain();
+ typeBuffer.getReferenceManager().release();
+ typeBuffer = allocator.getEmpty();
+ }
+ return list.toArray(new ArrowBuf[list.size()]);
+ }
+
+ @Override
+ public Iterator<ValueVector> iterator() {
+ return internalStruct.iterator();
+ }
+
+ public ValueVector getVector(int index) {
+ return getVector(index, null);
+ }
+
+ public ValueVector getVector(int index, ArrowType arrowType) {
+ int type = typeBuffer.getByte(index * TYPE_WIDTH);
+ return getVectorByType(type, arrowType);
+ }
+
+ public ValueVector getVectorByType(int typeId) {
+ return getVectorByType(typeId, null);
+ }
+
+ public ValueVector getVectorByType(int typeId, ArrowType arrowType) {
+ Field type = typeIds[typeId];
+ Types.MinorType minorType;
+ String name = null;
+ if (type == null) {
+ minorType = Types.MinorType.values()[typeId];
+ } else {
+ minorType = Types.getMinorTypeForArrowType(type.getType());
+ name = type.getName();
+ }
+ switch (minorType) {
+ case NULL:
+ return null;
+ <#list vv.types as type>
+ <#list type.minor as minor>
+ <#assign name = minor.class?cap_first />
+ <#assign fields = minor.fields!type.fields />
+ <#assign uncappedName = name?uncap_first/>
+ <#if !minor.typeParams?? || minor.class?starts_with("Decimal") >
+ case ${name?upper_case}:
+ return get${name}Vector(name<#if minor.class?starts_with("Decimal")>, arrowType</#if>);
+ </#if>
+ </#list>
+ </#list>
+ case STRUCT:
+ return getStruct();
+ case LIST:
+ return getList();
+ case MAP:
+ return getMap(name, arrowType);
+ default:
+ throw new UnsupportedOperationException("Cannot support type: " + MinorType.values()[typeId]);
+ }
+ }
+
+ public Object getObject(int index) {
+ ValueVector vector = getVector(index);
+ if (vector != null) {
+ return vector.isNull(index) ? null : vector.getObject(index);
+ }
+ return null;
+ }
+
+ public byte[] get(int index) {
+ return null;
+ }
+
+ public void get(int index, ComplexHolder holder) {
+ }
+
+ public void get(int index, UnionHolder holder) {
+ FieldReader reader = new UnionReader(UnionVector.this);
+ reader.setPosition(index);
+ holder.reader = reader;
+ }
+
+ public int getValueCount() {
+ return valueCount;
+ }
+
+ /**
+ * IMPORTANT: Union types always return non null as there is no validity buffer.
+ *
+ * To check validity correctly you must check the underlying vector.
+ */
+ public boolean isNull(int index) {
+ return false;
+ }
+
+ @Override
+ public int getNullCount() {
+ return 0;
+ }
+
+ public int isSet(int index) {
+ return isNull(index) ? 0 : 1;
+ }
+
+ UnionWriter writer;
+
+ public void setValueCount(int valueCount) {
+ this.valueCount = valueCount;
+ while (valueCount > getTypeBufferValueCapacity()) {
+ reallocTypeBuffer();
+ }
+ internalStruct.setValueCount(valueCount);
+ }
+
+ public void setSafe(int index, UnionHolder holder) {
+ setSafe(index, holder, null);
+ }
+
+ public void setSafe(int index, UnionHolder holder, ArrowType arrowType) {
+ FieldReader reader = holder.reader;
+ if (writer == null) {
+ writer = new UnionWriter(UnionVector.this);
+ }
+ writer.setPosition(index);
+ MinorType type = reader.getMinorType();
+ switch (type) {
+ <#list vv.types as type>
+ <#list type.minor as minor>
+ <#assign name = minor.class?cap_first />
+ <#assign fields = minor.fields!type.fields />
+ <#assign uncappedName = name?uncap_first/>
+ <#if !minor.typeParams?? || minor.class?starts_with("Decimal") >
+ case ${name?upper_case}:
+ Nullable${name}Holder ${uncappedName}Holder = new Nullable${name}Holder();
+ reader.read(${uncappedName}Holder);
+ setSafe(index, ${uncappedName}Holder<#if minor.class?starts_with("Decimal")>, arrowType</#if>);
+ break;
+ </#if>
+ </#list>
+ </#list>
+ case STRUCT: {
+ ComplexCopier.copy(reader, writer);
+ break;
+ }
+ case LIST: {
+ ComplexCopier.copy(reader, writer);
+ break;
+ }
+ default:
+ throw new UnsupportedOperationException();
+ }
+ }
+ <#list vv.types as type>
+ <#list type.minor as minor>
+ <#assign name = minor.class?cap_first />
+ <#assign fields = minor.fields!type.fields />
+ <#assign uncappedName = name?uncap_first/>
+ <#if !minor.typeParams?? || minor.class?starts_with("Decimal") >
+ public void setSafe(int index, Nullable${name}Holder holder<#if minor.class?starts_with("Decimal")>, ArrowType arrowType</#if>) {
+ setType(index, MinorType.${name?upper_case});
+ get${name}Vector(null<#if minor.class?starts_with("Decimal")>, arrowType</#if>).setSafe(index, holder);
+ }
+
+ </#if>
+ </#list>
+ </#list>
+
+ public void setType(int index, MinorType type) {
+ while (index >= getTypeBufferValueCapacity()) {
+ reallocTypeBuffer();
+ }
+ typeBuffer.setByte(index * TYPE_WIDTH , (byte) type.ordinal());
+ }
+
+ private int getTypeBufferValueCapacity() {
+ return capAtMaxInt(typeBuffer.capacity() / TYPE_WIDTH);
+ }
+
+ @Override
+ public int hashCode(int index) {
+ return hashCode(index, null);
+ }
+
+ @Override
+ public int hashCode(int index, ArrowBufHasher hasher) {
+ ValueVector vec = getVector(index);
+ if (vec == null) {
+ return ArrowBufPointer.NULL_HASH_CODE;
+ }
+ return vec.hashCode(index, hasher);
+ }
+
+ @Override
+ public <OUT, IN> OUT accept(VectorVisitor<OUT, IN> visitor, IN value) {
+ return visitor.visit(this, value);
+ }
+
+ @Override
+ public String getName() {
+ return name;
+ }
+
+ @Override
+ public String toString() {
+ return ValueVectorUtility.getToString(this, 0, getValueCount());
+ }
+
+ @Override
+ public <T extends FieldVector> T addOrGet(String name, FieldType fieldType, Class<T> clazz) {
+ return internalStruct.addOrGet(name, fieldType, clazz);
+ }
+
+ @Override
+ public <T extends FieldVector> T getChild(String name, Class<T> clazz) {
+ return internalStruct.getChild(name, clazz);
+ }
+
+ @Override
+ public VectorWithOrdinal getChildVectorWithOrdinal(String name) {
+ return internalStruct.getChildVectorWithOrdinal(name);
+ }
+
+ @Override
+ public int size() {
+ return internalStruct.size();
+ }
+
+ @Override
+ public void setInitialCapacity(int valueCount, double density) {
+ for (final ValueVector vector : internalStruct) {
+ if (vector instanceof DensityAwareVector) {
+ ((DensityAwareVector) vector).setInitialCapacity(valueCount, density);
+ } else {
+ vector.setInitialCapacity(valueCount);
+ }
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/codegen/templates/UnionWriter.java b/src/arrow/java/vector/src/main/codegen/templates/UnionWriter.java
new file mode 100644
index 000000000..fc4fd7dd7
--- /dev/null
+++ b/src/arrow/java/vector/src/main/codegen/templates/UnionWriter.java
@@ -0,0 +1,364 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.complex.impl.NullableStructWriterFactory;
+import org.apache.arrow.vector.types.Types;
+
+<@pp.dropOutputFile />
+<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/UnionWriter.java" />
+
+
+<#include "/@includes/license.ftl" />
+
+package org.apache.arrow.vector.complex.impl;
+
+<#include "/@includes/vv_imports.ftl" />
+import org.apache.arrow.vector.complex.writer.BaseWriter;
+import org.apache.arrow.vector.types.Types.MinorType;
+
+/*
+ * This class is generated using freemarker and the ${.template_name} template.
+ */
+@SuppressWarnings("unused")
+public class UnionWriter extends AbstractFieldWriter implements FieldWriter {
+
+ UnionVector data;
+ private StructWriter structWriter;
+ private UnionListWriter listWriter;
+ private UnionMapWriter mapWriter;
+ private List<BaseWriter> writers = new java.util.ArrayList<>();
+ private final NullableStructWriterFactory nullableStructWriterFactory;
+
+ public UnionWriter(UnionVector vector) {
+ this(vector, NullableStructWriterFactory.getNullableStructWriterFactoryInstance());
+ }
+
+ public UnionWriter(UnionVector vector, NullableStructWriterFactory nullableStructWriterFactory) {
+ data = vector;
+ this.nullableStructWriterFactory = nullableStructWriterFactory;
+ }
+
+ @Override
+ public void setPosition(int index) {
+ super.setPosition(index);
+ for (BaseWriter writer : writers) {
+ writer.setPosition(index);
+ }
+ }
+
+
+ @Override
+ public void start() {
+ data.setType(idx(), MinorType.STRUCT);
+ getStructWriter().start();
+ }
+
+ @Override
+ public void end() {
+ getStructWriter().end();
+ }
+
+ @Override
+ public void startList() {
+ getListWriter().startList();
+ data.setType(idx(), MinorType.LIST);
+ }
+
+ @Override
+ public void endList() {
+ getListWriter().endList();
+ }
+
+ @Override
+ public void startMap() {
+ getMapWriter().startMap();
+ data.setType(idx(), MinorType.MAP);
+ }
+
+ @Override
+ public void endMap() {
+ getMapWriter().endMap();
+ }
+
+ @Override
+ public void startEntry() {
+ getMapWriter().startEntry();
+ }
+
+ @Override
+ public MapWriter key() {
+ return getMapWriter().key();
+ }
+
+ @Override
+ public MapWriter value() {
+ return getMapWriter().value();
+ }
+
+ @Override
+ public void endEntry() {
+ getMapWriter().endEntry();
+ }
+
+ private StructWriter getStructWriter() {
+ if (structWriter == null) {
+ structWriter = nullableStructWriterFactory.build(data.getStruct());
+ structWriter.setPosition(idx());
+ writers.add(structWriter);
+ }
+ return structWriter;
+ }
+
+ public StructWriter asStruct() {
+ data.setType(idx(), MinorType.STRUCT);
+ return getStructWriter();
+ }
+
+ private ListWriter getListWriter() {
+ if (listWriter == null) {
+ listWriter = new UnionListWriter(data.getList(), nullableStructWriterFactory);
+ listWriter.setPosition(idx());
+ writers.add(listWriter);
+ }
+ return listWriter;
+ }
+
+ public ListWriter asList() {
+ data.setType(idx(), MinorType.LIST);
+ return getListWriter();
+ }
+
+ private MapWriter getMapWriter() {
+ if (mapWriter == null) {
+ mapWriter = new UnionMapWriter(data.getMap(new ArrowType.Map(false)));
+ mapWriter.setPosition(idx());
+ writers.add(mapWriter);
+ }
+ return mapWriter;
+ }
+
+ private MapWriter getMapWriter(ArrowType arrowType) {
+ if (mapWriter == null) {
+ mapWriter = new UnionMapWriter(data.getMap(arrowType));
+ mapWriter.setPosition(idx());
+ writers.add(mapWriter);
+ }
+ return mapWriter;
+ }
+
+ public MapWriter asMap(ArrowType arrowType) {
+ data.setType(idx(), MinorType.MAP);
+ return getMapWriter(arrowType);
+ }
+
+ BaseWriter getWriter(MinorType minorType) {
+ return getWriter(minorType, null);
+ }
+
+ BaseWriter getWriter(MinorType minorType, ArrowType arrowType) {
+ switch (minorType) {
+ case STRUCT:
+ return getStructWriter();
+ case LIST:
+ return getListWriter();
+ case MAP:
+ return getMapWriter(arrowType);
+ <#list vv.types as type>
+ <#list type.minor as minor>
+ <#assign name = minor.class?cap_first />
+ <#assign fields = minor.fields!type.fields />
+ <#assign uncappedName = name?uncap_first/>
+ <#if !minor.typeParams?? || minor.class?starts_with("Decimal")>
+ case ${name?upper_case}:
+ return get${name}Writer(<#if minor.class?starts_with("Decimal") >arrowType</#if>);
+ </#if>
+ </#list>
+ </#list>
+ default:
+ throw new UnsupportedOperationException("Unknown type: " + minorType);
+ }
+ }
+ <#list vv.types as type>
+ <#list type.minor as minor>
+ <#assign name = minor.class?cap_first />
+ <#assign fields = minor.fields!type.fields />
+ <#assign uncappedName = name?uncap_first/>
+ <#assign friendlyType = (minor.friendlyType!minor.boxedType!type.boxedType) />
+ <#if !minor.typeParams?? || minor.class?starts_with("Decimal") >
+
+ private ${name}Writer ${name?uncap_first}Writer;
+
+ private ${name}Writer get${name}Writer(<#if minor.class?starts_with("Decimal")>ArrowType arrowType</#if>) {
+ if (${uncappedName}Writer == null) {
+ ${uncappedName}Writer = new ${name}WriterImpl(data.get${name}Vector(<#if minor.class?starts_with("Decimal")>arrowType</#if>));
+ ${uncappedName}Writer.setPosition(idx());
+ writers.add(${uncappedName}Writer);
+ }
+ return ${uncappedName}Writer;
+ }
+
+ public ${name}Writer as${name}(<#if minor.class?starts_with("Decimal")>ArrowType arrowType</#if>) {
+ data.setType(idx(), MinorType.${name?upper_case});
+ return get${name}Writer(<#if minor.class?starts_with("Decimal")>arrowType</#if>);
+ }
+
+ @Override
+ public void write(${name}Holder holder) {
+ data.setType(idx(), MinorType.${name?upper_case});
+ <#if minor.class?starts_with("Decimal")>ArrowType arrowType = new ArrowType.Decimal(holder.precision, holder.scale, ${name}Holder.WIDTH * 8);</#if>
+ get${name}Writer(<#if minor.class?starts_with("Decimal")>arrowType</#if>).setPosition(idx());
+ get${name}Writer(<#if minor.class?starts_with("Decimal")>arrowType</#if>).write${name}(<#list fields as field>holder.${field.name}<#if field_has_next>, </#if></#list><#if minor.class?starts_with("Decimal")>, arrowType</#if>);
+ }
+
+ public void write${minor.class}(<#list fields as field>${field.type} ${field.name}<#if field_has_next>, </#if></#list><#if minor.class?starts_with("Decimal")>, ArrowType arrowType</#if>) {
+ data.setType(idx(), MinorType.${name?upper_case});
+ get${name}Writer(<#if minor.class?starts_with("Decimal")>arrowType</#if>).setPosition(idx());
+ get${name}Writer(<#if minor.class?starts_with("Decimal")>arrowType</#if>).write${name}(<#list fields as field>${field.name}<#if field_has_next>, </#if></#list><#if minor.class?starts_with("Decimal")>, arrowType</#if>);
+ }
+ <#if minor.class?starts_with("Decimal")>
+ public void write${name}(${friendlyType} value) {
+ data.setType(idx(), MinorType.${name?upper_case});
+ ArrowType arrowType = new ArrowType.Decimal(value.precision(), value.scale(), ${name}Vector.TYPE_WIDTH * 8);
+ get${name}Writer(arrowType).setPosition(idx());
+ get${name}Writer(arrowType).write${name}(value);
+ }
+
+ public void writeBigEndianBytesTo${name}(byte[] value, ArrowType arrowType) {
+ data.setType(idx(), MinorType.${name?upper_case});
+ get${name}Writer(arrowType).setPosition(idx());
+ get${name}Writer(arrowType).writeBigEndianBytesTo${name}(value, arrowType);
+ }
+ </#if>
+ </#if>
+ </#list>
+ </#list>
+
+ public void writeNull() {
+ }
+
+ @Override
+ public StructWriter struct() {
+ data.setType(idx(), MinorType.LIST);
+ getListWriter().setPosition(idx());
+ return getListWriter().struct();
+ }
+
+ @Override
+ public ListWriter list() {
+ data.setType(idx(), MinorType.LIST);
+ getListWriter().setPosition(idx());
+ return getListWriter().list();
+ }
+
+ @Override
+ public ListWriter list(String name) {
+ data.setType(idx(), MinorType.STRUCT);
+ getStructWriter().setPosition(idx());
+ return getStructWriter().list(name);
+ }
+
+ @Override
+ public StructWriter struct(String name) {
+ data.setType(idx(), MinorType.STRUCT);
+ getStructWriter().setPosition(idx());
+ return getStructWriter().struct(name);
+ }
+
+ @Override
+ public MapWriter map() {
+ data.setType(idx(), MinorType.MAP);
+ getListWriter().setPosition(idx());
+ return getListWriter().map();
+ }
+
+ @Override
+ public MapWriter map(boolean keysSorted) {
+ data.setType(idx(), MinorType.MAP);
+ getListWriter().setPosition(idx());
+ return getListWriter().map(keysSorted);
+ }
+
+ @Override
+ public MapWriter map(String name) {
+ data.setType(idx(), MinorType.MAP);
+ getStructWriter().setPosition(idx());
+ return getStructWriter().map(name);
+ }
+
+ @Override
+ public MapWriter map(String name, boolean keysSorted) {
+ data.setType(idx(), MinorType.MAP);
+ getStructWriter().setPosition(idx());
+ return getStructWriter().map(name, keysSorted);
+ }
+
+ <#list vv.types as type><#list type.minor as minor>
+ <#assign lowerName = minor.class?uncap_first />
+ <#if lowerName == "int" ><#assign lowerName = "integer" /></#if>
+ <#assign upperName = minor.class?upper_case />
+ <#assign capName = minor.class?cap_first />
+ <#if !minor.typeParams?? || minor.class?starts_with("Decimal") >
+ @Override
+ public ${capName}Writer ${lowerName}(String name) {
+ data.setType(idx(), MinorType.STRUCT);
+ getStructWriter().setPosition(idx());
+ return getStructWriter().${lowerName}(name);
+ }
+
+ @Override
+ public ${capName}Writer ${lowerName}() {
+ data.setType(idx(), MinorType.LIST);
+ getListWriter().setPosition(idx());
+ return getListWriter().${lowerName}();
+ }
+ </#if>
+ <#if minor.class?starts_with("Decimal")>
+ @Override
+ public ${capName}Writer ${lowerName}(String name<#list minor.typeParams as typeParam>, ${typeParam.type} ${typeParam.name}</#list>) {
+ data.setType(idx(), MinorType.STRUCT);
+ getStructWriter().setPosition(idx());
+ return getStructWriter().${lowerName}(name<#list minor.typeParams as typeParam>, ${typeParam.name}</#list>);
+ }
+ </#if>
+ </#list></#list>
+
+ @Override
+ public void allocate() {
+ data.allocateNew();
+ }
+
+ @Override
+ public void clear() {
+ data.clear();
+ }
+
+ @Override
+ public void close() throws Exception {
+ data.close();
+ }
+
+ @Override
+ public Field getField() {
+ return data.getField();
+ }
+
+ @Override
+ public int getValueCapacity() {
+ return data.getValueCapacity();
+ }
+}
diff --git a/src/arrow/java/vector/src/main/codegen/templates/ValueHolders.java b/src/arrow/java/vector/src/main/codegen/templates/ValueHolders.java
new file mode 100644
index 000000000..973efd870
--- /dev/null
+++ b/src/arrow/java/vector/src/main/codegen/templates/ValueHolders.java
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+<@pp.dropOutputFile />
+<#list vv.modes as mode>
+<#list vv.types as type>
+<#list type.minor as minor>
+
+<#assign className="${mode.prefix}${minor.class}Holder" />
+<@pp.changeOutputFile name="/org/apache/arrow/vector/holders/${className}.java" />
+
+<#include "/@includes/license.ftl" />
+
+package org.apache.arrow.vector.holders;
+
+<#include "/@includes/vv_imports.ftl" />
+
+/**
+ * Source code generated using FreeMarker template ${.template_name}
+ */
+public final class ${className} implements ValueHolder{
+
+ <#if mode.name == "Repeated">
+
+ /** The first index (inclusive) into the Vector. **/
+ public int start;
+
+ /** The last index (exclusive) into the Vector. **/
+ public int end;
+
+ /** The Vector holding the actual values. **/
+ public ${minor.class}Vector vector;
+
+ <#else>
+ public static final int WIDTH = ${type.width};
+
+ <#if mode.name == "Optional">public int isSet;
+ <#else>public final int isSet = 1;</#if>
+ <#assign fields = (minor.fields!type.fields) + (minor.typeParams![]) />
+ <#list fields as field>
+ public ${field.type} ${field.name};
+ </#list>
+
+ /**
+ * Reason for not supporting the operation is that ValueHolders are potential scalar
+ * replacements and hence we don't want any methods to be invoked on them.
+ */
+ public int hashCode(){
+ throw new UnsupportedOperationException();
+ }
+
+ /**
+ * Reason for not supporting the operation is that ValueHolders are potential scalar
+ * replacements and hence we don't want any methods to be invoked on them.
+ */
+ public String toString(){
+ throw new UnsupportedOperationException();
+ }
+ </#if>
+
+
+
+
+}
+
+</#list>
+</#list>
+</#list> \ No newline at end of file
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/AddOrGetResult.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/AddOrGetResult.java
new file mode 100644
index 000000000..b41dbb245
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/AddOrGetResult.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import org.apache.arrow.util.Preconditions;
+
+/**
+ * Tuple class containing a vector and whether is was created.
+ *
+ * @param <V> The type of vector the result is for.
+ */
+public class AddOrGetResult<V extends ValueVector> {
+ private final V vector;
+ private final boolean created;
+
+ /** Constructs a new object. */
+ public AddOrGetResult(V vector, boolean created) {
+ this.vector = Preconditions.checkNotNull(vector);
+ this.created = created;
+ }
+
+ /** Returns the vector. */
+ public V getVector() {
+ return vector;
+ }
+
+ /** Returns whether the vector is created. */
+ public boolean isCreated() {
+ return created;
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/AllocationHelper.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/AllocationHelper.java
new file mode 100644
index 000000000..6824756d8
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/AllocationHelper.java
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import org.apache.arrow.vector.complex.RepeatedFixedWidthVectorLike;
+import org.apache.arrow.vector.complex.RepeatedVariableWidthVectorLike;
+
+/** Helper utility methods for allocating storage for Vectors. */
+public class AllocationHelper {
+ private AllocationHelper() {}
+
+ /**
+ * Allocates the vector.
+ *
+ * @param v The vector to allocate.
+ * @param valueCount Number of values to allocate.
+ * @param bytesPerValue bytes per value.
+ * @throws org.apache.arrow.memory.OutOfMemoryException if it can't allocate the memory.
+ */
+ public static void allocate(ValueVector v, int valueCount, int bytesPerValue) {
+ allocate(v, valueCount, bytesPerValue, 5);
+ }
+
+ /**
+ * Allocates memory for a vector assuming given number of values and their width.
+ *
+ * @param v The vector the allocate.
+ * @param valueCount The number of elements to allocate.
+ * @param bytesPerValue The bytes per value to use for allocating underlying storage
+ * @param childValCount If <code>v</code> is a repeated vector, this is number of child elements to allocate.
+ * @throws org.apache.arrow.memory.OutOfMemoryException if it can't allocate the memory.
+ */
+ public static void allocatePrecomputedChildCount(
+ ValueVector v,
+ int valueCount,
+ int bytesPerValue,
+ int childValCount) {
+ if (v instanceof FixedWidthVector) {
+ ((FixedWidthVector) v).allocateNew(valueCount);
+ } else if (v instanceof VariableWidthVector) {
+ ((VariableWidthVector) v).allocateNew(valueCount * bytesPerValue, valueCount);
+ } else if (v instanceof RepeatedFixedWidthVectorLike) {
+ ((RepeatedFixedWidthVectorLike) v).allocateNew(valueCount, childValCount);
+ } else if (v instanceof RepeatedVariableWidthVectorLike) {
+ ((RepeatedVariableWidthVectorLike) v).allocateNew(childValCount * bytesPerValue, valueCount, childValCount);
+ } else {
+ v.allocateNew();
+ }
+ }
+
+ /**
+ * Allocates memory for a vector assuming given number of values and their width.
+ *
+ * @param v The vector the allocate.
+ * @param valueCount The number of elements to allocate.
+ * @param bytesPerValue The bytes per value to use for allocating underlying storage
+ * @param repeatedPerTop If <code>v</code> is a repeated vector, this is assumed number of elements per child.
+ * @throws org.apache.arrow.memory.OutOfMemoryException if it can't allocate the memory
+ */
+ public static void allocate(ValueVector v, int valueCount, int bytesPerValue, int repeatedPerTop) {
+ allocatePrecomputedChildCount(v, valueCount, bytesPerValue, repeatedPerTop * valueCount);
+ }
+
+ /**
+ * Allocates the exact amount if v is fixed width, otherwise falls back to dynamic allocation.
+ *
+ * @param v value vector we are trying to allocate
+ * @param valueCount size we are trying to allocate
+ * @throws org.apache.arrow.memory.OutOfMemoryException if it can't allocate the memory
+ */
+ public static void allocateNew(ValueVector v, int valueCount) {
+ if (v instanceof FixedWidthVector) {
+ ((FixedWidthVector) v).allocateNew(valueCount);
+ } else if (v instanceof VariableWidthVector) {
+ ((VariableWidthVector) v).allocateNew(valueCount);
+ } else {
+ v.allocateNew();
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java
new file mode 100644
index 000000000..ded58b22b
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java
@@ -0,0 +1,930 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt;
+
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.util.ArrowBufPointer;
+import org.apache.arrow.memory.util.ByteFunctionHelpers;
+import org.apache.arrow.memory.util.hash.ArrowBufHasher;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.compare.VectorVisitor;
+import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.util.CallBack;
+import org.apache.arrow.vector.util.OversizedAllocationException;
+import org.apache.arrow.vector.util.TransferPair;
+
+import io.netty.util.internal.PlatformDependent;
+
+/**
+ * BaseFixedWidthVector provides an abstract interface for
+ * implementing vectors of fixed width values. The vectors are nullable
+ * implying that zero or more elements in the vector could be NULL.
+ */
+public abstract class BaseFixedWidthVector extends BaseValueVector
+ implements FixedWidthVector, FieldVector, VectorDefinitionSetter {
+ private final int typeWidth;
+
+ protected int lastValueCapacity;
+ protected int actualValueCapacity;
+
+ protected final Field field;
+ private int allocationMonitor;
+ protected ArrowBuf validityBuffer;
+ protected ArrowBuf valueBuffer;
+ protected int valueCount;
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param field field materialized by this vector
+ * @param allocator The allocator to use for allocating memory for the vector.
+ * @param typeWidth The width in bytes of the type.
+ */
+ public BaseFixedWidthVector(Field field, final BufferAllocator allocator, final int typeWidth) {
+ super(allocator);
+ this.typeWidth = typeWidth;
+ this.field = field;
+ valueCount = 0;
+ allocationMonitor = 0;
+ validityBuffer = allocator.getEmpty();
+ valueBuffer = allocator.getEmpty();
+ lastValueCapacity = INITIAL_VALUE_ALLOCATION;
+ refreshValueCapacity();
+ }
+
+
+ public int getTypeWidth() {
+ return typeWidth;
+ }
+
+ @Override
+ public String getName() {
+ return field.getName();
+ }
+
+ /* TODO:
+ * see if getNullCount() can be made faster -- O(1)
+ */
+
+ /* TODO:
+ * Once the entire hierarchy has been refactored, move common functions
+ * like getNullCount(), splitAndTransferValidityBuffer to top level
+ * base class BaseValueVector.
+ *
+ * Along with this, some class members (validityBuffer) can also be
+ * abstracted out to top level base class.
+ *
+ * Right now BaseValueVector is the top level base class for other
+ * vector types in ValueVector hierarchy (non-nullable) and those
+ * vectors have not yet been refactored/removed so moving things to
+ * the top class as of now is not a good idea.
+ */
+
+ /**
+ * Get the memory address of buffer that manages the validity
+ * (NULL or NON-NULL nature) of elements in the vector.
+ * @return starting address of the buffer
+ */
+ @Override
+ public long getValidityBufferAddress() {
+ return (validityBuffer.memoryAddress());
+ }
+
+ /**
+ * Get the memory address of buffer that stores the data for elements
+ * in the vector.
+ * @return starting address of the buffer
+ */
+ @Override
+ public long getDataBufferAddress() {
+ return (valueBuffer.memoryAddress());
+ }
+
+ /**
+ * Get the memory address of buffer that stores the offsets for elements
+ * in the vector. This operation is not supported for fixed-width vectors.
+ * @return starting address of the buffer
+ * @throws UnsupportedOperationException for fixed width vectors
+ */
+ @Override
+ public long getOffsetBufferAddress() {
+ throw new UnsupportedOperationException("not supported for fixed-width vectors");
+ }
+
+ /**
+ * Get buffer that manages the validity (NULL or NON-NULL nature) of
+ * elements in the vector. Consider it as a buffer for internal bit vector
+ * data structure.
+ * @return buffer
+ */
+ @Override
+ public ArrowBuf getValidityBuffer() {
+ return validityBuffer;
+ }
+
+ /**
+ * Get the buffer that stores the data for elements in the vector.
+ * @return buffer
+ */
+ @Override
+ public ArrowBuf getDataBuffer() {
+ return valueBuffer;
+ }
+
+ /**
+ * buffer that stores the offsets for elements
+ * in the vector. This operation is not supported for fixed-width vectors.
+ * @return buffer
+ * @throws UnsupportedOperationException for fixed width vectors
+ */
+ @Override
+ public ArrowBuf getOffsetBuffer() {
+ throw new UnsupportedOperationException("not supported for fixed-width vectors");
+ }
+
+ /**
+ * Sets the desired value capacity for the vector. This function doesn't
+ * allocate any memory for the vector.
+ * @param valueCount desired number of elements in the vector
+ */
+ @Override
+ public void setInitialCapacity(int valueCount) {
+ computeAndCheckBufferSize(valueCount);
+ lastValueCapacity = valueCount;
+ }
+
+ /**
+ * Get the current value capacity for the vector.
+ *
+ * @return number of elements that vector can hold.
+ */
+ @Override
+ public int getValueCapacity() {
+ return actualValueCapacity;
+ }
+
+ /**
+ * Call this if you change the capacity of valueBuffer or validityBuffer.
+ */
+ protected void refreshValueCapacity() {
+ actualValueCapacity = Math.min(getValueBufferValueCapacity(), getValidityBufferValueCapacity());
+ }
+
+ protected int getValueBufferValueCapacity() {
+ return capAtMaxInt(valueBuffer.capacity() / typeWidth);
+ }
+
+ protected int getValidityBufferValueCapacity() {
+ return capAtMaxInt(validityBuffer.capacity() * 8);
+ }
+
+ /**
+ * zero out the vector and the data in associated buffers.
+ */
+ @Override
+ public void zeroVector() {
+ initValidityBuffer();
+ initValueBuffer();
+ }
+
+ /* zero out the validity buffer */
+ private void initValidityBuffer() {
+ validityBuffer.setZero(0, validityBuffer.capacity());
+ }
+
+ /* zero out the data buffer */
+ private void initValueBuffer() {
+ valueBuffer.setZero(0, valueBuffer.capacity());
+ }
+
+ /**
+ * Reset the vector to initial state. Same as {@link #zeroVector()}.
+ * Note that this method doesn't release any memory.
+ */
+ @Override
+ public void reset() {
+ valueCount = 0;
+ zeroVector();
+ }
+
+ /**
+ * Close the vector and release the associated buffers.
+ */
+ @Override
+ public void close() {
+ clear();
+ }
+
+ /**
+ * Same as {@link #close()}.
+ */
+ @Override
+ public void clear() {
+ valueCount = 0;
+ validityBuffer = releaseBuffer(validityBuffer);
+ valueBuffer = releaseBuffer(valueBuffer);
+ refreshValueCapacity();
+ }
+
+ /* used to step down the memory allocation */
+ protected void incrementAllocationMonitor() {
+ if (allocationMonitor < 0) {
+ allocationMonitor = 0;
+ }
+ allocationMonitor++;
+ }
+
+ /* used to step up the memory allocation */
+ protected void decrementAllocationMonitor() {
+ if (allocationMonitor > 0) {
+ allocationMonitor = 0;
+ }
+ allocationMonitor--;
+ }
+
+ /**
+ * Same as {@link #allocateNewSafe()}.
+ */
+ @Override
+ public void allocateNew() {
+ allocateNew(lastValueCapacity);
+ }
+
+ /**
+ * Allocate memory for the vector. We internally use a default value count
+ * of 4096 to allocate memory for at least these many elements in the
+ * vector. See {@link #allocateNew(int)} for allocating memory for specific
+ * number of elements in the vector.
+ *
+ * @return false if memory allocation fails, true otherwise.
+ */
+ @Override
+ public boolean allocateNewSafe() {
+ try {
+ allocateNew(lastValueCapacity);
+ return true;
+ } catch (Exception e) {
+ return false;
+ }
+ }
+
+ /**
+ * Allocate memory for the vector to support storing at least the provided number of
+ * elements in the vector. This method must be called prior to using the ValueVector.
+ *
+ * @param valueCount the desired number of elements in the vector
+ * @throws org.apache.arrow.memory.OutOfMemoryException on error
+ */
+ public void allocateNew(int valueCount) {
+ computeAndCheckBufferSize(valueCount);
+
+ /* we are doing a new allocation -- release the current buffers */
+ clear();
+
+ try {
+ allocateBytes(valueCount);
+ } catch (Exception e) {
+ clear();
+ throw e;
+ }
+ }
+
+ /*
+ * Compute the buffer size required for 'valueCount', and check if it's within bounds.
+ */
+ private long computeAndCheckBufferSize(int valueCount) {
+ final long size = computeCombinedBufferSize(valueCount, typeWidth);
+ if (size > MAX_ALLOCATION_SIZE) {
+ throw new OversizedAllocationException("Memory required for vector capacity " +
+ valueCount +
+ " is (" + size + "), which is more than max allowed (" + MAX_ALLOCATION_SIZE + ")");
+ }
+ return size;
+ }
+
+ /**
+ * Actual memory allocation is done by this function. All the calculations
+ * and knowledge about what size to allocate is upto the callers of this
+ * method.
+ * Callers appropriately handle errors if memory allocation fails here.
+ * Callers should also take care of determining that desired size is
+ * within the bounds of max allocation allowed and any other error
+ * conditions.
+ */
+ private void allocateBytes(int valueCount) {
+ DataAndValidityBuffers buffers = allocFixedDataAndValidityBufs(valueCount, typeWidth);
+ valueBuffer = buffers.getDataBuf();
+ validityBuffer = buffers.getValidityBuf();
+ zeroVector();
+
+ refreshValueCapacity();
+ lastValueCapacity = getValueCapacity();
+ }
+
+ /**
+ * During splitAndTransfer, if we splitting from a random position within a byte,
+ * we can't just slice the source buffer so we have to explicitly allocate the
+ * validityBuffer of the target vector. This is unlike the databuffer which we can
+ * always slice for the target vector.
+ */
+ private void allocateValidityBuffer(final int validityBufferSize) {
+ validityBuffer = allocator.buffer(validityBufferSize);
+ validityBuffer.readerIndex(0);
+ refreshValueCapacity();
+ }
+
+ /**
+ * Get the potential buffer size for a particular number of records.
+ * @param count desired number of elements in the vector
+ * @return estimated size of underlying buffers if the vector holds
+ * a given number of elements
+ */
+ @Override
+ public int getBufferSizeFor(final int count) {
+ if (count == 0) {
+ return 0;
+ }
+ return (count * typeWidth) + getValidityBufferSizeFromCount(count);
+ }
+
+ /**
+ * Get the size (number of bytes) of underlying buffers used by this
+ * vector.
+ * @return size of underlying buffers.
+ */
+ @Override
+ public int getBufferSize() {
+ if (valueCount == 0) {
+ return 0;
+ }
+ return (valueCount * typeWidth) + getValidityBufferSizeFromCount(valueCount);
+ }
+
+ /**
+ * Get information about how this field is materialized.
+ * @return the field corresponding to this vector
+ */
+ @Override
+ public Field getField() {
+ return field;
+ }
+
+ /**
+ * Return the underlying buffers associated with this vector. Note that this doesn't
+ * impact the reference counts for this buffer so it only should be used for in-context
+ * access. Also note that this buffer changes regularly thus
+ * external classes shouldn't hold a reference to it (unless they change it).
+ *
+ * @param clear Whether to clear vector before returning; the buffers will still be refcounted
+ * but the returned array will be the only reference to them
+ * @return The underlying {@link ArrowBuf buffers} that is used by this
+ * vector instance.
+ */
+ @Override
+ public ArrowBuf[] getBuffers(boolean clear) {
+ final ArrowBuf[] buffers;
+ setReaderAndWriterIndex();
+ if (getBufferSize() == 0) {
+ buffers = new ArrowBuf[0];
+ } else {
+ buffers = new ArrowBuf[2];
+ buffers[0] = validityBuffer;
+ buffers[1] = valueBuffer;
+ }
+ if (clear) {
+ for (final ArrowBuf buffer : buffers) {
+ buffer.getReferenceManager().retain(1);
+ }
+ clear();
+ }
+ return buffers;
+ }
+
+ /**
+ * Resize the vector to increase the capacity. The internal behavior is to
+ * double the current value capacity.
+ */
+ @Override
+ public void reAlloc() {
+ int targetValueCount = getValueCapacity() * 2;
+ if (targetValueCount == 0) {
+ if (lastValueCapacity > 0) {
+ targetValueCount = lastValueCapacity;
+ } else {
+ targetValueCount = INITIAL_VALUE_ALLOCATION * 2;
+ }
+ }
+ computeAndCheckBufferSize(targetValueCount);
+
+ DataAndValidityBuffers buffers = allocFixedDataAndValidityBufs(targetValueCount, typeWidth);
+ final ArrowBuf newValueBuffer = buffers.getDataBuf();
+ newValueBuffer.setBytes(0, valueBuffer, 0, valueBuffer.capacity());
+ newValueBuffer.setZero(valueBuffer.capacity(), newValueBuffer.capacity() - valueBuffer.capacity());
+ valueBuffer.getReferenceManager().release();
+ valueBuffer = newValueBuffer;
+
+ final ArrowBuf newValidityBuffer = buffers.getValidityBuf();
+ newValidityBuffer.setBytes(0, validityBuffer, 0, validityBuffer.capacity());
+ newValidityBuffer.setZero(validityBuffer.capacity(), newValidityBuffer.capacity() - validityBuffer.capacity());
+ validityBuffer.getReferenceManager().release();
+ validityBuffer = newValidityBuffer;
+
+ refreshValueCapacity();
+ lastValueCapacity = getValueCapacity();
+ }
+
+ /**
+ * Get the inner vectors.
+ *
+ * @deprecated This API will be removed as the current implementations no longer support inner vectors.
+ *
+ * @return the inner vectors for this field as defined by the TypeLayout
+ */
+ @Deprecated
+ @Override
+ public List<BufferBacked> getFieldInnerVectors() {
+ throw new UnsupportedOperationException("There are no inner vectors. Use getFieldBuffers");
+ }
+
+ /**
+ * Initialize the children in schema for this Field. This operation is a
+ * NO-OP for scalar types since they don't have any children.
+ * @param children the schema
+ * @throws IllegalArgumentException if children is a non-empty list for scalar types.
+ */
+ @Override
+ public void initializeChildrenFromFields(List<Field> children) {
+ if (!children.isEmpty()) {
+ throw new IllegalArgumentException("primitive type vector can not have children");
+ }
+ }
+
+ /**
+ * Get the inner child vectors.
+ * @return list of child vectors for complex types, empty list for scalar vector types
+ */
+ @Override
+ public List<FieldVector> getChildrenFromFields() {
+ return Collections.emptyList();
+ }
+
+ /**
+ * Load the buffers of this vector with provided source buffers.
+ * The caller manages the source buffers and populates them before invoking
+ * this method.
+ * @param fieldNode the fieldNode indicating the value count
+ * @param ownBuffers the buffers for this Field (own buffers only, children not included)
+ */
+ @Override
+ public void loadFieldBuffers(ArrowFieldNode fieldNode, List<ArrowBuf> ownBuffers) {
+ if (ownBuffers.size() != 2) {
+ throw new IllegalArgumentException("Illegal buffer count, expected " + 2 + ", got: " + ownBuffers.size());
+ }
+
+ ArrowBuf bitBuffer = ownBuffers.get(0);
+ ArrowBuf dataBuffer = ownBuffers.get(1);
+
+ validityBuffer.getReferenceManager().release();
+ validityBuffer = BitVectorHelper.loadValidityBuffer(fieldNode, bitBuffer, allocator);
+ valueBuffer.getReferenceManager().release();
+ valueBuffer = dataBuffer.getReferenceManager().retain(dataBuffer, allocator);
+ refreshValueCapacity();
+
+ valueCount = fieldNode.getLength();
+ }
+
+ /**
+ * Get the buffers belonging to this vector.
+ *
+ * @return the inner buffers.
+ */
+ public List<ArrowBuf> getFieldBuffers() {
+ List<ArrowBuf> result = new ArrayList<>(2);
+ setReaderAndWriterIndex();
+ result.add(validityBuffer);
+ result.add(valueBuffer);
+
+ return result;
+ }
+
+ /**
+ * Set the reader and writer indexes for the inner buffers.
+ */
+ private void setReaderAndWriterIndex() {
+ validityBuffer.readerIndex(0);
+ valueBuffer.readerIndex(0);
+ if (valueCount == 0) {
+ validityBuffer.writerIndex(0);
+ valueBuffer.writerIndex(0);
+ } else {
+ validityBuffer.writerIndex(getValidityBufferSizeFromCount(valueCount));
+ if (typeWidth == 0) {
+ /* specialized handling for BitVector */
+ valueBuffer.writerIndex(getValidityBufferSizeFromCount(valueCount));
+ } else {
+ valueBuffer.writerIndex((long) valueCount * typeWidth);
+ }
+ }
+ }
+
+ /**
+ * Construct a transfer pair of this vector and another vector of same type.
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @param callBack not used
+ * @return TransferPair
+ */
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) {
+ return getTransferPair(ref, allocator);
+ }
+
+ /**
+ * Construct a transfer pair of this vector and another vector of same type.
+ * @param allocator allocator for the target vector
+ * @return TransferPair
+ */
+ @Override
+ public TransferPair getTransferPair(BufferAllocator allocator) {
+ return getTransferPair(getName(), allocator);
+ }
+
+ /**
+ * Construct a transfer pair of this vector and another vector of same type.
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @return TransferPair
+ */
+ public abstract TransferPair getTransferPair(String ref, BufferAllocator allocator);
+
+ /**
+ * Transfer this vector'data to another vector. The memory associated
+ * with this vector is transferred to the allocator of target vector
+ * for accounting and management purposes.
+ * @param target destination vector for transfer
+ */
+ public void transferTo(BaseFixedWidthVector target) {
+ compareTypes(target, "transferTo");
+ target.clear();
+ target.validityBuffer = transferBuffer(validityBuffer, target.allocator);
+ target.valueBuffer = transferBuffer(valueBuffer, target.allocator);
+ target.valueCount = valueCount;
+ target.refreshValueCapacity();
+ clear();
+ }
+
+ /**
+ * Slice this vector at desired index and length and transfer the
+ * corresponding data to the target vector.
+ * @param startIndex start position of the split in source vector.
+ * @param length length of the split.
+ * @param target destination vector
+ */
+ public void splitAndTransferTo(int startIndex, int length,
+ BaseFixedWidthVector target) {
+ Preconditions.checkArgument(startIndex >= 0 && length >= 0 && startIndex + length <= valueCount,
+ "Invalid parameters startIndex: %s, length: %s for valueCount: %s", startIndex, length, valueCount);
+ compareTypes(target, "splitAndTransferTo");
+ target.clear();
+ splitAndTransferValidityBuffer(startIndex, length, target);
+ splitAndTransferValueBuffer(startIndex, length, target);
+ target.setValueCount(length);
+ }
+
+ /**
+ * Data buffer can always be split and transferred using slicing.
+ */
+ private void splitAndTransferValueBuffer(int startIndex, int length,
+ BaseFixedWidthVector target) {
+ final int startPoint = startIndex * typeWidth;
+ final int sliceLength = length * typeWidth;
+ final ArrowBuf slicedBuffer = valueBuffer.slice(startPoint, sliceLength);
+ target.valueBuffer = transferBuffer(slicedBuffer, target.allocator);
+ target.refreshValueCapacity();
+ }
+
+ /**
+ * Validity buffer has multiple cases of split and transfer depending on
+ * the starting position of the source index.
+ */
+ private void splitAndTransferValidityBuffer(int startIndex, int length,
+ BaseFixedWidthVector target) {
+ int firstByteSource = BitVectorHelper.byteIndex(startIndex);
+ int lastByteSource = BitVectorHelper.byteIndex(valueCount - 1);
+ int byteSizeTarget = getValidityBufferSizeFromCount(length);
+ int offset = startIndex % 8;
+
+ if (length > 0) {
+ if (offset == 0) {
+ /* slice */
+ if (target.validityBuffer != null) {
+ target.validityBuffer.getReferenceManager().release();
+ }
+ target.validityBuffer = validityBuffer.slice(firstByteSource, byteSizeTarget);
+ target.validityBuffer.getReferenceManager().retain(1);
+ target.refreshValueCapacity();
+ } else {
+ /* Copy data
+ * When the first bit starts from the middle of a byte (offset != 0),
+ * copy data from src BitVector.
+ * Each byte in the target is composed by a part in i-th byte,
+ * another part in (i+1)-th byte.
+ */
+ target.allocateValidityBuffer(byteSizeTarget);
+
+ for (int i = 0; i < byteSizeTarget - 1; i++) {
+ byte b1 = BitVectorHelper.getBitsFromCurrentByte(this.validityBuffer,
+ firstByteSource + i, offset);
+ byte b2 = BitVectorHelper.getBitsFromNextByte(this.validityBuffer,
+ firstByteSource + i + 1, offset);
+
+ target.validityBuffer.setByte(i, (b1 + b2));
+ }
+
+ /* Copying the last piece is done in the following manner:
+ * if the source vector has 1 or more bytes remaining, we copy
+ * the last piece as a byte formed by shifting data
+ * from the current byte and the next byte.
+ *
+ * if the source vector has no more bytes remaining
+ * (we are at the last byte), we copy the last piece as a byte
+ * by shifting data from the current byte.
+ */
+ if ((firstByteSource + byteSizeTarget - 1) < lastByteSource) {
+ byte b1 = BitVectorHelper.getBitsFromCurrentByte(this.validityBuffer,
+ firstByteSource + byteSizeTarget - 1, offset);
+ byte b2 = BitVectorHelper.getBitsFromNextByte(this.validityBuffer,
+ firstByteSource + byteSizeTarget, offset);
+
+ target.validityBuffer.setByte(byteSizeTarget - 1, b1 + b2);
+ } else {
+ byte b1 = BitVectorHelper.getBitsFromCurrentByte(this.validityBuffer,
+ firstByteSource + byteSizeTarget - 1, offset);
+ target.validityBuffer.setByte(byteSizeTarget - 1, b1);
+ }
+ }
+ }
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | common getters and setters |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Get the number of elements that are null in the vector.
+ *
+ * @return the number of null elements.
+ */
+ @Override
+ public int getNullCount() {
+ return BitVectorHelper.getNullCount(validityBuffer, valueCount);
+ }
+
+ /**
+ * Get the value count of vector. This will always be zero unless
+ * {@link #setValueCount(int)} has been called prior to calling this.
+ *
+ * @return valueCount for the vector
+ */
+ @Override
+ public int getValueCount() {
+ return valueCount;
+ }
+
+ /**
+ * Set value count for the vector.
+ *
+ * @param valueCount value count to set
+ */
+ @Override
+ public void setValueCount(int valueCount) {
+ this.valueCount = valueCount;
+ final int currentValueCapacity = getValueCapacity();
+ while (valueCount > getValueCapacity()) {
+ reAlloc();
+ }
+ /*
+ * We are trying to understand the pattern of memory allocation.
+ * If initially, the user did vector.allocateNew(), we would have
+ * allocated memory of default size (4096 * type width).
+ * Later on user invokes setValueCount(count).
+ *
+ * If the existing value capacity is twice as large as the
+ * valueCount, we know that we over-provisioned memory in the
+ * first place when default memory allocation was done because user
+ * really needs a much less value count in the vector.
+ *
+ * We record this by bumping up the allocationMonitor. If this pattern
+ * happens for certain number of times and allocationMonitor
+ * reaches the threshold (internal hardcoded) value, subsequent
+ * call to allocateNew() will take care of stepping down the
+ * default memory allocation size.
+ *
+ * Another case would be under-provisioning the initial memory and
+ * thus going through a lot of realloc(). Here the goal is to
+ * see if we can minimize the number of reallocations. Again the
+ * state is recorded in allocationMonitor by decrementing it
+ * (negative value). If a threshold is hit, realloc will try to
+ * allocate more memory in order to possibly avoid a future realloc.
+ * This case is also applicable to setSafe() methods which can trigger
+ * a realloc() and thus we record the state there as well.
+ */
+ if (valueCount > 0) {
+ if (currentValueCapacity >= (valueCount * 2)) {
+ incrementAllocationMonitor();
+ } else if (currentValueCapacity <= (valueCount / 2)) {
+ decrementAllocationMonitor();
+ }
+ }
+ setReaderAndWriterIndex();
+ }
+
+ /**
+ * Check if the given index is within the current value capacity
+ * of the vector.
+ *
+ * @param index position to check
+ * @return true if index is within the current value capacity
+ */
+ public boolean isSafe(int index) {
+ return index < getValueCapacity();
+ }
+
+ /**
+ * Check if element at given index is null.
+ *
+ * @param index position of element
+ * @return true if element at given index is null, false otherwise
+ */
+ @Override
+ public boolean isNull(int index) {
+ return (isSet(index) == 0);
+ }
+
+ /**
+ * Same as {@link #isNull(int)}.
+ *
+ * @param index position of element
+ * @return 1 if element at given index is not null, 0 otherwise
+ */
+ public int isSet(int index) {
+ final int byteIndex = index >> 3;
+ final byte b = validityBuffer.getByte(byteIndex);
+ final int bitIndex = index & 7;
+ return (b >> bitIndex) & 0x01;
+ }
+
+ /**
+ * Mark the particular position in the vector as non-null.
+ *
+ * @param index position of the element.
+ */
+ @Override
+ public void setIndexDefined(int index) {
+ handleSafe(index);
+ BitVectorHelper.setBit(validityBuffer, index);
+ }
+
+ public void set(int index, byte[] value, int start, int length) {
+ throw new UnsupportedOperationException();
+ }
+
+ public void setSafe(int index, byte[] value, int start, int length) {
+ throw new UnsupportedOperationException();
+ }
+
+ public void set(int index, ByteBuffer value, int start, int length) {
+ throw new UnsupportedOperationException();
+ }
+
+ public void setSafe(int index, ByteBuffer value, int start, int length) {
+ throw new UnsupportedOperationException();
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | helper methods for setters |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ protected void handleSafe(int index) {
+ while (index >= getValueCapacity()) {
+ decrementAllocationMonitor();
+ reAlloc();
+ }
+ }
+
+ /**
+ * Copy a cell value from a particular index in source vector to a particular
+ * position in this vector. The source vector should be of the same type as this one.
+ *
+ * @param fromIndex position to copy from in source vector
+ * @param thisIndex position to copy to in this vector
+ * @param from source vector
+ */
+ @Override
+ public void copyFrom(int fromIndex, int thisIndex, ValueVector from) {
+ Preconditions.checkArgument(this.getMinorType() == from.getMinorType());
+ if (from.isNull(fromIndex)) {
+ BitVectorHelper.unsetBit(this.getValidityBuffer(), thisIndex);
+ } else {
+ BitVectorHelper.setBit(this.getValidityBuffer(), thisIndex);
+ PlatformDependent.copyMemory(from.getDataBuffer().memoryAddress() + (long) fromIndex * typeWidth,
+ this.getDataBuffer().memoryAddress() + (long) thisIndex * typeWidth, typeWidth);
+ }
+ }
+
+ /**
+ * Same as {@link #copyFrom(int, int, ValueVector)} except that
+ * it handles the case when the capacity of the vector needs to be expanded
+ * before copy.
+ *
+ * @param fromIndex position to copy from in source vector
+ * @param thisIndex position to copy to in this vector
+ * @param from source vector
+ */
+ @Override
+ public void copyFromSafe(int fromIndex, int thisIndex, ValueVector from) {
+ Preconditions.checkArgument(this.getMinorType() == from.getMinorType());
+ handleSafe(thisIndex);
+ copyFrom(fromIndex, thisIndex, from);
+ }
+
+ /**
+ * Set the element at the given index to null.
+ *
+ * @param index position of element
+ */
+ public void setNull(int index) {
+ handleSafe(index);
+ // not really needed to set the bit to 0 as long as
+ // the buffer always starts from 0.
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+
+ @Override
+ public ArrowBufPointer getDataPointer(int index) {
+ return getDataPointer(index, new ArrowBufPointer());
+ }
+
+ @Override
+ public ArrowBufPointer getDataPointer(int index, ArrowBufPointer reuse) {
+ if (isNull(index)) {
+ reuse.set(null, 0, 0);
+ } else {
+ reuse.set(valueBuffer, (long) index * typeWidth, typeWidth);
+ }
+ return reuse;
+ }
+
+ @Override
+ public int hashCode(int index) {
+ return hashCode(index, null);
+ }
+
+ @Override
+ public int hashCode(int index, ArrowBufHasher hasher) {
+ if (isNull(index)) {
+ return ArrowBufPointer.NULL_HASH_CODE;
+ }
+ long start = (long) typeWidth * index;
+ long end = (long) typeWidth * (index + 1);
+ return ByteFunctionHelpers.hash(hasher, this.getDataBuffer(), start, end);
+ }
+
+ @Override
+ public <OUT, IN> OUT accept(VectorVisitor<OUT, IN> visitor, IN value) {
+ return visitor.visit(this, value);
+ }
+
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BaseIntVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BaseIntVector.java
new file mode 100644
index 000000000..556411c86
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BaseIntVector.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+/**
+ * Interface for all int type vectors.
+ */
+public interface BaseIntVector extends FieldVector {
+
+ /**
+ * Sets the value at index, note this value may need to be need truncated.
+ * Note this is safe version (i.e. call setSafe method in vector)
+ */
+ void setWithPossibleTruncate(int index, long value);
+
+ /**
+ * Sets the value at index, note this value may need to be need truncated.
+ * Note this is unsafe version (i.e. call set method in vector)
+ */
+ void setUnsafeWithPossibleTruncate(int index, long value);
+
+ /**
+ * Gets the value at index.
+ * This value may have been extended to long and will throw {@link NullPointerException}
+ * if the value is null. Note null check could be turned off via {@link NullCheckingForGet}.
+ */
+ long getValueAsLong(int index);
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BaseLargeVariableWidthVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BaseLargeVariableWidthVector.java
new file mode 100644
index 000000000..90694db83
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BaseLargeVariableWidthVector.java
@@ -0,0 +1,1370 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt;
+
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.OutOfMemoryException;
+import org.apache.arrow.memory.util.ArrowBufPointer;
+import org.apache.arrow.memory.util.ByteFunctionHelpers;
+import org.apache.arrow.memory.util.CommonUtil;
+import org.apache.arrow.memory.util.hash.ArrowBufHasher;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.compare.VectorVisitor;
+import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.util.CallBack;
+import org.apache.arrow.vector.util.OversizedAllocationException;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * BaseLargeVariableWidthVector is a base class providing functionality for large strings/large bytes types.
+ */
+public abstract class BaseLargeVariableWidthVector extends BaseValueVector
+ implements VariableWidthVector, FieldVector, VectorDefinitionSetter {
+ private static final int DEFAULT_RECORD_BYTE_COUNT = 12;
+ private static final int INITIAL_BYTE_COUNT = INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT;
+ private int lastValueCapacity;
+ private long lastValueAllocationSizeInBytes;
+
+ /* protected members */
+ public static final int OFFSET_WIDTH = 8; /* 8 byte unsigned int to track offsets */
+ protected static final byte[] emptyByteArray = new byte[]{};
+ protected ArrowBuf validityBuffer;
+ protected ArrowBuf valueBuffer;
+ protected ArrowBuf offsetBuffer;
+ protected int valueCount;
+ protected int lastSet;
+ protected final Field field;
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param field The field materialized by this vector.
+ * @param allocator The allocator to use for creating/resizing buffers
+ */
+ public BaseLargeVariableWidthVector(Field field, final BufferAllocator allocator) {
+ super(allocator);
+ this.field = field;
+ lastValueAllocationSizeInBytes = INITIAL_BYTE_COUNT;
+ // -1 because we require one extra slot for the offset array.
+ lastValueCapacity = INITIAL_VALUE_ALLOCATION - 1;
+ valueCount = 0;
+ lastSet = -1;
+ offsetBuffer = allocator.getEmpty();
+ validityBuffer = allocator.getEmpty();
+ valueBuffer = allocator.getEmpty();
+ }
+
+ @Override
+ public String getName() {
+ return field.getName();
+ }
+
+ /**
+ * Get buffer that manages the validity (NULL or NON-NULL nature) of
+ * elements in the vector. Consider it as a buffer for internal bit vector
+ * data structure.
+ * @return buffer
+ */
+ @Override
+ public ArrowBuf getValidityBuffer() {
+ return validityBuffer;
+ }
+
+ /**
+ * Get the buffer that stores the data for elements in the vector.
+ * @return buffer
+ */
+ @Override
+ public ArrowBuf getDataBuffer() {
+ return valueBuffer;
+ }
+
+ /**
+ * buffer that stores the offsets for elements
+ * in the vector. This operation is not supported for fixed-width vectors.
+ * @return buffer
+ */
+ @Override
+ public ArrowBuf getOffsetBuffer() {
+ return offsetBuffer;
+ }
+
+ /**
+ * Get the memory address of buffer that stores the offsets for elements
+ * in the vector.
+ * @return starting address of the buffer
+ */
+ @Override
+ public long getOffsetBufferAddress() {
+ return offsetBuffer.memoryAddress();
+ }
+
+ /**
+ * Get the memory address of buffer that manages the validity
+ * (NULL or NON-NULL nature) of elements in the vector.
+ * @return starting address of the buffer
+ */
+ @Override
+ public long getValidityBufferAddress() {
+ return validityBuffer.memoryAddress();
+ }
+
+ /**
+ * Get the memory address of buffer that stores the data for elements
+ * in the vector.
+ * @return starting address of the buffer
+ */
+ @Override
+ public long getDataBufferAddress() {
+ return valueBuffer.memoryAddress();
+ }
+
+ /**
+ * Sets the desired value capacity for the vector. This function doesn't
+ * allocate any memory for the vector.
+ * @param valueCount desired number of elements in the vector
+ */
+ @Override
+ public void setInitialCapacity(int valueCount) {
+ final long size = (long) valueCount * DEFAULT_RECORD_BYTE_COUNT;
+ checkDataBufferSize(size);
+ computeAndCheckOffsetsBufferSize(valueCount);
+ lastValueAllocationSizeInBytes = size;
+ lastValueCapacity = valueCount;
+ }
+
+ /**
+ * Sets the desired value capacity for the vector. This function doesn't
+ * allocate any memory for the vector.
+ * @param valueCount desired number of elements in the vector
+ * @param density average number of bytes per variable width element
+ */
+ @Override
+ public void setInitialCapacity(int valueCount, double density) {
+ long size = Math.max((long) (valueCount * density), 1L);
+ checkDataBufferSize(size);
+ computeAndCheckOffsetsBufferSize(valueCount);
+ lastValueAllocationSizeInBytes = size;
+ lastValueCapacity = valueCount;
+ }
+
+ /**
+ * Get the density of this ListVector.
+ * @return density
+ */
+ public double getDensity() {
+ if (valueCount == 0) {
+ return 0.0D;
+ }
+ final long startOffset = offsetBuffer.getLong(0);
+ final long endOffset = offsetBuffer.getLong((long) valueCount * OFFSET_WIDTH);
+ final double totalListSize = endOffset - startOffset;
+ return totalListSize / valueCount;
+ }
+
+ /**
+ * Get the current capacity which does not exceed either validity buffer or offset buffer.
+ * Note: Here the `getValueCapacity` has no relationship with the value buffer.
+ * @return number of elements that vector can hold.
+ */
+ @Override
+ public int getValueCapacity() {
+ final long offsetValueCapacity = Math.max(getOffsetBufferValueCapacity() - 1, 0);
+ return capAtMaxInt(Math.min(offsetValueCapacity, getValidityBufferValueCapacity()));
+ }
+
+ private long getValidityBufferValueCapacity() {
+ return validityBuffer.capacity() * 8;
+ }
+
+ private long getOffsetBufferValueCapacity() {
+ return offsetBuffer.capacity() / OFFSET_WIDTH;
+ }
+
+ /**
+ * zero out the vector and the data in associated buffers.
+ */
+ public void zeroVector() {
+ initValidityBuffer();
+ initOffsetBuffer();
+ valueBuffer.setZero(0, valueBuffer.capacity());
+ }
+
+ /* zero out the validity buffer */
+ private void initValidityBuffer() {
+ validityBuffer.setZero(0, validityBuffer.capacity());
+ }
+
+ /* zero out the offset buffer */
+ private void initOffsetBuffer() {
+ offsetBuffer.setZero(0, offsetBuffer.capacity());
+ }
+
+ /**
+ * Reset the vector to initial state. Same as {@link #zeroVector()}.
+ * Note that this method doesn't release any memory.
+ */
+ public void reset() {
+ zeroVector();
+ lastSet = -1;
+ valueCount = 0;
+ }
+
+ /**
+ * Close the vector and release the associated buffers.
+ */
+ @Override
+ public void close() {
+ clear();
+ }
+
+ /**
+ * Same as {@link #close()}.
+ */
+ @Override
+ public void clear() {
+ validityBuffer = releaseBuffer(validityBuffer);
+ valueBuffer = releaseBuffer(valueBuffer);
+ offsetBuffer = releaseBuffer(offsetBuffer);
+ lastSet = -1;
+ valueCount = 0;
+ }
+
+ /**
+ * Get the inner vectors.
+ *
+ * @deprecated This API will be removed as the current implementations no longer support inner vectors.
+ *
+ * @return the inner vectors for this field as defined by the TypeLayout
+ */
+ @Override
+ @Deprecated
+ public List<BufferBacked> getFieldInnerVectors() {
+ throw new UnsupportedOperationException("There are no inner vectors. Use getFieldBuffers");
+ }
+
+ /**
+ * Initialize the children in schema for this Field. This operation is a
+ * NO-OP for scalar types since they don't have any children.
+ * @param children the schema
+ * @throws IllegalArgumentException if children is a non-empty list for scalar types.
+ */
+ @Override
+ public void initializeChildrenFromFields(List<Field> children) {
+ if (!children.isEmpty()) {
+ throw new IllegalArgumentException("primitive type vector can not have children");
+ }
+ }
+
+ /**
+ * Get the inner child vectors.
+ * @return list of child vectors for complex types, empty list for scalar vector types
+ */
+ @Override
+ public List<FieldVector> getChildrenFromFields() {
+ return Collections.emptyList();
+ }
+
+
+ /**
+ * Load the buffers of this vector with provided source buffers.
+ * The caller manages the source buffers and populates them before invoking
+ * this method.
+ * @param fieldNode the fieldNode indicating the value count
+ * @param ownBuffers the buffers for this Field (own buffers only, children not included)
+ */
+ @Override
+ public void loadFieldBuffers(ArrowFieldNode fieldNode, List<ArrowBuf> ownBuffers) {
+ ArrowBuf bitBuffer = ownBuffers.get(0);
+ ArrowBuf offBuffer = ownBuffers.get(1);
+ ArrowBuf dataBuffer = ownBuffers.get(2);
+
+ validityBuffer.getReferenceManager().release();
+ validityBuffer = BitVectorHelper.loadValidityBuffer(fieldNode, bitBuffer, allocator);
+ offsetBuffer.getReferenceManager().release();
+ offsetBuffer = offBuffer.getReferenceManager().retain(offBuffer, allocator);
+ valueBuffer.getReferenceManager().release();
+ valueBuffer = dataBuffer.getReferenceManager().retain(dataBuffer, allocator);
+
+ lastSet = fieldNode.getLength() - 1;
+ valueCount = fieldNode.getLength();
+ }
+
+ /**
+ * Get the buffers belonging to this vector.
+ * @return the inner buffers.
+ */
+ public List<ArrowBuf> getFieldBuffers() {
+ // before flight/IPC, we must bring the vector to a consistent state.
+ // this is because, it is possible that the offset buffers of some trailing values
+ // are not updated. this may cause some data in the data buffer being lost.
+ // for details, please see TestValueVector#testUnloadVariableWidthVector.
+ fillHoles(valueCount);
+
+ List<ArrowBuf> result = new ArrayList<>(3);
+ setReaderAndWriterIndex();
+ result.add(validityBuffer);
+ result.add(offsetBuffer);
+ result.add(valueBuffer);
+
+ return result;
+ }
+
+ /**
+ * Set the reader and writer indexes for the inner buffers.
+ */
+ private void setReaderAndWriterIndex() {
+ validityBuffer.readerIndex(0);
+ offsetBuffer.readerIndex(0);
+ valueBuffer.readerIndex(0);
+ if (valueCount == 0) {
+ validityBuffer.writerIndex(0);
+ offsetBuffer.writerIndex(0);
+ valueBuffer.writerIndex(0);
+ } else {
+ final long lastDataOffset = getStartOffset(valueCount);
+ validityBuffer.writerIndex(getValidityBufferSizeFromCount(valueCount));
+ offsetBuffer.writerIndex((long) (valueCount + 1) * OFFSET_WIDTH);
+ valueBuffer.writerIndex(lastDataOffset);
+ }
+ }
+
+ /**
+ * Same as {@link #allocateNewSafe()}.
+ */
+ @Override
+ public void allocateNew() {
+ allocateNew(lastValueAllocationSizeInBytes, lastValueCapacity);
+ }
+
+ /**
+ * Allocate memory for the vector. We internally use a default value count
+ * of 4096 to allocate memory for at least these many elements in the
+ * vector. See {@link #allocateNew(long, int)} for allocating memory for specific
+ * number of elements in the vector.
+ *
+ * @return false if memory allocation fails, true otherwise.
+ */
+ @Override
+ public boolean allocateNewSafe() {
+ try {
+ allocateNew(lastValueAllocationSizeInBytes, lastValueCapacity);
+ return true;
+ } catch (Exception e) {
+ return false;
+ }
+ }
+
+ /**
+ * Allocate memory for the vector to support storing at least the provided number of
+ * elements in the vector. This method must be called prior to using the ValueVector.
+ *
+ * @param totalBytes desired total memory capacity
+ * @param valueCount the desired number of elements in the vector
+ * @throws org.apache.arrow.memory.OutOfMemoryException if memory allocation fails
+ */
+ @Override
+ public void allocateNew(long totalBytes, int valueCount) {
+ assert totalBytes >= 0;
+
+ checkDataBufferSize(totalBytes);
+ computeAndCheckOffsetsBufferSize(valueCount);
+
+ /* we are doing a new allocation -- release the current buffers */
+ clear();
+
+ try {
+ allocateBytes(totalBytes, valueCount);
+ } catch (Exception e) {
+ clear();
+ throw e;
+ }
+ }
+
+ @Override
+ public void allocateNew(int valueCount) {
+ allocateNew(lastValueAllocationSizeInBytes, valueCount);
+ }
+
+ /* Check if the data buffer size is within bounds. */
+ private void checkDataBufferSize(long size) {
+ if (size > MAX_ALLOCATION_SIZE || size < 0) {
+ throw new OversizedAllocationException("Memory required for vector " +
+ " is (" + size + "), which is more than max allowed (" + MAX_ALLOCATION_SIZE + ")");
+ }
+ }
+
+ /**
+ * Compute the buffer size required for 'valueCount' offsets and validity, and check if it's
+ * within bounds.
+ */
+ private long computeAndCheckOffsetsBufferSize(int valueCount) {
+ /* to track the end offset of last data element in vector, we need
+ * an additional slot in offset buffer.
+ */
+ final long size = computeCombinedBufferSize(valueCount + 1, OFFSET_WIDTH);
+ if (size > MAX_ALLOCATION_SIZE) {
+ throw new OversizedAllocationException("Memory required for vector capacity " +
+ valueCount +
+ " is (" + size + "), which is more than max allowed (" + MAX_ALLOCATION_SIZE + ")");
+ }
+ return size;
+ }
+
+ /* allocate the inner buffers */
+ private void allocateBytes(final long valueBufferSize, final int valueCount) {
+ /* allocate data buffer */
+ long curSize = valueBufferSize;
+ valueBuffer = allocator.buffer(curSize);
+ valueBuffer.readerIndex(0);
+
+ /* allocate offset buffer and validity buffer */
+ DataAndValidityBuffers buffers = allocFixedDataAndValidityBufs(valueCount + 1, OFFSET_WIDTH);
+ offsetBuffer = buffers.getDataBuf();
+ validityBuffer = buffers.getValidityBuf();
+ initOffsetBuffer();
+ initValidityBuffer();
+
+ lastValueCapacity = getValueCapacity();
+ lastValueAllocationSizeInBytes = capAtMaxInt(valueBuffer.capacity());
+ }
+
+ /* allocate offset buffer */
+ private void allocateOffsetBuffer(final long size) {
+ offsetBuffer = allocator.buffer(size);
+ offsetBuffer.readerIndex(0);
+ initOffsetBuffer();
+ }
+
+ /* allocate validity buffer */
+ private void allocateValidityBuffer(final long size) {
+ validityBuffer = allocator.buffer(size);
+ validityBuffer.readerIndex(0);
+ initValidityBuffer();
+ }
+
+ /**
+ * Resize the vector to increase the capacity. The internal behavior is to
+ * double the current value capacity.
+ */
+ public void reAlloc() {
+ reallocDataBuffer();
+ reallocValidityAndOffsetBuffers();
+ }
+
+ /**
+ * Reallocate the data buffer. Data Buffer stores the actual data for
+ * LARGEVARCHAR or LARGEVARBINARY elements in the vector. The behavior is to double
+ * the size of buffer.
+ * @throws OversizedAllocationException if the desired new size is more than
+ * max allowed
+ * @throws OutOfMemoryException if the internal memory allocation fails
+ */
+ public void reallocDataBuffer() {
+ final long currentBufferCapacity = valueBuffer.capacity();
+ long newAllocationSize = currentBufferCapacity * 2;
+ if (newAllocationSize == 0) {
+ if (lastValueAllocationSizeInBytes > 0) {
+ newAllocationSize = lastValueAllocationSizeInBytes;
+ } else {
+ newAllocationSize = INITIAL_BYTE_COUNT * 2;
+ }
+ }
+ newAllocationSize = CommonUtil.nextPowerOfTwo(newAllocationSize);
+ assert newAllocationSize >= 1;
+
+ checkDataBufferSize(newAllocationSize);
+
+ final ArrowBuf newBuf = allocator.buffer(newAllocationSize);
+ newBuf.setBytes(0, valueBuffer, 0, currentBufferCapacity);
+ valueBuffer.getReferenceManager().release();
+ valueBuffer = newBuf;
+ lastValueAllocationSizeInBytes = valueBuffer.capacity();
+ }
+
+ /**
+ * Reallocate the validity and offset buffers for this vector. Validity
+ * buffer is used to track the NULL or NON-NULL nature of elements in
+ * the vector and offset buffer is used to store the lengths of variable
+ * width elements in the vector.
+ *
+ * <p>Note that data buffer for variable length vectors moves independent
+ * of the companion validity and offset buffers. This is in
+ * contrast to what we have for fixed width vectors.
+ *
+ * <p>So even though we may have setup an initial capacity of 1024
+ * elements in the vector, it is quite possible
+ * that we need to reAlloc() the data buffer when we are setting
+ * the 5th element in the vector simply because previous
+ * variable length elements have exhausted the buffer capacity.
+ * However, we really don't need to reAlloc() validity and
+ * offset buffers until we try to set the 1025th element
+ * This is why we do a separate check for safe methods to
+ * determine which buffer needs reallocation.
+ * @throws OversizedAllocationException if the desired new size is more than
+ * max allowed
+ * @throws OutOfMemoryException if the internal memory allocation fails
+ */
+ public void reallocValidityAndOffsetBuffers() {
+ int targetOffsetCount = capAtMaxInt((offsetBuffer.capacity() / OFFSET_WIDTH) * 2);
+ if (targetOffsetCount == 0) {
+ if (lastValueCapacity > 0) {
+ targetOffsetCount = (lastValueCapacity + 1);
+ } else {
+ targetOffsetCount = 2 * (INITIAL_VALUE_ALLOCATION + 1);
+ }
+ }
+ computeAndCheckOffsetsBufferSize(targetOffsetCount);
+
+ DataAndValidityBuffers buffers = allocFixedDataAndValidityBufs(targetOffsetCount, OFFSET_WIDTH);
+ final ArrowBuf newOffsetBuffer = buffers.getDataBuf();
+ newOffsetBuffer.setBytes(0, offsetBuffer, 0, offsetBuffer.capacity());
+ newOffsetBuffer.setZero(offsetBuffer.capacity(), newOffsetBuffer.capacity() - offsetBuffer.capacity());
+ offsetBuffer.getReferenceManager().release();
+ offsetBuffer = newOffsetBuffer;
+
+ final ArrowBuf newValidityBuffer = buffers.getValidityBuf();
+ newValidityBuffer.setBytes(0, validityBuffer, 0, validityBuffer.capacity());
+ newValidityBuffer.setZero(validityBuffer.capacity(), newValidityBuffer.capacity() - validityBuffer.capacity());
+ validityBuffer.getReferenceManager().release();
+ validityBuffer = newValidityBuffer;
+
+ lastValueCapacity = getValueCapacity();
+ }
+
+ /**
+ * Get the size (number of bytes) of underlying data buffer.
+ * @return number of bytes in the data buffer
+ */
+ @Override
+ public int getByteCapacity() {
+ return capAtMaxInt(valueBuffer.capacity());
+ }
+
+ @Override
+ public int sizeOfValueBuffer() {
+ if (valueCount == 0) {
+ return 0;
+ }
+ return capAtMaxInt(offsetBuffer.getLong((long) valueCount * OFFSET_WIDTH));
+ }
+
+ /**
+ * Get the size (number of bytes) of underlying buffers used by this
+ * vector.
+ * @return size of underlying buffers.
+ */
+ @Override
+ public int getBufferSize() {
+ return getBufferSizeFor(this.valueCount);
+ }
+
+ /**
+ * Get the potential buffer size for a particular number of records.
+ * @param valueCount desired number of elements in the vector
+ * @return estimated size of underlying buffers if the vector holds
+ * a given number of elements
+ */
+ @Override
+ public int getBufferSizeFor(final int valueCount) {
+ if (valueCount == 0) {
+ return 0;
+ }
+
+ final long validityBufferSize = getValidityBufferSizeFromCount(valueCount);
+ final long offsetBufferSize = (long) (valueCount + 1) * OFFSET_WIDTH;
+ /* get the end offset for this valueCount */
+ final long dataBufferSize = offsetBuffer.getLong((long) valueCount * OFFSET_WIDTH);
+ return capAtMaxInt(validityBufferSize + offsetBufferSize + dataBufferSize);
+ }
+
+ /**
+ * Get information about how this field is materialized.
+ * @return the field corresponding to this vector
+ */
+ @Override
+ public Field getField() {
+ return field;
+ }
+
+ /**
+ * Return the underlying buffers associated with this vector. Note that this doesn't
+ * impact the reference counts for this buffer so it only should be used for in-context
+ * access. Also note that this buffer changes regularly thus
+ * external classes shouldn't hold a reference to it (unless they change it).
+ *
+ * @param clear Whether to clear vector before returning; the buffers will still be refcounted
+ * but the returned array will be the only reference to them
+ * @return The underlying {@link io.netty.buffer.ArrowBuf buffers} that is used by this
+ * vector instance.
+ */
+ @Override
+ public ArrowBuf[] getBuffers(boolean clear) {
+ final ArrowBuf[] buffers;
+ setReaderAndWriterIndex();
+ if (getBufferSize() == 0) {
+ buffers = new ArrowBuf[0];
+ } else {
+ buffers = new ArrowBuf[3];
+ buffers[0] = validityBuffer;
+ buffers[1] = offsetBuffer;
+ buffers[2] = valueBuffer;
+ }
+ if (clear) {
+ for (final ArrowBuf buffer : buffers) {
+ buffer.getReferenceManager().retain();
+ }
+ clear();
+ }
+ return buffers;
+ }
+
+ /**
+ * Construct a transfer pair of this vector and another vector of same type.
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @param callBack not used
+ * @return TransferPair
+ */
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) {
+ return getTransferPair(ref, allocator);
+ }
+
+ /**
+ * Construct a transfer pair of this vector and another vector of same type.
+ * @param allocator allocator for the target vector
+ * @return TransferPair
+ */
+ @Override
+ public TransferPair getTransferPair(BufferAllocator allocator) {
+ return getTransferPair(getName(), allocator);
+ }
+
+ /**
+ * Construct a transfer pair of this vector and another vector of same type.
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @return TransferPair
+ */
+ public abstract TransferPair getTransferPair(String ref, BufferAllocator allocator);
+
+ /**
+ * Transfer this vector'data to another vector. The memory associated
+ * with this vector is transferred to the allocator of target vector
+ * for accounting and management purposes.
+ * @param target destination vector for transfer
+ */
+ public void transferTo(BaseLargeVariableWidthVector target) {
+ compareTypes(target, "transferTo");
+ target.clear();
+ target.validityBuffer = transferBuffer(validityBuffer, target.allocator);
+ target.valueBuffer = transferBuffer(valueBuffer, target.allocator);
+ target.offsetBuffer = transferBuffer(offsetBuffer, target.allocator);
+ target.setLastSet(this.lastSet);
+ if (this.valueCount > 0) {
+ target.setValueCount(this.valueCount);
+ }
+ clear();
+ }
+
+ /**
+ * Slice this vector at desired index and length and transfer the
+ * corresponding data to the target vector.
+ * @param startIndex start position of the split in source vector.
+ * @param length length of the split.
+ * @param target destination vector
+ */
+ public void splitAndTransferTo(int startIndex, int length,
+ BaseLargeVariableWidthVector target) {
+ Preconditions.checkArgument(startIndex >= 0 && startIndex < valueCount,
+ "Invalid startIndex: %s", startIndex);
+ Preconditions.checkArgument(startIndex + length <= valueCount,
+ "Invalid length: %s", length);
+ compareTypes(target, "splitAndTransferTo");
+ target.clear();
+ splitAndTransferValidityBuffer(startIndex, length, target);
+ splitAndTransferOffsetBuffer(startIndex, length, target);
+ target.setLastSet(length - 1);
+ if (length > 0) {
+ target.setValueCount(length);
+ }
+ }
+
+ /**
+ * Transfer the offsets along with data. Unlike the data buffer, we cannot simply
+ * slice the offset buffer for split and transfer. The reason is that offsets
+ * in the target vector have to be adjusted and made relative to the staring
+ * offset in source vector from the start index of split. This is why, we
+ * need to explicitly allocate the offset buffer and set the adjusted offsets
+ * in the target vector.
+ */
+ private void splitAndTransferOffsetBuffer(int startIndex, int length, BaseLargeVariableWidthVector target) {
+ final long start = offsetBuffer.getLong((long) startIndex * OFFSET_WIDTH);
+ final long end = offsetBuffer.getLong((long) (startIndex + length) * OFFSET_WIDTH);
+ final long dataLength = end - start;
+ target.allocateOffsetBuffer((long) (length + 1) * OFFSET_WIDTH);
+ for (int i = 0; i < length + 1; i++) {
+ final long relativeSourceOffset = offsetBuffer.getLong((long) (startIndex + i) * OFFSET_WIDTH) - start;
+ target.offsetBuffer.setLong((long) i * OFFSET_WIDTH, relativeSourceOffset);
+ }
+ final ArrowBuf slicedBuffer = valueBuffer.slice(start, dataLength);
+ target.valueBuffer = transferBuffer(slicedBuffer, target.allocator);
+ }
+
+ /*
+ * Transfer the validity.
+ */
+ private void splitAndTransferValidityBuffer(int startIndex, int length,
+ BaseLargeVariableWidthVector target) {
+ int firstByteSource = BitVectorHelper.byteIndex(startIndex);
+ int lastByteSource = BitVectorHelper.byteIndex(valueCount - 1);
+ int byteSizeTarget = getValidityBufferSizeFromCount(length);
+ int offset = startIndex % 8;
+
+ if (length > 0) {
+ if (offset == 0) {
+ // slice
+ if (target.validityBuffer != null) {
+ target.validityBuffer.getReferenceManager().release();
+ }
+ target.validityBuffer = validityBuffer.slice(firstByteSource, byteSizeTarget);
+ target.validityBuffer.getReferenceManager().retain();
+ } else {
+ /* Copy data
+ * When the first bit starts from the middle of a byte (offset != 0),
+ * copy data from src BitVector.
+ * Each byte in the target is composed by a part in i-th byte,
+ * another part in (i+1)-th byte.
+ */
+ target.allocateValidityBuffer(byteSizeTarget);
+
+ for (int i = 0; i < byteSizeTarget - 1; i++) {
+ byte b1 = BitVectorHelper.getBitsFromCurrentByte(this.validityBuffer, firstByteSource + i, offset);
+ byte b2 = BitVectorHelper.getBitsFromNextByte(this.validityBuffer, firstByteSource + i + 1, offset);
+
+ target.validityBuffer.setByte(i, (b1 + b2));
+ }
+ /* Copying the last piece is done in the following manner:
+ * if the source vector has 1 or more bytes remaining, we copy
+ * the last piece as a byte formed by shifting data
+ * from the current byte and the next byte.
+ *
+ * if the source vector has no more bytes remaining
+ * (we are at the last byte), we copy the last piece as a byte
+ * by shifting data from the current byte.
+ */
+ if ((firstByteSource + byteSizeTarget - 1) < lastByteSource) {
+ byte b1 = BitVectorHelper.getBitsFromCurrentByte(this.validityBuffer,
+ firstByteSource + byteSizeTarget - 1, offset);
+ byte b2 = BitVectorHelper.getBitsFromNextByte(this.validityBuffer,
+ firstByteSource + byteSizeTarget, offset);
+
+ target.validityBuffer.setByte(byteSizeTarget - 1, b1 + b2);
+ } else {
+ byte b1 = BitVectorHelper.getBitsFromCurrentByte(this.validityBuffer,
+ firstByteSource + byteSizeTarget - 1, offset);
+ target.validityBuffer.setByte(byteSizeTarget - 1, b1);
+ }
+ }
+ }
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | common getters and setters |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Get the number of elements that are null in the vector.
+ *
+ * @return the number of null elements.
+ */
+ public int getNullCount() {
+ return BitVectorHelper.getNullCount(validityBuffer, valueCount);
+ }
+
+ /**
+ * Check if the given index is within the current value capacity
+ * of the vector.
+ *
+ * @param index position to check
+ * @return true if index is within the current value capacity
+ */
+ public boolean isSafe(int index) {
+ return index < getValueCapacity();
+ }
+
+ /**
+ * Check if element at given index is null.
+ *
+ * @param index position of element
+ * @return true if element at given index is null
+ */
+ public boolean isNull(int index) {
+ return (isSet(index) == 0);
+ }
+
+ /**
+ * Same as {@link #isNull(int)}.
+ *
+ * @param index position of element
+ * @return 1 if element at given index is not null, 0 otherwise
+ */
+ public int isSet(int index) {
+ final int byteIndex = index >> 3;
+ final byte b = validityBuffer.getByte(byteIndex);
+ final int bitIndex = index & 7;
+ return (b >> bitIndex) & 0x01;
+ }
+
+ /**
+ * Get the value count of vector. This will always be zero unless
+ * setValueCount(int) has been called prior to calling this.
+ *
+ * @return valueCount for the vector
+ */
+ public int getValueCount() {
+ return valueCount;
+ }
+
+ /**
+ * Sets the value count for the vector.
+ *
+ * @param valueCount value count
+ */
+ public void setValueCount(int valueCount) {
+ assert valueCount >= 0;
+ this.valueCount = valueCount;
+ while (valueCount > getValueCapacity()) {
+ reallocValidityAndOffsetBuffers();
+ }
+ fillHoles(valueCount);
+ lastSet = valueCount - 1;
+ setReaderAndWriterIndex();
+ }
+
+ /**
+ * Create holes in the vector upto the given index (exclusive).
+ * Holes will be created from the current last set position in
+ * the vector.
+ *
+ * @param index target index
+ */
+ public void fillEmpties(int index) {
+ handleSafe(index, emptyByteArray.length);
+ fillHoles(index);
+ lastSet = index - 1;
+ }
+
+ /**
+ * Set the index of last non-null element in the vector.
+ * It is important to call this method with appropriate value
+ * before calling {@link #setValueCount(int)}.
+ *
+ * @param value desired index of last non-null element.
+ */
+ public void setLastSet(int value) {
+ lastSet = value;
+ }
+
+ /**
+ * Get the index of last non-null element in the vector.
+ *
+ * @return index of the last non-null element
+ */
+ public int getLastSet() {
+ return lastSet;
+ }
+
+ /**
+ * Mark the particular position in the vector as non-null.
+ *
+ * @param index position of the element.
+ */
+ @Override
+ public void setIndexDefined(int index) {
+ // We need to check and realloc both validity and offset buffer
+ while (index >= getValueCapacity()) {
+ reallocValidityAndOffsetBuffers();
+ }
+ BitVectorHelper.setBit(validityBuffer, index);
+ }
+
+ /**
+ * Sets the value length for an element.
+ *
+ * @param index position of the element to set
+ * @param length length of the element
+ */
+ public void setValueLengthSafe(int index, int length) {
+ assert index >= 0;
+ handleSafe(index, length);
+ fillHoles(index);
+ final long startOffset = getStartOffset(index);
+ offsetBuffer.setLong((long) (index + 1) * OFFSET_WIDTH, startOffset + length);
+ lastSet = index;
+ }
+
+ /**
+ * Get the variable length element at specified index as Text.
+ *
+ * @param index position of element to get
+ * @return greater than 0 length for non-null element, 0 otherwise
+ */
+ public int getValueLength(int index) {
+ assert index >= 0;
+ if (isSet(index) == 0) {
+ return 0;
+ }
+ final long startOffset = getStartOffset(index);
+ final int dataLength =
+ (int) (offsetBuffer.getLong((long) (index + 1) * OFFSET_WIDTH) - startOffset);
+ return dataLength;
+ }
+
+ /**
+ * Set the variable length element at the specified index to the supplied
+ * byte array. This is same as using {@link #set(int, byte[], int, int)}
+ * with start as 0 and length as value.length
+ *
+ * @param index position of the element to set
+ * @param value array of bytes to write
+ */
+ public void set(int index, byte[] value) {
+ assert index >= 0;
+ fillHoles(index);
+ BitVectorHelper.setBit(validityBuffer, index);
+ setBytes(index, value, 0, value.length);
+ lastSet = index;
+ }
+
+ /**
+ * Same as {@link #set(int, byte[])} except that it handles the
+ * case where index and length of new element are beyond the existing
+ * capacity of the vector.
+ *
+ * @param index position of the element to set
+ * @param value array of bytes to write
+ */
+ public void setSafe(int index, byte[] value) {
+ assert index >= 0;
+ handleSafe(index, value.length);
+ fillHoles(index);
+ BitVectorHelper.setBit(validityBuffer, index);
+ setBytes(index, value, 0, value.length);
+ lastSet = index;
+ }
+
+ /**
+ * Set the variable length element at the specified index to the supplied
+ * byte array.
+ *
+ * @param index position of the element to set
+ * @param value array of bytes to write
+ * @param start start index in array of bytes
+ * @param length length of data in array of bytes
+ */
+ public void set(int index, byte[] value, int start, int length) {
+ assert index >= 0;
+ fillHoles(index);
+ BitVectorHelper.setBit(validityBuffer, index);
+ setBytes(index, value, start, length);
+ lastSet = index;
+ }
+
+ /**
+ * Same as {@link #set(int, byte[], int, int)} except that it handles the
+ * case where index and length of new element are beyond the existing
+ * capacity of the vector.
+ *
+ * @param index position of the element to set
+ * @param value array of bytes to write
+ * @param start start index in array of bytes
+ * @param length length of data in array of bytes
+ */
+ public void setSafe(int index, byte[] value, int start, int length) {
+ assert index >= 0;
+ handleSafe(index, length);
+ fillHoles(index);
+ BitVectorHelper.setBit(validityBuffer, index);
+ setBytes(index, value, start, length);
+ lastSet = index;
+ }
+
+ /**
+ * Set the variable length element at the specified index to the
+ * content in supplied ByteBuffer.
+ *
+ * @param index position of the element to set
+ * @param value ByteBuffer with data
+ * @param start start index in ByteBuffer
+ * @param length length of data in ByteBuffer
+ */
+ public void set(int index, ByteBuffer value, int start, int length) {
+ assert index >= 0;
+ fillHoles(index);
+ BitVectorHelper.setBit(validityBuffer, index);
+ final long startOffset = getStartOffset(index);
+ offsetBuffer.setLong((long) (index + 1) * OFFSET_WIDTH, startOffset + length);
+ valueBuffer.setBytes(startOffset, value, start, length);
+ lastSet = index;
+ }
+
+ /**
+ * Same as {@link #set(int, ByteBuffer, int, int)} except that it handles the
+ * case where index and length of new element are beyond the existing
+ * capacity of the vector.
+ *
+ * @param index position of the element to set
+ * @param value ByteBuffer with data
+ * @param start start index in ByteBuffer
+ * @param length length of data in ByteBuffer
+ */
+ public void setSafe(int index, ByteBuffer value, int start, int length) {
+ assert index >= 0;
+ handleSafe(index, length);
+ fillHoles(index);
+ BitVectorHelper.setBit(validityBuffer, index);
+ final long startOffset = getStartOffset(index);
+ offsetBuffer.setLong((long) (index + 1) * OFFSET_WIDTH, startOffset + length);
+ valueBuffer.setBytes(startOffset, value, start, length);
+ lastSet = index;
+ }
+
+ /**
+ * Set the element at the given index to null.
+ *
+ * @param index position of element
+ */
+ public void setNull(int index) {
+ // We need to check and realloc both validity and offset buffer
+ while (index >= getValueCapacity()) {
+ reallocValidityAndOffsetBuffers();
+ }
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+
+ /**
+ * Store the given value at a particular position in the vector. isSet indicates
+ * whether the value is NULL or not.
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param start start position of data in buffer
+ * @param end end position of data in buffer
+ * @param buffer data buffer containing the variable width element to be stored
+ * in the vector
+ */
+ public void set(int index, int isSet, long start, long end, ArrowBuf buffer) {
+ assert index >= 0;
+ final long dataLength = end - start;
+ fillHoles(index);
+ BitVectorHelper.setValidityBit(validityBuffer, index, isSet);
+ final long startOffset = offsetBuffer.getLong((long) index * OFFSET_WIDTH);
+ offsetBuffer.setLong((long) (index + 1) * OFFSET_WIDTH, end);
+ valueBuffer.setBytes(startOffset, buffer, start, dataLength);
+ lastSet = index;
+ }
+
+ /**
+ * Same as {@link #set(int, int, long, long, ArrowBuf)} except that it handles the case
+ * when index is greater than or equal to current value capacity of the
+ * vector.
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param start start position of data in buffer
+ * @param end end position of data in buffer
+ * @param buffer data buffer containing the variable width element to be stored
+ * in the vector
+ */
+ public void setSafe(int index, int isSet, long start, long end, ArrowBuf buffer) {
+ assert index >= 0;
+ final long dataLength = end - start;
+ handleSafe(index, (int) dataLength);
+ fillHoles(index);
+ BitVectorHelper.setValidityBit(validityBuffer, index, isSet);
+ final long startOffset = offsetBuffer.getLong((long) index * OFFSET_WIDTH);
+ offsetBuffer.setLong((long) (index + 1) * OFFSET_WIDTH, startOffset + dataLength);
+ valueBuffer.setBytes(startOffset, buffer, start, dataLength);
+ lastSet = index;
+ }
+
+ /**
+ * Store the given value at a particular position in the vector. isSet indicates
+ * whether the value is NULL or not.
+ * @param index position of the new value
+ * @param start start position of data in buffer
+ * @param length length of data in buffer
+ * @param buffer data buffer containing the variable width element to be stored
+ * in the vector
+ */
+ public void set(int index, long start, int length, ArrowBuf buffer) {
+ assert index >= 0;
+ fillHoles(index);
+ BitVectorHelper.setBit(validityBuffer, index);
+ final long startOffset = offsetBuffer.getLong((long) index * OFFSET_WIDTH);
+ offsetBuffer.setLong((long) (index + 1) * OFFSET_WIDTH, startOffset + length);
+ final ArrowBuf bb = buffer.slice(start, length);
+ valueBuffer.setBytes(startOffset, bb);
+ lastSet = index;
+ }
+
+ /**
+ * Same as {@link #set(int, int, long, long, ArrowBuf)} except that it handles the case
+ * when index is greater than or equal to current value capacity of the
+ * vector.
+ * @param index position of the new value
+ * @param start start position of data in buffer
+ * @param length length of data in buffer
+ * @param buffer data buffer containing the variable width element to be stored
+ * in the vector
+ */
+ public void setSafe(int index, long start, int length, ArrowBuf buffer) {
+ assert index >= 0;
+ handleSafe(index, length);
+ fillHoles(index);
+ BitVectorHelper.setBit(validityBuffer, index);
+ final long startOffset = offsetBuffer.getLong((long) index * OFFSET_WIDTH);
+ offsetBuffer.setLong((long) (index + 1) * OFFSET_WIDTH, startOffset + length);
+ final ArrowBuf bb = buffer.slice(start, length);
+ valueBuffer.setBytes(startOffset, bb);
+ lastSet = index;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | helper methods for setters |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ protected final void fillHoles(int index) {
+ for (int i = lastSet + 1; i < index; i++) {
+ setBytes(i, emptyByteArray, 0, emptyByteArray.length);
+ }
+ lastSet = index - 1;
+ }
+
+ protected final void setBytes(int index, byte[] value, int start, int length) {
+ /* end offset of current last element in the vector. this will
+ * be the start offset of new element we are trying to store.
+ */
+ final long startOffset = getStartOffset(index);
+ /* set new end offset */
+ offsetBuffer.setLong((long) (index + 1) * OFFSET_WIDTH, startOffset + length);
+ /* store the var length data in value buffer */
+ valueBuffer.setBytes(startOffset, value, start, length);
+ }
+
+ /**
+ * Gets the starting offset of a record, given its index.
+ * @param index index of the record.
+ * @return the starting offset of the record.
+ */
+ protected final long getStartOffset(int index) {
+ return offsetBuffer.getLong((long) index * OFFSET_WIDTH);
+ }
+
+ protected final void handleSafe(int index, int dataLength) {
+ /*
+ * IMPORTANT:
+ * value buffer for variable length vectors moves independent
+ * of the companion validity and offset buffers. This is in
+ * contrast to what we have for fixed width vectors.
+ *
+ * Here there is no concept of getValueCapacity() in the
+ * data stream. getValueCapacity() is applicable only to validity
+ * and offset buffers.
+ *
+ * So even though we may have setup an initial capacity of 1024
+ * elements in the vector, it is quite possible
+ * that we need to reAlloc() the data buffer when we are setting
+ * the 5th element in the vector simply because previous
+ * variable length elements have exhausted the buffer capacity.
+ * However, we really don't need to reAlloc() validity and
+ * offset buffers until we try to set the 1025th element
+ * This is why we do a separate check for safe methods to
+ * determine which buffer needs reallocation.
+ */
+ while (index >= getValueCapacity()) {
+ reallocValidityAndOffsetBuffers();
+ }
+ final long startOffset = lastSet < 0 ? 0L : getStartOffset(lastSet + 1);
+ while (valueBuffer.capacity() < (startOffset + dataLength)) {
+ reallocDataBuffer();
+ }
+ }
+
+ /**
+ * Method used by Json Writer to read a variable width element from
+ * the variable width vector and write to Json.
+ *
+ * <p>This method should not be used externally.
+ *
+ * @param data buffer storing the variable width vector elements
+ * @param offset buffer storing the offsets of variable width vector elements
+ * @param index position of the element in the vector
+ * @return array of bytes
+ */
+ public static byte[] get(final ArrowBuf data, final ArrowBuf offset, int index) {
+ final long currentStartOffset = offset.getLong((long) index * OFFSET_WIDTH);
+ final int dataLength =
+ (int) (offset.getLong((long) (index + 1) * OFFSET_WIDTH) - currentStartOffset);
+ final byte[] result = new byte[dataLength];
+ data.getBytes(currentStartOffset, result, 0, dataLength);
+ return result;
+ }
+
+ /**
+ * Method used by Json Reader to explicitly set the offsets of the variable
+ * width vector data. The method takes care of allocating the memory for
+ * offsets if the caller hasn't done so.
+ *
+ * <p>This method should not be used externally.
+ *
+ * @param buffer ArrowBuf to store offsets for variable width elements
+ * @param allocator memory allocator
+ * @param valueCount number of elements
+ * @param index position of the element
+ * @param value offset of the element
+ * @return buffer holding the offsets
+ */
+ public static ArrowBuf set(ArrowBuf buffer, BufferAllocator allocator,
+ int valueCount, int index, long value) {
+ if (buffer == null) {
+ buffer = allocator.buffer((long) valueCount * OFFSET_WIDTH);
+ }
+ buffer.setLong((long) index * OFFSET_WIDTH, value);
+ if (index == (valueCount - 1)) {
+ buffer.writerIndex((long) valueCount * OFFSET_WIDTH);
+ }
+
+ return buffer;
+ }
+
+ /**
+ * Copy a cell value from a particular index in source vector to a particular
+ * position in this vector.
+ *
+ * @param fromIndex position to copy from in source vector
+ * @param thisIndex position to copy to in this vector
+ * @param from source vector
+ */
+ @Override
+ public void copyFrom(int fromIndex, int thisIndex, ValueVector from) {
+ Preconditions.checkArgument(this.getMinorType() == from.getMinorType());
+ if (from.isNull(fromIndex)) {
+ fillHoles(thisIndex);
+ BitVectorHelper.unsetBit(this.validityBuffer, thisIndex);
+ final long copyStart = offsetBuffer.getLong((long) thisIndex * OFFSET_WIDTH);
+ offsetBuffer.setLong((long) (thisIndex + 1) * OFFSET_WIDTH, copyStart);
+ } else {
+ final long start = from.getOffsetBuffer().getLong((long) fromIndex * OFFSET_WIDTH);
+ final long end = from.getOffsetBuffer().getLong((long) (fromIndex + 1) * OFFSET_WIDTH);
+ final long length = end - start;
+ fillHoles(thisIndex);
+ BitVectorHelper.setBit(this.validityBuffer, thisIndex);
+ final long copyStart = offsetBuffer.getLong((long) thisIndex * OFFSET_WIDTH);
+ from.getDataBuffer().getBytes(start, this.valueBuffer, copyStart, (int) length);
+ offsetBuffer.setLong((long) (thisIndex + 1) * OFFSET_WIDTH, copyStart + length);
+ }
+ lastSet = thisIndex;
+ }
+
+ /**
+ * Same as {@link #copyFrom(int, int, ValueVector)} except that
+ * it handles the case when the capacity of the vector needs to be expanded
+ * before copy.
+ *
+ * @param fromIndex position to copy from in source vector
+ * @param thisIndex position to copy to in this vector
+ * @param from source vector
+ */
+ @Override
+ public void copyFromSafe(int fromIndex, int thisIndex, ValueVector from) {
+ Preconditions.checkArgument(this.getMinorType() == from.getMinorType());
+ if (from.isNull(fromIndex)) {
+ handleSafe(thisIndex, 0);
+ fillHoles(thisIndex);
+ BitVectorHelper.unsetBit(this.validityBuffer, thisIndex);
+ final long copyStart = offsetBuffer.getLong((long) thisIndex * OFFSET_WIDTH);
+ offsetBuffer.setLong((long) (thisIndex + 1) * OFFSET_WIDTH, copyStart);
+ } else {
+ final long start = from.getOffsetBuffer().getLong((long) fromIndex * OFFSET_WIDTH);
+ final long end = from.getOffsetBuffer().getLong((long) (fromIndex + 1) * OFFSET_WIDTH);
+ final int length = (int) (end - start);
+ handleSafe(thisIndex, length);
+ fillHoles(thisIndex);
+ BitVectorHelper.setBit(this.validityBuffer, thisIndex);
+ final long copyStart = offsetBuffer.getLong((long) thisIndex * OFFSET_WIDTH);
+ from.getDataBuffer().getBytes(start, this.valueBuffer, copyStart, length);
+ offsetBuffer.setLong((long) (thisIndex + 1) * OFFSET_WIDTH, copyStart + length);
+ }
+ lastSet = thisIndex;
+ }
+
+ @Override
+ public ArrowBufPointer getDataPointer(int index) {
+ return getDataPointer(index, new ArrowBufPointer());
+ }
+
+ @Override
+ public ArrowBufPointer getDataPointer(int index, ArrowBufPointer reuse) {
+ if (isNull(index)) {
+ reuse.set(null, 0, 0);
+ } else {
+ long offset = offsetBuffer.getLong((long) index * OFFSET_WIDTH);
+ int length = (int) (offsetBuffer.getLong((long) (index + 1) * OFFSET_WIDTH) - offset);
+ reuse.set(valueBuffer, offset, length);
+ }
+ return reuse;
+ }
+
+ @Override
+ public int hashCode(int index) {
+ return hashCode(index, null);
+ }
+
+ @Override
+ public int hashCode(int index, ArrowBufHasher hasher) {
+ if (isNull(index)) {
+ return ArrowBufPointer.NULL_HASH_CODE;
+ }
+ final long start = getStartOffset(index);
+ final long end = getStartOffset(index + 1);
+ return ByteFunctionHelpers.hash(hasher, this.getDataBuffer(), start, end);
+ }
+
+ @Override
+ public <OUT, IN> OUT accept(VectorVisitor<OUT, IN> visitor, IN value) {
+ return visitor.visit(this, value);
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BaseValueVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BaseValueVector.java
new file mode 100644
index 000000000..22fe4254f
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BaseValueVector.java
@@ -0,0 +1,231 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import java.util.Collections;
+import java.util.Iterator;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.ReferenceManager;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.util.DataSizeRoundingUtil;
+import org.apache.arrow.vector.util.TransferPair;
+import org.apache.arrow.vector.util.ValueVectorUtility;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Base class for other Arrow Vector Types. Provides basic functionality around
+ * memory management.
+ */
+public abstract class BaseValueVector implements ValueVector {
+ private static final Logger logger = LoggerFactory.getLogger(BaseValueVector.class);
+
+ public static final String MAX_ALLOCATION_SIZE_PROPERTY = "arrow.vector.max_allocation_bytes";
+ public static final long MAX_ALLOCATION_SIZE = Long.getLong(MAX_ALLOCATION_SIZE_PROPERTY, Long.MAX_VALUE);
+ /*
+ * For all fixed width vectors, the value and validity buffers are sliced from a single buffer.
+ * Similarly, for variable width vectors, the offsets and validity buffers are sliced from a
+ * single buffer. To ensure the single buffer is power-of-2 size, the initial value allocation
+ * should be less than power-of-2. For IntVectors, this comes to 3970*4 (15880) for the data
+ * buffer and 504 bytes for the validity buffer, totalling to 16384 (2^16).
+ */
+ public static final int INITIAL_VALUE_ALLOCATION = 3970;
+
+ protected final BufferAllocator allocator;
+
+ protected BaseValueVector(BufferAllocator allocator) {
+ this.allocator = Preconditions.checkNotNull(allocator, "allocator cannot be null");
+ }
+
+ @Override
+ public abstract String getName();
+
+ /**
+ * Representation of vector suitable for debugging.
+ */
+ @Override
+ public String toString() {
+ return ValueVectorUtility.getToString(this, 0, getValueCount());
+ }
+
+ @Override
+ public void clear() {
+ }
+
+ @Override
+ public void close() {
+ clear();
+ }
+
+ @Override
+ public TransferPair getTransferPair(BufferAllocator allocator) {
+ return getTransferPair(getName(), allocator);
+ }
+
+ @Override
+ public Iterator<ValueVector> iterator() {
+ return Collections.emptyIterator();
+ }
+
+ /**
+ * Checks to ensure that every buffer <code>vv</code> uses
+ * has a positive reference count, throws if this precondition
+ * isn't met. Returns true otherwise.
+ */
+ public static boolean checkBufRefs(final ValueVector vv) {
+ for (final ArrowBuf buffer : vv.getBuffers(false)) {
+ if (buffer.refCnt() <= 0) {
+ throw new IllegalStateException("zero refcount");
+ }
+ }
+
+ return true;
+ }
+
+ @Override
+ public BufferAllocator getAllocator() {
+ return allocator;
+ }
+
+ void compareTypes(BaseValueVector target, String caller) {
+ if (this.getMinorType() != target.getMinorType()) {
+ throw new UnsupportedOperationException(caller + " should have vectors of exact same type");
+ }
+ }
+
+ protected ArrowBuf releaseBuffer(ArrowBuf buffer) {
+ buffer.getReferenceManager().release();
+ buffer = allocator.getEmpty();
+ return buffer;
+ }
+
+ /* number of bytes for the validity buffer for the given valueCount */
+ protected static int getValidityBufferSizeFromCount(final int valueCount) {
+ return DataSizeRoundingUtil.divideBy8Ceil(valueCount);
+ }
+
+ /* round up bytes for the validity buffer for the given valueCount */
+ private static long roundUp8ForValidityBuffer(long valueCount) {
+ return ((valueCount + 63) >> 6) << 3;
+ }
+
+ long computeCombinedBufferSize(int valueCount, int typeWidth) {
+ Preconditions.checkArgument(valueCount >= 0, "valueCount must be >= 0");
+ Preconditions.checkArgument(typeWidth >= 0, "typeWidth must be >= 0");
+
+ // compute size of validity buffer.
+ long bufferSize = roundUp8ForValidityBuffer(valueCount);
+
+ // add the size of the value buffer.
+ if (typeWidth == 0) {
+ // for boolean type, value-buffer and validity-buffer are of same size.
+ bufferSize *= 2;
+ } else {
+ bufferSize += DataSizeRoundingUtil.roundUpTo8Multiple((long) valueCount * typeWidth);
+ }
+ return allocator.getRoundingPolicy().getRoundedSize(bufferSize);
+ }
+
+ /**
+ * Container for primitive vectors (1 for the validity bit-mask and one to hold the values).
+ */
+ class DataAndValidityBuffers {
+ private ArrowBuf dataBuf;
+ private ArrowBuf validityBuf;
+
+ DataAndValidityBuffers(ArrowBuf dataBuf, ArrowBuf validityBuf) {
+ this.dataBuf = dataBuf;
+ this.validityBuf = validityBuf;
+ }
+
+ ArrowBuf getDataBuf() {
+ return dataBuf;
+ }
+
+ ArrowBuf getValidityBuf() {
+ return validityBuf;
+ }
+ }
+
+ DataAndValidityBuffers allocFixedDataAndValidityBufs(int valueCount, int typeWidth) {
+ long bufferSize = computeCombinedBufferSize(valueCount, typeWidth);
+ assert bufferSize <= MAX_ALLOCATION_SIZE;
+
+ long validityBufferSize;
+ long dataBufferSize;
+ if (typeWidth == 0) {
+ validityBufferSize = dataBufferSize = bufferSize / 2;
+ } else {
+ // Due to the rounding policy, the bufferSize could be greater than the
+ // requested size. Utilize the allocated buffer fully.;
+ long actualCount = (long) ((bufferSize * 8.0) / (8 * typeWidth + 1));
+ do {
+ validityBufferSize = roundUp8ForValidityBuffer(actualCount);
+ dataBufferSize = DataSizeRoundingUtil.roundUpTo8Multiple(actualCount * typeWidth);
+ if (validityBufferSize + dataBufferSize <= bufferSize) {
+ break;
+ }
+ --actualCount;
+ }
+ while (true);
+ }
+
+
+ /* allocate combined buffer */
+ ArrowBuf combinedBuffer = allocator.buffer(bufferSize);
+
+ /* slice into requested lengths */
+ ArrowBuf dataBuf = null;
+ ArrowBuf validityBuf = null;
+ long bufferOffset = 0;
+ for (int numBuffers = 0; numBuffers < 2; ++numBuffers) {
+ long len = (numBuffers == 0 ? dataBufferSize : validityBufferSize);
+ ArrowBuf buf = combinedBuffer.slice(bufferOffset, len);
+ buf.getReferenceManager().retain();
+ buf.readerIndex(0);
+ buf.writerIndex(0);
+
+ bufferOffset += len;
+ if (numBuffers == 0) {
+ dataBuf = buf;
+ } else {
+ validityBuf = buf;
+ }
+ }
+ combinedBuffer.getReferenceManager().release();
+ return new DataAndValidityBuffers(dataBuf, validityBuf);
+ }
+
+ public static ArrowBuf transferBuffer(final ArrowBuf srcBuffer, final BufferAllocator targetAllocator) {
+ final ReferenceManager referenceManager = srcBuffer.getReferenceManager();
+ return referenceManager.transferOwnership(srcBuffer, targetAllocator).getTransferredBuffer();
+ }
+
+ @Override
+ public void copyFrom(int fromIndex, int thisIndex, ValueVector from) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void copyFromSafe(int fromIndex, int thisIndex, ValueVector from) {
+ throw new UnsupportedOperationException();
+ }
+}
+
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java
new file mode 100644
index 000000000..866dd9e21
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java
@@ -0,0 +1,1410 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt;
+
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.OutOfMemoryException;
+import org.apache.arrow.memory.util.ArrowBufPointer;
+import org.apache.arrow.memory.util.ByteFunctionHelpers;
+import org.apache.arrow.memory.util.CommonUtil;
+import org.apache.arrow.memory.util.hash.ArrowBufHasher;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.compare.VectorVisitor;
+import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.util.CallBack;
+import org.apache.arrow.vector.util.OversizedAllocationException;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * BaseVariableWidthVector is a base class providing functionality for strings/bytes types.
+ */
+public abstract class BaseVariableWidthVector extends BaseValueVector
+ implements VariableWidthVector, FieldVector, VectorDefinitionSetter {
+ private static final int DEFAULT_RECORD_BYTE_COUNT = 8;
+ private static final int INITIAL_BYTE_COUNT = INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT;
+ private int lastValueCapacity;
+ private long lastValueAllocationSizeInBytes;
+
+ /* protected members */
+ public static final int OFFSET_WIDTH = 4; /* 4 byte unsigned int to track offsets */
+ protected static final byte[] emptyByteArray = new byte[]{};
+ protected ArrowBuf validityBuffer;
+ protected ArrowBuf valueBuffer;
+ protected ArrowBuf offsetBuffer;
+ protected int valueCount;
+ protected int lastSet;
+ protected final Field field;
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param field The field materialized by this vector.
+ * @param allocator The allocator to use for creating/resizing buffers
+ */
+ public BaseVariableWidthVector(Field field, final BufferAllocator allocator) {
+ super(allocator);
+ this.field = field;
+ lastValueAllocationSizeInBytes = INITIAL_BYTE_COUNT;
+ // -1 because we require one extra slot for the offset array.
+ lastValueCapacity = INITIAL_VALUE_ALLOCATION - 1;
+ valueCount = 0;
+ lastSet = -1;
+ offsetBuffer = allocator.getEmpty();
+ validityBuffer = allocator.getEmpty();
+ valueBuffer = allocator.getEmpty();
+ }
+
+ @Override
+ public String getName() {
+ return field.getName();
+ }
+
+ /* TODO:
+ * see if getNullCount() can be made faster -- O(1)
+ */
+
+ /* TODO:
+ * Once the entire hierarchy has been refactored, move common functions
+ * like getNullCount(), splitAndTransferValidityBuffer to top level
+ * base class BaseValueVector.
+ *
+ * Along with this, some class members (validityBuffer) can also be
+ * abstracted out to top level base class.
+ *
+ * Right now BaseValueVector is the top level base class for other
+ * vector types in ValueVector hierarchy (non-nullable) and those
+ * vectors have not yet been refactored/removed so moving things to
+ * the top class as of now is not a good idea.
+ */
+
+ /**
+ * Get buffer that manages the validity (NULL or NON-NULL nature) of
+ * elements in the vector. Consider it as a buffer for internal bit vector
+ * data structure.
+ * @return buffer
+ */
+ @Override
+ public ArrowBuf getValidityBuffer() {
+ return validityBuffer;
+ }
+
+ /**
+ * Get the buffer that stores the data for elements in the vector.
+ * @return buffer
+ */
+ @Override
+ public ArrowBuf getDataBuffer() {
+ return valueBuffer;
+ }
+
+ /**
+ * buffer that stores the offsets for elements
+ * in the vector. This operation is not supported for fixed-width vectors.
+ * @return buffer
+ */
+ @Override
+ public ArrowBuf getOffsetBuffer() {
+ return offsetBuffer;
+ }
+
+ /**
+ * Get the memory address of buffer that stores the offsets for elements
+ * in the vector.
+ * @return starting address of the buffer
+ */
+ @Override
+ public long getOffsetBufferAddress() {
+ return offsetBuffer.memoryAddress();
+ }
+
+ /**
+ * Get the memory address of buffer that manages the validity
+ * (NULL or NON-NULL nature) of elements in the vector.
+ * @return starting address of the buffer
+ */
+ @Override
+ public long getValidityBufferAddress() {
+ return validityBuffer.memoryAddress();
+ }
+
+ /**
+ * Get the memory address of buffer that stores the data for elements
+ * in the vector.
+ * @return starting address of the buffer
+ */
+ @Override
+ public long getDataBufferAddress() {
+ return valueBuffer.memoryAddress();
+ }
+
+ /**
+ * Sets the desired value capacity for the vector. This function doesn't
+ * allocate any memory for the vector.
+ * @param valueCount desired number of elements in the vector
+ */
+ @Override
+ public void setInitialCapacity(int valueCount) {
+ final long size = (long) valueCount * DEFAULT_RECORD_BYTE_COUNT;
+ checkDataBufferSize(size);
+ computeAndCheckOffsetsBufferSize(valueCount);
+ lastValueAllocationSizeInBytes = (int) size;
+ lastValueCapacity = valueCount;
+ }
+
+ /**
+ * Sets the desired value capacity for the vector. This function doesn't
+ * allocate any memory for the vector.
+ * @param valueCount desired number of elements in the vector
+ * @param density average number of bytes per variable width element
+ */
+ @Override
+ public void setInitialCapacity(int valueCount, double density) {
+ long size = Math.max((long) (valueCount * density), 1L);
+ checkDataBufferSize(size);
+ computeAndCheckOffsetsBufferSize(valueCount);
+ lastValueAllocationSizeInBytes = (int) size;
+ lastValueCapacity = valueCount;
+ }
+
+ /**
+ * Get the density of this ListVector.
+ * @return density
+ */
+ public double getDensity() {
+ if (valueCount == 0) {
+ return 0.0D;
+ }
+ final int startOffset = offsetBuffer.getInt(0);
+ final int endOffset = offsetBuffer.getInt((long) valueCount * OFFSET_WIDTH);
+ final double totalListSize = endOffset - startOffset;
+ return totalListSize / valueCount;
+ }
+
+ /**
+ * Get the current capacity which does not exceed either validity buffer or offset buffer.
+ * Note: Here the `getValueCapacity` has no relationship with the value buffer.
+ * @return number of elements that vector can hold.
+ */
+ @Override
+ public int getValueCapacity() {
+ final int offsetValueCapacity = Math.max(getOffsetBufferValueCapacity() - 1, 0);
+ return Math.min(offsetValueCapacity, getValidityBufferValueCapacity());
+ }
+
+ private int getValidityBufferValueCapacity() {
+ return capAtMaxInt(validityBuffer.capacity() * 8);
+ }
+
+ private int getOffsetBufferValueCapacity() {
+ return capAtMaxInt(offsetBuffer.capacity() / OFFSET_WIDTH);
+ }
+
+ /**
+ * zero out the vector and the data in associated buffers.
+ */
+ public void zeroVector() {
+ initValidityBuffer();
+ initOffsetBuffer();
+ valueBuffer.setZero(0, valueBuffer.capacity());
+ }
+
+ /* zero out the validity buffer */
+ private void initValidityBuffer() {
+ validityBuffer.setZero(0, validityBuffer.capacity());
+ }
+
+ /* zero out the offset buffer */
+ private void initOffsetBuffer() {
+ offsetBuffer.setZero(0, offsetBuffer.capacity());
+ }
+
+ /**
+ * Reset the vector to initial state. Same as {@link #zeroVector()}.
+ * Note that this method doesn't release any memory.
+ */
+ public void reset() {
+ zeroVector();
+ lastSet = -1;
+ valueCount = 0;
+ }
+
+ /**
+ * Close the vector and release the associated buffers.
+ */
+ @Override
+ public void close() {
+ clear();
+ }
+
+ /**
+ * Same as {@link #close()}.
+ */
+ @Override
+ public void clear() {
+ validityBuffer = releaseBuffer(validityBuffer);
+ valueBuffer = releaseBuffer(valueBuffer);
+ offsetBuffer = releaseBuffer(offsetBuffer);
+ lastSet = -1;
+ valueCount = 0;
+ }
+
+ /**
+ * Get the inner vectors.
+ *
+ * @deprecated This API will be removed as the current implementations no longer support inner vectors.
+ *
+ * @return the inner vectors for this field as defined by the TypeLayout
+ */
+ @Deprecated
+ @Override
+ public List<BufferBacked> getFieldInnerVectors() {
+ throw new UnsupportedOperationException("There are no inner vectors. Use getFieldBuffers");
+ }
+
+ /**
+ * Initialize the children in schema for this Field. This operation is a
+ * NO-OP for scalar types since they don't have any children.
+ * @param children the schema
+ * @throws IllegalArgumentException if children is a non-empty list for scalar types.
+ */
+ @Override
+ public void initializeChildrenFromFields(List<Field> children) {
+ if (!children.isEmpty()) {
+ throw new IllegalArgumentException("primitive type vector can not have children");
+ }
+ }
+
+ /**
+ * Get the inner child vectors.
+ * @return list of child vectors for complex types, empty list for scalar vector types
+ */
+ @Override
+ public List<FieldVector> getChildrenFromFields() {
+ return Collections.emptyList();
+ }
+
+
+ /**
+ * Load the buffers of this vector with provided source buffers.
+ * The caller manages the source buffers and populates them before invoking
+ * this method.
+ * @param fieldNode the fieldNode indicating the value count
+ * @param ownBuffers the buffers for this Field (own buffers only, children not included)
+ */
+ @Override
+ public void loadFieldBuffers(ArrowFieldNode fieldNode, List<ArrowBuf> ownBuffers) {
+ ArrowBuf bitBuffer = ownBuffers.get(0);
+ ArrowBuf offBuffer = ownBuffers.get(1);
+ ArrowBuf dataBuffer = ownBuffers.get(2);
+
+ validityBuffer.getReferenceManager().release();
+ validityBuffer = BitVectorHelper.loadValidityBuffer(fieldNode, bitBuffer, allocator);
+ offsetBuffer.getReferenceManager().release();
+ offsetBuffer = offBuffer.getReferenceManager().retain(offBuffer, allocator);
+ valueBuffer.getReferenceManager().release();
+ valueBuffer = dataBuffer.getReferenceManager().retain(dataBuffer, allocator);
+
+ lastSet = fieldNode.getLength() - 1;
+ valueCount = fieldNode.getLength();
+ }
+
+ /**
+ * Get the buffers belonging to this vector.
+ * @return the inner buffers.
+ */
+ public List<ArrowBuf> getFieldBuffers() {
+ // before flight/IPC, we must bring the vector to a consistent state.
+ // this is because, it is possible that the offset buffers of some trailing values
+ // are not updated. this may cause some data in the data buffer being lost.
+ // for details, please see TestValueVector#testUnloadVariableWidthVector.
+ fillHoles(valueCount);
+
+ List<ArrowBuf> result = new ArrayList<>(3);
+ setReaderAndWriterIndex();
+ result.add(validityBuffer);
+ result.add(offsetBuffer);
+ result.add(valueBuffer);
+
+ return result;
+ }
+
+ /**
+ * Set the reader and writer indexes for the inner buffers.
+ */
+ private void setReaderAndWriterIndex() {
+ validityBuffer.readerIndex(0);
+ offsetBuffer.readerIndex(0);
+ valueBuffer.readerIndex(0);
+ if (valueCount == 0) {
+ validityBuffer.writerIndex(0);
+ offsetBuffer.writerIndex(0);
+ valueBuffer.writerIndex(0);
+ } else {
+ final int lastDataOffset = getStartOffset(valueCount);
+ validityBuffer.writerIndex(getValidityBufferSizeFromCount(valueCount));
+ offsetBuffer.writerIndex((long) (valueCount + 1) * OFFSET_WIDTH);
+ valueBuffer.writerIndex(lastDataOffset);
+ }
+ }
+
+ /**
+ * Same as {@link #allocateNewSafe()}.
+ */
+ @Override
+ public void allocateNew() {
+ allocateNew(lastValueAllocationSizeInBytes, lastValueCapacity);
+ }
+
+ /**
+ * Allocate memory for the vector. We internally use a default value count
+ * of 4096 to allocate memory for at least these many elements in the
+ * vector. See {@link #allocateNew(long, int)} for allocating memory for specific
+ * number of elements in the vector.
+ *
+ * @return false if memory allocation fails, true otherwise.
+ */
+ @Override
+ public boolean allocateNewSafe() {
+ try {
+ allocateNew(lastValueAllocationSizeInBytes, lastValueCapacity);
+ return true;
+ } catch (Exception e) {
+ return false;
+ }
+ }
+
+ /**
+ * Allocate memory for the vector to support storing at least the provided number of
+ * elements in the vector. This method must be called prior to using the ValueVector.
+ *
+ * @param totalBytes desired total memory capacity
+ * @param valueCount the desired number of elements in the vector
+ * @throws org.apache.arrow.memory.OutOfMemoryException if memory allocation fails
+ */
+ @Override
+ public void allocateNew(long totalBytes, int valueCount) {
+ assert totalBytes >= 0;
+
+ checkDataBufferSize(totalBytes);
+ computeAndCheckOffsetsBufferSize(valueCount);
+
+ /* we are doing a new allocation -- release the current buffers */
+ clear();
+
+ try {
+ allocateBytes(totalBytes, valueCount);
+ } catch (Exception e) {
+ clear();
+ throw e;
+ }
+ }
+
+ @Override
+ public void allocateNew(int valueCount) {
+ allocateNew(lastValueAllocationSizeInBytes, valueCount);
+ }
+
+ /* Check if the data buffer size is within bounds. */
+ private void checkDataBufferSize(long size) {
+ if (size > MAX_ALLOCATION_SIZE || size < 0) {
+ throw new OversizedAllocationException("Memory required for vector " +
+ " is (" + size + "), which is more than max allowed (" + MAX_ALLOCATION_SIZE + ")");
+ }
+ }
+
+ /*
+ * Compute the buffer size required for 'valueCount' offsets and validity, and check if it's
+ * within bounds.
+ */
+ private long computeAndCheckOffsetsBufferSize(int valueCount) {
+ /* to track the end offset of last data element in vector, we need
+ * an additional slot in offset buffer.
+ */
+ final long size = computeCombinedBufferSize(valueCount + 1, OFFSET_WIDTH);
+ if (size > MAX_ALLOCATION_SIZE) {
+ throw new OversizedAllocationException("Memory required for vector capacity " +
+ valueCount +
+ " is (" + size + "), which is more than max allowed (" + MAX_ALLOCATION_SIZE + ")");
+ }
+ return size;
+ }
+
+ /* allocate the inner buffers */
+ private void allocateBytes(final long valueBufferSize, final int valueCount) {
+ /* allocate data buffer */
+ long curSize = valueBufferSize;
+ valueBuffer = allocator.buffer(curSize);
+ valueBuffer.readerIndex(0);
+
+ /* allocate offset buffer and validity buffer */
+ DataAndValidityBuffers buffers = allocFixedDataAndValidityBufs(valueCount + 1, OFFSET_WIDTH);
+ offsetBuffer = buffers.getDataBuf();
+ validityBuffer = buffers.getValidityBuf();
+ initOffsetBuffer();
+ initValidityBuffer();
+
+ lastValueCapacity = getValueCapacity();
+ lastValueAllocationSizeInBytes = capAtMaxInt(valueBuffer.capacity());
+ }
+
+ /* allocate offset buffer */
+ private void allocateOffsetBuffer(final long size) {
+ final int curSize = (int) size;
+ offsetBuffer = allocator.buffer(curSize);
+ offsetBuffer.readerIndex(0);
+ initOffsetBuffer();
+ }
+
+ /* allocate validity buffer */
+ private void allocateValidityBuffer(final long size) {
+ final int curSize = (int) size;
+ validityBuffer = allocator.buffer(curSize);
+ validityBuffer.readerIndex(0);
+ initValidityBuffer();
+ }
+
+ /**
+ * Resize the vector to increase the capacity. The internal behavior is to
+ * double the current value capacity.
+ */
+ public void reAlloc() {
+ reallocDataBuffer();
+ reallocValidityAndOffsetBuffers();
+ }
+
+ /**
+ * Reallocate the data buffer. Data Buffer stores the actual data for
+ * VARCHAR or VARBINARY elements in the vector. The behavior is to double
+ * the size of buffer.
+ * @throws OversizedAllocationException if the desired new size is more than
+ * max allowed
+ * @throws OutOfMemoryException if the internal memory allocation fails
+ */
+ public void reallocDataBuffer() {
+ final long currentBufferCapacity = valueBuffer.capacity();
+ long newAllocationSize = currentBufferCapacity * 2;
+ if (newAllocationSize == 0) {
+ if (lastValueAllocationSizeInBytes > 0) {
+ newAllocationSize = lastValueAllocationSizeInBytes;
+ } else {
+ newAllocationSize = INITIAL_BYTE_COUNT * 2L;
+ }
+ }
+ newAllocationSize = CommonUtil.nextPowerOfTwo(newAllocationSize);
+ assert newAllocationSize >= 1;
+
+ checkDataBufferSize(newAllocationSize);
+
+ final ArrowBuf newBuf = allocator.buffer(newAllocationSize);
+ newBuf.setBytes(0, valueBuffer, 0, currentBufferCapacity);
+ valueBuffer.getReferenceManager().release();
+ valueBuffer = newBuf;
+ lastValueAllocationSizeInBytes = valueBuffer.capacity();
+ }
+
+ /**
+ * Reallocate the validity and offset buffers for this vector. Validity
+ * buffer is used to track the NULL or NON-NULL nature of elements in
+ * the vector and offset buffer is used to store the lengths of variable
+ * width elements in the vector.
+ *
+ * <p>Note that data buffer for variable length vectors moves independent
+ * of the companion validity and offset buffers. This is in
+ * contrast to what we have for fixed width vectors.
+ *
+ * <p>So even though we may have setup an initial capacity of 1024
+ * elements in the vector, it is quite possible
+ * that we need to reAlloc() the data buffer when we are setting
+ * the 5th element in the vector simply because previous
+ * variable length elements have exhausted the buffer capacity.
+ * However, we really don't need to reAlloc() validity and
+ * offset buffers until we try to set the 1025th element
+ * This is why we do a separate check for safe methods to
+ * determine which buffer needs reallocation.
+ * @throws OversizedAllocationException if the desired new size is more than
+ * max allowed
+ * @throws OutOfMemoryException if the internal memory allocation fails
+ */
+ public void reallocValidityAndOffsetBuffers() {
+ int targetOffsetCount = capAtMaxInt((offsetBuffer.capacity() / OFFSET_WIDTH) * 2);
+ if (targetOffsetCount == 0) {
+ if (lastValueCapacity > 0) {
+ targetOffsetCount = (lastValueCapacity + 1);
+ } else {
+ targetOffsetCount = 2 * (INITIAL_VALUE_ALLOCATION + 1);
+ }
+ }
+ computeAndCheckOffsetsBufferSize(targetOffsetCount);
+
+ DataAndValidityBuffers buffers = allocFixedDataAndValidityBufs(targetOffsetCount, OFFSET_WIDTH);
+ final ArrowBuf newOffsetBuffer = buffers.getDataBuf();
+ newOffsetBuffer.setBytes(0, offsetBuffer, 0, offsetBuffer.capacity());
+ newOffsetBuffer.setZero(offsetBuffer.capacity(), newOffsetBuffer.capacity() - offsetBuffer.capacity());
+ offsetBuffer.getReferenceManager().release();
+ offsetBuffer = newOffsetBuffer;
+
+ final ArrowBuf newValidityBuffer = buffers.getValidityBuf();
+ newValidityBuffer.setBytes(0, validityBuffer, 0, validityBuffer.capacity());
+ newValidityBuffer.setZero(validityBuffer.capacity(), newValidityBuffer.capacity() - validityBuffer.capacity());
+ validityBuffer.getReferenceManager().release();
+ validityBuffer = newValidityBuffer;
+
+ lastValueCapacity = getValueCapacity();
+ }
+
+ /**
+ * Get the size (number of bytes) of underlying data buffer.
+ * @return number of bytes in the data buffer
+ */
+ @Override
+ public int getByteCapacity() {
+ return capAtMaxInt(valueBuffer.capacity());
+ }
+
+ @Override
+ public int sizeOfValueBuffer() {
+ if (valueCount == 0) {
+ return 0;
+ }
+ return offsetBuffer.getInt((long) valueCount * OFFSET_WIDTH);
+ }
+
+ /**
+ * Get the size (number of bytes) of underlying buffers used by this
+ * vector.
+ * @return size of underlying buffers.
+ */
+ @Override
+ public int getBufferSize() {
+ return getBufferSizeFor(this.valueCount);
+ }
+
+ /**
+ * Get the potential buffer size for a particular number of records.
+ * @param valueCount desired number of elements in the vector
+ * @return estimated size of underlying buffers if the vector holds
+ * a given number of elements
+ */
+ @Override
+ public int getBufferSizeFor(final int valueCount) {
+ if (valueCount == 0) {
+ return 0;
+ }
+
+ final int validityBufferSize = getValidityBufferSizeFromCount(valueCount);
+ final int offsetBufferSize = (valueCount + 1) * OFFSET_WIDTH;
+ /* get the end offset for this valueCount */
+ final int dataBufferSize = offsetBuffer.getInt((long) valueCount * OFFSET_WIDTH);
+ return validityBufferSize + offsetBufferSize + dataBufferSize;
+ }
+
+ /**
+ * Get information about how this field is materialized.
+ * @return the field corresponding to this vector
+ */
+ @Override
+ public Field getField() {
+ return field;
+ }
+
+ /**
+ * Return the underlying buffers associated with this vector. Note that this doesn't
+ * impact the reference counts for this buffer so it only should be used for in-context
+ * access. Also note that this buffer changes regularly thus
+ * external classes shouldn't hold a reference to it (unless they change it).
+ *
+ * @param clear Whether to clear vector before returning; the buffers will still be refcounted
+ * but the returned array will be the only reference to them
+ * @return The underlying {@link ArrowBuf buffers} that is used by this
+ * vector instance.
+ */
+ @Override
+ public ArrowBuf[] getBuffers(boolean clear) {
+ final ArrowBuf[] buffers;
+ setReaderAndWriterIndex();
+ if (getBufferSize() == 0) {
+ buffers = new ArrowBuf[0];
+ } else {
+ buffers = new ArrowBuf[3];
+ buffers[0] = validityBuffer;
+ buffers[1] = offsetBuffer;
+ buffers[2] = valueBuffer;
+ }
+ if (clear) {
+ for (final ArrowBuf buffer : buffers) {
+ buffer.getReferenceManager().retain();
+ }
+ clear();
+ }
+ return buffers;
+ }
+
+ /**
+ * Construct a transfer pair of this vector and another vector of same type.
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @param callBack not used
+ * @return TransferPair
+ */
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) {
+ return getTransferPair(ref, allocator);
+ }
+
+ /**
+ * Construct a transfer pair of this vector and another vector of same type.
+ * @param allocator allocator for the target vector
+ * @return TransferPair
+ */
+ @Override
+ public TransferPair getTransferPair(BufferAllocator allocator) {
+ return getTransferPair(getName(), allocator);
+ }
+
+ /**
+ * Construct a transfer pair of this vector and another vector of same type.
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @return TransferPair
+ */
+ public abstract TransferPair getTransferPair(String ref, BufferAllocator allocator);
+
+ /**
+ * Transfer this vector'data to another vector. The memory associated
+ * with this vector is transferred to the allocator of target vector
+ * for accounting and management purposes.
+ * @param target destination vector for transfer
+ */
+ public void transferTo(BaseVariableWidthVector target) {
+ compareTypes(target, "transferTo");
+ target.clear();
+ target.validityBuffer = transferBuffer(validityBuffer, target.allocator);
+ target.valueBuffer = transferBuffer(valueBuffer, target.allocator);
+ target.offsetBuffer = transferBuffer(offsetBuffer, target.allocator);
+ target.setLastSet(this.lastSet);
+ if (this.valueCount > 0) {
+ target.setValueCount(this.valueCount);
+ }
+ clear();
+ }
+
+ /**
+ * Slice this vector at desired index and length and transfer the
+ * corresponding data to the target vector.
+ * @param startIndex start position of the split in source vector.
+ * @param length length of the split.
+ * @param target destination vector
+ */
+ public void splitAndTransferTo(int startIndex, int length,
+ BaseVariableWidthVector target) {
+ Preconditions.checkArgument(startIndex >= 0 && length >= 0 && startIndex + length <= valueCount,
+ "Invalid parameters startIndex: %s, length: %s for valueCount: %s", startIndex, length, valueCount);
+ compareTypes(target, "splitAndTransferTo");
+ target.clear();
+ splitAndTransferValidityBuffer(startIndex, length, target);
+ splitAndTransferOffsetBuffer(startIndex, length, target);
+ target.setLastSet(length - 1);
+ if (length > 0) {
+ target.setValueCount(length);
+ }
+ }
+
+ /**
+ * Transfer the offsets along with data. Unlike the data buffer, we cannot simply
+ * slice the offset buffer for split and transfer. The reason is that offsets
+ * in the target vector have to be adjusted and made relative to the staring
+ * offset in source vector from the start index of split. This is why, we
+ * need to explicitly allocate the offset buffer and set the adjusted offsets
+ * in the target vector.
+ */
+ private void splitAndTransferOffsetBuffer(int startIndex, int length, BaseVariableWidthVector target) {
+ final int start = offsetBuffer.getInt((long) startIndex * OFFSET_WIDTH);
+ final int end = offsetBuffer.getInt((long) (startIndex + length) * OFFSET_WIDTH);
+ final int dataLength = end - start;
+
+ if (start == 0) {
+ final ArrowBuf slicedOffsetBuffer = offsetBuffer.slice(startIndex * OFFSET_WIDTH, (1 + length) * OFFSET_WIDTH);
+ target.offsetBuffer = transferBuffer(slicedOffsetBuffer, target.allocator);
+ } else {
+ target.allocateOffsetBuffer((long) (length + 1) * OFFSET_WIDTH);
+ for (int i = 0; i < length + 1; i++) {
+ final int relativeSourceOffset = offsetBuffer.getInt((long) (startIndex + i) * OFFSET_WIDTH) - start;
+ target.offsetBuffer.setInt((long) i * OFFSET_WIDTH, relativeSourceOffset);
+ }
+ }
+ final ArrowBuf slicedBuffer = valueBuffer.slice(start, dataLength);
+ target.valueBuffer = transferBuffer(slicedBuffer, target.allocator);
+ }
+
+ /*
+ * Transfer the validity.
+ */
+ private void splitAndTransferValidityBuffer(int startIndex, int length,
+ BaseVariableWidthVector target) {
+ if (length <= 0) {
+ return;
+ }
+
+ final int firstByteSource = BitVectorHelper.byteIndex(startIndex);
+ final int lastByteSource = BitVectorHelper.byteIndex(valueCount - 1);
+ final int byteSizeTarget = getValidityBufferSizeFromCount(length);
+ final int offset = startIndex % 8;
+
+ if (offset == 0) {
+ // slice
+ if (target.validityBuffer != null) {
+ target.validityBuffer.getReferenceManager().release();
+ }
+ final ArrowBuf slicedValidityBuffer = validityBuffer.slice(firstByteSource, byteSizeTarget);
+ target.validityBuffer = transferBuffer(slicedValidityBuffer, target.allocator);
+ return;
+ }
+
+ /* Copy data
+ * When the first bit starts from the middle of a byte (offset != 0),
+ * copy data from src BitVector.
+ * Each byte in the target is composed by a part in i-th byte,
+ * another part in (i+1)-th byte.
+ */
+ target.allocateValidityBuffer(byteSizeTarget);
+
+ for (int i = 0; i < byteSizeTarget - 1; i++) {
+ byte b1 = BitVectorHelper.getBitsFromCurrentByte(this.validityBuffer, firstByteSource + i, offset);
+ byte b2 = BitVectorHelper.getBitsFromNextByte(this.validityBuffer, firstByteSource + i + 1, offset);
+
+ target.validityBuffer.setByte(i, (b1 + b2));
+ }
+ /* Copying the last piece is done in the following manner:
+ * if the source vector has 1 or more bytes remaining, we copy
+ * the last piece as a byte formed by shifting data
+ * from the current byte and the next byte.
+ *
+ * if the source vector has no more bytes remaining
+ * (we are at the last byte), we copy the last piece as a byte
+ * by shifting data from the current byte.
+ */
+ if ((firstByteSource + byteSizeTarget - 1) < lastByteSource) {
+ byte b1 = BitVectorHelper.getBitsFromCurrentByte(this.validityBuffer,
+ firstByteSource + byteSizeTarget - 1, offset);
+ byte b2 = BitVectorHelper.getBitsFromNextByte(this.validityBuffer,
+ firstByteSource + byteSizeTarget, offset);
+
+ target.validityBuffer.setByte(byteSizeTarget - 1, b1 + b2);
+ } else {
+ byte b1 = BitVectorHelper.getBitsFromCurrentByte(this.validityBuffer,
+ firstByteSource + byteSizeTarget - 1, offset);
+ target.validityBuffer.setByte(byteSizeTarget - 1, b1);
+ }
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | common getters and setters |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Get the number of elements that are null in the vector.
+ *
+ * @return the number of null elements.
+ */
+ public int getNullCount() {
+ return BitVectorHelper.getNullCount(validityBuffer, valueCount);
+ }
+
+ /**
+ * Check if the given index is within the current value capacity
+ * of the vector.
+ *
+ * @param index position to check
+ * @return true if index is within the current value capacity
+ */
+ public boolean isSafe(int index) {
+ return index < getValueCapacity();
+ }
+
+ /**
+ * Check if element at given index is null.
+ *
+ * @param index position of element
+ * @return true if element at given index is null
+ */
+ public boolean isNull(int index) {
+ return (isSet(index) == 0);
+ }
+
+ /**
+ * Same as {@link #isNull(int)}.
+ *
+ * @param index position of element
+ * @return 1 if element at given index is not null, 0 otherwise
+ */
+ public int isSet(int index) {
+ final int byteIndex = index >> 3;
+ final byte b = validityBuffer.getByte(byteIndex);
+ final int bitIndex = index & 7;
+ return (b >> bitIndex) & 0x01;
+ }
+
+ /**
+ * Get the value count of vector. This will always be zero unless
+ * setValueCount(int) has been called prior to calling this.
+ *
+ * @return valueCount for the vector
+ */
+ public int getValueCount() {
+ return valueCount;
+ }
+
+ /**
+ * Sets the value count for the vector.
+ *
+ * @param valueCount value count
+ */
+ public void setValueCount(int valueCount) {
+ assert valueCount >= 0;
+ this.valueCount = valueCount;
+ while (valueCount > getValueCapacity()) {
+ reallocValidityAndOffsetBuffers();
+ }
+ fillHoles(valueCount);
+ lastSet = valueCount - 1;
+ setReaderAndWriterIndex();
+ }
+
+ /**
+ * Create holes in the vector upto the given index (exclusive).
+ * Holes will be created from the current last set position in
+ * the vector.
+ *
+ * @param index target index
+ */
+ public void fillEmpties(int index) {
+ handleSafe(index, emptyByteArray.length);
+ fillHoles(index);
+ lastSet = index - 1;
+ }
+
+ /**
+ * Set the index of last non-null element in the vector.
+ * It is important to call this method with appropriate value
+ * before calling {@link #setValueCount(int)}.
+ *
+ * @param value desired index of last non-null element.
+ */
+ public void setLastSet(int value) {
+ lastSet = value;
+ }
+
+ /**
+ * Get the index of last non-null element in the vector.
+ *
+ * @return index of the last non-null element
+ */
+ public int getLastSet() {
+ return lastSet;
+ }
+
+ /**
+ * Get the starting position (offset) in the data stream for a given
+ * element in the vector.
+ *
+ * @param index position of the element in the vector
+ * @return starting offset for the element
+ */
+ public long getStartEnd(int index) {
+ return offsetBuffer.getLong((long) index * OFFSET_WIDTH);
+ }
+
+ /**
+ * Mark the particular position in the vector as non-null.
+ *
+ * @param index position of the element.
+ */
+ @Override
+ public void setIndexDefined(int index) {
+ // We need to check and realloc both validity and offset buffer
+ while (index >= getValueCapacity()) {
+ reallocValidityAndOffsetBuffers();
+ }
+ BitVectorHelper.setBit(validityBuffer, index);
+ }
+
+ /**
+ * Sets the value length for an element.
+ *
+ * @param index position of the element to set
+ * @param length length of the element
+ */
+ public void setValueLengthSafe(int index, int length) {
+ assert index >= 0;
+ handleSafe(index, length);
+ fillHoles(index);
+ final int startOffset = getStartOffset(index);
+ offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, startOffset + length);
+ lastSet = index;
+ }
+
+ /**
+ * Get the variable length element at specified index as Text.
+ *
+ * @param index position of element to get
+ * @return greater than 0 length for non-null element, 0 otherwise
+ */
+ public int getValueLength(int index) {
+ assert index >= 0;
+ if (isSet(index) == 0) {
+ return 0;
+ }
+ final int startOffset = getStartOffset(index);
+ final int dataLength =
+ offsetBuffer.getInt((index + 1) * OFFSET_WIDTH) - startOffset;
+ return dataLength;
+ }
+
+ /**
+ * Set the variable length element at the specified index to the supplied
+ * byte array. This is same as using {@link #set(int, byte[], int, int)}
+ * with start as 0 and length as value.length
+ *
+ * @param index position of the element to set
+ * @param value array of bytes to write
+ */
+ public void set(int index, byte[] value) {
+ assert index >= 0;
+ fillHoles(index);
+ BitVectorHelper.setBit(validityBuffer, index);
+ setBytes(index, value, 0, value.length);
+ lastSet = index;
+ }
+
+ /**
+ * Same as {@link #set(int, byte[])} except that it handles the
+ * case where index and length of new element are beyond the existing
+ * capacity of the vector.
+ *
+ * @param index position of the element to set
+ * @param value array of bytes to write
+ */
+ public void setSafe(int index, byte[] value) {
+ assert index >= 0;
+ handleSafe(index, value.length);
+ fillHoles(index);
+ BitVectorHelper.setBit(validityBuffer, index);
+ setBytes(index, value, 0, value.length);
+ lastSet = index;
+ }
+
+ /**
+ * Set the variable length element at the specified index to the supplied
+ * byte array.
+ *
+ * @param index position of the element to set
+ * @param value array of bytes to write
+ * @param start start index in array of bytes
+ * @param length length of data in array of bytes
+ */
+ public void set(int index, byte[] value, int start, int length) {
+ assert index >= 0;
+ fillHoles(index);
+ BitVectorHelper.setBit(validityBuffer, index);
+ setBytes(index, value, start, length);
+ lastSet = index;
+ }
+
+ /**
+ * Same as {@link #set(int, byte[], int, int)} except that it handles the
+ * case where index and length of new element are beyond the existing
+ * capacity of the vector.
+ *
+ * @param index position of the element to set
+ * @param value array of bytes to write
+ * @param start start index in array of bytes
+ * @param length length of data in array of bytes
+ */
+ public void setSafe(int index, byte[] value, int start, int length) {
+ assert index >= 0;
+ handleSafe(index, length);
+ fillHoles(index);
+ BitVectorHelper.setBit(validityBuffer, index);
+ setBytes(index, value, start, length);
+ lastSet = index;
+ }
+
+ /**
+ * Set the variable length element at the specified index to the
+ * content in supplied ByteBuffer.
+ *
+ * @param index position of the element to set
+ * @param value ByteBuffer with data
+ * @param start start index in ByteBuffer
+ * @param length length of data in ByteBuffer
+ */
+ public void set(int index, ByteBuffer value, int start, int length) {
+ assert index >= 0;
+ fillHoles(index);
+ BitVectorHelper.setBit(validityBuffer, index);
+ final int startOffset = getStartOffset(index);
+ offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, startOffset + length);
+ valueBuffer.setBytes(startOffset, value, start, length);
+ lastSet = index;
+ }
+
+ /**
+ * Same as {@link #set(int, ByteBuffer, int, int)} except that it handles the
+ * case where index and length of new element are beyond the existing
+ * capacity of the vector.
+ *
+ * @param index position of the element to set
+ * @param value ByteBuffer with data
+ * @param start start index in ByteBuffer
+ * @param length length of data in ByteBuffer
+ */
+ public void setSafe(int index, ByteBuffer value, int start, int length) {
+ assert index >= 0;
+ handleSafe(index, length);
+ fillHoles(index);
+ BitVectorHelper.setBit(validityBuffer, index);
+ final int startOffset = getStartOffset(index);
+ offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, startOffset + length);
+ valueBuffer.setBytes(startOffset, value, start, length);
+ lastSet = index;
+ }
+
+ /**
+ * Set the element at the given index to null.
+ *
+ * @param index position of element
+ */
+ public void setNull(int index) {
+ // We need to check and realloc both validity and offset buffer
+ while (index >= getValueCapacity()) {
+ reallocValidityAndOffsetBuffers();
+ }
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+
+ /**
+ * Store the given value at a particular position in the vector. isSet indicates
+ * whether the value is NULL or not.
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param start start position of data in buffer
+ * @param end end position of data in buffer
+ * @param buffer data buffer containing the variable width element to be stored
+ * in the vector
+ */
+ public void set(int index, int isSet, int start, int end, ArrowBuf buffer) {
+ assert index >= 0;
+ final int dataLength = end - start;
+ fillHoles(index);
+ BitVectorHelper.setValidityBit(validityBuffer, index, isSet);
+ final int startOffset = offsetBuffer.getInt((long) index * OFFSET_WIDTH);
+ offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, startOffset + dataLength);
+ valueBuffer.setBytes(startOffset, buffer, start, dataLength);
+ lastSet = index;
+ }
+
+ /**
+ * Same as {@link #set(int, int, int, int, ArrowBuf)} except that it handles the case
+ * when index is greater than or equal to current value capacity of the
+ * vector.
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param start start position of data in buffer
+ * @param end end position of data in buffer
+ * @param buffer data buffer containing the variable width element to be stored
+ * in the vector
+ */
+ public void setSafe(int index, int isSet, int start, int end, ArrowBuf buffer) {
+ assert index >= 0;
+ final int dataLength = end - start;
+ handleSafe(index, dataLength);
+ fillHoles(index);
+ BitVectorHelper.setValidityBit(validityBuffer, index, isSet);
+ final int startOffset = offsetBuffer.getInt((long) index * OFFSET_WIDTH);
+ offsetBuffer.setInt((long) (index + 1) * OFFSET_WIDTH, startOffset + dataLength);
+ valueBuffer.setBytes(startOffset, buffer, start, dataLength);
+ lastSet = index;
+ }
+
+ /**
+ * Store the given value at a particular position in the vector. isSet indicates
+ * whether the value is NULL or not.
+ * @param index position of the new value
+ * @param start start position of data in buffer
+ * @param length length of data in buffer
+ * @param buffer data buffer containing the variable width element to be stored
+ * in the vector
+ */
+ public void set(int index, int start, int length, ArrowBuf buffer) {
+ assert index >= 0;
+ fillHoles(index);
+ BitVectorHelper.setBit(validityBuffer, index);
+ final int startOffset = offsetBuffer.getInt((long) index * OFFSET_WIDTH);
+ offsetBuffer.setInt((long) (index + 1) * OFFSET_WIDTH, startOffset + length);
+ final ArrowBuf bb = buffer.slice(start, length);
+ valueBuffer.setBytes(startOffset, bb);
+ lastSet = index;
+ }
+
+ /**
+ * Same as {@link #set(int, int, int, int, ArrowBuf)} except that it handles the case
+ * when index is greater than or equal to current value capacity of the
+ * vector.
+ * @param index position of the new value
+ * @param start start position of data in buffer
+ * @param length length of data in buffer
+ * @param buffer data buffer containing the variable width element to be stored
+ * in the vector
+ */
+ public void setSafe(int index, int start, int length, ArrowBuf buffer) {
+ assert index >= 0;
+ handleSafe(index, length);
+ fillHoles(index);
+ BitVectorHelper.setBit(validityBuffer, index);
+ final int startOffset = offsetBuffer.getInt((long) index * OFFSET_WIDTH);
+ offsetBuffer.setInt((long) (index + 1) * OFFSET_WIDTH, startOffset + length);
+ final ArrowBuf bb = buffer.slice(start, length);
+ valueBuffer.setBytes(startOffset, bb);
+ lastSet = index;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | helper methods for setters |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ protected final void fillHoles(int index) {
+ for (int i = lastSet + 1; i < index; i++) {
+ setBytes(i, emptyByteArray, 0, emptyByteArray.length);
+ }
+ lastSet = index - 1;
+ }
+
+ protected final void setBytes(int index, byte[] value, int start, int length) {
+ /* end offset of current last element in the vector. this will
+ * be the start offset of new element we are trying to store.
+ */
+ final int startOffset = getStartOffset(index);
+ /* set new end offset */
+ offsetBuffer.setInt((long) (index + 1) * OFFSET_WIDTH, startOffset + length);
+ /* store the var length data in value buffer */
+ valueBuffer.setBytes(startOffset, value, start, length);
+ }
+
+ public final int getStartOffset(int index) {
+ return offsetBuffer.getInt((long) index * OFFSET_WIDTH);
+ }
+
+ protected final void handleSafe(int index, int dataLength) {
+ /*
+ * IMPORTANT:
+ * value buffer for variable length vectors moves independent
+ * of the companion validity and offset buffers. This is in
+ * contrast to what we have for fixed width vectors.
+ *
+ * Here there is no concept of getValueCapacity() in the
+ * data stream. getValueCapacity() is applicable only to validity
+ * and offset buffers.
+ *
+ * So even though we may have setup an initial capacity of 1024
+ * elements in the vector, it is quite possible
+ * that we need to reAlloc() the data buffer when we are setting
+ * the 5th element in the vector simply because previous
+ * variable length elements have exhausted the buffer capacity.
+ * However, we really don't need to reAlloc() validity and
+ * offset buffers until we try to set the 1025th element
+ * This is why we do a separate check for safe methods to
+ * determine which buffer needs reallocation.
+ */
+ while (index >= getValueCapacity()) {
+ reallocValidityAndOffsetBuffers();
+ }
+ final int startOffset = lastSet < 0 ? 0 : getStartOffset(lastSet + 1);
+ while (valueBuffer.capacity() < (startOffset + dataLength)) {
+ reallocDataBuffer();
+ }
+ }
+
+ /**
+ * Method used by Json Writer to read a variable width element from
+ * the variable width vector and write to Json.
+ *
+ * <p>This method should not be used externally.
+ *
+ * @param data buffer storing the variable width vector elements
+ * @param offset buffer storing the offsets of variable width vector elements
+ * @param index position of the element in the vector
+ * @return array of bytes
+ */
+ public static byte[] get(final ArrowBuf data, final ArrowBuf offset, int index) {
+ final int currentStartOffset = offset.getInt((long) index * OFFSET_WIDTH);
+ final int dataLength =
+ offset.getInt((long) (index + 1) * OFFSET_WIDTH) - currentStartOffset;
+ final byte[] result = new byte[dataLength];
+ data.getBytes(currentStartOffset, result, 0, dataLength);
+ return result;
+ }
+
+ /**
+ * Method used by Json Reader to explicitly set the offsets of the variable
+ * width vector data. The method takes care of allocating the memory for
+ * offsets if the caller hasn't done so.
+ *
+ * <p>This method should not be used externally.
+ *
+ * @param buffer ArrowBuf to store offsets for variable width elements
+ * @param allocator memory allocator
+ * @param valueCount number of elements
+ * @param index position of the element
+ * @param value offset of the element
+ * @return buffer holding the offsets
+ */
+ public static ArrowBuf set(ArrowBuf buffer, BufferAllocator allocator,
+ int valueCount, int index, int value) {
+ if (buffer == null) {
+ buffer = allocator.buffer((long) valueCount * OFFSET_WIDTH);
+ }
+ buffer.setInt((long) index * OFFSET_WIDTH, value);
+ if (index == (valueCount - 1)) {
+ buffer.writerIndex((long) valueCount * OFFSET_WIDTH);
+ }
+
+ return buffer;
+ }
+
+ /**
+ * Copy a cell value from a particular index in source vector to a particular
+ * position in this vector.
+ *
+ * @param fromIndex position to copy from in source vector
+ * @param thisIndex position to copy to in this vector
+ * @param from source vector
+ */
+ @Override
+ public void copyFrom(int fromIndex, int thisIndex, ValueVector from) {
+ Preconditions.checkArgument(this.getMinorType() == from.getMinorType());
+ if (from.isNull(fromIndex)) {
+ fillHoles(thisIndex);
+ BitVectorHelper.unsetBit(this.validityBuffer, thisIndex);
+ final int copyStart = offsetBuffer.getInt((long) thisIndex * OFFSET_WIDTH);
+ offsetBuffer.setInt((long) (thisIndex + 1) * OFFSET_WIDTH, copyStart);
+ } else {
+ final int start = from.getOffsetBuffer().getInt((long) fromIndex * OFFSET_WIDTH);
+ final int end = from.getOffsetBuffer().getInt((long) (fromIndex + 1) * OFFSET_WIDTH);
+ final int length = end - start;
+ fillHoles(thisIndex);
+ BitVectorHelper.setBit(this.validityBuffer, thisIndex);
+ final int copyStart = offsetBuffer.getInt((long) thisIndex * OFFSET_WIDTH);
+ from.getDataBuffer().getBytes(start, this.valueBuffer, copyStart, length);
+ offsetBuffer.setInt((long) (thisIndex + 1) * OFFSET_WIDTH, copyStart + length);
+ }
+ lastSet = thisIndex;
+ }
+
+ /**
+ * Same as {@link #copyFrom(int, int, ValueVector)} except that
+ * it handles the case when the capacity of the vector needs to be expanded
+ * before copy.
+ *
+ * @param fromIndex position to copy from in source vector
+ * @param thisIndex position to copy to in this vector
+ * @param from source vector
+ */
+ @Override
+ public void copyFromSafe(int fromIndex, int thisIndex, ValueVector from) {
+ Preconditions.checkArgument(this.getMinorType() == from.getMinorType());
+ if (from.isNull(fromIndex)) {
+ handleSafe(thisIndex, 0);
+ fillHoles(thisIndex);
+ BitVectorHelper.unsetBit(this.validityBuffer, thisIndex);
+ final int copyStart = offsetBuffer.getInt(thisIndex * OFFSET_WIDTH);
+ offsetBuffer.setInt((long) (thisIndex + 1) * OFFSET_WIDTH, copyStart);
+ } else {
+ final int start = from.getOffsetBuffer().getInt((long) fromIndex * OFFSET_WIDTH);
+ final int end = from.getOffsetBuffer().getInt((long) (fromIndex + 1) * OFFSET_WIDTH);
+ final int length = end - start;
+ handleSafe(thisIndex, length);
+ fillHoles(thisIndex);
+ BitVectorHelper.setBit(this.validityBuffer, thisIndex);
+ final int copyStart = offsetBuffer.getInt((long) thisIndex * OFFSET_WIDTH);
+ from.getDataBuffer().getBytes(start, this.valueBuffer, copyStart, length);
+ offsetBuffer.setInt((long) (thisIndex + 1) * OFFSET_WIDTH, copyStart + length);
+ }
+ lastSet = thisIndex;
+ }
+
+ @Override
+ public ArrowBufPointer getDataPointer(int index) {
+ return getDataPointer(index, new ArrowBufPointer());
+ }
+
+ @Override
+ public ArrowBufPointer getDataPointer(int index, ArrowBufPointer reuse) {
+ if (isNull(index)) {
+ reuse.set(null, 0, 0);
+ } else {
+ int offset = offsetBuffer.getInt((long) index * OFFSET_WIDTH);
+ int length = offsetBuffer.getInt((long) (index + 1) * OFFSET_WIDTH) - offset;
+ reuse.set(valueBuffer, offset, length);
+ }
+ return reuse;
+ }
+
+ @Override
+ public int hashCode(int index) {
+ return hashCode(index, null);
+ }
+
+ @Override
+ public int hashCode(int index, ArrowBufHasher hasher) {
+ if (isNull(index)) {
+ return ArrowBufPointer.NULL_HASH_CODE;
+ }
+ final int start = getStartOffset(index);
+ final int end = getStartOffset(index + 1);
+ return ByteFunctionHelpers.hash(hasher, this.getDataBuffer(), start, end);
+ }
+
+ @Override
+ public <OUT, IN> OUT accept(VectorVisitor<OUT, IN> visitor, IN value) {
+ return visitor.visit(this, value);
+ }
+
+ /**
+ * Gets the ending offset of a record, given its index.
+ */
+ public final int getEndOffset(int index) {
+ return offsetBuffer.getInt((long) (index + 1) * OFFSET_WIDTH);
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BigIntVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BigIntVector.java
new file mode 100644
index 000000000..c19955b54
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BigIntVector.java
@@ -0,0 +1,358 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.impl.BigIntReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.BigIntHolder;
+import org.apache.arrow.vector.holders.NullableBigIntHolder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * BigIntVector implements a fixed width vector (8 bytes) of
+ * integer values which could be null. A validity buffer (bit vector) is
+ * maintained to track which elements in the vector are null.
+ */
+public final class BigIntVector extends BaseFixedWidthVector implements BaseIntVector {
+ public static final byte TYPE_WIDTH = 8;
+ private final FieldReader reader;
+
+ /**
+ * Instantiate a BigIntVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param allocator allocator for memory management.
+ */
+ public BigIntVector(String name, BufferAllocator allocator) {
+ this(name, FieldType.nullable(MinorType.BIGINT.getType()), allocator);
+ }
+
+ /**
+ * Instantiate a BigIntVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param fieldType type of Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public BigIntVector(String name, FieldType fieldType, BufferAllocator allocator) {
+ this(new Field(name, fieldType, null), allocator);
+ }
+
+ /**
+ * Instantiate a BigIntVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param field field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public BigIntVector(Field field, BufferAllocator allocator) {
+ super(field, allocator, TYPE_WIDTH);
+ reader = new BigIntReaderImpl(BigIntVector.this);
+ }
+
+ /**
+ * Get a reader that supports reading values from this vector.
+ *
+ * @return Field Reader for this vector
+ */
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ /**
+ * Get minor type for this vector. The vector holds values belonging
+ * to a particular type.
+ * @return {@link org.apache.arrow.vector.types.Types.MinorType}
+ */
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.BIGINT;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+
+ /**
+ * Get the element at the given index from the vector.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public long get(int index) throws IllegalStateException {
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+ throw new IllegalStateException("Value at index is null");
+ }
+ return valueBuffer.getLong((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Get the element at the given index from the vector and
+ * sets the state in holder. If element at given index
+ * is null, holder.isSet will be zero.
+ *
+ * @param index position of element
+ */
+ public void get(int index, NullableBigIntHolder holder) {
+ if (isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ holder.isSet = 1;
+ holder.value = valueBuffer.getLong((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Same as {@link #get(int)}.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public Long getObject(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ return valueBuffer.getLong((long) index * TYPE_WIDTH);
+ }
+ }
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ private void setValue(int index, long value) {
+ valueBuffer.setLong((long) index * TYPE_WIDTH, value);
+ }
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void set(int index, long value) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, value);
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ * If the value in holder is not indicated as set, element in the
+ * at the given index will be null.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void set(int index, NullableBigIntHolder holder) throws IllegalArgumentException {
+ if (holder.isSet < 0) {
+ throw new IllegalArgumentException();
+ } else if (holder.isSet > 0) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void set(int index, BigIntHolder holder) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ }
+
+ /**
+ * Same as {@link #set(int, long)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void setSafe(int index, long value) {
+ handleSafe(index);
+ set(index, value);
+ }
+
+ /**
+ * Same as {@link #set(int, NullableBigIntHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void setSafe(int index, NullableBigIntHolder holder) throws IllegalArgumentException {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Same as {@link #set(int, BigIntHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void setSafe(int index, BigIntHolder holder) {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Store the given value at a particular position in the vector. isSet indicates
+ * whether the value is NULL or not.
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param value element value
+ */
+ public void set(int index, int isSet, long value) {
+ if (isSet > 0) {
+ set(index, value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Same as {@link #set(int, int, long)} except that it handles the case
+ * when index is greater than or equal to current value capacity of the
+ * vector.
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param value element value
+ */
+ public void setSafe(int index, int isSet, long value) {
+ handleSafe(index);
+ set(index, isSet, value);
+ }
+
+ /**
+ * Given a data buffer, get the value stored at a particular position
+ * in the vector.
+ *
+ * <p>This method should not be used externally.
+ *
+ * @param buffer data buffer
+ * @param index position of the element.
+ * @return value stored at the index.
+ */
+ public static long get(final ArrowBuf buffer, final int index) {
+ return buffer.getLong((long) index * TYPE_WIDTH);
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Construct a TransferPair comprising of this and a target vector of
+ * the same type.
+ *
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return new TransferImpl(ref, allocator);
+ }
+
+ /**
+ * Construct a TransferPair with a desired target vector of the same type.
+ *
+ * @param to target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new TransferImpl((BigIntVector) to);
+ }
+
+ @Override
+ public void setWithPossibleTruncate(int index, long value) {
+ this.setSafe(index, value);
+ }
+
+ @Override
+ public void setUnsafeWithPossibleTruncate(int index, long value) {
+ this.set(index, value);
+ }
+
+ @Override
+ public long getValueAsLong(int index) {
+ return this.get(index);
+ }
+
+ private class TransferImpl implements TransferPair {
+ BigIntVector to;
+
+ public TransferImpl(String ref, BufferAllocator allocator) {
+ to = new BigIntVector(ref, field.getFieldType(), allocator);
+ }
+
+ public TransferImpl(BigIntVector to) {
+ this.to = to;
+ }
+
+ @Override
+ public BigIntVector getTo() {
+ return to;
+ }
+
+ @Override
+ public void transfer() {
+ transferTo(to);
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ splitAndTransferTo(startIndex, length, to);
+ }
+
+ @Override
+ public void copyValueSafe(int fromIndex, int toIndex) {
+ to.copyFromSafe(fromIndex, toIndex, BigIntVector.this);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java
new file mode 100644
index 000000000..3bcfd983e
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BitVector.java
@@ -0,0 +1,599 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt;
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.util.ArrowBufPointer;
+import org.apache.arrow.memory.util.hash.ArrowBufHasher;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.complex.impl.BitReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.BitHolder;
+import org.apache.arrow.vector.holders.NullableBitHolder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.OversizedAllocationException;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * BitVector implements a fixed width (1 bit) vector of
+ * boolean values which could be null. Each value in the vector corresponds
+ * to a single bit in the underlying data stream backing the vector.
+ */
+public final class BitVector extends BaseFixedWidthVector {
+
+ private static final int HASH_CODE_FOR_ZERO = 17;
+
+ private static final int HASH_CODE_FOR_ONE = 19;
+
+ private final FieldReader reader;
+
+ /**
+ * Instantiate a BitVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param allocator allocator for memory management.
+ */
+ public BitVector(String name, BufferAllocator allocator) {
+ this(name, FieldType.nullable(MinorType.BIT.getType()), allocator);
+ }
+
+ /**
+ * Instantiate a BitVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param fieldType type of Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public BitVector(String name, FieldType fieldType, BufferAllocator allocator) {
+ this(new Field(name, fieldType, null), allocator);
+ }
+
+ /**
+ * Instantiate a BitVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param field the Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public BitVector(Field field, BufferAllocator allocator) {
+ super(field, allocator, 0);
+ reader = new BitReaderImpl(BitVector.this);
+ }
+
+ /**
+ * Get a reader that supports reading values from this vector.
+ *
+ * @return Field Reader for this vector
+ */
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ /**
+ * Get minor type for this vector. The vector holds values belonging
+ * to a particular type.
+ *
+ * @return {@link org.apache.arrow.vector.types.Types.MinorType}
+ */
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.BIT;
+ }
+
+ /**
+ * Sets the desired value capacity for the vector. This function doesn't
+ * allocate any memory for the vector.
+ *
+ * @param valueCount desired number of elements in the vector
+ */
+ @Override
+ public void setInitialCapacity(int valueCount) {
+ final int size = getValidityBufferSizeFromCount(valueCount);
+ if (size * 2 > MAX_ALLOCATION_SIZE) {
+ throw new OversizedAllocationException("Requested amount of memory is more than max allowed");
+ }
+ lastValueCapacity = valueCount;
+ }
+
+ @Override
+ protected int getValueBufferValueCapacity() {
+ return capAtMaxInt(valueBuffer.capacity() * 8);
+ }
+
+ /**
+ * Get the potential buffer size for a particular number of records.
+ *
+ * @param count desired number of elements in the vector
+ * @return estimated size of underlying buffers if the vector holds
+ * a given number of elements
+ */
+ @Override
+ public int getBufferSizeFor(final int count) {
+ if (count == 0) {
+ return 0;
+ }
+ return 2 * getValidityBufferSizeFromCount(count);
+ }
+
+ /**
+ * Get the size (number of bytes) of underlying buffers used by this vector.
+ *
+ * @return size of underlying buffers.
+ */
+ @Override
+ public int getBufferSize() {
+ return getBufferSizeFor(valueCount);
+ }
+
+ /**
+ * Slice this vector at desired index and length and transfer the
+ * corresponding data to the target vector.
+ *
+ * @param startIndex start position of the split in source vector.
+ * @param length length of the split.
+ * @param target destination vector
+ */
+ public void splitAndTransferTo(int startIndex, int length, BaseFixedWidthVector target) {
+ Preconditions.checkArgument(startIndex >= 0 && length >= 0 && startIndex + length <= valueCount,
+ "Invalid parameters startIndex: %s, length: %s for valueCount: %s", startIndex, length, valueCount);
+ compareTypes(target, "splitAndTransferTo");
+ target.clear();
+ target.validityBuffer = splitAndTransferBuffer(startIndex, length, target,
+ validityBuffer, target.validityBuffer);
+ target.valueBuffer = splitAndTransferBuffer(startIndex, length, target,
+ valueBuffer, target.valueBuffer);
+ target.refreshValueCapacity();
+
+ target.setValueCount(length);
+ }
+
+ private ArrowBuf splitAndTransferBuffer(
+ int startIndex,
+ int length,
+ BaseFixedWidthVector target,
+ ArrowBuf sourceBuffer,
+ ArrowBuf destBuffer) {
+ int firstByteSource = BitVectorHelper.byteIndex(startIndex);
+ int lastByteSource = BitVectorHelper.byteIndex(valueCount - 1);
+ int byteSizeTarget = getValidityBufferSizeFromCount(length);
+ int offset = startIndex % 8;
+
+ if (length > 0) {
+ if (offset == 0) {
+ /* slice */
+ if (destBuffer != null) {
+ destBuffer.getReferenceManager().release();
+ }
+ destBuffer = sourceBuffer.slice(firstByteSource, byteSizeTarget);
+ destBuffer.getReferenceManager().retain(1);
+ } else {
+ /* Copy data
+ * When the first bit starts from the middle of a byte (offset != 0),
+ * copy data from src BitVector.
+ * Each byte in the target is composed by a part in i-th byte,
+ * another part in (i+1)-th byte.
+ */
+ destBuffer = allocator.buffer(byteSizeTarget);
+ destBuffer.readerIndex(0);
+ destBuffer.setZero(0, destBuffer.capacity());
+
+ for (int i = 0; i < byteSizeTarget - 1; i++) {
+ byte b1 = BitVectorHelper.getBitsFromCurrentByte(sourceBuffer, firstByteSource + i, offset);
+ byte b2 = BitVectorHelper.getBitsFromNextByte(sourceBuffer, firstByteSource + i + 1, offset);
+
+ destBuffer.setByte(i, (b1 + b2));
+ }
+
+ /* Copying the last piece is done in the following manner:
+ * if the source vector has 1 or more bytes remaining, we copy
+ * the last piece as a byte formed by shifting data
+ * from the current byte and the next byte.
+ *
+ * if the source vector has no more bytes remaining
+ * (we are at the last byte), we copy the last piece as a byte
+ * by shifting data from the current byte.
+ */
+ if ((firstByteSource + byteSizeTarget - 1) < lastByteSource) {
+ byte b1 = BitVectorHelper.getBitsFromCurrentByte(sourceBuffer,
+ firstByteSource + byteSizeTarget - 1, offset);
+ byte b2 = BitVectorHelper.getBitsFromNextByte(sourceBuffer,
+ firstByteSource + byteSizeTarget, offset);
+
+ destBuffer.setByte(byteSizeTarget - 1, b1 + b2);
+ } else {
+ byte b1 = BitVectorHelper.getBitsFromCurrentByte(sourceBuffer,
+ firstByteSource + byteSizeTarget - 1, offset);
+ destBuffer.setByte(byteSizeTarget - 1, b1);
+ }
+ }
+ }
+
+ return destBuffer;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+
+ private int getBit(int index) {
+ final int byteIndex = index >> 3;
+ final byte b = valueBuffer.getByte(byteIndex);
+ final int bitIndex = index & 7;
+ return (b >> bitIndex) & 0x01;
+ }
+
+ /**
+ * Get the element at the given index from the vector.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public int get(int index) throws IllegalStateException {
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+ throw new IllegalStateException("Value at index is null");
+ }
+ return getBit(index);
+ }
+
+ /**
+ * Get the element at the given index from the vector and
+ * sets the state in holder. If element at given index
+ * is null, holder.isSet will be zero.
+ *
+ * @param index position of element
+ */
+ public void get(int index, NullableBitHolder holder) {
+ if (isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ holder.isSet = 1;
+ holder.value = getBit(index);
+ }
+
+ /**
+ * Same as {@link #get(int)}.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public Boolean getObject(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ return new Boolean(getBit(index) != 0);
+ }
+ }
+
+ /**
+ * Copy a cell value from a particular index in source vector to a particular
+ * position in this vector.
+ *
+ * @param fromIndex position to copy from in source vector
+ * @param thisIndex position to copy to in this vector
+ * @param from source vector
+ */
+ @Override
+ public void copyFrom(int fromIndex, int thisIndex, ValueVector from) {
+ Preconditions.checkArgument(this.getMinorType() == from.getMinorType());
+ boolean fromIsSet = BitVectorHelper.get(from.getValidityBuffer(), fromIndex) != 0;
+ if (fromIsSet) {
+ BitVectorHelper.setBit(validityBuffer, thisIndex);
+ BitVectorHelper.setValidityBit(valueBuffer, thisIndex, ((BitVector) from).getBit(fromIndex));
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, thisIndex);
+ }
+ }
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void set(int index, int value) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ if (value != 0) {
+ BitVectorHelper.setBit(valueBuffer, index);
+ } else {
+ BitVectorHelper.unsetBit(valueBuffer, index);
+ }
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ * If the value in holder is not indicated as set, element in the
+ * at the given index will be null.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void set(int index, NullableBitHolder holder) throws IllegalArgumentException {
+ if (holder.isSet < 0) {
+ throw new IllegalArgumentException();
+ } else if (holder.isSet > 0) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ if (holder.value != 0) {
+ BitVectorHelper.setBit(valueBuffer, index);
+ } else {
+ BitVectorHelper.unsetBit(valueBuffer, index);
+ }
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void set(int index, BitHolder holder) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ if (holder.value != 0) {
+ BitVectorHelper.setBit(valueBuffer, index);
+ } else {
+ BitVectorHelper.unsetBit(valueBuffer, index);
+ }
+ }
+
+ /**
+ * Same as {@link #set(int, int)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void setSafe(int index, int value) {
+ handleSafe(index);
+ set(index, value);
+ }
+
+ /**
+ * Same as {@link #set(int, NullableBitHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void setSafe(int index, NullableBitHolder holder) throws IllegalArgumentException {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Same as {@link #set(int, BitHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void setSafe(int index, BitHolder holder) {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Store the given value at a particular position in the vector. isSet indicates
+ * whether the value is NULL or not.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param value element value
+ */
+ public void set(int index, int isSet, int value) {
+ if (isSet > 0) {
+ set(index, value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Same as {@link #set(int, int, int)} except that it handles the case
+ * when index is greater than or equal to current value capacity of the
+ * vector.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param value element value
+ */
+ public void setSafe(int index, int isSet, int value) {
+ handleSafe(index);
+ set(index, isSet, value);
+ }
+
+ /**
+ * Set the element at the given index to one.
+ *
+ * @param index position of element
+ */
+ public void setToOne(int index) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ BitVectorHelper.setBit(valueBuffer, index);
+ }
+
+ /**
+ * Same as {@link #setToOne(int)} except that it handles the case when
+ * index is greater than or equal to current value capacity of the vector.
+ *
+ * @param index position of the element
+ */
+ public void setSafeToOne(int index) {
+ handleSafe(index);
+ setToOne(index);
+ }
+
+ @Override
+ public ArrowBufPointer getDataPointer(int index) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public ArrowBufPointer getDataPointer(int index, ArrowBufPointer reuse) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public int hashCode(int index) {
+ if (isNull(index)) {
+ return ArrowBufPointer.NULL_HASH_CODE;
+ } else {
+ if (get(index) == 0) {
+ return HASH_CODE_FOR_ZERO;
+ } else {
+ return HASH_CODE_FOR_ONE;
+ }
+ }
+ }
+
+ @Override
+ public int hashCode(int index, ArrowBufHasher hasher) {
+ return hashCode(index);
+ }
+
+ /**
+ * Set count bits to 1 in data starting at firstBitIndex.
+ *
+ * @param firstBitIndex the index of the first bit to set
+ * @param count the number of bits to set
+ */
+ public void setRangeToOne(int firstBitIndex, int count) {
+ int startByteIndex = BitVectorHelper.byteIndex(firstBitIndex);
+ final int lastBitIndex = firstBitIndex + count;
+ final int endByteIndex = BitVectorHelper.byteIndex(lastBitIndex);
+ final int startByteBitIndex = BitVectorHelper.bitIndex(firstBitIndex);
+ final int endBytebitIndex = BitVectorHelper.bitIndex(lastBitIndex);
+ if (count < 8 && startByteIndex == endByteIndex) {
+ // handles the case where we don't have a first and a last byte
+ byte bitMask = 0;
+ for (int i = startByteBitIndex; i < endBytebitIndex; ++i) {
+ bitMask |= (byte) (1L << i);
+ }
+ BitVectorHelper.setBitMaskedByte(validityBuffer, startByteIndex, bitMask);
+ BitVectorHelper.setBitMaskedByte(valueBuffer, startByteIndex, bitMask);
+ } else {
+ // fill in first byte (if it's not full)
+ if (startByteBitIndex != 0) {
+ final byte bitMask = (byte) (0xFFL << startByteBitIndex);
+ BitVectorHelper.setBitMaskedByte(validityBuffer, startByteIndex, bitMask);
+ BitVectorHelper.setBitMaskedByte(valueBuffer, startByteIndex, bitMask);
+ ++startByteIndex;
+ }
+
+ // fill in one full byte at a time
+ validityBuffer.setOne(startByteIndex, endByteIndex - startByteIndex);
+ valueBuffer.setOne(startByteIndex, endByteIndex - startByteIndex);
+
+ // fill in the last byte (if it's not full)
+ if (endBytebitIndex != 0) {
+ final int byteIndex = BitVectorHelper.byteIndex(lastBitIndex - endBytebitIndex);
+ final byte bitMask = (byte) (0xFFL >>> ((8 - endBytebitIndex) & 7));
+ BitVectorHelper.setBitMaskedByte(validityBuffer, byteIndex, bitMask);
+ BitVectorHelper.setBitMaskedByte(valueBuffer, byteIndex, bitMask);
+ }
+ }
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Construct a TransferPair comprising of this and a target vector of
+ * the same type.
+ *
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return new TransferImpl(ref, allocator);
+ }
+
+ /**
+ * Construct a TransferPair with a desired target vector of the same type.
+ *
+ * @param to target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new TransferImpl((BitVector) to);
+ }
+
+ private class TransferImpl implements TransferPair {
+ BitVector to;
+
+ public TransferImpl(String ref, BufferAllocator allocator) {
+ to = new BitVector(ref, field.getFieldType(), allocator);
+ }
+
+ public TransferImpl(BitVector to) {
+ this.to = to;
+ }
+
+ @Override
+ public BitVector getTo() {
+ return to;
+ }
+
+ @Override
+ public void transfer() {
+ transferTo(to);
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ splitAndTransferTo(startIndex, length, to);
+ }
+
+ @Override
+ public void copyValueSafe(int fromIndex, int toIndex) {
+ to.copyFromSafe(fromIndex, toIndex, BitVector.this);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BitVectorHelper.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BitVectorHelper.java
new file mode 100644
index 000000000..3745c5a75
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BitVectorHelper.java
@@ -0,0 +1,449 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static io.netty.util.internal.PlatformDependent.getByte;
+import static io.netty.util.internal.PlatformDependent.getInt;
+import static io.netty.util.internal.PlatformDependent.getLong;
+import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BoundsChecking;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
+import org.apache.arrow.vector.util.DataSizeRoundingUtil;
+
+import io.netty.util.internal.PlatformDependent;
+
+/**
+ * Helper class for performing generic operations on a bit vector buffer.
+ * External use of this class is not recommended.
+ */
+public class BitVectorHelper {
+
+ private BitVectorHelper() {}
+
+ /**
+ * Get the index of byte corresponding to bit index in validity buffer.
+ */
+ public static long byteIndex(long absoluteBitIndex) {
+ return absoluteBitIndex >> 3;
+ }
+
+ /**
+ * Get the relative index of bit within the byte in validity buffer.
+ */
+ public static int bitIndex(long absoluteBitIndex) {
+ return checkedCastToInt(absoluteBitIndex & 7);
+ }
+
+ /**
+ * Get the index of byte corresponding to bit index in validity buffer.
+ */
+ public static int byteIndex(int absoluteBitIndex) {
+ return absoluteBitIndex >> 3;
+ }
+
+ /**
+ * Get the relative index of bit within the byte in validity buffer.
+ */
+ public static int bitIndex(int absoluteBitIndex) {
+ return absoluteBitIndex & 7;
+ }
+
+ /**
+ * Set the bit at provided index to 1.
+ *
+ * @param validityBuffer validity buffer of the vector
+ * @param index index to be set
+ */
+ public static void setBit(ArrowBuf validityBuffer, long index) {
+ // it can be observed that some logic is duplicate of the logic in setValidityBit.
+ // this is because JIT cannot always remove the if branch in setValidityBit,
+ // so we give a dedicated implementation for setting bits.
+ final long byteIndex = byteIndex(index);
+ final int bitIndex = bitIndex(index);
+
+ // the byte is promoted to an int, because according to Java specification,
+ // bytes will be promoted to ints automatically, upon expression evaluation.
+ // by promoting it manually, we avoid the unnecessary conversions.
+ int currentByte = validityBuffer.getByte(byteIndex);
+ final int bitMask = 1 << bitIndex;
+ currentByte |= bitMask;
+ validityBuffer.setByte(byteIndex, currentByte);
+ }
+
+ /**
+ * Set the bit at provided index to 0.
+ *
+ * @param validityBuffer validity buffer of the vector
+ * @param index index to be set
+ */
+ public static void unsetBit(ArrowBuf validityBuffer, int index) {
+ // it can be observed that some logic is duplicate of the logic in setValidityBit.
+ // this is because JIT cannot always remove the if branch in setValidityBit,
+ // so we give a dedicated implementation for unsetting bits.
+ final int byteIndex = byteIndex(index);
+ final int bitIndex = bitIndex(index);
+
+ // the byte is promoted to an int, because according to Java specification,
+ // bytes will be promoted to ints automatically, upon expression evaluation.
+ // by promoting it manually, we avoid the unnecessary conversions.
+ int currentByte = validityBuffer.getByte(byteIndex);
+ final int bitMask = 1 << bitIndex;
+ currentByte &= ~bitMask;
+ validityBuffer.setByte(byteIndex, currentByte);
+ }
+
+ /**
+ * Set the bit at a given index to provided value (1 or 0).
+ *
+ * @param validityBuffer validity buffer of the vector
+ * @param index index to be set
+ * @param value value to set
+ */
+ public static void setValidityBit(ArrowBuf validityBuffer, int index, int value) {
+ final int byteIndex = byteIndex(index);
+ final int bitIndex = bitIndex(index);
+
+ // the byte is promoted to an int, because according to Java specification,
+ // bytes will be promoted to ints automatically, upon expression evaluation.
+ // by promoting it manually, we avoid the unnecessary conversions.
+ int currentByte = validityBuffer.getByte(byteIndex);
+ final int bitMask = 1 << bitIndex;
+ if (value != 0) {
+ currentByte |= bitMask;
+ } else {
+ currentByte &= ~bitMask;
+ }
+ validityBuffer.setByte(byteIndex, currentByte);
+ }
+
+ /**
+ * Set the bit at a given index to provided value (1 or 0). Internally
+ * takes care of allocating the buffer if the caller didn't do so.
+ *
+ * @param validityBuffer validity buffer of the vector
+ * @param allocator allocator for the buffer
+ * @param valueCount number of values to allocate/set
+ * @param index index to be set
+ * @param value value to set
+ * @return ArrowBuf
+ */
+ public static ArrowBuf setValidityBit(ArrowBuf validityBuffer, BufferAllocator allocator,
+ int valueCount, int index, int value) {
+ if (validityBuffer == null) {
+ validityBuffer = allocator.buffer(getValidityBufferSize(valueCount));
+ }
+ setValidityBit(validityBuffer, index, value);
+ if (index == (valueCount - 1)) {
+ validityBuffer.writerIndex(getValidityBufferSize(valueCount));
+ }
+
+ return validityBuffer;
+ }
+
+ /**
+ * Check if a bit at a given index is set or not.
+ *
+ * @param buffer buffer to check
+ * @param index index of the buffer
+ * @return 1 if bit is set, 0 otherwise.
+ */
+ public static int get(final ArrowBuf buffer, int index) {
+ final int byteIndex = index >> 3;
+ final byte b = buffer.getByte(byteIndex);
+ final int bitIndex = index & 7;
+ return (b >> bitIndex) & 0x01;
+ }
+
+ /**
+ * Compute the size of validity buffer required to manage a given number
+ * of elements in a vector.
+ *
+ * @param valueCount number of elements in the vector
+ * @return buffer size
+ */
+ public static int getValidityBufferSize(int valueCount) {
+ return DataSizeRoundingUtil.divideBy8Ceil(valueCount);
+ }
+
+ /**
+ * Given a validity buffer, find the number of bits that are not set.
+ * This is used to compute the number of null elements in a nullable vector.
+ *
+ * @param validityBuffer validity buffer of the vector
+ * @param valueCount number of values in the vector
+ * @return number of bits not set.
+ */
+ public static int getNullCount(final ArrowBuf validityBuffer, final int valueCount) {
+ if (valueCount == 0) {
+ return 0;
+ }
+ int count = 0;
+ final int sizeInBytes = getValidityBufferSize(valueCount);
+ // If value count is not a multiple of 8, then calculate number of used bits in the last byte
+ final int remainder = valueCount % 8;
+ final int fullBytesCount = remainder == 0 ? sizeInBytes : sizeInBytes - 1;
+
+ int index = 0;
+ while (index + 8 <= fullBytesCount) {
+ long longValue = validityBuffer.getLong(index);
+ count += Long.bitCount(longValue);
+ index += 8;
+ }
+
+ if (index + 4 <= fullBytesCount) {
+ int intValue = validityBuffer.getInt(index);
+ count += Integer.bitCount(intValue);
+ index += 4;
+ }
+
+ while (index < fullBytesCount) {
+ byte byteValue = validityBuffer.getByte(index);
+ count += Integer.bitCount(byteValue & 0xFF);
+ index += 1;
+ }
+
+ // handling with the last bits
+ if (remainder != 0) {
+ byte byteValue = validityBuffer.getByte(sizeInBytes - 1);
+
+ // making the remaining bits all 1s if it is not fully filled
+ byte mask = (byte) (0xFF << remainder);
+ byteValue = (byte) (byteValue | mask);
+ count += Integer.bitCount(byteValue & 0xFF);
+ }
+
+ return 8 * sizeInBytes - count;
+ }
+
+ /**
+ * Tests if all bits in a validity buffer are equal 0 or 1, according to the specified parameter.
+ * @param validityBuffer the validity buffer.
+ * @param valueCount the bit count.
+ * @param checkOneBits if set to true, the method checks if all bits are equal to 1;
+ * otherwise, it checks if all bits are equal to 0.
+ * @return true if all bits are 0 or 1 according to the parameter, and false otherwise.
+ */
+ public static boolean checkAllBitsEqualTo(
+ final ArrowBuf validityBuffer, final int valueCount, final boolean checkOneBits) {
+ if (valueCount == 0) {
+ return true;
+ }
+ final int sizeInBytes = getValidityBufferSize(valueCount);
+
+ // boundary check
+ validityBuffer.checkBytes(0, sizeInBytes);
+
+ // If value count is not a multiple of 8, then calculate number of used bits in the last byte
+ final int remainder = valueCount % 8;
+ final int fullBytesCount = remainder == 0 ? sizeInBytes : sizeInBytes - 1;
+
+ // the integer number to compare against
+ final int intToCompare = checkOneBits ? -1 : 0;
+
+ int index = 0;
+ while (index + 8 <= fullBytesCount) {
+ long longValue = getLong(validityBuffer.memoryAddress() + index);
+ if (longValue != (long) intToCompare) {
+ return false;
+ }
+ index += 8;
+ }
+
+ if (index + 4 <= fullBytesCount) {
+ int intValue = getInt(validityBuffer.memoryAddress() + index);
+ if (intValue != intToCompare) {
+ return false;
+ }
+ index += 4;
+ }
+
+ while (index < fullBytesCount) {
+ byte byteValue = getByte(validityBuffer.memoryAddress() + index);
+ if (byteValue != (byte) intToCompare) {
+ return false;
+ }
+ index += 1;
+ }
+
+ // handling with the last bits
+ if (remainder != 0) {
+ byte byteValue = getByte(validityBuffer.memoryAddress() + sizeInBytes - 1);
+ byte mask = (byte) ((1 << remainder) - 1);
+ byteValue = (byte) (byteValue & mask);
+ if (checkOneBits) {
+ if ((mask & byteValue) != mask) {
+ return false;
+ }
+ } else {
+ if (byteValue != (byte) 0) {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+
+ /** Returns the byte at index from data right-shifted by offset. */
+ public static byte getBitsFromCurrentByte(final ArrowBuf data, final int index, final int offset) {
+ return (byte) ((data.getByte(index) & 0xFF) >>> offset);
+ }
+
+ /**
+ * Returns the byte at <code>index</code> from left-shifted by (8 - <code>offset</code>).
+ */
+ public static byte getBitsFromNextByte(ArrowBuf data, int index, int offset) {
+ return (byte) ((data.getByte(index) << (8 - offset)));
+ }
+
+ /**
+ * Returns a new buffer if the source validity buffer is either all null or all
+ * not-null, otherwise returns a buffer pointing to the same memory as source.
+ *
+ * @param fieldNode The fieldNode containing the null count
+ * @param sourceValidityBuffer The source validity buffer that will have its
+ * position copied if there is a mix of null and non-null values
+ * @param allocator The allocator to use for creating a new buffer if necessary.
+ * @return A new buffer that is either allocated or points to the same memory as sourceValidityBuffer.
+ */
+ public static ArrowBuf loadValidityBuffer(final ArrowFieldNode fieldNode,
+ final ArrowBuf sourceValidityBuffer,
+ final BufferAllocator allocator) {
+ final int valueCount = fieldNode.getLength();
+ ArrowBuf newBuffer = null;
+ /* either all NULLs or all non-NULLs */
+ if (fieldNode.getNullCount() == 0 || fieldNode.getNullCount() == valueCount) {
+ newBuffer = allocator.buffer(getValidityBufferSize(valueCount));
+ newBuffer.setZero(0, newBuffer.capacity());
+ if (fieldNode.getNullCount() != 0) {
+ /* all NULLs */
+ return newBuffer;
+ }
+ /* all non-NULLs */
+ int fullBytesCount = valueCount / 8;
+ newBuffer.setOne(0, fullBytesCount);
+ int remainder = valueCount % 8;
+ if (remainder > 0) {
+ byte bitMask = (byte) (0xFFL >>> ((8 - remainder) & 7));
+ newBuffer.setByte(fullBytesCount, bitMask);
+ }
+ } else {
+ /* mixed byte pattern -- create another ArrowBuf associated with the
+ * target allocator
+ */
+ newBuffer = sourceValidityBuffer.getReferenceManager().retain(sourceValidityBuffer, allocator);
+ }
+
+ return newBuffer;
+ }
+
+ /**
+ * Set the byte of the given index in the data buffer by applying a bit mask to
+ * the current byte at that index.
+ *
+ * @param data buffer to set
+ * @param byteIndex byteIndex within the buffer
+ * @param bitMask bit mask to be set
+ */
+ static void setBitMaskedByte(ArrowBuf data, int byteIndex, byte bitMask) {
+ byte currentByte = data.getByte(byteIndex);
+ currentByte |= bitMask;
+ data.setByte(byteIndex, currentByte);
+ }
+
+ /**
+ * Concat two validity buffers.
+ * @param input1 the first validity buffer.
+ * @param numBits1 the number of bits in the first validity buffer.
+ * @param input2 the second validity buffer.
+ * @param numBits2 the number of bits in the second validity buffer.
+ * @param output the output validity buffer. It can be the same one as the first input.
+ * The caller must make sure the output buffer has enough capacity.
+ */
+ public static void concatBits(ArrowBuf input1, int numBits1, ArrowBuf input2, int numBits2, ArrowBuf output) {
+ int numBytes1 = DataSizeRoundingUtil.divideBy8Ceil(numBits1);
+ int numBytes2 = DataSizeRoundingUtil.divideBy8Ceil(numBits2);
+ int numBytesOut = DataSizeRoundingUtil.divideBy8Ceil(numBits1 + numBits2);
+
+ if (BoundsChecking.BOUNDS_CHECKING_ENABLED) {
+ output.checkBytes(0, numBytesOut);
+ }
+
+ // copy the first bit set
+ if (input1 != output) {
+ PlatformDependent.copyMemory(input1.memoryAddress(), output.memoryAddress(), numBytes1);
+ }
+
+ if (bitIndex(numBits1) == 0) {
+ // The number of bits for the first bit set is a multiple of 8, so the boundary is at byte boundary.
+ // For this case, we have a shortcut to copy all bytes from the second set after the byte boundary.
+ PlatformDependent.copyMemory(input2.memoryAddress(), output.memoryAddress() + numBytes1, numBytes2);
+ return;
+ }
+
+ // the number of bits to fill a full byte after the first input is processed
+ int numBitsToFill = 8 - bitIndex(numBits1);
+
+ // mask to clear high bits
+ int mask = (1 << (8 - numBitsToFill)) - 1;
+
+ int numFullBytes = numBits2 / 8;
+
+ int prevByte = output.getByte(numBytes1 - 1) & mask;
+ for (int i = 0; i < numFullBytes; i++) {
+ int curByte = input2.getByte(i) & 0xff;
+
+ // first fill the bits to a full byte
+ int byteToFill = (curByte << (8 - numBitsToFill)) & 0xff;
+ output.setByte(numBytes1 + i - 1, byteToFill | prevByte);
+
+ // fill remaining bits in the current byte
+ // note that it is also the previous byte for the next iteration
+ prevByte = curByte >>> numBitsToFill;
+ }
+
+ int lastOutputByte = prevByte;
+
+ // the number of extra bits for the second input, relative to full bytes
+ int numTrailingBits = bitIndex(numBits2);
+
+ if (numTrailingBits == 0) {
+ output.setByte(numBytes1 + numFullBytes - 1, lastOutputByte);
+ return;
+ }
+
+ // process remaining bits from input2
+ int remByte = input2.getByte(numBytes2 - 1) & 0xff;
+
+ int byteToFill = remByte << (8 - numBitsToFill);
+ lastOutputByte |= byteToFill;
+
+ output.setByte(numBytes1 + numFullBytes - 1, lastOutputByte);
+
+ if (numTrailingBits > numBitsToFill) {
+ // clear all bits for the last byte before writing
+ output.setByte(numBytes1 + numFullBytes, 0);
+
+ // some remaining bits cannot be filled in the previous byte
+ int leftByte = remByte >>> numBitsToFill;
+ output.setByte(numBytes1 + numFullBytes, leftByte);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BufferBacked.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BufferBacked.java
new file mode 100644
index 000000000..ccba5b26c
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BufferBacked.java
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
+
+/**
+ * Content is backed by a buffer and can be loaded/unloaded.
+ */
+public interface BufferBacked {
+
+ void load(ArrowFieldNode fieldNode, ArrowBuf data);
+
+ ArrowBuf unLoad();
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BufferLayout.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BufferLayout.java
new file mode 100644
index 000000000..09c874e39
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/BufferLayout.java
@@ -0,0 +1,153 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import org.apache.arrow.util.Preconditions;
+
+/**
+ * Metadata class that captures the "type" of an Arrow buffer.
+ * (e.g. data buffers, offset buffers for variable width types and validity
+ * buffers).
+ */
+public class BufferLayout {
+
+ /**
+ * Enumeration of the different logical types a buffer can have.
+ */
+ public enum BufferType {
+ DATA("DATA"),
+ OFFSET("OFFSET"),
+ VALIDITY("VALIDITY"),
+ TYPE("TYPE_ID");
+
+ private final String name;
+
+ BufferType(String name) {
+ this.name = name;
+ }
+
+ public String getName() {
+ return name;
+ }
+ }
+
+ private static final BufferLayout VALIDITY_BUFFER = new BufferLayout(BufferType.VALIDITY, 1);
+ private static final BufferLayout OFFSET_BUFFER = new BufferLayout(BufferType.OFFSET, 32);
+ private static final BufferLayout LARGE_OFFSET_BUFFER = new BufferLayout(BufferType.OFFSET, 64);
+ private static final BufferLayout TYPE_BUFFER = new BufferLayout(BufferType.TYPE, 32);
+ private static final BufferLayout BIT_BUFFER = new BufferLayout(BufferType.DATA, 1);
+ private static final BufferLayout VALUES_256 = new BufferLayout(BufferType.DATA, 256);
+ private static final BufferLayout VALUES_128 = new BufferLayout(BufferType.DATA, 128);
+ private static final BufferLayout VALUES_64 = new BufferLayout(BufferType.DATA, 64);
+ private static final BufferLayout VALUES_32 = new BufferLayout(BufferType.DATA, 32);
+ private static final BufferLayout VALUES_16 = new BufferLayout(BufferType.DATA, 16);
+ private static final BufferLayout VALUES_8 = new BufferLayout(BufferType.DATA, 8);
+
+ public static BufferLayout typeBuffer() {
+ return TYPE_BUFFER;
+ }
+
+ public static BufferLayout offsetBuffer() {
+ return OFFSET_BUFFER;
+ }
+
+ public static BufferLayout largeOffsetBuffer() {
+ return LARGE_OFFSET_BUFFER;
+ }
+
+ /**
+ * Returns a databuffer for the given bitwidth. Only supports powers of two between 8 and 128
+ * inclusive.
+ */
+ public static BufferLayout dataBuffer(int typeBitWidth) {
+ switch (typeBitWidth) {
+ case 8:
+ return VALUES_8;
+ case 16:
+ return VALUES_16;
+ case 32:
+ return VALUES_32;
+ case 64:
+ return VALUES_64;
+ case 128:
+ return VALUES_128;
+ case 256:
+ return VALUES_256;
+ default:
+ throw new IllegalArgumentException("only 8, 16, 32, 64, 128, or 256 bits supported");
+ }
+ }
+
+ public static BufferLayout booleanVector() {
+ return BIT_BUFFER;
+ }
+
+ public static BufferLayout validityVector() {
+ return VALIDITY_BUFFER;
+ }
+
+ public static BufferLayout byteVector() {
+ return dataBuffer(8);
+ }
+
+ private final short typeBitWidth;
+
+ private final BufferType type;
+
+ BufferLayout(BufferType type, int typeBitWidth) {
+ super();
+ this.type = Preconditions.checkNotNull(type);
+ this.typeBitWidth = (short) typeBitWidth;
+ if (typeBitWidth <= 0) {
+ throw new IllegalArgumentException("bitWidth invalid: " + typeBitWidth);
+ }
+ }
+
+ public int getTypeBitWidth() {
+ return typeBitWidth;
+ }
+
+ public BufferType getType() {
+ return type;
+ }
+
+ @Override
+ public String toString() {
+ return String.format("%s(%s)", type, typeBitWidth);
+ }
+
+ @Override
+ public int hashCode() {
+ return 31 * (31 + type.hashCode()) + typeBitWidth;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj) {
+ return true;
+ }
+ if (obj == null) {
+ return false;
+ }
+ if (getClass() != obj.getClass()) {
+ return false;
+ }
+ BufferLayout other = (BufferLayout) obj;
+ return type.equals(other.type) && (typeBitWidth == other.typeBitWidth);
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/DateDayVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/DateDayVector.java
new file mode 100644
index 000000000..3e8826845
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/DateDayVector.java
@@ -0,0 +1,347 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.impl.DateDayReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.DateDayHolder;
+import org.apache.arrow.vector.holders.NullableDateDayHolder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * DateDayVector implements a fixed width (4 bytes) vector of
+ * date values which could be null. A validity buffer (bit vector) is
+ * maintained to track which elements in the vector are null.
+ */
+public final class DateDayVector extends BaseFixedWidthVector {
+
+ public static final byte TYPE_WIDTH = 4;
+ private final FieldReader reader;
+
+ /**
+ * Instantiate a DateDayVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param allocator allocator for memory management.
+ */
+ public DateDayVector(String name, BufferAllocator allocator) {
+ this(name, FieldType.nullable(MinorType.DATEDAY.getType()), allocator);
+ }
+
+ /**
+ * Instantiate a DateDayVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param fieldType type of Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public DateDayVector(String name, FieldType fieldType, BufferAllocator allocator) {
+ this(new Field(name, fieldType, null), allocator);
+ }
+
+ /**
+ * Instantiate a DateDayVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param field Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public DateDayVector(Field field, BufferAllocator allocator) {
+ super(field, allocator, TYPE_WIDTH);
+ reader = new DateDayReaderImpl(DateDayVector.this);
+ }
+
+ /**
+ * Get a reader that supports reading values from this vector.
+ *
+ * @return Field Reader for this vector
+ */
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ /**
+ * Get minor type for this vector. The vector holds values belonging
+ * to a particular type.
+ * @return {@link org.apache.arrow.vector.types.Types.MinorType}
+ */
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.DATEDAY;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Get the element at the given index from the vector.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public int get(int index) throws IllegalStateException {
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+ throw new IllegalStateException("Value at index is null");
+ }
+ return valueBuffer.getInt((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Get the element at the given index from the vector and
+ * sets the state in holder. If element at given index
+ * is null, holder.isSet will be zero.
+ *
+ * @param index position of element
+ */
+ public void get(int index, NullableDateDayHolder holder) {
+ if (isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ holder.isSet = 1;
+ holder.value = valueBuffer.getInt((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Same as {@link #get(int)}.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public Integer getObject(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ return valueBuffer.getInt((long) index * TYPE_WIDTH);
+ }
+ }
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ private void setValue(int index, int value) {
+ valueBuffer.setInt((long) index * TYPE_WIDTH, value);
+ }
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void set(int index, int value) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, value);
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ * If the value in holder is not indicated as set, element in the
+ * at the given index will be null.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void set(int index, NullableDateDayHolder holder) throws IllegalArgumentException {
+ if (holder.isSet < 0) {
+ throw new IllegalArgumentException();
+ } else if (holder.isSet > 0) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void set(int index, DateDayHolder holder) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ }
+
+ /**
+ * Same as {@link #set(int, int)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void setSafe(int index, int value) {
+ handleSafe(index);
+ set(index, value);
+ }
+
+ /**
+ * Same as {@link #set(int, NullableDateDayHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void setSafe(int index, NullableDateDayHolder holder) throws IllegalArgumentException {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Same as {@link #set(int, DateDayHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void setSafe(int index, DateDayHolder holder) {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Store the given value at a particular position in the vector. isSet indicates
+ * whether the value is NULL or not.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param value element value
+ */
+ public void set(int index, int isSet, int value) {
+ if (isSet > 0) {
+ set(index, value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Same as {@link #set(int, int, int)} except that it handles the case
+ * when index is greater than or equal to current value capacity of the
+ * vector.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param value element value
+ */
+ public void setSafe(int index, int isSet, int value) {
+ handleSafe(index);
+ set(index, isSet, value);
+ }
+
+ /**
+ * Given a data buffer, get the value stored at a particular position
+ * in the vector.
+ *
+ * <p>This method should not be used externally.
+ *
+ * @param buffer data buffer
+ * @param index position of the element.
+ * @return value stored at the index.
+ */
+ public static int get(final ArrowBuf buffer, final int index) {
+ return buffer.getInt((long) index * TYPE_WIDTH);
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Construct a TransferPair comprising of this and a target vector of
+ * the same type.
+ *
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return new TransferImpl(ref, allocator);
+ }
+
+ /**
+ * Construct a TransferPair with a desired target vector of the same type.
+ *
+ * @param to target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new TransferImpl((DateDayVector) to);
+ }
+
+ private class TransferImpl implements TransferPair {
+ DateDayVector to;
+
+ public TransferImpl(String ref, BufferAllocator allocator) {
+ to = new DateDayVector(ref, field.getFieldType(), allocator);
+ }
+
+ public TransferImpl(DateDayVector to) {
+ this.to = to;
+ }
+
+ @Override
+ public DateDayVector getTo() {
+ return to;
+ }
+
+ @Override
+ public void transfer() {
+ transferTo(to);
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ splitAndTransferTo(startIndex, length, to);
+ }
+
+ @Override
+ public void copyValueSafe(int fromIndex, int toIndex) {
+ to.copyFromSafe(fromIndex, toIndex, DateDayVector.this);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/DateMilliVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/DateMilliVector.java
new file mode 100644
index 000000000..73738d771
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/DateMilliVector.java
@@ -0,0 +1,350 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+import java.time.LocalDateTime;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.impl.DateMilliReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.DateMilliHolder;
+import org.apache.arrow.vector.holders.NullableDateMilliHolder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.DateUtility;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * DateMilliVector implements a fixed width vector (8 bytes) of
+ * date values which could be null. A validity buffer (bit vector) is
+ * maintained to track which elements in the vector are null.
+ */
+public final class DateMilliVector extends BaseFixedWidthVector {
+ public static final byte TYPE_WIDTH = 8;
+ private final FieldReader reader;
+
+ /**
+ * Instantiate a DateMilliVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param allocator allocator for memory management.
+ */
+ public DateMilliVector(String name, BufferAllocator allocator) {
+ this(name, FieldType.nullable(MinorType.DATEMILLI.getType()), allocator);
+ }
+
+ /**
+ * Instantiate a DateMilliVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param fieldType type of Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public DateMilliVector(String name, FieldType fieldType, BufferAllocator allocator) {
+ this(new Field(name, fieldType, null), allocator);
+ }
+
+ /**
+ * Instantiate a DateMilliVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param field field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public DateMilliVector(Field field, BufferAllocator allocator) {
+ super(field, allocator, TYPE_WIDTH);
+ reader = new DateMilliReaderImpl(DateMilliVector.this);
+ }
+
+ /**
+ * Get a reader that supports reading values from this vector.
+ *
+ * @return Field Reader for this vector
+ */
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ /**
+ * Get minor type for this vector. The vector holds values belonging
+ * to a particular type.
+ * @return {@link org.apache.arrow.vector.types.Types.MinorType}
+ */
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.DATEMILLI;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Get the element at the given index from the vector.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public long get(int index) throws IllegalStateException {
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+ throw new IllegalStateException("Value at index is null");
+ }
+ return valueBuffer.getLong((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Get the element at the given index from the vector and
+ * sets the state in holder. If element at given index
+ * is null, holder.isSet will be zero.
+ *
+ * @param index position of element
+ */
+ public void get(int index, NullableDateMilliHolder holder) {
+ if (isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ holder.isSet = 1;
+ holder.value = valueBuffer.getLong((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Same as {@link #get(int)}.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public LocalDateTime getObject(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ final long millis = valueBuffer.getLong((long) index * TYPE_WIDTH);
+ return DateUtility.getLocalDateTimeFromEpochMilli(millis);
+ }
+ }
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ private void setValue(int index, long value) {
+ valueBuffer.setLong((long) index * TYPE_WIDTH, value);
+ }
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void set(int index, long value) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, value);
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ * If the value in holder is not indicated as set, element in the
+ * at the given index will be null.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void set(int index, NullableDateMilliHolder holder) throws IllegalArgumentException {
+ if (holder.isSet < 0) {
+ throw new IllegalArgumentException();
+ } else if (holder.isSet > 0) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void set(int index, DateMilliHolder holder) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ }
+
+ /**
+ * Same as {@link #set(int, long)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void setSafe(int index, long value) {
+ handleSafe(index);
+ set(index, value);
+ }
+
+ /**
+ * Same as {@link #set(int, NullableDateMilliHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void setSafe(int index, NullableDateMilliHolder holder) throws IllegalArgumentException {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Same as {@link #set(int, DateMilliHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void setSafe(int index, DateMilliHolder holder) {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Store the given value at a particular position in the vector. isSet indicates
+ * whether the value is NULL or not.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param value element value
+ */
+ public void set(int index, int isSet, long value) {
+ if (isSet > 0) {
+ set(index, value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Same as {@link #set(int, int, long)} except that it handles the case
+ * when index is greater than or equal to current value capacity of the
+ * vector.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param value element value
+ */
+ public void setSafe(int index, int isSet, long value) {
+ handleSafe(index);
+ set(index, isSet, value);
+ }
+
+ /**
+ * Given a data buffer, get the value stored at a particular position
+ * in the vector.
+ *
+ * <p>This method should not be used externally.
+ *
+ * @param buffer data buffer
+ * @param index position of the element.
+ * @return value stored at the index.
+ */
+ public static long get(final ArrowBuf buffer, final int index) {
+ return buffer.getLong((long) index * TYPE_WIDTH);
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Construct a TransferPair comprising of this and a target vector of
+ * the same type.
+ *
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return new TransferImpl(ref, allocator);
+ }
+
+ /**
+ * Construct a TransferPair with a desired target vector of the same type.
+ *
+ * @param to target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new TransferImpl((DateMilliVector) to);
+ }
+
+ private class TransferImpl implements TransferPair {
+ DateMilliVector to;
+
+ public TransferImpl(String ref, BufferAllocator allocator) {
+ to = new DateMilliVector(ref, field.getFieldType(), allocator);
+ }
+
+ public TransferImpl(DateMilliVector to) {
+ this.to = to;
+ }
+
+ @Override
+ public DateMilliVector getTo() {
+ return to;
+ }
+
+ @Override
+ public void transfer() {
+ transferTo(to);
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ splitAndTransferTo(startIndex, length, to);
+ }
+
+ @Override
+ public void copyValueSafe(int fromIndex, int toIndex) {
+ to.copyFromSafe(fromIndex, toIndex, DateMilliVector.this);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/Decimal256Vector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/Decimal256Vector.java
new file mode 100644
index 000000000..c5fef82d0
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/Decimal256Vector.java
@@ -0,0 +1,584 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+import java.math.BigDecimal;
+import java.nio.ByteOrder;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.impl.Decimal256ReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.Decimal256Holder;
+import org.apache.arrow.vector.holders.NullableDecimal256Holder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.DecimalUtility;
+import org.apache.arrow.vector.util.TransferPair;
+
+import io.netty.util.internal.PlatformDependent;
+
+/**
+ * Decimal256Vector implements a fixed width vector (32 bytes) of
+ * decimal values which could be null. A validity buffer (bit vector) is
+ * maintained to track which elements in the vector are null.
+ */
+public final class Decimal256Vector extends BaseFixedWidthVector {
+ public static final byte TYPE_WIDTH = 32;
+ private static final boolean LITTLE_ENDIAN = ByteOrder.nativeOrder() == ByteOrder.LITTLE_ENDIAN;
+ private final FieldReader reader;
+
+ private final int precision;
+ private final int scale;
+
+ /**
+ * Instantiate a Decimal256Vector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param allocator allocator for memory management.
+ */
+ public Decimal256Vector(String name, BufferAllocator allocator,
+ int precision, int scale) {
+ this(name, FieldType.nullable(new ArrowType.Decimal(precision, scale, /*bitWidth=*/TYPE_WIDTH * 8)), allocator);
+ }
+
+ /**
+ * Instantiate a Decimal256Vector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param fieldType type of Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public Decimal256Vector(String name, FieldType fieldType, BufferAllocator allocator) {
+ this(new Field(name, fieldType, null), allocator);
+ }
+
+ /**
+ * Instantiate a Decimal256Vector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param field field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public Decimal256Vector(Field field, BufferAllocator allocator) {
+ super(field, allocator, TYPE_WIDTH);
+ ArrowType.Decimal arrowType = (ArrowType.Decimal) field.getFieldType().getType();
+ reader = new Decimal256ReaderImpl(Decimal256Vector.this);
+ this.precision = arrowType.getPrecision();
+ this.scale = arrowType.getScale();
+ }
+
+ /**
+ * Get a reader that supports reading values from this vector.
+ *
+ * @return Field Reader for this vector
+ */
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ /**
+ * Get minor type for this vector. The vector holds values belonging
+ * to a particular type.
+ *
+ * @return {@link org.apache.arrow.vector.types.Types.MinorType}
+ */
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.DECIMAL256;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Get the element at the given index from the vector.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public ArrowBuf get(int index) throws IllegalStateException {
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+ throw new IllegalStateException("Value at index is null");
+ }
+ return valueBuffer.slice((long) index * TYPE_WIDTH, TYPE_WIDTH);
+ }
+
+ /**
+ * Get the element at the given index from the vector and
+ * sets the state in holder. If element at given index
+ * is null, holder.isSet will be zero.
+ *
+ * @param index position of element
+ */
+ public void get(int index, NullableDecimal256Holder holder) {
+ if (isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ holder.isSet = 1;
+ holder.buffer = valueBuffer;
+ holder.precision = precision;
+ holder.scale = scale;
+ holder.start = ((long) index) * TYPE_WIDTH;
+ }
+
+ /**
+ * Same as {@link #get(int)}.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public BigDecimal getObject(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ return DecimalUtility.getBigDecimalFromArrowBuf(valueBuffer, index, scale, TYPE_WIDTH);
+ }
+ }
+
+ /**
+ * Return precision for the decimal value.
+ */
+ public int getPrecision() {
+ return precision;
+ }
+
+ /**
+ * Return scale for the decimal value.
+ */
+ public int getScale() {
+ return scale;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param buffer ArrowBuf containing decimal value.
+ */
+ public void set(int index, ArrowBuf buffer) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ valueBuffer.setBytes((long) index * TYPE_WIDTH, buffer, 0, TYPE_WIDTH);
+ }
+
+ /**
+ * Set the decimal element at given index to the provided array of bytes.
+ * Decimal256 is now implemented as Native Endian. This API allows the user
+ * to pass a decimal value in the form of byte array in BE byte order.
+ *
+ * <p>Consumers of Arrow code can use this API instead of first swapping
+ * the source bytes (doing a write and read) and then finally writing to
+ * ArrowBuf of decimal vector.
+ *
+ * <p>This method takes care of adding the necessary padding if the length
+ * of byte array is less then 32 (length of decimal type).
+ *
+ * @param index position of element
+ * @param value array of bytes containing decimal in big endian byte order.
+ */
+ public void setBigEndian(int index, byte[] value) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ final int length = value.length;
+
+ // do the bound check.
+ valueBuffer.checkBytes((long) index * TYPE_WIDTH, (long) (index + 1) * TYPE_WIDTH);
+
+ long outAddress = valueBuffer.memoryAddress() + (long) index * TYPE_WIDTH;
+ if (length == 0) {
+ PlatformDependent.setMemory(outAddress, Decimal256Vector.TYPE_WIDTH, (byte) 0);
+ return;
+ }
+ if (LITTLE_ENDIAN) {
+ // swap bytes to convert BE to LE
+ for (int byteIdx = 0; byteIdx < length; ++byteIdx) {
+ PlatformDependent.putByte(outAddress + byteIdx, value[length - 1 - byteIdx]);
+ }
+
+ if (length == TYPE_WIDTH) {
+ return;
+ }
+
+ if (length < TYPE_WIDTH) {
+ // sign extend
+ final byte pad = (byte) (value[0] < 0 ? 0xFF : 0x00);
+ PlatformDependent.setMemory(outAddress + length, Decimal256Vector.TYPE_WIDTH - length, pad);
+ return;
+ }
+ } else {
+ if (length <= TYPE_WIDTH) {
+ // copy data from value to outAddress
+ PlatformDependent.copyMemory(value, 0, outAddress + Decimal256Vector.TYPE_WIDTH - length, length);
+ // sign extend
+ final byte pad = (byte) (value[0] < 0 ? 0xFF : 0x00);
+ PlatformDependent.setMemory(outAddress, Decimal256Vector.TYPE_WIDTH - length, pad);
+ return;
+ }
+ }
+ throw new IllegalArgumentException(
+ "Invalid decimal value length. Valid length in [1 - 32], got " + length);
+ }
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param start start index of data in the buffer
+ * @param buffer ArrowBuf containing decimal value.
+ */
+ public void set(int index, long start, ArrowBuf buffer) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ valueBuffer.setBytes((long) index * TYPE_WIDTH, buffer, start, TYPE_WIDTH);
+ }
+
+ /**
+ * Sets the element at given index using the buffer whose size maybe <= 32 bytes.
+ * @param index index to write the decimal to
+ * @param start start of value in the buffer
+ * @param buffer contains the decimal in native endian bytes
+ * @param length length of the value in the buffer
+ */
+ public void setSafe(int index, long start, ArrowBuf buffer, int length) {
+ handleSafe(index);
+ BitVectorHelper.setBit(validityBuffer, index);
+
+ // do the bound checks.
+ buffer.checkBytes(start, start + length);
+ valueBuffer.checkBytes((long) index * TYPE_WIDTH, (long) (index + 1) * TYPE_WIDTH);
+
+ long inAddress = buffer.memoryAddress() + start;
+ long outAddress = valueBuffer.memoryAddress() + (long) index * TYPE_WIDTH;
+ if (LITTLE_ENDIAN) {
+ PlatformDependent.copyMemory(inAddress, outAddress, length);
+ // sign extend
+ if (length < TYPE_WIDTH) {
+ byte msb = PlatformDependent.getByte(inAddress + length - 1);
+ final byte pad = (byte) (msb < 0 ? 0xFF : 0x00);
+ PlatformDependent.setMemory(outAddress + length, Decimal256Vector.TYPE_WIDTH - length, pad);
+ }
+ } else {
+ PlatformDependent.copyMemory(inAddress, outAddress + Decimal256Vector.TYPE_WIDTH - length, length);
+ // sign extend
+ if (length < TYPE_WIDTH) {
+ byte msb = PlatformDependent.getByte(inAddress);
+ final byte pad = (byte) (msb < 0 ? 0xFF : 0x00);
+ PlatformDependent.setMemory(outAddress, Decimal256Vector.TYPE_WIDTH - length, pad);
+ }
+ }
+ }
+
+
+ /**
+ * Sets the element at given index using the buffer whose size maybe <= 32 bytes.
+ * @param index index to write the decimal to
+ * @param start start of value in the buffer
+ * @param buffer contains the decimal in big endian bytes
+ * @param length length of the value in the buffer
+ */
+ public void setBigEndianSafe(int index, long start, ArrowBuf buffer, int length) {
+ handleSafe(index);
+ BitVectorHelper.setBit(validityBuffer, index);
+
+ // do the bound checks.
+ buffer.checkBytes(start, start + length);
+ valueBuffer.checkBytes((long) index * TYPE_WIDTH, (long) (index + 1) * TYPE_WIDTH);
+
+ // not using buffer.getByte() to avoid boundary checks for every byte.
+ long inAddress = buffer.memoryAddress() + start;
+ long outAddress = valueBuffer.memoryAddress() + (long) index * TYPE_WIDTH;
+ if (LITTLE_ENDIAN) {
+ // swap bytes to convert BE to LE
+ for (int byteIdx = 0; byteIdx < length; ++byteIdx) {
+ byte val = PlatformDependent.getByte((inAddress + length - 1) - byteIdx);
+ PlatformDependent.putByte(outAddress + byteIdx, val);
+ }
+ // sign extend
+ if (length < 32) {
+ byte msb = PlatformDependent.getByte(inAddress);
+ final byte pad = (byte) (msb < 0 ? 0xFF : 0x00);
+ PlatformDependent.setMemory(outAddress + length, Decimal256Vector.TYPE_WIDTH - length, pad);
+ }
+ } else {
+ PlatformDependent.copyMemory(inAddress, outAddress + Decimal256Vector.TYPE_WIDTH - length, length);
+ // sign extend
+ if (length < TYPE_WIDTH) {
+ byte msb = PlatformDependent.getByte(inAddress);
+ final byte pad = (byte) (msb < 0 ? 0xFF : 0x00);
+ PlatformDependent.setMemory(outAddress, Decimal256Vector.TYPE_WIDTH - length, pad);
+ }
+ }
+ }
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param value BigDecimal containing decimal value.
+ */
+ public void set(int index, BigDecimal value) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ DecimalUtility.checkPrecisionAndScale(value, precision, scale);
+ DecimalUtility.writeBigDecimalToArrowBuf(value, valueBuffer, index, TYPE_WIDTH);
+ }
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param value long value.
+ */
+ public void set(int index, long value) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ DecimalUtility.writeLongToArrowBuf(value, valueBuffer, index, TYPE_WIDTH);
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ * If the value in holder is not indicated as set, element in the
+ * at the given index will be null.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void set(int index, NullableDecimal256Holder holder) throws IllegalArgumentException {
+ if (holder.isSet < 0) {
+ throw new IllegalArgumentException();
+ } else if (holder.isSet > 0) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ valueBuffer.setBytes((long) index * TYPE_WIDTH, holder.buffer, holder.start, TYPE_WIDTH);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void set(int index, Decimal256Holder holder) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ valueBuffer.setBytes((long) index * TYPE_WIDTH, holder.buffer, holder.start, TYPE_WIDTH);
+ }
+
+ /**
+ * Same as {@link #set(int, ArrowBuf)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param buffer ArrowBuf containing decimal value.
+ */
+ public void setSafe(int index, ArrowBuf buffer) {
+ handleSafe(index);
+ set(index, buffer);
+ }
+
+ /**
+ * Same as {@link #setBigEndian(int, byte[])} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ */
+ public void setBigEndianSafe(int index, byte[] value) {
+ handleSafe(index);
+ setBigEndian(index, value);
+ }
+
+ /**
+ * Same as {@link #set(int, int, ArrowBuf)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param start start index of data in the buffer
+ * @param buffer ArrowBuf containing decimal value.
+ */
+ public void setSafe(int index, long start, ArrowBuf buffer) {
+ handleSafe(index);
+ set(index, start, buffer);
+ }
+
+ /**
+ * Same as {@link #set(int, BigDecimal)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param value BigDecimal containing decimal value.
+ */
+ public void setSafe(int index, BigDecimal value) {
+ handleSafe(index);
+ set(index, value);
+ }
+
+ /**
+ * Same as {@link #set(int, long)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param value long value.
+ */
+ public void setSafe(int index, long value) {
+ handleSafe(index);
+ set(index, value);
+ }
+
+ /**
+ * Same as {@link #set(int, NullableDecimalHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void setSafe(int index, NullableDecimal256Holder holder) throws IllegalArgumentException {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Same as {@link #set(int, Decimal256Holder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void setSafe(int index, Decimal256Holder holder) {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Store the given value at a particular position in the vector. isSet indicates
+ * whether the value is NULL or not.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param start start position of the value in the buffer
+ * @param buffer buffer containing the value to be stored in the vector
+ */
+ public void set(int index, int isSet, long start, ArrowBuf buffer) {
+ if (isSet > 0) {
+ set(index, start, buffer);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Same as {@link #setSafe(int, int, int, ArrowBuf)} except that it handles
+ * the case when the position of new value is beyond the current value
+ * capacity of the vector.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param start start position of the value in the buffer
+ * @param buffer buffer containing the value to be stored in the vector
+ */
+ public void setSafe(int index, int isSet, long start, ArrowBuf buffer) {
+ handleSafe(index);
+ set(index, isSet, start, buffer);
+ }
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Construct a TransferPair comprising of this and a target vector of
+ * the same type.
+ *
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return new TransferImpl(ref, allocator);
+ }
+
+ /**
+ * Construct a TransferPair with a desired target vector of the same type.
+ *
+ * @param to target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new TransferImpl((Decimal256Vector) to);
+ }
+
+ private class TransferImpl implements TransferPair {
+ Decimal256Vector to;
+
+ public TransferImpl(String ref, BufferAllocator allocator) {
+ to = new Decimal256Vector(ref, allocator, Decimal256Vector.this.precision,
+ Decimal256Vector.this.scale);
+ }
+
+ public TransferImpl(Decimal256Vector to) {
+ this.to = to;
+ }
+
+ @Override
+ public Decimal256Vector getTo() {
+ return to;
+ }
+
+ @Override
+ public void transfer() {
+ transferTo(to);
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ splitAndTransferTo(startIndex, length, to);
+ }
+
+ @Override
+ public void copyValueSafe(int fromIndex, int toIndex) {
+ to.copyFromSafe(fromIndex, toIndex, Decimal256Vector.this);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/DecimalVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/DecimalVector.java
new file mode 100644
index 000000000..f988f4f94
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/DecimalVector.java
@@ -0,0 +1,584 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+import java.math.BigDecimal;
+import java.nio.ByteOrder;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.impl.DecimalReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.DecimalHolder;
+import org.apache.arrow.vector.holders.NullableDecimalHolder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.DecimalUtility;
+import org.apache.arrow.vector.util.TransferPair;
+
+import io.netty.util.internal.PlatformDependent;
+
+/**
+ * DecimalVector implements a fixed width vector (16 bytes) of
+ * decimal values which could be null. A validity buffer (bit vector) is
+ * maintained to track which elements in the vector are null.
+ */
+public final class DecimalVector extends BaseFixedWidthVector {
+ public static final byte TYPE_WIDTH = 16;
+ private static final boolean LITTLE_ENDIAN = ByteOrder.nativeOrder() == ByteOrder.LITTLE_ENDIAN;
+ private final FieldReader reader;
+
+ private final int precision;
+ private final int scale;
+
+ /**
+ * Instantiate a DecimalVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param allocator allocator for memory management.
+ */
+ public DecimalVector(String name, BufferAllocator allocator,
+ int precision, int scale) {
+ this(name, FieldType.nullable(new ArrowType.Decimal(precision, scale, TYPE_WIDTH * 8)), allocator);
+ }
+
+ /**
+ * Instantiate a DecimalVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param fieldType type of Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public DecimalVector(String name, FieldType fieldType, BufferAllocator allocator) {
+ this(new Field(name, fieldType, null), allocator);
+ }
+
+ /**
+ * Instantiate a DecimalVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param field field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public DecimalVector(Field field, BufferAllocator allocator) {
+ super(field, allocator, TYPE_WIDTH);
+ ArrowType.Decimal arrowType = (ArrowType.Decimal) field.getFieldType().getType();
+ reader = new DecimalReaderImpl(DecimalVector.this);
+ this.precision = arrowType.getPrecision();
+ this.scale = arrowType.getScale();
+ }
+
+ /**
+ * Get a reader that supports reading values from this vector.
+ *
+ * @return Field Reader for this vector
+ */
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ /**
+ * Get minor type for this vector. The vector holds values belonging
+ * to a particular type.
+ *
+ * @return {@link org.apache.arrow.vector.types.Types.MinorType}
+ */
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.DECIMAL;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Get the element at the given index from the vector.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public ArrowBuf get(int index) throws IllegalStateException {
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+ throw new IllegalStateException("Value at index is null");
+ }
+ return valueBuffer.slice((long) index * TYPE_WIDTH, TYPE_WIDTH);
+ }
+
+ /**
+ * Get the element at the given index from the vector and
+ * sets the state in holder. If element at given index
+ * is null, holder.isSet will be zero.
+ *
+ * @param index position of element
+ */
+ public void get(int index, NullableDecimalHolder holder) {
+ if (isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ holder.isSet = 1;
+ holder.buffer = valueBuffer;
+ holder.precision = precision;
+ holder.scale = scale;
+ holder.start = (long) index * TYPE_WIDTH;
+ }
+
+ /**
+ * Same as {@link #get(int)}.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public BigDecimal getObject(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ return DecimalUtility.getBigDecimalFromArrowBuf(valueBuffer, index, scale, TYPE_WIDTH);
+ }
+ }
+
+ /**
+ * Return precision for the decimal value.
+ */
+ public int getPrecision() {
+ return precision;
+ }
+
+ /**
+ * Return scale for the decimal value.
+ */
+ public int getScale() {
+ return scale;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param buffer ArrowBuf containing decimal value.
+ */
+ public void set(int index, ArrowBuf buffer) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ valueBuffer.setBytes((long) index * TYPE_WIDTH, buffer, 0, TYPE_WIDTH);
+ }
+
+ /**
+ * Set the decimal element at given index to the provided array of bytes.
+ * Decimal is now implemented as Native Endian. This API allows the user
+ * to pass a decimal value in the form of byte array in BE byte order.
+ *
+ * <p>Consumers of Arrow code can use this API instead of first swapping
+ * the source bytes (doing a write and read) and then finally writing to
+ * ArrowBuf of decimal vector.
+ *
+ * <p>This method takes care of adding the necessary padding if the length
+ * of byte array is less then 16 (length of decimal type).
+ *
+ * @param index position of element
+ * @param value array of bytes containing decimal in big endian byte order.
+ */
+ public void setBigEndian(int index, byte[] value) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ final int length = value.length;
+
+ // do the bound check.
+ valueBuffer.checkBytes((long) index * TYPE_WIDTH, (long) (index + 1) * TYPE_WIDTH);
+
+ long outAddress = valueBuffer.memoryAddress() + (long) index * TYPE_WIDTH;
+ if (length == 0) {
+ PlatformDependent.setMemory(outAddress, DecimalVector.TYPE_WIDTH, (byte) 0);
+ return;
+ }
+ if (LITTLE_ENDIAN) {
+ // swap bytes to convert BE to LE
+ for (int byteIdx = 0; byteIdx < length; ++byteIdx) {
+ PlatformDependent.putByte(outAddress + byteIdx, value[length - 1 - byteIdx]);
+ }
+
+ if (length == TYPE_WIDTH) {
+ return;
+ }
+
+ if (length < TYPE_WIDTH) {
+ // sign extend
+ final byte pad = (byte) (value[0] < 0 ? 0xFF : 0x00);
+ PlatformDependent.setMemory(outAddress + length, DecimalVector.TYPE_WIDTH - length, pad);
+ return;
+ }
+ } else {
+ if (length <= TYPE_WIDTH) {
+ // copy data from value to outAddress
+ PlatformDependent.copyMemory(value, 0, outAddress + DecimalVector.TYPE_WIDTH - length, length);
+ // sign extend
+ final byte pad = (byte) (value[0] < 0 ? 0xFF : 0x00);
+ PlatformDependent.setMemory(outAddress, DecimalVector.TYPE_WIDTH - length, pad);
+ return;
+ }
+ }
+ throw new IllegalArgumentException(
+ "Invalid decimal value length. Valid length in [1 - 16], got " + length);
+ }
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param start start index of data in the buffer
+ * @param buffer ArrowBuf containing decimal value.
+ */
+ public void set(int index, long start, ArrowBuf buffer) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ valueBuffer.setBytes((long) index * TYPE_WIDTH, buffer, start, TYPE_WIDTH);
+ }
+
+ /**
+ * Sets the element at given index using the buffer whose size maybe <= 16 bytes.
+ * @param index index to write the decimal to
+ * @param start start of value in the buffer
+ * @param buffer contains the decimal in native endian bytes
+ * @param length length of the value in the buffer
+ */
+ public void setSafe(int index, long start, ArrowBuf buffer, int length) {
+ handleSafe(index);
+ BitVectorHelper.setBit(validityBuffer, index);
+
+ // do the bound checks.
+ buffer.checkBytes(start, start + length);
+ valueBuffer.checkBytes((long) index * TYPE_WIDTH, (long) (index + 1) * TYPE_WIDTH);
+
+ long inAddress = buffer.memoryAddress() + start;
+ long outAddress = valueBuffer.memoryAddress() + (long) index * TYPE_WIDTH;
+ if (LITTLE_ENDIAN) {
+ PlatformDependent.copyMemory(inAddress, outAddress, length);
+ // sign extend
+ if (length < TYPE_WIDTH) {
+ byte msb = PlatformDependent.getByte(inAddress + length - 1);
+ final byte pad = (byte) (msb < 0 ? 0xFF : 0x00);
+ PlatformDependent.setMemory(outAddress + length, DecimalVector.TYPE_WIDTH - length, pad);
+ }
+ } else {
+ PlatformDependent.copyMemory(inAddress, outAddress + DecimalVector.TYPE_WIDTH - length, length);
+ // sign extend
+ if (length < TYPE_WIDTH) {
+ byte msb = PlatformDependent.getByte(inAddress);
+ final byte pad = (byte) (msb < 0 ? 0xFF : 0x00);
+ PlatformDependent.setMemory(outAddress, DecimalVector.TYPE_WIDTH - length, pad);
+ }
+ }
+ }
+
+
+ /**
+ * Sets the element at given index using the buffer whose size maybe <= 16 bytes.
+ * @param index index to write the decimal to
+ * @param start start of value in the buffer
+ * @param buffer contains the decimal in big endian bytes
+ * @param length length of the value in the buffer
+ */
+ public void setBigEndianSafe(int index, long start, ArrowBuf buffer, int length) {
+ handleSafe(index);
+ BitVectorHelper.setBit(validityBuffer, index);
+
+ // do the bound checks.
+ buffer.checkBytes(start, start + length);
+ valueBuffer.checkBytes((long) index * TYPE_WIDTH, (long) (index + 1) * TYPE_WIDTH);
+
+ // not using buffer.getByte() to avoid boundary checks for every byte.
+ long inAddress = buffer.memoryAddress() + start;
+ long outAddress = valueBuffer.memoryAddress() + (long) index * TYPE_WIDTH;
+ if (LITTLE_ENDIAN) {
+ // swap bytes to convert BE to LE
+ for (int byteIdx = 0; byteIdx < length; ++byteIdx) {
+ byte val = PlatformDependent.getByte((inAddress + length - 1) - byteIdx);
+ PlatformDependent.putByte(outAddress + byteIdx, val);
+ }
+ // sign extend
+ if (length < TYPE_WIDTH) {
+ byte msb = PlatformDependent.getByte(inAddress);
+ final byte pad = (byte) (msb < 0 ? 0xFF : 0x00);
+ PlatformDependent.setMemory(outAddress + length, DecimalVector.TYPE_WIDTH - length, pad);
+ }
+ } else {
+ PlatformDependent.copyMemory(inAddress, outAddress + DecimalVector.TYPE_WIDTH - length, length);
+ // sign extend
+ if (length < TYPE_WIDTH) {
+ byte msb = PlatformDependent.getByte(inAddress);
+ final byte pad = (byte) (msb < 0 ? 0xFF : 0x00);
+ PlatformDependent.setMemory(outAddress, DecimalVector.TYPE_WIDTH - length, pad);
+ }
+ }
+ }
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param value BigDecimal containing decimal value.
+ */
+ public void set(int index, BigDecimal value) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ DecimalUtility.checkPrecisionAndScale(value, precision, scale);
+ DecimalUtility.writeBigDecimalToArrowBuf(value, valueBuffer, index, TYPE_WIDTH);
+ }
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param value long value.
+ */
+ public void set(int index, long value) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ DecimalUtility.writeLongToArrowBuf(value, valueBuffer, index, TYPE_WIDTH);
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ * If the value in holder is not indicated as set, element in the
+ * at the given index will be null.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void set(int index, NullableDecimalHolder holder) throws IllegalArgumentException {
+ if (holder.isSet < 0) {
+ throw new IllegalArgumentException();
+ } else if (holder.isSet > 0) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ valueBuffer.setBytes((long) index * TYPE_WIDTH, holder.buffer, holder.start, TYPE_WIDTH);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void set(int index, DecimalHolder holder) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ valueBuffer.setBytes((long) index * TYPE_WIDTH, holder.buffer, holder.start, TYPE_WIDTH);
+ }
+
+ /**
+ * Same as {@link #set(int, ArrowBuf)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param buffer ArrowBuf containing decimal value.
+ */
+ public void setSafe(int index, ArrowBuf buffer) {
+ handleSafe(index);
+ set(index, buffer);
+ }
+
+ /**
+ * Same as {@link #setBigEndian(int, byte[])} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ */
+ public void setBigEndianSafe(int index, byte[] value) {
+ handleSafe(index);
+ setBigEndian(index, value);
+ }
+
+ /**
+ * Same as {@link #set(int, long, ArrowBuf)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param start start index of data in the buffer
+ * @param buffer ArrowBuf containing decimal value.
+ */
+ public void setSafe(int index, long start, ArrowBuf buffer) {
+ handleSafe(index);
+ set(index, start, buffer);
+ }
+
+ /**
+ * Same as {@link #set(int, BigDecimal)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param value BigDecimal containing decimal value.
+ */
+ public void setSafe(int index, BigDecimal value) {
+ handleSafe(index);
+ set(index, value);
+ }
+
+ /**
+ * Same as {@link #set(int, long)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param value long value.
+ */
+ public void setSafe(int index, long value) {
+ handleSafe(index);
+ set(index, value);
+ }
+
+ /**
+ * Same as {@link #set(int, NullableDecimalHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void setSafe(int index, NullableDecimalHolder holder) throws IllegalArgumentException {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Same as {@link #set(int, DecimalHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void setSafe(int index, DecimalHolder holder) {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Store the given value at a particular position in the vector. isSet indicates
+ * whether the value is NULL or not.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param start start position of the value in the buffer
+ * @param buffer buffer containing the value to be stored in the vector
+ */
+ public void set(int index, int isSet, long start, ArrowBuf buffer) {
+ if (isSet > 0) {
+ set(index, start, buffer);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Same as {@link #set(int, int, long, ArrowBuf)} except that it handles
+ * the case when the position of new value is beyond the current value
+ * capacity of the vector.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param start start position of the value in the buffer
+ * @param buffer buffer containing the value to be stored in the vector
+ */
+ public void setSafe(int index, int isSet, long start, ArrowBuf buffer) {
+ handleSafe(index);
+ set(index, isSet, start, buffer);
+ }
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Construct a TransferPair comprising of this and a target vector of
+ * the same type.
+ *
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return new TransferImpl(ref, allocator);
+ }
+
+ /**
+ * Construct a TransferPair with a desired target vector of the same type.
+ *
+ * @param to target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new TransferImpl((DecimalVector) to);
+ }
+
+ private class TransferImpl implements TransferPair {
+ DecimalVector to;
+
+ public TransferImpl(String ref, BufferAllocator allocator) {
+ to = new DecimalVector(ref, allocator, DecimalVector.this.precision,
+ DecimalVector.this.scale);
+ }
+
+ public TransferImpl(DecimalVector to) {
+ this.to = to;
+ }
+
+ @Override
+ public DecimalVector getTo() {
+ return to;
+ }
+
+ @Override
+ public void transfer() {
+ transferTo(to);
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ splitAndTransferTo(startIndex, length, to);
+ }
+
+ @Override
+ public void copyValueSafe(int fromIndex, int toIndex) {
+ to.copyFromSafe(fromIndex, toIndex, DecimalVector.this);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/DensityAwareVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/DensityAwareVector.java
new file mode 100644
index 000000000..c16db40f7
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/DensityAwareVector.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+/**
+ * Vector that support density aware initial capacity settings.
+ * We use this for ListVector and VarCharVector as of now to
+ * control the memory allocated.
+ *
+ * <p>For ListVector, we have been using a multiplier of 5
+ * to compute the initial capacity of the inner data vector.
+ * For deeply nested lists and lists with lots of NULL values,
+ * this is over-allocation upfront. So density helps to be
+ * conservative when computing the value capacity of the
+ * inner vector.
+ *
+ * <p>For example, a density value of 10 implies each position in the
+ * list vector has a list of 10 values. So we will provision
+ * an initial capacity of (valuecount * 10) for the inner vector.
+ * A density value of 0.1 implies out of 10 positions in the list vector,
+ * 1 position has a list of size 1 and remaining positions are
+ * null (no lists) or empty lists. This helps in tightly controlling
+ * the memory we provision for inner data vector.
+ *
+ * <p>Similar analogy is applicable for VarCharVector where the capacity
+ * of the data buffer can be controlled using density multiplier
+ * instead of default multiplier of 8 (default size of average
+ * varchar length).
+ *
+ * <p>Also from container vectors, we propagate the density down
+ * the inner vectors so that they can use it appropriately.
+ */
+public interface DensityAwareVector {
+
+ /**
+ * Set value with density.
+ *
+ * @param valueCount the number of values in this vector
+ * @param density the density of the vector
+ */
+ void setInitialCapacity(int valueCount, double density);
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/DurationVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/DurationVector.java
new file mode 100644
index 000000000..9671b34e0
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/DurationVector.java
@@ -0,0 +1,406 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static java.util.concurrent.TimeUnit.MICROSECONDS;
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+import java.time.Duration;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.impl.DurationReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.DurationHolder;
+import org.apache.arrow.vector.holders.NullableDurationHolder;
+import org.apache.arrow.vector.types.TimeUnit;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * DurationVector implements a fixed width vector (8 bytes) of
+ * a configurable TimeUnit granularity duration values which could be null.
+ * A validity buffer (bit vector) is maintained to track which elements in the
+ * vector are null.
+ */
+public final class DurationVector extends BaseFixedWidthVector {
+ public static final byte TYPE_WIDTH = 8;
+ private final FieldReader reader;
+
+ private final TimeUnit unit;
+
+ /**
+ * Instantiate a DurationVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param fieldType type of Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public DurationVector(String name, FieldType fieldType, BufferAllocator allocator) {
+ this(new Field(name, fieldType, null), allocator);
+ }
+
+ /**
+ * Instantiate a DurationVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param field field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public DurationVector(Field field, BufferAllocator allocator) {
+ super(field, allocator, TYPE_WIDTH);
+ reader = new DurationReaderImpl(DurationVector.this);
+ this.unit = ((ArrowType.Duration) field.getFieldType().getType()).getUnit();
+ }
+
+ /**
+ * Get a reader that supports reading values from this vector.
+ *
+ * @return Field Reader for this vector
+ */
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ /**
+ * Get minor type for this vector. The vector holds values belonging
+ * to a particular type.
+ *
+ * @return {@link MinorType}
+ */
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.DURATION;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+
+ /**
+ * Given a data buffer, get the value stored at a particular position
+ * in the vector.
+ *
+ * <p>This method should not be used externally.
+ *
+ * @param buffer data buffer
+ * @param index position of the element.
+ * @return value stored at the index.
+ */
+ public static long get(final ArrowBuf buffer, final int index) {
+ return buffer.getLong((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Get the element at the given index from the vector.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public ArrowBuf get(int index) throws IllegalStateException {
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+ return null;
+ }
+ return valueBuffer.slice((long) index * TYPE_WIDTH, TYPE_WIDTH);
+ }
+
+ /**
+ * Get the element at the given index from the vector and
+ * sets the state in holder. If element at given index
+ * is null, holder.isSet will be zero.
+ *
+ * @param index position of element
+ */
+ public void get(int index, NullableDurationHolder holder) {
+ if (isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ holder.isSet = 1;
+ holder.value = get(valueBuffer, index);
+ }
+
+ /**
+ * Same as {@link #get(int)}.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public Duration getObject(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ final long value = get(valueBuffer, index);
+ return toDuration(value, unit);
+ }
+ }
+
+ /**
+ * Converts the given value and unit to the appropriate {@link Duration}.
+ */
+ public static Duration toDuration(long value, TimeUnit unit) {
+ switch (unit) {
+ case SECOND:
+ return Duration.ofSeconds(value);
+ case MILLISECOND:
+ return Duration.ofMillis(value);
+ case NANOSECOND:
+ return Duration.ofNanos(value);
+ case MICROSECOND:
+ return Duration.ofNanos(MICROSECONDS.toNanos(value));
+ default:
+ throw new IllegalArgumentException("Unknown timeunit: " + unit);
+ }
+ }
+
+ /**
+ * Get the Interval value at a given index as a {@link StringBuilder} object.
+ *
+ * @param index position of the element
+ * @return String Builder object with Interval in java.time.Duration format.
+ */
+ public StringBuilder getAsStringBuilder(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ return getAsStringBuilderHelper(index);
+ }
+ }
+
+ private StringBuilder getAsStringBuilderHelper(int index) {
+ return new StringBuilder(getObject(index).toString());
+ }
+
+ /**
+ * Gets the time unit of the duration.
+ */
+ public TimeUnit getUnit() {
+ return unit;
+ }
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void set(int index, ArrowBuf value) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ valueBuffer.setBytes((long) index * TYPE_WIDTH, value, 0, TYPE_WIDTH);
+ }
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param value The duration value (in the timeunit associated with this vector)
+ */
+ public void set(int index, long value) {
+ final long offsetIndex = (long) index * TYPE_WIDTH;
+ BitVectorHelper.setBit(validityBuffer, index);
+ valueBuffer.setLong(offsetIndex, value);
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ * If the value in holder is not indicated as set, element in the
+ * at the given index will be null.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void set(int index, NullableDurationHolder holder) throws IllegalArgumentException {
+ if (holder.isSet < 0) {
+ throw new IllegalArgumentException();
+ } else if (holder.isSet > 0) {
+ set(index, holder.value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void set(int index, DurationHolder holder) {
+ set(index, holder.value);
+ }
+
+ /**
+ * Same as {@link #set(int, ArrowBuf)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void setSafe(int index, ArrowBuf value) {
+ handleSafe(index);
+ set(index, value);
+ }
+
+ /**
+ * Same as {@link #set(int, long)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param value duration in the time unit this vector was constructed with
+ */
+ public void setSafe(int index, long value) {
+ handleSafe(index);
+ set(index, value);
+ }
+
+ /**
+ * Same as {@link #set(int, NullableDurationHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void setSafe(int index, NullableDurationHolder holder) throws IllegalArgumentException {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Same as {@link #set(int, DurationHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void setSafe(int index, DurationHolder holder) {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Store the given value at a particular position in the vector. isSet indicates
+ * whether the value is NULL or not.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param value The duration value (in the TimeUnit associated with this vector).
+ */
+ public void set(int index, int isSet, long value) {
+ if (isSet > 0) {
+ set(index, value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Same as {@link #set(int, int, long)} except that it handles the case
+ * when index is greater than or equal to current value capacity of the
+ * vector.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param value The duration value (in the timeunit associated with this vector)
+ */
+ public void setSafe(int index, int isSet, long value) {
+ handleSafe(index);
+ set(index, isSet, value);
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Construct a TransferPair comprising of this and a target vector of
+ * the same type.
+ *
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return new TransferImpl(ref, allocator);
+ }
+
+ /**
+ * Construct a TransferPair with a desired target vector of the same type.
+ *
+ * @param to target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new TransferImpl((DurationVector) to);
+ }
+
+ private class TransferImpl implements TransferPair {
+ DurationVector to;
+
+ public TransferImpl(String ref, BufferAllocator allocator) {
+ to = new DurationVector(ref, field.getFieldType(), allocator);
+ }
+
+ public TransferImpl(DurationVector to) {
+ this.to = to;
+ }
+
+ @Override
+ public DurationVector getTo() {
+ return to;
+ }
+
+ @Override
+ public void transfer() {
+ transferTo(to);
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ splitAndTransferTo(startIndex, length, to);
+ }
+
+ @Override
+ public void copyValueSafe(int fromIndex, int toIndex) {
+ to.copyFromSafe(fromIndex, toIndex, DurationVector.this);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ElementAddressableVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ElementAddressableVector.java
new file mode 100644
index 000000000..f37a50100
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ElementAddressableVector.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import org.apache.arrow.memory.util.ArrowBufPointer;
+
+/**
+ * Vector for which each data element resides in a continuous memory region,
+ * so it can be pointed to by an {@link org.apache.arrow.memory.util.ArrowBufPointer}.
+ */
+public interface ElementAddressableVector extends ValueVector {
+
+ /**
+ * Gets the pointer for the data at the given index.
+ * @param index the index for the data.
+ * @return the pointer to the data.
+ */
+ ArrowBufPointer getDataPointer(int index);
+
+ /**
+ * Gets the pointer for the data at the given index.
+ * @param index the index for the data.
+ * @param reuse the data pointer to fill, this avoids creating a new pointer object.
+ * @return the pointer to the data, it should be the same one as the input parameter
+ */
+ ArrowBufPointer getDataPointer(int index, ArrowBufPointer reuse);
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ExtensionTypeVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ExtensionTypeVector.java
new file mode 100644
index 000000000..2041227fc
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ExtensionTypeVector.java
@@ -0,0 +1,274 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.OutOfMemoryException;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.compare.VectorVisitor;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.util.CallBack;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * A vector that wraps an underlying vector, used to help implement extension types.
+ * @param <T> The wrapped vector type.
+ */
+public abstract class ExtensionTypeVector<T extends ValueVector & FieldVector> extends BaseValueVector implements
+ FieldVector {
+
+ private final T underlyingVector;
+ private final String name;
+
+ /**
+ * Instantiate an extension type vector.
+ * @param name name of the vector
+ * @param allocator allocator for memory management
+ * @param underlyingVector underlying filed vector
+ */
+ public ExtensionTypeVector(String name, BufferAllocator allocator, T underlyingVector) {
+ super(allocator);
+ Preconditions.checkNotNull(underlyingVector, "underlyingVector can not be null.");
+ this.name = name;
+ this.underlyingVector = underlyingVector;
+ }
+
+ /**
+ * Instantiate an extension type vector.
+ * @param field field materialized by this vector.
+ * @param allocator allocator for memory management
+ * @param underlyingVector underlying filed vector
+ */
+ public ExtensionTypeVector(Field field, BufferAllocator allocator, T underlyingVector) {
+ this(field.getName(), allocator, underlyingVector);
+ }
+
+ @Override
+ public String getName() {
+ return name;
+ }
+
+ /** Get the underlying vector. */
+ public T getUnderlyingVector() {
+ return underlyingVector;
+ }
+
+ @Override
+ public void allocateNew() throws OutOfMemoryException {
+ this.underlyingVector.allocateNew();
+ }
+
+ @Override
+ public boolean allocateNewSafe() {
+ return this.underlyingVector.allocateNewSafe();
+ }
+
+ @Override
+ public void reAlloc() {
+ this.underlyingVector.reAlloc();
+ }
+
+ @Override
+ public void setInitialCapacity(int numRecords) {
+ this.underlyingVector.setInitialCapacity(numRecords);
+ }
+
+ @Override
+ public int getValueCapacity() {
+ return this.underlyingVector.getValueCapacity();
+ }
+
+ @Override
+ public void reset() {
+ this.underlyingVector.reset();
+ }
+
+ @Override
+ public Field getField() {
+ return this.underlyingVector.getField();
+ }
+
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.EXTENSIONTYPE;
+ }
+
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return underlyingVector.getTransferPair(ref, allocator);
+ }
+
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) {
+ return underlyingVector.getTransferPair(ref, allocator, callBack);
+ }
+
+ @Override
+ public TransferPair makeTransferPair(ValueVector target) {
+ return underlyingVector.makeTransferPair(target);
+ }
+
+ @Override
+ public FieldReader getReader() {
+ return underlyingVector.getReader();
+ }
+
+ @Override
+ public int getBufferSize() {
+ return underlyingVector.getBufferSize();
+ }
+
+ @Override
+ public int getBufferSizeFor(int valueCount) {
+ return underlyingVector.getBufferSizeFor(valueCount);
+ }
+
+ @Override
+ public ArrowBuf[] getBuffers(boolean clear) {
+ return underlyingVector.getBuffers(clear);
+ }
+
+ @Override
+ public ArrowBuf getValidityBuffer() {
+ return underlyingVector.getValidityBuffer();
+ }
+
+ @Override
+ public ArrowBuf getDataBuffer() {
+ return underlyingVector.getDataBuffer();
+ }
+
+ @Override
+ public ArrowBuf getOffsetBuffer() {
+ return underlyingVector.getOffsetBuffer();
+ }
+
+ @Override
+ public int getValueCount() {
+ return underlyingVector.getValueCount();
+ }
+
+ @Override
+ public void setValueCount(int valueCount) {
+ underlyingVector.setValueCount(valueCount);
+ }
+
+ /**
+ * Get the extension object at the specified index.
+ *
+ * <p>Generally, this should access the underlying vector and construct the corresponding Java object from the raw
+ * data.
+ */
+ @Override
+ public abstract Object getObject(int index);
+
+ @Override
+ public int getNullCount() {
+ return underlyingVector.getNullCount();
+ }
+
+ @Override
+ public boolean isNull(int index) {
+ return underlyingVector.isNull(index);
+ }
+
+ @Override
+ public void initializeChildrenFromFields(List<Field> children) {
+ underlyingVector.initializeChildrenFromFields(children);
+ }
+
+ @Override
+ public List<FieldVector> getChildrenFromFields() {
+ return underlyingVector.getChildrenFromFields();
+ }
+
+ @Override
+ public void loadFieldBuffers(ArrowFieldNode fieldNode, List<ArrowBuf> ownBuffers) {
+ underlyingVector.loadFieldBuffers(fieldNode, ownBuffers);
+ }
+
+ @Override
+ public List<ArrowBuf> getFieldBuffers() {
+ return underlyingVector.getFieldBuffers();
+ }
+
+ /**
+ * Get the inner vectors.
+ *
+ * @deprecated This API will be removed as the current implementations no longer support inner vectors.
+ *
+ * @return the inner vectors for this field as defined by the TypeLayout
+ */
+ @Deprecated
+ @Override
+ public List<BufferBacked> getFieldInnerVectors() {
+ return underlyingVector.getFieldInnerVectors();
+ }
+
+ @Override
+ public long getValidityBufferAddress() {
+ return underlyingVector.getValidityBufferAddress();
+ }
+
+ @Override
+ public long getDataBufferAddress() {
+ return underlyingVector.getDataBufferAddress();
+ }
+
+ @Override
+ public long getOffsetBufferAddress() {
+ return underlyingVector.getOffsetBufferAddress();
+ }
+
+ @Override
+ public void clear() {
+ underlyingVector.clear();
+ }
+
+ @Override
+ public void close() {
+ underlyingVector.close();
+ }
+
+ @Override
+ public TransferPair getTransferPair(BufferAllocator allocator) {
+ return underlyingVector.getTransferPair(allocator);
+ }
+
+ @Override
+ public Iterator<ValueVector> iterator() {
+ return underlyingVector.iterator();
+ }
+
+ @Override
+ public BufferAllocator getAllocator() {
+ return underlyingVector.getAllocator();
+ }
+
+ @Override
+ public <OUT, IN> OUT accept(VectorVisitor<OUT, IN> visitor, IN value) {
+ return visitor.visit(this, value);
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/FieldVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/FieldVector.java
new file mode 100644
index 000000000..b00581a04
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/FieldVector.java
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import java.util.List;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
+import org.apache.arrow.vector.types.pojo.Field;
+
+/**
+ * A vector corresponding to a Field in the schema.
+ * It has inner vectors backed by buffers (validity, offsets, data, ...)
+ */
+public interface FieldVector extends ValueVector {
+
+ /**
+ * Initializes the child vectors
+ * to be later loaded with loadBuffers.
+ *
+ * @param children the schema
+ */
+ void initializeChildrenFromFields(List<Field> children);
+
+ /**
+ * The returned list is the same size as the list passed to initializeChildrenFromFields.
+ *
+ * @return the children according to schema (empty for primitive types)
+ */
+ List<FieldVector> getChildrenFromFields();
+
+ /**
+ * Loads data in the vectors.
+ * (ownBuffers must be the same size as getFieldVectors())
+ *
+ * @param fieldNode the fieldNode
+ * @param ownBuffers the buffers for this Field (own buffers only, children not included)
+ */
+ void loadFieldBuffers(ArrowFieldNode fieldNode, List<ArrowBuf> ownBuffers);
+
+ /**
+ * Get the buffers of the fields, (same size as getFieldVectors() since it is their content).
+ *
+ * @return the buffers containing the data for this vector (ready for reading)
+ */
+ List<ArrowBuf> getFieldBuffers();
+
+ /**
+ * Get the inner vectors.
+ *
+ * @deprecated This API will be removed as the current implementations no longer support inner vectors.
+ *
+ * @return the inner vectors for this field as defined by the TypeLayout
+ */
+ @Deprecated
+ List<BufferBacked> getFieldInnerVectors();
+
+ /**
+ * Gets the starting address of the underlying buffer associated with validity vector.
+ *
+ * @return buffer address
+ */
+ long getValidityBufferAddress();
+
+ /**
+ * Gets the starting address of the underlying buffer associated with data vector.
+ *
+ * @return buffer address
+ */
+ long getDataBufferAddress();
+
+ /**
+ * Gets the starting address of the underlying buffer associated with offset vector.
+ *
+ * @return buffer address
+ */
+ long getOffsetBufferAddress();
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/FixedSizeBinaryVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/FixedSizeBinaryVector.java
new file mode 100644
index 000000000..e1847e4bb
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/FixedSizeBinaryVector.java
@@ -0,0 +1,386 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.complex.impl.FixedSizeBinaryReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.FixedSizeBinaryHolder;
+import org.apache.arrow.vector.holders.NullableFixedSizeBinaryHolder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType.FixedSizeBinary;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * FixedSizeBinaryVector implements a fixed width vector of
+ * binary values which could be null. A validity buffer (bit vector) is
+ * maintained to track which elements in the vector are null.
+ */
+public class FixedSizeBinaryVector extends BaseFixedWidthVector {
+ private final int byteWidth;
+ private final FieldReader reader;
+
+ /**
+ * Instantiate a FixedSizeBinaryVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param allocator allocator for memory management.
+ * @param byteWidth byte width of the binary values
+ */
+ public FixedSizeBinaryVector(String name, BufferAllocator allocator, int byteWidth) {
+ this(name, FieldType.nullable(new FixedSizeBinary(byteWidth)), allocator);
+ }
+
+ /**
+ * Instantiate a FixedSizeBinaryVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param fieldType type of Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public FixedSizeBinaryVector(String name, FieldType fieldType, BufferAllocator allocator) {
+ this(new Field(name, fieldType, null), allocator);
+ }
+
+ /**
+ * Instantiate a FixedSizeBinaryVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param field field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public FixedSizeBinaryVector(Field field, BufferAllocator allocator) {
+ super(field, allocator, ((FixedSizeBinary) field.getFieldType().getType()).getByteWidth());
+ reader = new FixedSizeBinaryReaderImpl(FixedSizeBinaryVector.this);
+ byteWidth = ((FixedSizeBinary) field.getFieldType().getType()).getByteWidth();
+ }
+
+ /**
+ * Get a reader that supports reading values from this vector.
+ *
+ * @return Field Reader for this vector
+ */
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ /**
+ * Get minor type for this vector. The vector holds values belonging
+ * to a particular type.
+ *
+ * @return {@link org.apache.arrow.vector.types.Types.MinorType}
+ */
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.FIXEDSIZEBINARY;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+
+ /**
+ * Get the element at the given index from the vector.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public byte[] get(int index) {
+ assert index >= 0;
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+ return null;
+ }
+ final byte[] dst = new byte[byteWidth];
+ valueBuffer.getBytes((long) index * byteWidth, dst, 0, byteWidth);
+ return dst;
+ }
+
+ /**
+ * Get the element at the given index from the vector and
+ * sets the state in holder. If element at given index
+ * is null, holder.isSet will be zero.
+ *
+ * @param index position of element
+ * @param holder nullable holder to carry the buffer
+ */
+ public void get(int index, NullableFixedSizeBinaryHolder holder) {
+ assert index >= 0;
+ if (isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ holder.isSet = 1;
+ holder.buffer = valueBuffer.slice((long) index * byteWidth, byteWidth);
+ }
+
+ /**
+ * Same as {@link #get(int)}.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ @Override
+ public byte[] getObject(int index) {
+ return get(index);
+ }
+
+ public int getByteWidth() {
+ return byteWidth;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+ /** Sets the value at index to the provided one. */
+ public void set(int index, byte[] value) {
+ assert index >= 0;
+ Preconditions.checkNotNull(value, "expecting a valid byte array");
+ assert byteWidth <= value.length;
+ BitVectorHelper.setBit(validityBuffer, index);
+ valueBuffer.setBytes((long) index * byteWidth, value, 0, byteWidth);
+ }
+
+ /**
+ * Same as {@link #set(int, byte[])} but reallocates if <code>index</code>
+ * is larger than capacity.
+ */
+ public void setSafe(int index, byte[] value) {
+ handleSafe(index);
+ set(index, value);
+ }
+
+ /**
+ * Sets the value if isSet is positive, otherwise sets the index to null/invalid.
+ */
+ public void set(int index, int isSet, byte[] value) {
+ if (isSet > 0) {
+ set(index, value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ public void setSafe(int index, int isSet, byte[] value) {
+ handleSafe(index);
+ set(index, isSet, value);
+ }
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param buffer ArrowBuf containing binary value.
+ */
+ public void set(int index, ArrowBuf buffer) {
+ assert index >= 0;
+ assert byteWidth <= buffer.capacity();
+ BitVectorHelper.setBit(validityBuffer, index);
+ valueBuffer.setBytes((long) index * byteWidth, buffer, 0, byteWidth);
+ }
+
+ /**
+ * Same as {@link #set(int, ArrowBuf)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param buffer ArrowBuf containing binary value.
+ */
+ public void setSafe(int index, ArrowBuf buffer) {
+ handleSafe(index);
+ set(index, buffer);
+ }
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param buffer ArrowBuf containing binary value.
+ */
+ public void set(int index, int isSet, ArrowBuf buffer) {
+ if (isSet > 0) {
+ set(index, buffer);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Same as {@link #set(int, ArrowBuf)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param buffer ArrowBuf containing binary value.
+ */
+ public void setSafe(int index, int isSet, ArrowBuf buffer) {
+ handleSafe(index);
+ set(index, isSet, buffer);
+ }
+
+ /**
+ * Set the variable length element at the specified index to the data
+ * buffer supplied in the holder.
+ *
+ * @param index position of the element to set
+ * @param holder holder that carries data buffer.
+ */
+ public void set(int index, FixedSizeBinaryHolder holder) {
+ assert holder.byteWidth == byteWidth;
+ set(index, holder.buffer);
+ }
+
+ /**
+ * Same as {@link #set(int, FixedSizeBinaryHolder)} except that it handles the
+ * case where index and length of new element are beyond the existing
+ * capacity of the vector.
+ *
+ * @param index position of the element to set
+ * @param holder holder that carries data buffer.
+ */
+ public void setSafe(int index, FixedSizeBinaryHolder holder) {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Set the variable length element at the specified index to the data
+ * buffer supplied in the holder.
+ *
+ * @param index position of the element to set
+ * @param holder holder that carries data buffer.
+ */
+ public void set(int index, NullableFixedSizeBinaryHolder holder) {
+ assert holder.byteWidth == byteWidth;
+ if (holder.isSet < 0) {
+ throw new IllegalArgumentException("holder has a negative isSet value");
+ } else if (holder.isSet > 0) {
+ set(index, holder.buffer);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Same as {@link #set(int, NullableFixedSizeBinaryHolder)} except that it handles the
+ * case where index and length of new element are beyond the existing
+ * capacity of the vector.
+ *
+ * @param index position of the element to set
+ * @param holder holder that carries data buffer.
+ */
+ public void setSafe(int index, NullableFixedSizeBinaryHolder holder) {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Given a data buffer, get the value stored at a particular position
+ * in the vector.
+ *
+ * <p>This method should not be used externally.
+ *
+ * @param buffer data buffer
+ * @param index position of the element.
+ * @return value stored at the index.
+ */
+ public static byte[] get(final ArrowBuf buffer, final int index, final int byteWidth) {
+ final byte[] dst = new byte[byteWidth];
+ buffer.getBytes((long) index * byteWidth, dst, 0, byteWidth);
+ return dst;
+ }
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Construct a TransferPair comprising of this and a target vector of
+ * the same type.
+ *
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return new TransferImpl(ref, allocator);
+ }
+
+ /**
+ * Construct a TransferPair with a desired target vector of the same type.
+ *
+ * @param to target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new TransferImpl((FixedSizeBinaryVector) to);
+ }
+
+ private class TransferImpl implements TransferPair {
+ FixedSizeBinaryVector to;
+
+ public TransferImpl(String ref, BufferAllocator allocator) {
+ to = new FixedSizeBinaryVector(ref, allocator, FixedSizeBinaryVector.this.byteWidth);
+ }
+
+ public TransferImpl(FixedSizeBinaryVector to) {
+ this.to = to;
+ }
+
+ @Override
+ public FixedSizeBinaryVector getTo() {
+ return to;
+ }
+
+ @Override
+ public void transfer() {
+ transferTo(to);
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ splitAndTransferTo(startIndex, length, to);
+ }
+
+ @Override
+ public void copyValueSafe(int fromIndex, int toIndex) {
+ to.copyFromSafe(fromIndex, toIndex, FixedSizeBinaryVector.this);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/FixedWidthVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/FixedWidthVector.java
new file mode 100644
index 000000000..58effeecb
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/FixedWidthVector.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+/**
+ * Interface for all fixed width {@link ElementAddressableVector} (e.g. integer, fixed size binary, etc).
+ */
+public interface FixedWidthVector extends ElementAddressableVector {
+
+ /**
+ * Allocate a new memory space for this vector. Must be called prior to using the ValueVector.
+ *
+ * @param valueCount Number of values in the vector.
+ */
+ void allocateNew(int valueCount);
+
+ /**
+ * Zero out the underlying buffer backing this vector.
+ */
+ void zeroVector();
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/Float4Vector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/Float4Vector.java
new file mode 100644
index 000000000..365a1529b
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/Float4Vector.java
@@ -0,0 +1,361 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.impl.Float4ReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.Float4Holder;
+import org.apache.arrow.vector.holders.NullableFloat4Holder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * Float4Vector implements a fixed width vector (4 bytes) of
+ * float values which could be null. A validity buffer (bit vector) is
+ * maintained to track which elements in the vector are null.
+ */
+public final class Float4Vector extends BaseFixedWidthVector implements FloatingPointVector {
+ public static final byte TYPE_WIDTH = 4;
+ private final FieldReader reader;
+
+ /**
+ * Instantiate a Float4Vector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param allocator allocator for memory management.
+ */
+ public Float4Vector(String name, BufferAllocator allocator) {
+ this(name, FieldType.nullable(MinorType.FLOAT4.getType()), allocator);
+ }
+
+ /**
+ * Instantiate a Float4Vector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param fieldType type of Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public Float4Vector(String name, FieldType fieldType, BufferAllocator allocator) {
+ this(new Field(name, fieldType, null), allocator);
+ }
+
+ /**
+ * Instantiate a Float4Vector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param field field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public Float4Vector(Field field, BufferAllocator allocator) {
+ super(field, allocator, TYPE_WIDTH);
+ reader = new Float4ReaderImpl(Float4Vector.this);
+ }
+
+ /**
+ * Get a reader that supports reading values from this vector.
+ *
+ * @return Field Reader for this vector
+ */
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ /**
+ * Get minor type for this vector. The vector holds values belonging
+ * to a particular type.
+ *
+ * @return {@link org.apache.arrow.vector.types.Types.MinorType}
+ */
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.FLOAT4;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Get the element at the given index from the vector.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public float get(int index) throws IllegalStateException {
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+ throw new IllegalStateException("Value at index is null");
+ }
+ return valueBuffer.getFloat((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Get the element at the given index from the vector and
+ * sets the state in holder. If element at given index
+ * is null, holder.isSet will be zero.
+ *
+ * @param index position of element
+ */
+ public void get(int index, NullableFloat4Holder holder) {
+ if (isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ holder.isSet = 1;
+ holder.value = valueBuffer.getFloat((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Same as {@link #get(int)}.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public Float getObject(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ return valueBuffer.getFloat((long) index * TYPE_WIDTH);
+ }
+ }
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ private void setValue(int index, float value) {
+ valueBuffer.setFloat((long) index * TYPE_WIDTH, value);
+ }
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void set(int index, float value) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, value);
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ * If the value in holder is not indicated as set, element in the
+ * at the given index will be null.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void set(int index, NullableFloat4Holder holder) throws IllegalArgumentException {
+ if (holder.isSet < 0) {
+ throw new IllegalArgumentException();
+ } else if (holder.isSet > 0) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void set(int index, Float4Holder holder) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ }
+
+ /**
+ * Same as {@link #set(int, float)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void setSafe(int index, float value) {
+ handleSafe(index);
+ set(index, value);
+ }
+
+ /**
+ * Same as {@link #set(int, NullableFloat4Holder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void setSafe(int index, NullableFloat4Holder holder) throws IllegalArgumentException {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Same as {@link #set(int, Float4Holder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void setSafe(int index, Float4Holder holder) {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Store the given value at a particular position in the vector. isSet indicates
+ * whether the value is NULL or not.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param value element value
+ */
+ public void set(int index, int isSet, float value) {
+ if (isSet > 0) {
+ set(index, value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Same as {@link #set(int, int, float)} except that it handles the case
+ * when index is greater than or equal to current value capacity of the
+ * vector.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param value element value
+ */
+ public void setSafe(int index, int isSet, float value) {
+ handleSafe(index);
+ set(index, isSet, value);
+ }
+
+ /**
+ * Given a data buffer, get the value stored at a particular position
+ * in the vector.
+ *
+ * <p>This method should not be used externally.
+ *
+ * @param buffer data buffer
+ * @param index position of the element.
+ * @return value stored at the index.
+ */
+ public static float get(final ArrowBuf buffer, final int index) {
+ return buffer.getFloat((long) index * TYPE_WIDTH);
+ }
+
+ @Override
+ public void setWithPossibleTruncate(int index, double value) {
+ set(index, (float) value);
+ }
+
+ @Override
+ public void setSafeWithPossibleTruncate(int index, double value) {
+ setSafe(index, (float) value);
+ }
+
+ @Override
+ public double getValueAsDouble(int index) {
+ return get(index);
+ }
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Construct a TransferPair comprising of this and a target vector of
+ * the same type.
+ *
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return new TransferImpl(ref, allocator);
+ }
+
+ /**
+ * Construct a TransferPair with a desired target vector of the same type.
+ *
+ * @param to target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new TransferImpl((Float4Vector) to);
+ }
+
+ private class TransferImpl implements TransferPair {
+ Float4Vector to;
+
+ public TransferImpl(String ref, BufferAllocator allocator) {
+ to = new Float4Vector(ref, field.getFieldType(), allocator);
+ }
+
+ public TransferImpl(Float4Vector to) {
+ this.to = to;
+ }
+
+ @Override
+ public Float4Vector getTo() {
+ return to;
+ }
+
+ @Override
+ public void transfer() {
+ transferTo(to);
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ splitAndTransferTo(startIndex, length, to);
+ }
+
+ @Override
+ public void copyValueSafe(int fromIndex, int toIndex) {
+ to.copyFromSafe(fromIndex, toIndex, Float4Vector.this);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/Float8Vector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/Float8Vector.java
new file mode 100644
index 000000000..948390d46
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/Float8Vector.java
@@ -0,0 +1,362 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.impl.Float8ReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.Float8Holder;
+import org.apache.arrow.vector.holders.NullableFloat8Holder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * Float8Vector implements a fixed width vector (8 bytes) of
+ * double values which could be null. A validity buffer (bit vector) is
+ * maintained to track which elements in the vector are null.
+ */
+public final class Float8Vector extends BaseFixedWidthVector implements FloatingPointVector {
+ public static final byte TYPE_WIDTH = 8;
+ private final FieldReader reader;
+
+ /**
+ * Instantiate a Float8Vector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param allocator allocator for memory management.
+ */
+ public Float8Vector(String name, BufferAllocator allocator) {
+ this(name, FieldType.nullable(MinorType.FLOAT8.getType()), allocator);
+ }
+
+ /**
+ * Instantiate a Float8Vector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param fieldType type of Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public Float8Vector(String name, FieldType fieldType, BufferAllocator allocator) {
+ this(new Field(name, fieldType, null), allocator);
+ }
+
+ /**
+ * Instantiate a Float8Vector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param field field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public Float8Vector(Field field, BufferAllocator allocator) {
+ super(field, allocator, TYPE_WIDTH);
+ reader = new Float8ReaderImpl(Float8Vector.this);
+ }
+
+ /**
+ * Get a reader that supports reading values from this vector.
+ *
+ * @return Field Reader for this vector
+ */
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ /**
+ * Get minor type for this vector. The vector holds values belonging
+ * to a particular type.
+ *
+ * @return {@link org.apache.arrow.vector.types.Types.MinorType}
+ */
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.FLOAT8;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Get the element at the given index from the vector.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public double get(int index) throws IllegalStateException {
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+ throw new IllegalStateException("Value at index is null");
+ }
+ return valueBuffer.getDouble((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Get the element at the given index from the vector and
+ * sets the state in holder. If element at given index
+ * is null, holder.isSet will be zero.
+ *
+ * @param index position of element
+ */
+ public void get(int index, NullableFloat8Holder holder) {
+ if (isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ holder.isSet = 1;
+ holder.value = valueBuffer.getDouble((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Same as {@link #get(int)}.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public Double getObject(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ return valueBuffer.getDouble((long) index * TYPE_WIDTH);
+ }
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ private void setValue(int index, double value) {
+ valueBuffer.setDouble((long) index * TYPE_WIDTH, value);
+ }
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void set(int index, double value) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, value);
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ * If the value in holder is not indicated as set, element in the
+ * at the given index will be null.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void set(int index, NullableFloat8Holder holder) throws IllegalArgumentException {
+ if (holder.isSet < 0) {
+ throw new IllegalArgumentException();
+ } else if (holder.isSet > 0) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void set(int index, Float8Holder holder) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ }
+
+ /**
+ * Same as {@link #set(int, double)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void setSafe(int index, double value) {
+ handleSafe(index);
+ set(index, value);
+ }
+
+ /**
+ * Same as {@link #set(int, NullableFloat8Holder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void setSafe(int index, NullableFloat8Holder holder) throws IllegalArgumentException {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Same as {@link #set(int, Float8Holder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void setSafe(int index, Float8Holder holder) {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Store the given value at a particular position in the vector. isSet indicates
+ * whether the value is NULL or not.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param value element value
+ */
+ public void set(int index, int isSet, double value) {
+ if (isSet > 0) {
+ set(index, value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Same as {@link #set(int, int, double)} except that it handles the case
+ * when index is greater than or equal to current value capacity of the
+ * vector.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param value element value
+ */
+ public void setSafe(int index, int isSet, double value) {
+ handleSafe(index);
+ set(index, isSet, value);
+ }
+
+ /**
+ * Given a data buffer, get the value stored at a particular position
+ * in the vector.
+ *
+ * <p>This method should not be used externally.
+ *
+ * @param buffer data buffer
+ * @param index position of the element.
+ * @return value stored at the index.
+ */
+ public static double get(final ArrowBuf buffer, final int index) {
+ return buffer.getDouble((long) index * TYPE_WIDTH);
+ }
+
+ @Override
+ public void setWithPossibleTruncate(int index, double value) {
+ set(index, value);
+ }
+
+ @Override
+ public void setSafeWithPossibleTruncate(int index, double value) {
+ setSafe(index, value);
+ }
+
+ @Override
+ public double getValueAsDouble(int index) {
+ return get(index);
+ }
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Construct a TransferPair comprising of this and a target vector of
+ * the same type.
+ *
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return new TransferImpl(ref, allocator);
+ }
+
+ /**
+ * Construct a TransferPair with a desired target vector of the same type.
+ *
+ * @param to target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new TransferImpl((Float8Vector) to);
+ }
+
+ private class TransferImpl implements TransferPair {
+ Float8Vector to;
+
+ public TransferImpl(String ref, BufferAllocator allocator) {
+ to = new Float8Vector(ref, field.getFieldType(), allocator);
+ }
+
+ public TransferImpl(Float8Vector to) {
+ this.to = to;
+ }
+
+ @Override
+ public Float8Vector getTo() {
+ return to;
+ }
+
+ @Override
+ public void transfer() {
+ transferTo(to);
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ splitAndTransferTo(startIndex, length, to);
+ }
+
+ @Override
+ public void copyValueSafe(int fromIndex, int toIndex) {
+ to.copyFromSafe(fromIndex, toIndex, Float8Vector.this);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/FloatingPointVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/FloatingPointVector.java
new file mode 100644
index 000000000..4c5143de6
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/FloatingPointVector.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+/**
+ * The interface for vectors with floating point values.
+ */
+public interface FloatingPointVector extends ValueVector {
+
+ /**
+ * Sets the value at the given index, note this value may be truncated internally.
+ * @param index the index to set.
+ * @param value the value to set.
+ */
+ void setWithPossibleTruncate(int index, double value);
+
+ /**
+ * Sets the value at the given index, note this value may be truncated internally.
+ * Any expansion/reallocation is handled automatically.
+ * @param index the index to set.
+ * @param value the value to set.
+ */
+ void setSafeWithPossibleTruncate(int index, double value);
+
+ /**
+ * Gets the value at the given index.
+ * @param index the index to retrieve the value.
+ * @return the value at the index.
+ */
+ double getValueAsDouble(int index);
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/GenerateSampleData.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/GenerateSampleData.java
new file mode 100644
index 000000000..3da915541
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/GenerateSampleData.java
@@ -0,0 +1,337 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import java.math.BigDecimal;
+import java.nio.charset.Charset;
+
+/**
+ * Helper class to generate test data for Nullable fixed and variable
+ * width scalar vectors. Previous implementations of java vector classes
+ * provided generateTestData(now deprecated) API to populate the vector
+ * with sample data. This class should be used for that purpose.
+ */
+public class GenerateSampleData {
+ private GenerateSampleData() {}
+
+ /** Populates <code>vector</code> with <code>valueCount</code> random values. */
+ public static void generateTestData(final ValueVector vector, final int valueCount) {
+ if (vector instanceof IntVector) {
+ writeIntData((IntVector) vector, valueCount);
+ } else if (vector instanceof DecimalVector) {
+ writeDecimalData((DecimalVector) vector, valueCount);
+ } else if (vector instanceof BitVector) {
+ writeBooleanData((BitVector) vector, valueCount);
+ } else if (vector instanceof VarCharVector) {
+ writeVarCharData((VarCharVector) vector, valueCount);
+ } else if (vector instanceof VarBinaryVector) {
+ writeVarBinaryData((VarBinaryVector) vector, valueCount);
+ } else if (vector instanceof BigIntVector) {
+ writeBigIntData((BigIntVector) vector, valueCount);
+ } else if (vector instanceof Float4Vector) {
+ writeFloatData((Float4Vector) vector, valueCount);
+ } else if (vector instanceof Float8Vector) {
+ writeDoubleData((Float8Vector) vector, valueCount);
+ } else if (vector instanceof DateDayVector) {
+ writeDateDayData((DateDayVector) vector, valueCount);
+ } else if (vector instanceof DateMilliVector) {
+ writeDateMilliData((DateMilliVector) vector, valueCount);
+ } else if (vector instanceof IntervalDayVector) {
+ writeIntervalDayData((IntervalDayVector) vector, valueCount);
+ } else if (vector instanceof IntervalYearVector) {
+ writeIntervalYearData((IntervalYearVector) vector, valueCount);
+ } else if (vector instanceof SmallIntVector) {
+ writeSmallIntData((SmallIntVector) vector, valueCount);
+ } else if (vector instanceof TinyIntVector) {
+ writeTinyIntData((TinyIntVector) vector, valueCount);
+ } else if (vector instanceof TimeMicroVector) {
+ writeTimeMicroData((TimeMicroVector) vector, valueCount);
+ } else if (vector instanceof TimeMilliVector) {
+ writeTimeMilliData((TimeMilliVector) vector, valueCount);
+ } else if (vector instanceof TimeNanoVector) {
+ writeTimeNanoData((TimeNanoVector) vector, valueCount);
+ } else if (vector instanceof TimeSecVector) {
+ writeTimeSecData((TimeSecVector) vector, valueCount);
+ } else if (vector instanceof TimeStampSecVector) {
+ writeTimeStampData((TimeStampSecVector) vector, valueCount);
+ } else if (vector instanceof TimeStampMicroVector) {
+ writeTimeStampData((TimeStampMicroVector) vector, valueCount);
+ } else if (vector instanceof TimeStampMilliVector) {
+ writeTimeStampData((TimeStampMilliVector) vector, valueCount);
+ } else if (vector instanceof TimeStampNanoVector) {
+ writeTimeStampData((TimeStampNanoVector) vector, valueCount);
+ } else if (vector instanceof TimeStampSecTZVector) {
+ writeTimeStampData((TimeStampSecTZVector) vector, valueCount);
+ } else if (vector instanceof TimeStampMicroTZVector) {
+ writeTimeStampData((TimeStampMicroTZVector) vector, valueCount);
+ } else if (vector instanceof TimeStampMilliTZVector) {
+ writeTimeStampData((TimeStampMilliTZVector) vector, valueCount);
+ } else if (vector instanceof TimeStampNanoTZVector) {
+ writeTimeStampData((TimeStampNanoTZVector) vector, valueCount);
+ }
+ }
+
+ private static void writeTimeStampData(TimeStampVector vector, int valueCount) {
+ final long even = 100000;
+ final long odd = 200000;
+ for (int i = 0; i < valueCount; i++) {
+ if (i % 2 == 0) {
+ vector.setSafe(i, even);
+ } else {
+ vector.setSafe(i, odd);
+ }
+ }
+ vector.setValueCount(valueCount);
+ }
+
+ private static void writeDecimalData(DecimalVector vector, int valueCount) {
+ final BigDecimal even = new BigDecimal(0.0543278923);
+ final BigDecimal odd = new BigDecimal(2.0543278923);
+ for (int i = 0; i < valueCount; i++) {
+ if (i % 2 == 0) {
+ vector.setSafe(i, even);
+ } else {
+ vector.setSafe(i, odd);
+ }
+ }
+ vector.setValueCount(valueCount);
+ }
+
+ private static void writeIntData(IntVector vector, int valueCount) {
+ final int even = 1000;
+ final int odd = 2000;
+ for (int i = 0; i < valueCount; i++) {
+ if (i % 2 == 0) {
+ vector.setSafe(i, even);
+ } else {
+ vector.setSafe(i, odd);
+ }
+ }
+ vector.setValueCount(valueCount);
+ }
+
+ private static void writeBooleanData(BitVector vector, int valueCount) {
+ final int even = 0;
+ final int odd = 1;
+ for (int i = 0; i < valueCount; i++) {
+ if (i % 2 == 0) {
+ vector.setSafe(i, even);
+ } else {
+ vector.setSafe(i, odd);
+ }
+ }
+ vector.setValueCount(valueCount);
+ }
+
+ private static void writeIntervalYearData(IntervalYearVector vector, int valueCount) {
+ final int even = 1;
+ final int odd = 2;
+ for (int i = 0; i < valueCount; i++) {
+ if (i % 2 == 0) {
+ vector.setSafe(i, even);
+ } else {
+ vector.setSafe(i, odd);
+ }
+ }
+ vector.setValueCount(valueCount);
+ }
+
+ private static void writeIntervalDayData(IntervalDayVector vector, int valueCount) {
+ for (int i = 0; i < valueCount; i++) {
+ if (i % 2 == 0) {
+ vector.setSafe(i, 1, 50);
+ } else {
+ vector.setSafe(i, 2, 100);
+ }
+ }
+ vector.setValueCount(valueCount);
+ }
+
+ private static void writeTimeSecData(TimeSecVector vector, int valueCount) {
+ final int even = 500;
+ final int odd = 900;
+ for (int i = 0; i < valueCount; i++) {
+ if (i % 2 == 0) {
+ vector.setSafe(i, even);
+ } else {
+ vector.setSafe(i, odd);
+ }
+ }
+ vector.setValueCount(valueCount);
+ }
+
+ private static void writeTimeMilliData(TimeMilliVector vector, int valueCount) {
+ final int even = 1000;
+ final int odd = 2000;
+ for (int i = 0; i < valueCount; i++) {
+ if (i % 2 == 0) {
+ vector.setSafe(i, even);
+ } else {
+ vector.setSafe(i, odd);
+ }
+ }
+ vector.setValueCount(valueCount);
+ }
+
+ private static void writeTimeMicroData(TimeMicroVector vector, int valueCount) {
+ final long even = 1000000000;
+ final long odd = 2000000000;
+ for (int i = 0; i < valueCount; i++) {
+ if (i % 2 == 0) {
+ vector.setSafe(i, even);
+ } else {
+ vector.setSafe(i, odd);
+ }
+ }
+ vector.setValueCount(valueCount);
+
+ }
+
+ private static void writeTimeNanoData(TimeNanoVector vector, int valueCount) {
+ final long even = 1000000000;
+ final long odd = 2000000000;
+ for (int i = 0; i < valueCount; i++) {
+ if (i % 2 == 0) {
+ vector.setSafe(i, even);
+ } else {
+ vector.setSafe(i, odd);
+ }
+ }
+ vector.setValueCount(valueCount);
+ }
+
+ private static void writeDateDayData(DateDayVector vector, int valueCount) {
+ final int even = 1000;
+ final int odd = 2000;
+ for (int i = 0; i < valueCount; i++) {
+ if (i % 2 == 0) {
+ vector.setSafe(i, even);
+ } else {
+ vector.setSafe(i, odd);
+ }
+ }
+ vector.setValueCount(valueCount);
+ }
+
+ private static void writeDateMilliData(DateMilliVector vector, int valueCount) {
+ final long even = 1000000000;
+ final long odd = 2000000000;
+ for (int i = 0; i < valueCount; i++) {
+ if (i % 2 == 0) {
+ vector.setSafe(i, even);
+ } else {
+ vector.setSafe(i, odd);
+ }
+ }
+ vector.setValueCount(valueCount);
+ }
+
+ private static void writeSmallIntData(SmallIntVector vector, int valueCount) {
+ final short even = 10;
+ final short odd = 20;
+ for (int i = 0; i < valueCount; i++) {
+ if (i % 2 == 0) {
+ vector.setSafe(i, even);
+ } else {
+ vector.setSafe(i, odd);
+ }
+ }
+ vector.setValueCount(valueCount);
+ }
+
+ private static void writeTinyIntData(TinyIntVector vector, int valueCount) {
+ final byte even = 1;
+ final byte odd = 2;
+ for (int i = 0; i < valueCount; i++) {
+ if (i % 2 == 0) {
+ vector.setSafe(i, even);
+ } else {
+ vector.setSafe(i, odd);
+ }
+ }
+ vector.setValueCount(valueCount);
+ }
+
+ private static void writeBigIntData(BigIntVector vector, int valueCount) {
+ final long even = 1000000000;
+ final long odd = 2000000000;
+ for (int i = 0; i < valueCount; i++) {
+ if (i % 2 == 0) {
+ vector.setSafe(i, even);
+ } else {
+ vector.setSafe(i, odd);
+ }
+ }
+ vector.setValueCount(valueCount);
+ }
+
+ private static void writeFloatData(Float4Vector vector, int valueCount) {
+ final float even = 20.3f;
+ final float odd = 40.2f;
+ for (int i = 0; i < valueCount; i++) {
+ if (i % 2 == 0) {
+ vector.setSafe(i, even);
+ } else {
+ vector.setSafe(i, odd);
+ }
+ }
+ vector.setValueCount(valueCount);
+ }
+
+ private static void writeDoubleData(Float8Vector vector, int valueCount) {
+ final double even = 20.2373;
+ final double odd = 40.2378;
+ for (int i = 0; i < valueCount; i++) {
+ if (i % 2 == 0) {
+ vector.setSafe(i, even);
+ } else {
+ vector.setSafe(i, odd);
+ }
+ }
+ vector.setValueCount(valueCount);
+ }
+
+ private static void writeVarBinaryData(VarBinaryVector vector, int valueCount) {
+ Charset utf8Charset = Charset.forName("UTF-8");
+ final byte[] even = "AAAAA1".getBytes(utf8Charset);
+ final byte[] odd = "BBBBBBBBB2".getBytes(utf8Charset);
+ for (int i = 0; i < valueCount; i++) {
+ if (i % 2 == 0) {
+ vector.setSafe(i, even);
+ } else {
+ vector.setSafe(i, odd);
+ }
+ }
+ vector.setValueCount(valueCount);
+ }
+
+ private static void writeVarCharData(VarCharVector vector, int valueCount) {
+ Charset utf8Charset = Charset.forName("UTF-8");
+ final byte[] even = "AAAAA1".getBytes(utf8Charset);
+ final byte[] odd = "BBBBBBBBB2".getBytes(utf8Charset);
+ for (int i = 0; i < valueCount; i++) {
+ if (i % 2 == 0) {
+ vector.setSafe(i, even);
+ } else {
+ vector.setSafe(i, odd);
+ }
+ }
+ vector.setValueCount(valueCount);
+ }
+}
+
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/IntVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/IntVector.java
new file mode 100644
index 000000000..e591ec1e8
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/IntVector.java
@@ -0,0 +1,362 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.impl.IntReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.IntHolder;
+import org.apache.arrow.vector.holders.NullableIntHolder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * IntVector implements a fixed width (4 bytes) vector of
+ * integer values which could be null. A validity buffer (bit vector) is
+ * maintained to track which elements in the vector are null.
+ */
+public final class IntVector extends BaseFixedWidthVector implements BaseIntVector {
+ public static final byte TYPE_WIDTH = 4;
+ private final FieldReader reader;
+
+ /**
+ * Instantiate a IntVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param allocator allocator for memory management.
+ */
+ public IntVector(String name, BufferAllocator allocator) {
+ this(name, FieldType.nullable(MinorType.INT.getType()), allocator);
+ }
+
+ /**
+ * Instantiate a IntVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param fieldType type of Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public IntVector(String name, FieldType fieldType, BufferAllocator allocator) {
+ this(new Field(name, fieldType, null), allocator);
+ }
+
+ /**
+ * Instantiate a IntVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param field field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public IntVector(Field field, BufferAllocator allocator) {
+ super(field, allocator, TYPE_WIDTH);
+ reader = new IntReaderImpl(IntVector.this);
+ }
+
+ /**
+ * Get a reader that supports reading values from this vector.
+ *
+ * @return Field Reader for this vector
+ */
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ /**
+ * Get minor type for this vector. The vector holds values belonging
+ * to a particular type.
+ *
+ * @return {@link org.apache.arrow.vector.types.Types.MinorType}
+ */
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.INT;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Get the element at the given index from the vector.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public int get(int index) throws IllegalStateException {
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+ throw new IllegalStateException("Value at index is null");
+ }
+ return valueBuffer.getInt((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Get the element at the given index from the vector and
+ * sets the state in holder. If element at given index
+ * is null, holder.isSet will be zero.
+ *
+ * @param index position of element
+ */
+ public void get(int index, NullableIntHolder holder) {
+ if (isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ holder.isSet = 1;
+ holder.value = valueBuffer.getInt((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Same as {@link #get(int)}.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public Integer getObject(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ return valueBuffer.getInt((long) index * TYPE_WIDTH);
+ }
+ }
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ private void setValue(int index, int value) {
+ valueBuffer.setInt((long) index * TYPE_WIDTH, value);
+ }
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void set(int index, int value) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, value);
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ * If the value in holder is not indicated as set, element in the
+ * at the given index will be null.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void set(int index, NullableIntHolder holder) throws IllegalArgumentException {
+ if (holder.isSet < 0) {
+ throw new IllegalArgumentException();
+ } else if (holder.isSet > 0) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void set(int index, IntHolder holder) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ }
+
+ /**
+ * Same as {@link #set(int, int)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void setSafe(int index, int value) {
+ handleSafe(index);
+ set(index, value);
+ }
+
+ /**
+ * Same as {@link #set(int, NullableIntHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void setSafe(int index, NullableIntHolder holder) throws IllegalArgumentException {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Same as {@link #set(int, IntHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void setSafe(int index, IntHolder holder) {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Store the given value at a particular position in the vector. isSet indicates
+ * whether the value is NULL or not.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param value element value
+ */
+ public void set(int index, int isSet, int value) {
+ if (isSet > 0) {
+ set(index, value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Same as {@link #set(int, int, int)} except that it handles the case
+ * when index is greater than or equal to current value capacity of the
+ * vector.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param value element value
+ */
+ public void setSafe(int index, int isSet, int value) {
+ handleSafe(index);
+ set(index, isSet, value);
+ }
+
+ /**
+ * Given a data buffer, get the value stored at a particular position
+ * in the vector.
+ *
+ * <p>This method should not be used externally.
+ *
+ * @param buffer data buffer
+ * @param index position of the element.
+ * @return value stored at the index.
+ */
+ public static int get(final ArrowBuf buffer, final int index) {
+ return buffer.getInt((long) index * TYPE_WIDTH);
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Construct a TransferPair comprising of this and a target vector of
+ * the same type.
+ *
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return new TransferImpl(ref, allocator);
+ }
+
+ /**
+ * Construct a TransferPair with a desired target vector of the same type.
+ *
+ * @param to target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new TransferImpl((IntVector) to);
+ }
+
+ @Override
+ public void setWithPossibleTruncate(int index, long value) {
+ this.setSafe(index, (int) value);
+ }
+
+ @Override
+ public void setUnsafeWithPossibleTruncate(int index, long value) {
+ this.set(index, (int) value);
+ }
+
+ @Override
+ public long getValueAsLong(int index) {
+ return this.get(index);
+ }
+
+ private class TransferImpl implements TransferPair {
+ IntVector to;
+
+ public TransferImpl(String ref, BufferAllocator allocator) {
+ to = new IntVector(ref, field.getFieldType(), allocator);
+ }
+
+ public TransferImpl(IntVector to) {
+ this.to = to;
+ }
+
+ @Override
+ public IntVector getTo() {
+ return to;
+ }
+
+ @Override
+ public void transfer() {
+ transferTo(to);
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ splitAndTransferTo(startIndex, length, to);
+ }
+
+ @Override
+ public void copyValueSafe(int fromIndex, int toIndex) {
+ to.copyFromSafe(fromIndex, toIndex, IntVector.this);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/IntervalDayVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/IntervalDayVector.java
new file mode 100644
index 000000000..0dc860e6b
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/IntervalDayVector.java
@@ -0,0 +1,433 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+import java.time.Duration;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.impl.IntervalDayReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.IntervalDayHolder;
+import org.apache.arrow.vector.holders.NullableIntervalDayHolder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * IntervalDayVector implements a fixed width vector (8 bytes) of
+ * interval (days and milliseconds) values which could be null.
+ * A validity buffer (bit vector) is maintained to track which elements in the
+ * vector are null.
+ */
+public final class IntervalDayVector extends BaseFixedWidthVector {
+ public static final byte TYPE_WIDTH = 8;
+ private static final byte MILLISECOND_OFFSET = 4;
+ private final FieldReader reader;
+
+ /**
+ * Instantiate a IntervalDayVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param allocator allocator for memory management.
+ */
+ public IntervalDayVector(String name, BufferAllocator allocator) {
+ this(name, FieldType.nullable(MinorType.INTERVALDAY.getType()), allocator);
+ }
+
+ /**
+ * Instantiate a IntervalDayVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param fieldType type of Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public IntervalDayVector(String name, FieldType fieldType, BufferAllocator allocator) {
+ this(new Field(name, fieldType, null), allocator);
+ }
+
+ /**
+ * Instantiate a IntervalDayVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param field field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public IntervalDayVector(Field field, BufferAllocator allocator) {
+ super(field, allocator, TYPE_WIDTH);
+ reader = new IntervalDayReaderImpl(IntervalDayVector.this);
+ }
+
+ /**
+ * Get a reader that supports reading values from this vector.
+ *
+ * @return Field Reader for this vector
+ */
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ /**
+ * Get minor type for this vector. The vector holds values belonging
+ * to a particular type.
+ *
+ * @return {@link org.apache.arrow.vector.types.Types.MinorType}
+ */
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.INTERVALDAY;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+
+ /**
+ * Given a data buffer, get the number of days stored at a particular position
+ * in the vector.
+ *
+ * <p>This method should not be used externally.
+ *
+ * @param buffer data buffer
+ * @param index position of the element.
+ * @return day value stored at the index.
+ */
+ public static int getDays(final ArrowBuf buffer, final int index) {
+ return buffer.getInt((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Given a data buffer, get the get the number of milliseconds stored at a particular position
+ * in the vector.
+ *
+ * <p>This method should not be used externally.
+ *
+ * @param buffer data buffer
+ * @param index position of the element.
+ * @return milliseconds value stored at the index.
+ */
+ public static int getMilliseconds(final ArrowBuf buffer, final int index) {
+ return buffer.getInt((long) index * TYPE_WIDTH + MILLISECOND_OFFSET);
+ }
+
+ /**
+ * Get the element at the given index from the vector.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public ArrowBuf get(int index) throws IllegalStateException {
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+ return null;
+ }
+ return valueBuffer.slice((long) index * TYPE_WIDTH, TYPE_WIDTH);
+ }
+
+ /**
+ * Get the element at the given index from the vector and
+ * sets the state in holder. If element at given index
+ * is null, holder.isSet will be zero.
+ *
+ * @param index position of element
+ */
+ public void get(int index, NullableIntervalDayHolder holder) {
+ if (isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ final long startIndex = (long) index * TYPE_WIDTH;
+ holder.isSet = 1;
+ holder.days = valueBuffer.getInt(startIndex);
+ holder.milliseconds = valueBuffer.getInt(startIndex + MILLISECOND_OFFSET);
+ }
+
+ /**
+ * Same as {@link #get(int)}.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public Duration getObject(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ final long startIndex = (long) index * TYPE_WIDTH;
+ final int days = valueBuffer.getInt(startIndex);
+ final int milliseconds = valueBuffer.getInt(startIndex + MILLISECOND_OFFSET);
+ return Duration.ofDays(days).plusMillis(milliseconds);
+ }
+ }
+
+ /**
+ * Get the Interval value at a given index as a {@link StringBuilder} object.
+ *
+ * @param index position of the element
+ * @return String Builder object with Interval value as
+ * [days, hours, minutes, seconds, millis]
+ */
+ public StringBuilder getAsStringBuilder(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ return getAsStringBuilderHelper(index);
+ }
+ }
+
+ private StringBuilder getAsStringBuilderHelper(int index) {
+ final long startIndex = (long) index * TYPE_WIDTH;
+
+ final int days = valueBuffer.getInt(startIndex);
+ int millis = valueBuffer.getInt(startIndex + MILLISECOND_OFFSET);
+
+ final int hours = millis / (org.apache.arrow.vector.util.DateUtility.hoursToMillis);
+ millis = millis % (org.apache.arrow.vector.util.DateUtility.hoursToMillis);
+
+ final int minutes = millis / (org.apache.arrow.vector.util.DateUtility.minutesToMillis);
+ millis = millis % (org.apache.arrow.vector.util.DateUtility.minutesToMillis);
+
+ final int seconds = millis / (org.apache.arrow.vector.util.DateUtility.secondsToMillis);
+ millis = millis % (org.apache.arrow.vector.util.DateUtility.secondsToMillis);
+
+ final String dayString = (Math.abs(days) == 1) ? " day " : " days ";
+
+ return (new StringBuilder()
+ .append(days).append(dayString)
+ .append(hours).append(":")
+ .append(minutes).append(":")
+ .append(seconds).append(".")
+ .append(millis));
+ }
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void set(int index, ArrowBuf value) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ valueBuffer.setBytes((long) index * TYPE_WIDTH, value, 0, TYPE_WIDTH);
+ }
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param days days for the interval
+ * @param milliseconds milliseconds for the interval
+ */
+ public void set(int index, int days, int milliseconds) {
+ final long offsetIndex = (long) index * TYPE_WIDTH;
+ BitVectorHelper.setBit(validityBuffer, index);
+ valueBuffer.setInt(offsetIndex, days);
+ valueBuffer.setInt((offsetIndex + MILLISECOND_OFFSET), milliseconds);
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ * If the value in holder is not indicated as set, element in the
+ * at the given index will be null.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void set(int index, NullableIntervalDayHolder holder) throws IllegalArgumentException {
+ if (holder.isSet < 0) {
+ throw new IllegalArgumentException();
+ } else if (holder.isSet > 0) {
+ set(index, holder.days, holder.milliseconds);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void set(int index, IntervalDayHolder holder) {
+ set(index, holder.days, holder.milliseconds);
+ }
+
+ /**
+ * Same as {@link #set(int, ArrowBuf)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void setSafe(int index, ArrowBuf value) {
+ handleSafe(index);
+ set(index, value);
+ }
+
+ /**
+ * Same as {@link #set(int, int, int)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param days days for the interval
+ * @param milliseconds milliseconds for the interval
+ */
+ public void setSafe(int index, int days, int milliseconds) {
+ handleSafe(index);
+ set(index, days, milliseconds);
+ }
+
+ /**
+ * Same as {@link #set(int, NullableIntervalDayHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void setSafe(int index, NullableIntervalDayHolder holder) throws IllegalArgumentException {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Same as {@link #set(int, IntervalDayHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void setSafe(int index, IntervalDayHolder holder) {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Store the given value at a particular position in the vector. isSet indicates
+ * whether the value is NULL or not.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param days days component of interval
+ * @param milliseconds millisecond component of interval
+ */
+ public void set(int index, int isSet, int days, int milliseconds) {
+ if (isSet > 0) {
+ set(index, days, milliseconds);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Same as {@link #set(int, int, int, int)} except that it handles the case
+ * when index is greater than or equal to current value capacity of the
+ * vector.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param days days component of interval
+ * @param milliseconds millisecond component of interval
+ */
+ public void setSafe(int index, int isSet, int days, int milliseconds) {
+ handleSafe(index);
+ set(index, isSet, days, milliseconds);
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Construct a TransferPair comprising of this and a target vector of
+ * the same type.
+ *
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return new TransferImpl(ref, allocator);
+ }
+
+ /**
+ * Construct a TransferPair with a desired target vector of the same type.
+ *
+ * @param to target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new TransferImpl((IntervalDayVector) to);
+ }
+
+ private class TransferImpl implements TransferPair {
+ IntervalDayVector to;
+
+ public TransferImpl(String ref, BufferAllocator allocator) {
+ to = new IntervalDayVector(ref, field.getFieldType(), allocator);
+ }
+
+ public TransferImpl(IntervalDayVector to) {
+ this.to = to;
+ }
+
+ @Override
+ public IntervalDayVector getTo() {
+ return to;
+ }
+
+ @Override
+ public void transfer() {
+ transferTo(to);
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ splitAndTransferTo(startIndex, length, to);
+ }
+
+ @Override
+ public void copyValueSafe(int fromIndex, int toIndex) {
+ to.copyFromSafe(fromIndex, toIndex, IntervalDayVector.this);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/IntervalMonthDayNanoVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/IntervalMonthDayNanoVector.java
new file mode 100644
index 000000000..ba3a26a89
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/IntervalMonthDayNanoVector.java
@@ -0,0 +1,442 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+import java.time.Duration;
+import java.time.Period;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.impl.IntervalMonthDayNanoReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.IntervalMonthDayNanoHolder;
+import org.apache.arrow.vector.holders.NullableIntervalMonthDayNanoHolder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * IntervalMonthDayNanoVector implements a fixed width vector (16 bytes) of
+ * interval (month, days and nanoseconds) values which could be null.
+ * A validity buffer (bit vector) is maintained to track which elements in the
+ * vector are null.
+ *
+ * Month, day and nanoseconds are indepndent from one another and there
+ * is no specific limits imposed on their values.
+ */
+public final class IntervalMonthDayNanoVector extends BaseFixedWidthVector {
+ public static final byte TYPE_WIDTH = 16;
+ private static final byte DAY_OFFSET = 4;
+ private static final byte NANOSECOND_OFFSET = 8;
+ private final FieldReader reader;
+
+
+ /**
+ * Instantiate a IntervalMonthDayNanoVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param allocator allocator for memory management.
+ */
+ public IntervalMonthDayNanoVector(String name, BufferAllocator allocator) {
+ this(name, FieldType.nullable(MinorType.INTERVALDAY.getType()), allocator);
+ }
+
+ /**
+ * Instantiate a IntervalMonthDayNanoVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param fieldType type of Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public IntervalMonthDayNanoVector(String name, FieldType fieldType, BufferAllocator allocator) {
+ this(new Field(name, fieldType, null), allocator);
+ }
+
+ /**
+ * Instantiate a IntervalMonthDayNanoVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param field field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public IntervalMonthDayNanoVector(Field field, BufferAllocator allocator) {
+ super(field, allocator, TYPE_WIDTH);
+ reader = new IntervalMonthDayNanoReaderImpl(IntervalMonthDayNanoVector.this);
+ }
+
+ /**
+ * Get a reader that supports reading values from this vector.
+ *
+ * @return Field Reader for this vector
+ */
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ /**
+ * Get minor type for this vector. The vector holds values belonging
+ * to a particular type.
+ *
+ * @return {@link org.apache.arrow.vector.types.Types.MinorType}
+ */
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.INTERVALMONTHDAYNANO;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+
+ /**
+ * Given a data buffer, get the number of months stored at a particular position
+ * in the vector.
+ *
+ * <p>This method should not be used externally.
+ *
+ * @param buffer data buffer
+ * @param index position of the element.
+ * @return day value stored at the index.
+ */
+ public static int getMonths(final ArrowBuf buffer, final int index) {
+ return buffer.getInt((long) index * TYPE_WIDTH);
+ }
+
+
+ /**
+ * Given a data buffer, get the number of days stored at a particular position
+ * in the vector.
+ *
+ * <p>This method should not be used externally.
+ *
+ * @param buffer data buffer
+ * @param index position of the element.
+ * @return day value stored at the index.
+ */
+ public static int getDays(final ArrowBuf buffer, final int index) {
+ return buffer.getInt((long) index * TYPE_WIDTH + DAY_OFFSET);
+ }
+
+ /**
+ * Given a data buffer, get the get the number of nanoseconds stored at a particular position
+ * in the vector.
+ *
+ * <p>This method should not be used externally.
+ *
+ * @param buffer data buffer
+ * @param index position of the element.
+ * @return nanoseconds value stored at the index.
+ */
+ public static long getNanoseconds(final ArrowBuf buffer, final int index) {
+ return buffer.getLong((long) index * TYPE_WIDTH + NANOSECOND_OFFSET);
+ }
+
+ /**
+ * Get the element at the given index from the vector.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public ArrowBuf get(int index) throws IllegalStateException {
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+ return null;
+ }
+ return valueBuffer.slice((long) index * TYPE_WIDTH, TYPE_WIDTH);
+ }
+
+ /**
+ * Get the element at the given index from the vector and
+ * sets the state in holder. If element at given index
+ * is null, holder.isSet will be zero.
+ *
+ * @param index position of element
+ */
+ public void get(int index, NullableIntervalMonthDayNanoHolder holder) {
+ if (isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ final long startIndex = (long) index * TYPE_WIDTH;
+ holder.isSet = 1;
+ holder.months = valueBuffer.getInt(startIndex);
+ holder.days = valueBuffer.getInt(startIndex + DAY_OFFSET);
+ holder.nanoseconds = valueBuffer.getLong(startIndex + NANOSECOND_OFFSET);
+ }
+
+ /**
+ * Same as {@link #get(int)}.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public PeriodDuration getObject(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ final long startIndex = (long) index * TYPE_WIDTH;
+ final int months = valueBuffer.getInt(startIndex);
+ final int days = valueBuffer.getInt(startIndex + DAY_OFFSET);
+ final long nanoseconds = valueBuffer.getLong(startIndex + NANOSECOND_OFFSET);
+
+ return new PeriodDuration(Period.ofMonths(months).plusDays(days),
+ Duration.ofNanos(nanoseconds));
+ }
+ }
+
+ /**
+ * Get the Interval value at a given index as a {@link StringBuilder} object.
+ *
+ * @param index position of the element
+ * @return String Builder object with Interval value as
+ */
+ public StringBuilder getAsStringBuilder(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ return getAsStringBuilderHelper(index);
+ }
+ }
+
+ private StringBuilder getAsStringBuilderHelper(int index) {
+ return new StringBuilder().append(getObject(index).toString()).append(" ");
+ }
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void set(int index, ArrowBuf value) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ valueBuffer.setBytes((long) index * TYPE_WIDTH, value, 0, TYPE_WIDTH);
+ }
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param months months component of interval
+ * @param days days component of interval
+ * @param nanoseconds nanosecond component of interval
+ */
+ public void set(int index, int months, int days, long nanoseconds) {
+ final long offsetIndex = (long) index * TYPE_WIDTH;
+ BitVectorHelper.setBit(validityBuffer, index);
+ valueBuffer.setInt(offsetIndex, months);
+ valueBuffer.setInt(offsetIndex + DAY_OFFSET, days);
+ valueBuffer.setLong((offsetIndex + NANOSECOND_OFFSET), nanoseconds);
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ * If the value in holder is not indicated as set, element
+ * at the given index will be null.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void set(int index, NullableIntervalMonthDayNanoHolder holder) throws IllegalArgumentException {
+ if (holder.isSet < 0) {
+ throw new IllegalArgumentException();
+ } else if (holder.isSet > 0) {
+ set(index, holder.months, holder.days, holder.nanoseconds);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void set(int index, IntervalMonthDayNanoHolder holder) {
+ set(index, holder.months, holder.days, holder.nanoseconds);
+ }
+
+ /**
+ * Same as {@link #set(int, ArrowBuf)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void setSafe(int index, ArrowBuf value) {
+ handleSafe(index);
+ set(index, value);
+ }
+
+ /**
+ * Same as {@link #set(int, int, int, long)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param months months for the interval
+ * @param days days for the interval
+ * @param nanoseconds nanoseconds for the interval
+ */
+ public void setSafe(int index, int months, int days, long nanoseconds) {
+ handleSafe(index);
+ set(index, months, days, nanoseconds);
+ }
+
+ /**
+ * Same as {@link #set(int, NullableIntervalMonthDayNanoHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void setSafe(int index, NullableIntervalMonthDayNanoHolder holder) throws IllegalArgumentException {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Same as {@link #set(int, IntervalMonthDayNanoHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void setSafe(int index, IntervalMonthDayNanoHolder holder) {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Store the given value at a particular position in the vector. isSet indicates
+ * whether the value is NULL or not.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param months months component of interval
+ * @param days days component of interval
+ * @param nanoseconds nanosecond component of interval
+ */
+ public void set(int index, int isSet, int months, int days, long nanoseconds) {
+ if (isSet > 0) {
+ set(index, months, days, nanoseconds);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Same as {@link #set(int, int, int, int, long)} except that it handles the case
+ * when index is greater than or equal to current value capacity of the
+ * vector.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param months months component of interval
+ * @param days days component of interval
+ * @param nanoseconds nanosecond component of interval
+ */
+ public void setSafe(int index, int isSet, int months, int days,
+ long nanoseconds) {
+ handleSafe(index);
+ set(index, isSet, months, days, nanoseconds);
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Construct a TransferPair comprising of this and a target vector of
+ * the same type.
+ *
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return new TransferImpl(ref, allocator);
+ }
+
+ /**
+ * Construct a TransferPair with a desired target vector of the same type.
+ *
+ * @param to target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new TransferImpl((IntervalMonthDayNanoVector) to);
+ }
+
+ private class TransferImpl implements TransferPair {
+ IntervalMonthDayNanoVector to;
+
+ public TransferImpl(String ref, BufferAllocator allocator) {
+ to = new IntervalMonthDayNanoVector(ref, field.getFieldType(), allocator);
+ }
+
+ public TransferImpl(IntervalMonthDayNanoVector to) {
+ this.to = to;
+ }
+
+ @Override
+ public IntervalMonthDayNanoVector getTo() {
+ return to;
+ }
+
+ @Override
+ public void transfer() {
+ transferTo(to);
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ splitAndTransferTo(startIndex, length, to);
+ }
+
+ @Override
+ public void copyValueSafe(int fromIndex, int toIndex) {
+ to.copyFromSafe(fromIndex, toIndex, IntervalMonthDayNanoVector.this);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/IntervalYearVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/IntervalYearVector.java
new file mode 100644
index 000000000..7ddfe6b78
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/IntervalYearVector.java
@@ -0,0 +1,382 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+import java.time.Period;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.impl.IntervalYearReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.IntervalYearHolder;
+import org.apache.arrow.vector.holders.NullableIntervalYearHolder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * IntervalYearVector implements a fixed width (4 bytes) vector of
+ * interval (years and months) values which could be null. A validity buffer
+ * (bit vector) is maintained to track which elements in the vector are null.
+ */
+public final class IntervalYearVector extends BaseFixedWidthVector {
+ public static final byte TYPE_WIDTH = 4;
+ private final FieldReader reader;
+
+ /**
+ * Instantiate a IntervalYearVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param allocator allocator for memory management.
+ */
+ public IntervalYearVector(String name, BufferAllocator allocator) {
+ this(name, FieldType.nullable(MinorType.INTERVALYEAR.getType()), allocator);
+ }
+
+ /**
+ * Instantiate a IntervalYearVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param fieldType type of Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public IntervalYearVector(String name, FieldType fieldType, BufferAllocator allocator) {
+ this(new Field(name, fieldType, null), allocator);
+ }
+
+ /**
+ * Instantiate a IntervalYearVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param field field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public IntervalYearVector(Field field, BufferAllocator allocator) {
+ super(field, allocator, TYPE_WIDTH);
+ reader = new IntervalYearReaderImpl(IntervalYearVector.this);
+ }
+
+ /**
+ * Get a reader that supports reading values from this vector.
+ *
+ * @return Field Reader for this vector
+ */
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ /**
+ * Get minor type for this vector. The vector holds values belonging
+ * to a particular type.
+ *
+ * @return {@link org.apache.arrow.vector.types.Types.MinorType}
+ */
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.INTERVALYEAR;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Given a data buffer, get the value stored at a particular position
+ * in the vector.
+ *
+ * <p>This method should not be used externally.
+ *
+ * @param buffer data buffer
+ * @param index position of the element.
+ * @return value stored at the index.
+ */
+ public static int getTotalMonths(final ArrowBuf buffer, final int index) {
+ return buffer.getInt((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Get the element at the given index from the vector.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public int get(int index) throws IllegalStateException {
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+ throw new IllegalStateException("Value at index is null");
+ }
+ return valueBuffer.getInt((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Get the element at the given index from the vector and
+ * sets the state in holder. If element at given index
+ * is null, holder.isSet will be zero.
+ *
+ * @param index position of element
+ */
+ public void get(int index, NullableIntervalYearHolder holder) {
+ if (isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ holder.isSet = 1;
+ holder.value = valueBuffer.getInt((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Same as {@link #get(int)}.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public Period getObject(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ final int interval = valueBuffer.getInt((long) index * TYPE_WIDTH);
+ // TODO: verify interval is in months
+ return Period.ofMonths(interval);
+ }
+ }
+
+ /**
+ * Get the Interval value at a given index as a {@link StringBuilder} object.
+ *
+ * @param index position of the element
+ * @return String Builder object with Interval value as
+ * [years, months]
+ */
+ public StringBuilder getAsStringBuilder(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ return getAsStringBuilderHelper(index);
+ }
+ }
+
+ private StringBuilder getAsStringBuilderHelper(int index) {
+ int value = valueBuffer.getInt((long) index * TYPE_WIDTH);
+
+ final int years = (value / org.apache.arrow.vector.util.DateUtility.yearsToMonths);
+ final int months = (value % org.apache.arrow.vector.util.DateUtility.yearsToMonths);
+
+ final String yearString = (Math.abs(years) == 1) ? " year " : " years ";
+ final String monthString = (Math.abs(months) == 1) ? " month " : " months ";
+
+ return (new StringBuilder()
+ .append(years)
+ .append(yearString)
+ .append(months)
+ .append(monthString));
+ }
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ private void setValue(int index, int value) {
+ valueBuffer.setInt((long) index * TYPE_WIDTH, value);
+ }
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void set(int index, int value) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, value);
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ * If the value in holder is not indicated as set, element in the
+ * at the given index will be null.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void set(int index, NullableIntervalYearHolder holder) throws IllegalArgumentException {
+ if (holder.isSet < 0) {
+ throw new IllegalArgumentException();
+ } else if (holder.isSet > 0) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void set(int index, IntervalYearHolder holder) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ }
+
+ /**
+ * Same as {@link #set(int, int)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void setSafe(int index, int value) {
+ handleSafe(index);
+ set(index, value);
+ }
+
+ /**
+ * Same as {@link #set(int, NullableIntervalYearHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void setSafe(int index, NullableIntervalYearHolder holder) throws IllegalArgumentException {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Same as {@link #set(int, IntervalYearHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void setSafe(int index, IntervalYearHolder holder) {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Store the given value at a particular position in the vector. isSet indicates
+ * whether the value is NULL or not.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param value element value
+ */
+ public void set(int index, int isSet, int value) {
+ if (isSet > 0) {
+ set(index, value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Same as {@link #set(int, int, int)} except that it handles the case
+ * when index is greater than or equal to current value capacity of the
+ * vector.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param value element value
+ */
+ public void setSafe(int index, int isSet, int value) {
+ handleSafe(index);
+ set(index, isSet, value);
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Construct a TransferPair comprising of this and a target vector of
+ * the same type.
+ *
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return new TransferImpl(ref, allocator);
+ }
+
+ /**
+ * Construct a TransferPair with a desired target vector of the same type.
+ *
+ * @param to target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new TransferImpl((IntervalYearVector) to);
+ }
+
+ private class TransferImpl implements TransferPair {
+ IntervalYearVector to;
+
+ public TransferImpl(String ref, BufferAllocator allocator) {
+ to = new IntervalYearVector(ref, field.getFieldType(), allocator);
+ }
+
+ public TransferImpl(IntervalYearVector to) {
+ this.to = to;
+ }
+
+ @Override
+ public IntervalYearVector getTo() {
+ return to;
+ }
+
+ @Override
+ public void transfer() {
+ transferTo(to);
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ splitAndTransferTo(startIndex, length, to);
+ }
+
+ @Override
+ public void copyValueSafe(int fromIndex, int toIndex) {
+ to.copyFromSafe(fromIndex, toIndex, IntervalYearVector.this);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/LargeVarBinaryVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/LargeVarBinaryVector.java
new file mode 100644
index 000000000..e9d60b38e
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/LargeVarBinaryVector.java
@@ -0,0 +1,305 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.impl.LargeVarBinaryReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.LargeVarBinaryHolder;
+import org.apache.arrow.vector.holders.NullableLargeVarBinaryHolder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * LargeVarBinaryVector implements a large variable width vector of binary
+ * values which could be NULL. A validity buffer (bit vector) is maintained
+ * to track which elements in the vector are null.
+ * The size of the underlying buffer can be over 2GB.
+ */
+public final class LargeVarBinaryVector extends BaseLargeVariableWidthVector {
+ private final FieldReader reader;
+
+ /**
+ * Instantiate a LargeVarBinaryVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param allocator allocator for memory management.
+ */
+ public LargeVarBinaryVector(String name, BufferAllocator allocator) {
+ this(name, FieldType.nullable(MinorType.LARGEVARBINARY.getType()), allocator);
+ }
+
+ /**
+ * Instantiate a LargeVarBinaryVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param fieldType type of Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public LargeVarBinaryVector(String name, FieldType fieldType, BufferAllocator allocator) {
+ this(new Field(name, fieldType, null), allocator);
+ }
+
+ /**
+ * Instantiate a LargeVarBinaryVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param field field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public LargeVarBinaryVector(Field field, BufferAllocator allocator) {
+ super(field, allocator);
+ reader = new LargeVarBinaryReaderImpl(LargeVarBinaryVector.this);
+ }
+
+ /**
+ * Get a reader that supports reading values from this vector.
+ *
+ * @return Field Reader for this vector
+ */
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ /**
+ * Get minor type for this vector. The vector holds values belonging
+ * to a particular type.
+ *
+ * @return {@link org.apache.arrow.vector.types.Types.MinorType}
+ */
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.LARGEVARBINARY;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Get the variable length element at specified index as byte array.
+ *
+ * @param index position of element to get
+ * @return array of bytes for non-null element, null otherwise
+ */
+ public byte[] get(int index) {
+ assert index >= 0;
+ if (isSet(index) == 0) {
+ return null;
+ }
+ final long startOffset = getStartOffset(index);
+ final int dataLength =
+ (int) (offsetBuffer.getLong((long) (index + 1) * OFFSET_WIDTH) - startOffset);
+ final byte[] result = new byte[dataLength];
+ valueBuffer.getBytes(startOffset, result, 0, dataLength);
+ return result;
+ }
+
+ /**
+ * Get the variable length element at specified index as Text.
+ *
+ * @param index position of element to get
+ * @return byte array for non-null element, null otherwise
+ */
+ public byte[] getObject(int index) {
+ return get(index);
+ }
+
+ /**
+ * Get the variable length element at specified index and sets the state
+ * in provided holder.
+ *
+ * @param index position of element to get
+ * @param holder data holder to be populated by this function
+ */
+ public void get(int index, NullableLargeVarBinaryHolder holder) {
+ assert index >= 0;
+ if (isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ holder.isSet = 1;
+ holder.start = getStartOffset(index);
+ holder.end = offsetBuffer.getLong((long) (index + 1) * OFFSET_WIDTH);
+ holder.buffer = valueBuffer;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Set the variable length element at the specified index to the data
+ * buffer supplied in the holder.
+ *
+ * @param index position of the element to set
+ * @param holder holder that carries data buffer.
+ */
+ public void set(int index, LargeVarBinaryHolder holder) {
+ assert index >= 0;
+ fillHoles(index);
+ BitVectorHelper.setBit(validityBuffer, index);
+ final int dataLength = (int) (holder.end - holder.start);
+ final long startOffset = getStartOffset(index);
+ offsetBuffer.setLong((long) (index + 1) * OFFSET_WIDTH, startOffset + dataLength);
+ valueBuffer.setBytes(startOffset, holder.buffer, holder.start, dataLength);
+ lastSet = index;
+ }
+
+ /**
+ * Same as {@link #set(int, LargeVarBinaryHolder)} except that it handles the
+ * case where index and length of new element are beyond the existing
+ * capacity of the vector.
+ *
+ * @param index position of the element to set
+ * @param holder holder that carries data buffer.
+ */
+ public void setSafe(int index, LargeVarBinaryHolder holder) {
+ assert index >= 0;
+ final int dataLength = (int) (holder.end - holder.start);
+ handleSafe(index, dataLength);
+ fillHoles(index);
+ BitVectorHelper.setBit(validityBuffer, index);
+ final long startOffset = getStartOffset(index);
+ offsetBuffer.setLong((long) (index + 1) * OFFSET_WIDTH, startOffset + dataLength);
+ valueBuffer.setBytes(startOffset, holder.buffer, holder.start, dataLength);
+ lastSet = index;
+ }
+
+ /**
+ * Set the variable length element at the specified index to the data
+ * buffer supplied in the holder.
+ *
+ * @param index position of the element to set
+ * @param holder holder that carries data buffer.
+ */
+ public void set(int index, NullableLargeVarBinaryHolder holder) {
+ assert index >= 0;
+ fillHoles(index);
+ BitVectorHelper.setValidityBit(validityBuffer, index, holder.isSet);
+ final long startOffset = getStartOffset(index);
+ if (holder.isSet != 0) {
+ final int dataLength = (int) (holder.end - holder.start);
+ offsetBuffer.setLong((long) (index + 1) * OFFSET_WIDTH, startOffset + dataLength);
+ valueBuffer.setBytes(startOffset, holder.buffer, holder.start, dataLength);
+ } else {
+ offsetBuffer.setLong((long) (index + 1) * OFFSET_WIDTH, startOffset);
+ }
+ lastSet = index;
+ }
+
+ /**
+ * Same as {@link #set(int, NullableLargeVarBinaryHolder)} except that it handles the
+ * case where index and length of new element are beyond the existing
+ * capacity of the vector.
+ *
+ * @param index position of the element to set
+ * @param holder holder that carries data buffer.
+ */
+ public void setSafe(int index, NullableLargeVarBinaryHolder holder) {
+ assert index >= 0;
+ if (holder.isSet != 0) {
+ final int dataLength = (int) (holder.end - holder.start);
+ handleSafe(index, dataLength);
+ fillHoles(index);
+ final long startOffset = getStartOffset(index);
+ offsetBuffer.setLong((long) (index + 1) * OFFSET_WIDTH, startOffset + dataLength);
+ valueBuffer.setBytes(startOffset, holder.buffer, holder.start, dataLength);
+ } else {
+ fillEmpties(index + 1);
+ }
+ BitVectorHelper.setValidityBit(validityBuffer, index, holder.isSet);
+ lastSet = index;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+ /**
+ * Construct a TransferPair comprising of this and a target vector of
+ * the same type.
+ *
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return new TransferImpl(ref, allocator);
+ }
+
+ /**
+ * Construct a TransferPair with a desired target vector of the same type.
+ *
+ * @param to target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new TransferImpl((LargeVarBinaryVector) to);
+ }
+
+ private class TransferImpl implements TransferPair {
+ LargeVarBinaryVector to;
+
+ public TransferImpl(String ref, BufferAllocator allocator) {
+ to = new LargeVarBinaryVector(ref, field.getFieldType(), allocator);
+ }
+
+ public TransferImpl(LargeVarBinaryVector to) {
+ this.to = to;
+ }
+
+ @Override
+ public LargeVarBinaryVector getTo() {
+ return to;
+ }
+
+ @Override
+ public void transfer() {
+ transferTo(to);
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ splitAndTransferTo(startIndex, length, to);
+ }
+
+ @Override
+ public void copyValueSafe(int fromIndex, int toIndex) {
+ to.copyFromSafe(fromIndex, toIndex, LargeVarBinaryVector.this);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/LargeVarCharVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/LargeVarCharVector.java
new file mode 100644
index 000000000..fd2057260
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/LargeVarCharVector.java
@@ -0,0 +1,331 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.impl.LargeVarCharReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.LargeVarCharHolder;
+import org.apache.arrow.vector.holders.NullableLargeVarCharHolder;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.Text;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * LargeVarCharVector implements a variable width vector of VARCHAR
+ * values which could be NULL. A validity buffer (bit vector) is maintained
+ * to track which elements in the vector are null.
+ * <p>
+ * The offset width of this vector is 8, so the underlying buffer can be larger than 2GB.
+ * </p>
+ */
+public final class LargeVarCharVector extends BaseLargeVariableWidthVector {
+ private final FieldReader reader;
+
+ /**
+ * Instantiate a LargeVarCharVector. This doesn't allocate any memory for
+ * the data in vector.
+ * @param name name of the vector
+ * @param allocator allocator for memory management.
+ */
+ public LargeVarCharVector(String name, BufferAllocator allocator) {
+ this(name, FieldType.nullable(Types.MinorType.LARGEVARCHAR.getType()), allocator);
+ }
+
+ /**
+ * Instantiate a LargeVarCharVector. This doesn't allocate any memory for
+ * the data in vector.
+ * @param name name of the vector
+ * @param fieldType type of Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public LargeVarCharVector(String name, FieldType fieldType, BufferAllocator allocator) {
+ this(new Field(name, fieldType, null), allocator);
+ }
+
+ /**
+ * Instantiate a LargeVarCharVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param field field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public LargeVarCharVector(Field field, BufferAllocator allocator) {
+ super(field, allocator);
+ reader = new LargeVarCharReaderImpl(LargeVarCharVector.this);
+ }
+
+ /**
+ * Get a reader that supports reading values from this vector.
+ * @return Field Reader for this vector
+ */
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ /**
+ * Get minor type for this vector. The vector holds values belonging
+ * to a particular type.
+ * @return {@link org.apache.arrow.vector.types.Types.MinorType}
+ */
+ @Override
+ public Types.MinorType getMinorType() {
+ return Types.MinorType.LARGEVARCHAR;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Get the variable length element at specified index as byte array.
+ *
+ * @param index position of element to get
+ * @return array of bytes for non-null element, null otherwise
+ */
+ public byte[] get(int index) {
+ assert index >= 0;
+ if (isSet(index) == 0) {
+ return null;
+ }
+ final long startOffset = getStartOffset(index);
+ final int dataLength =
+ (int) (offsetBuffer.getLong((long) (index + 1) * OFFSET_WIDTH) - startOffset);
+ final byte[] result = new byte[dataLength];
+ valueBuffer.getBytes(startOffset, result, 0, dataLength);
+ return result;
+ }
+
+ /**
+ * Get the variable length element at specified index as Text.
+ *
+ * @param index position of element to get
+ * @return Text object for non-null element, null otherwise
+ */
+ public Text getObject(int index) {
+ byte[] b = get(index);
+ if (b == null) {
+ return null;
+ } else {
+ return new Text(b);
+ }
+ }
+
+ /**
+ * Get the variable length element at specified index and sets the state
+ * in provided holder.
+ *
+ * @param index position of element to get
+ * @param holder data holder to be populated by this function
+ */
+ public void get(int index, NullableLargeVarCharHolder holder) {
+ assert index >= 0;
+ if (isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ holder.isSet = 1;
+ holder.start = getStartOffset(index);
+ holder.end = offsetBuffer.getLong((long) (index + 1) * OFFSET_WIDTH);
+ holder.buffer = valueBuffer;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Set the variable length element at the specified index to the data
+ * buffer supplied in the holder.
+ *
+ * @param index position of the element to set
+ * @param holder holder that carries data buffer.
+ */
+ public void set(int index, LargeVarCharHolder holder) {
+ assert index >= 0;
+ fillHoles(index);
+ BitVectorHelper.setBit(validityBuffer, index);
+ final int dataLength = (int) (holder.end - holder.start);
+ final long startOffset = getStartOffset(index);
+ offsetBuffer.setLong((long) (index + 1) * OFFSET_WIDTH, startOffset + dataLength);
+ valueBuffer.setBytes(startOffset, holder.buffer, holder.start, dataLength);
+ lastSet = index;
+ }
+
+ /**
+ * Same as {@link #set(int, LargeVarCharHolder)} except that it handles the
+ * case where index and length of new element are beyond the existing
+ * capacity of the vector.
+ *
+ * @param index position of the element to set
+ * @param holder holder that carries data buffer.
+ */
+ public void setSafe(int index, LargeVarCharHolder holder) {
+ assert index >= 0;
+ final int dataLength = (int) (holder.end - holder.start);
+ handleSafe(index, dataLength);
+ fillHoles(index);
+ BitVectorHelper.setBit(validityBuffer, index);
+ final long startOffset = getStartOffset(index);
+ offsetBuffer.setLong((long) (index + 1) * OFFSET_WIDTH, startOffset + dataLength);
+ valueBuffer.setBytes(startOffset, holder.buffer, holder.start, dataLength);
+ lastSet = index;
+ }
+
+ /**
+ * Set the variable length element at the specified index to the data
+ * buffer supplied in the holder.
+ *
+ * @param index position of the element to set
+ * @param holder holder that carries data buffer.
+ */
+ public void set(int index, NullableLargeVarCharHolder holder) {
+ assert index >= 0;
+ fillHoles(index);
+ BitVectorHelper.setValidityBit(validityBuffer, index, holder.isSet);
+ final long startOffset = getStartOffset(index);
+ if (holder.isSet != 0) {
+ final int dataLength = (int) (holder.end - holder.start);
+ offsetBuffer.setLong((long) (index + 1) * OFFSET_WIDTH, startOffset + dataLength);
+ valueBuffer.setBytes(startOffset, holder.buffer, holder.start, dataLength);
+ } else {
+ offsetBuffer.setLong((long) (index + 1) * OFFSET_WIDTH, startOffset);
+ }
+ lastSet = index;
+ }
+
+ /**
+ * Same as {@link #set(int, NullableLargeVarCharHolder)} except that it handles the
+ * case where index and length of new element are beyond the existing
+ * capacity of the vector.
+ *
+ * @param index position of the element to set
+ * @param holder holder that carries data buffer.
+ */
+ public void setSafe(int index, NullableLargeVarCharHolder holder) {
+ assert index >= 0;
+ if (holder.isSet != 0) {
+ final int dataLength = (int) (holder.end - holder.start);
+ handleSafe(index, dataLength);
+ fillHoles(index);
+ final long startOffset = getStartOffset(index);
+ offsetBuffer.setLong((long) (index + 1) * OFFSET_WIDTH, startOffset + dataLength);
+ valueBuffer.setBytes(startOffset, holder.buffer, holder.start, dataLength);
+ } else {
+ fillHoles(index + 1);
+ }
+ BitVectorHelper.setValidityBit(validityBuffer, index, holder.isSet);
+ lastSet = index;
+ }
+
+ /**
+ * Set the variable length element at the specified index to the
+ * content in supplied Text.
+ *
+ * @param index position of the element to set
+ * @param text Text object with data
+ */
+ public void set(int index, Text text) {
+ set(index, text.getBytes(), 0, text.getLength());
+ }
+
+ /**
+ * Same as {@link #set(int, NullableLargeVarCharHolder)} except that it handles the
+ * case where index and length of new element are beyond the existing
+ * capacity of the vector.
+ *
+ * @param index position of the element to set.
+ * @param text Text object with data
+ */
+ public void setSafe(int index, Text text) {
+ setSafe(index, text.getBytes(), 0, text.getLength());
+ }
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+ /**
+ * Construct a TransferPair comprising of this and a target vector of
+ * the same type.
+ *
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return new LargeVarCharVector.TransferImpl(ref, allocator);
+ }
+
+ /**
+ * Construct a TransferPair with a desired target vector of the same type.
+ *
+ * @param to target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new LargeVarCharVector.TransferImpl((LargeVarCharVector) to);
+ }
+
+ private class TransferImpl implements TransferPair {
+ LargeVarCharVector to;
+
+ public TransferImpl(String ref, BufferAllocator allocator) {
+ to = new LargeVarCharVector(ref, field.getFieldType(), allocator);
+ }
+
+ public TransferImpl(LargeVarCharVector to) {
+ this.to = to;
+ }
+
+ @Override
+ public LargeVarCharVector getTo() {
+ return to;
+ }
+
+ @Override
+ public void transfer() {
+ transferTo(to);
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ splitAndTransferTo(startIndex, length, to);
+ }
+
+ @Override
+ public void copyValueSafe(int fromIndex, int toIndex) {
+ to.copyFromSafe(fromIndex, toIndex, LargeVarCharVector.this);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/NullCheckingForGet.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/NullCheckingForGet.java
new file mode 100644
index 000000000..9961c72a4
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/NullCheckingForGet.java
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+/**
+ * Configuration class to determine if null checking should be enabled or disabled for the "get" methods.
+ * For example, the get method of class org.apache.arrow.vector.Float8Vector first checks if the value
+ * at the given index is null, before retrieving the value. This configuration will turn on and off such checks.
+ *
+ * <p>Null checking is on by default. You can disable it by setting either the system property or the
+ * environmental variable to "false". The system property is named "arrow.enable_null_check_for_get" and
+ * the environmental variable is named "ARROW_ENABLE_NULL_CHECK_FOR_GET".
+ * When both the system property and the environmental variable are set, the system property takes precedence.
+ * </p>
+ * <p>
+ * Disabling null-checking in the "get" methods may lead to performance improvements.
+ * For example, suppose we have the following micro-benchmark:
+ * </p>
+ * <p>
+ * <pre>{@code
+ *
+ * Float8Vector vector = ...
+ *
+ * public void test() {
+ * sum = 0;
+ * for (int i = 0; i < 1024; i++) {
+ * vector.set(i, i + 10.0);
+ * safeSum += vector.get(i);
+ * }
+ * }
+ *
+ * }</pre>
+ * </p>
+ * <p>
+ * Performance evaluations of the micro-benchmark with the JMH framework reveal that, disabling null checking
+ * has the following effects:
+ * 1. The amounts of byte code and assembly code generated by JIT are both smaller.
+ * 2. The performance improves by about 30% (2.819 ± 0.005 us/op vs. 4.069 ± 0.004 us/op).
+ * </p>
+ * <p>
+ * Therefore, for scenarios where the user can be sure that the null-checking is unnecessary,
+ * it is beneficial to disable it with this configuration.
+ * </p>
+ */
+public class NullCheckingForGet {
+
+ /**
+ * The flag to indicate if null checking is enabled for "get" methods.
+ */
+ public static final boolean NULL_CHECKING_ENABLED;
+
+ static {
+ String envProperty = System.getenv("ARROW_ENABLE_NULL_CHECK_FOR_GET");
+ String sysProperty = System.getProperty("arrow.enable_null_check_for_get");
+
+ // The system property has a higher priority than the environmental variable.
+ String flagValue = sysProperty;
+ if (flagValue == null) {
+ flagValue = envProperty;
+ }
+
+ // The flag is set to false only if the system property/environmental
+ // variable is explicitly set to "false".
+ NULL_CHECKING_ENABLED = !"false".equals(flagValue);
+ }
+
+ private NullCheckingForGet() {
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/NullVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/NullVector.java
new file mode 100644
index 000000000..1010d8d47
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/NullVector.java
@@ -0,0 +1,338 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.complex.BaseRepeatedValueVector.DATA_VECTOR_NAME;
+
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.OutOfMemoryException;
+import org.apache.arrow.memory.util.hash.ArrowBufHasher;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.compare.VectorVisitor;
+import org.apache.arrow.vector.complex.impl.NullReader;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.CallBack;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * A null type vector.
+ */
+public class NullVector implements FieldVector {
+
+ private int valueCount;
+
+ protected Field field;
+
+ /**
+ * Instantiate a NullVector.
+ *
+ * @param name name of the vector
+ */
+ public NullVector(String name) {
+ this(name, FieldType.nullable(Types.MinorType.NULL.getType()));
+ }
+
+ /**
+ * Instantiate a NullVector.
+ *
+ * @param name name of the vector
+ * @param fieldType type of Field materialized by this vector.
+ */
+ public NullVector(String name, FieldType fieldType) {
+ this(new Field(name, fieldType, null));
+ }
+
+ /**
+ * Instantiate a NullVector.
+ *
+ * @param field field materialized by this vector.
+ */
+ public NullVector(Field field) {
+ this.valueCount = 0;
+ this.field = field;
+ }
+
+ @Deprecated
+ public NullVector() {
+ this(new Field(DATA_VECTOR_NAME, FieldType.nullable(new ArrowType.Null()), null));
+ }
+
+ @Override
+ public void close() {
+ }
+
+ @Override
+ public void clear() {
+ }
+
+ @Override
+ public void reset() {
+ }
+
+ @Override
+ public Field getField() {
+ return field;
+ }
+
+ @Override
+ public Types.MinorType getMinorType() {
+ return Types.MinorType.NULL;
+ }
+
+ @Override
+ public TransferPair getTransferPair(BufferAllocator allocator) {
+ return getTransferPair(null, allocator);
+ }
+
+ @Override
+ public Iterator<ValueVector> iterator() {
+ return Collections.emptyIterator();
+ }
+
+ @Override
+ public int getBufferSize() {
+ return 0;
+ }
+
+ @Override
+ public int getBufferSizeFor(final int valueCount) {
+ return 0;
+ }
+
+ @Override
+ public ArrowBuf[] getBuffers(boolean clear) {
+ return new ArrowBuf[0];
+ }
+
+ @Override
+ public void allocateNew() throws OutOfMemoryException {
+ allocateNewSafe();
+ }
+
+ @Override
+ public boolean allocateNewSafe() {
+ return true;
+ }
+
+ @Override
+ public void reAlloc() {
+ }
+
+ @Override
+ public BufferAllocator getAllocator() {
+ throw new UnsupportedOperationException("Tried to get allocator from NullVector");
+ }
+
+ @Override
+ public void setInitialCapacity(int numRecords) {
+ }
+
+ @Override
+ public int getValueCapacity() {
+ return this.valueCount;
+ }
+
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return new TransferImpl();
+ }
+
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) {
+ return getTransferPair(ref, allocator);
+ }
+
+ @Override
+ public TransferPair makeTransferPair(ValueVector target) {
+ return new TransferImpl((NullVector) target);
+ }
+
+ @Override
+ public FieldReader getReader() {
+ return NullReader.INSTANCE;
+ }
+
+ @Override
+ public void initializeChildrenFromFields(List<Field> children) {
+ if (!children.isEmpty()) {
+ throw new IllegalArgumentException("Null vector has no children");
+ }
+ }
+
+ @Override
+ public List<FieldVector> getChildrenFromFields() {
+ return Collections.emptyList();
+ }
+
+ @Override
+ public void loadFieldBuffers(ArrowFieldNode fieldNode, List<ArrowBuf> ownBuffers) {
+ Preconditions.checkArgument(ownBuffers.isEmpty(), "Null vector has no buffers");
+ }
+
+ @Override
+ public List<ArrowBuf> getFieldBuffers() {
+ return Collections.emptyList();
+ }
+
+ /**
+ * Get the inner vectors.
+ *
+ * @deprecated This API will be removed as the current implementations no longer support inner vectors.
+ *
+ * @return the inner vectors for this field as defined by the TypeLayout
+ */
+ @Deprecated
+ @Override
+ public List<BufferBacked> getFieldInnerVectors() {
+ return Collections.emptyList();
+ }
+
+ @Override
+ public long getValidityBufferAddress() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public long getDataBufferAddress() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public long getOffsetBufferAddress() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public ArrowBuf getValidityBuffer() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public ArrowBuf getDataBuffer() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public ArrowBuf getOffsetBuffer() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public int getValueCount() {
+ return this.valueCount;
+ }
+
+ @Override
+ public void setValueCount(int valueCount) {
+ this.valueCount = valueCount;
+ }
+
+ @Override
+ public Object getObject(int index) {
+ return null;
+ }
+
+ @Override
+ public int getNullCount() {
+ return this.valueCount;
+ }
+
+ @Override
+ public boolean isNull(int index) {
+ return true;
+ }
+
+ @Override
+ public int hashCode(int index) {
+ return 31 * valueCount;
+ }
+
+ @Override
+ public int hashCode(int index, ArrowBufHasher hasher) {
+ return 31 * valueCount;
+ }
+
+ @Override
+ public <OUT, IN> OUT accept(VectorVisitor<OUT, IN> visitor, IN value) {
+ return visitor.visit(this, value);
+ }
+
+ @Override
+ public void copyFrom(int fromIndex, int thisIndex, ValueVector from) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void copyFromSafe(int fromIndex, int thisIndex, ValueVector from) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public String getName() {
+ return this.getField().getName();
+ }
+
+ private class TransferImpl implements TransferPair {
+ NullVector to;
+
+ public TransferImpl(String ref) {
+ to = new NullVector(ref);
+ }
+
+ @Deprecated
+ public TransferImpl() {
+ to = new NullVector();
+ }
+
+ public TransferImpl(NullVector to) {
+ this.to = to;
+ }
+
+ @Override
+ public NullVector getTo() {
+ return to;
+ }
+
+ @Override
+ public void transfer() {
+ to.valueCount = valueCount;
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ to.valueCount = length;
+ }
+
+ @Override
+ public void copyValueSafe(int fromIndex, int toIndex) {
+ if (toIndex > to.valueCount) {
+ to.valueCount = toIndex;
+ }
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/PeriodDuration.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/PeriodDuration.java
new file mode 100644
index 000000000..ee48fe797
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/PeriodDuration.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import java.time.Duration;
+import java.time.Period;
+
+import org.apache.arrow.util.Preconditions;
+
+/**
+ * Combination of Period and Duration for representing this interval type
+ * as a POJO.
+ */
+public class PeriodDuration {
+ private final Period period;
+ private final Duration duration;
+
+ public PeriodDuration(Period period, Duration duration) {
+ this.period = Preconditions.checkNotNull(period);
+ this.duration = Preconditions.checkNotNull(duration);
+ }
+
+ public Period getPeriod() {
+ return period;
+ }
+
+ public Duration getDuration() {
+ return duration;
+ }
+
+ @Override
+ public String toString() {
+ return period.toString() + " " + duration.toString();
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (!(o instanceof PeriodDuration)) {
+ return false;
+ }
+ PeriodDuration other = (PeriodDuration) o;
+ return this.period.equals(other.period) && this.duration.equals(other.duration);
+ }
+
+ @Override
+ public int hashCode() {
+ return this.period.hashCode() * 31 + this.duration.hashCode();
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/SchemaChangeCallBack.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/SchemaChangeCallBack.java
new file mode 100644
index 000000000..b61e4a160
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/SchemaChangeCallBack.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import org.apache.arrow.vector.util.CallBack;
+
+
+/**
+ * Callback for when the Schema for the Vector changes (generally happens when a vector is promoted to a union type
+ * from a single value type).
+ */
+public class SchemaChangeCallBack implements CallBack {
+ private boolean schemaChanged = false;
+
+ /**
+ * Constructs a schema-change callback with the schema-changed state set to
+ * {@code false}.
+ */
+ public SchemaChangeCallBack() {
+ }
+
+ /**
+ * Sets the schema-changed state to {@code true}.
+ */
+ @Override
+ public void doWork() {
+ schemaChanged = true;
+ }
+
+ /**
+ * Returns the value of schema-changed state, <strong>resetting</strong> the
+ * schema-changed state to {@code false}.
+ *
+ * @return the previous schema-changed state
+ */
+ public boolean getSchemaChangedAndReset() {
+ final boolean current = schemaChanged;
+ schemaChanged = false;
+ return current;
+ }
+}
+
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/SmallIntVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/SmallIntVector.java
new file mode 100644
index 000000000..1de6dea90
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/SmallIntVector.java
@@ -0,0 +1,389 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.impl.SmallIntReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.NullableSmallIntHolder;
+import org.apache.arrow.vector.holders.SmallIntHolder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * SmallIntVector implements a fixed width (2 bytes) vector of
+ * short values which could be null. A validity buffer (bit vector) is
+ * maintained to track which elements in the vector are null.
+ */
+public final class SmallIntVector extends BaseFixedWidthVector implements BaseIntVector {
+ public static final byte TYPE_WIDTH = 2;
+ private final FieldReader reader;
+
+ /**
+ * Instantiate a SmallIntVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param allocator allocator for memory management.
+ */
+ public SmallIntVector(String name, BufferAllocator allocator) {
+ this(name, FieldType.nullable(MinorType.SMALLINT.getType()), allocator);
+ }
+
+ /**
+ * Instantiate a SmallIntVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param fieldType type of Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public SmallIntVector(String name, FieldType fieldType, BufferAllocator allocator) {
+ this(new Field(name, fieldType, null), allocator);
+ }
+
+ /**
+ * Instantiate a SmallIntVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param field field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public SmallIntVector(Field field, BufferAllocator allocator) {
+ super(field, allocator, TYPE_WIDTH);
+ reader = new SmallIntReaderImpl(SmallIntVector.this);
+ }
+
+ /**
+ * Get a reader that supports reading values from this vector.
+ *
+ * @return Field Reader for this vector
+ */
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ /**
+ * Get minor type for this vector. The vector holds values belonging
+ * to a particular type.
+ *
+ * @return {@link org.apache.arrow.vector.types.Types.MinorType}
+ */
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.SMALLINT;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Get the element at the given index from the vector.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public short get(int index) throws IllegalStateException {
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+ throw new IllegalStateException("Value at index is null");
+ }
+ return valueBuffer.getShort((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Get the element at the given index from the vector and
+ * sets the state in holder. If element at given index
+ * is null, holder.isSet will be zero.
+ *
+ * @param index position of element
+ */
+ public void get(int index, NullableSmallIntHolder holder) {
+ if (isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ holder.isSet = 1;
+ holder.value = valueBuffer.getShort((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Same as {@link #get(int)}.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public Short getObject(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ return valueBuffer.getShort((long) index * TYPE_WIDTH);
+ }
+ }
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ private void setValue(int index, int value) {
+ valueBuffer.setShort((long) index * TYPE_WIDTH, value);
+ }
+
+ private void setValue(int index, short value) {
+ valueBuffer.setShort((long) index * TYPE_WIDTH, value);
+ }
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void set(int index, int value) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, value);
+ }
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void set(int index, short value) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, value);
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ * If the value in holder is not indicated as set, element in the
+ * at the given index will be null.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void set(int index, NullableSmallIntHolder holder) throws IllegalArgumentException {
+ if (holder.isSet < 0) {
+ throw new IllegalArgumentException();
+ } else if (holder.isSet > 0) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void set(int index, SmallIntHolder holder) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ }
+
+ /**
+ * Same as {@link #set(int, int)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void setSafe(int index, int value) {
+ handleSafe(index);
+ set(index, value);
+ }
+
+ /**
+ * Same as {@link #set(int, short)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void setSafe(int index, short value) {
+ handleSafe(index);
+ set(index, value);
+ }
+
+ /**
+ * Same as {@link #set(int, NullableSmallIntHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void setSafe(int index, NullableSmallIntHolder holder) throws IllegalArgumentException {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Same as {@link #set(int, SmallIntHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void setSafe(int index, SmallIntHolder holder) {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Store the given value at a particular position in the vector. isSet indicates
+ * whether the value is NULL or not.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param value element value
+ */
+ public void set(int index, int isSet, short value) {
+ if (isSet > 0) {
+ set(index, value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Same as {@link #set(int, int, short)} except that it handles the case
+ * when index is greater than or equal to current value capacity of the
+ * vector.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param value element value
+ */
+ public void setSafe(int index, int isSet, short value) {
+ handleSafe(index);
+ set(index, isSet, value);
+ }
+
+ /**
+ * Given a data buffer, get the value stored at a particular position
+ * in the vector.
+ *
+ * <p>This method should not be used externally.
+ *
+ * @param buffer data buffer
+ * @param index position of the element.
+ * @return value stored at the index.
+ */
+ public static short get(final ArrowBuf buffer, final int index) {
+ return buffer.getShort((long) index * TYPE_WIDTH);
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+ /**
+ * Construct a TransferPair comprising of this and a target vector of
+ * the same type.
+ *
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return new TransferImpl(ref, allocator);
+ }
+
+ /**
+ * Construct a TransferPair with a desired target vector of the same type.
+ *
+ * @param to target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new TransferImpl((SmallIntVector) to);
+ }
+
+ @Override
+ public void setWithPossibleTruncate(int index, long value) {
+ this.setSafe(index, (int) value);
+ }
+
+ @Override
+ public void setUnsafeWithPossibleTruncate(int index, long value) {
+ this.set(index, (int) value);
+ }
+
+ @Override
+ public long getValueAsLong(int index) {
+ return this.get(index);
+ }
+
+ private class TransferImpl implements TransferPair {
+ SmallIntVector to;
+
+ public TransferImpl(String ref, BufferAllocator allocator) {
+ to = new SmallIntVector(ref, field.getFieldType(), allocator);
+ }
+
+ public TransferImpl(SmallIntVector to) {
+ this.to = to;
+ }
+
+ @Override
+ public SmallIntVector getTo() {
+ return to;
+ }
+
+ @Override
+ public void transfer() {
+ transferTo(to);
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ splitAndTransferTo(startIndex, length, to);
+ }
+
+ @Override
+ public void copyValueSafe(int fromIndex, int toIndex) {
+ to.copyFromSafe(fromIndex, toIndex, SmallIntVector.this);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeMicroVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeMicroVector.java
new file mode 100644
index 000000000..cf128859e
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeMicroVector.java
@@ -0,0 +1,347 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.impl.TimeMicroReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.NullableTimeMicroHolder;
+import org.apache.arrow.vector.holders.TimeMicroHolder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * TimeMicroVector implements a fixed width vector (8 bytes) of
+ * time (microsecond resolution) values which could be null.
+ * A validity buffer (bit vector) is maintained to track which elements in the
+ * vector are null.
+ */
+public final class TimeMicroVector extends BaseFixedWidthVector {
+ public static final byte TYPE_WIDTH = 8;
+ private final FieldReader reader;
+
+ /**
+ * Instantiate a TimeMicroVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeMicroVector(String name, BufferAllocator allocator) {
+ this(name, FieldType.nullable(MinorType.TIMEMICRO.getType()), allocator);
+ }
+
+ /**
+ * Instantiate a TimeMicroVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param fieldType type of Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeMicroVector(String name, FieldType fieldType, BufferAllocator allocator) {
+ this(new Field(name, fieldType, null), allocator);
+ }
+
+ /**
+ * Instantiate a TimeMicroVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param field Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeMicroVector(Field field, BufferAllocator allocator) {
+ super(field, allocator, TYPE_WIDTH);
+ reader = new TimeMicroReaderImpl(TimeMicroVector.this);
+ }
+
+ /**
+ * Get a reader that supports reading values from this vector.
+ *
+ * @return Field Reader for this vector
+ */
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ /**
+ * Get minor type for this vector. The vector holds values belonging
+ * to a particular type.
+ *
+ * @return {@link org.apache.arrow.vector.types.Types.MinorType}
+ */
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.TIMEMICRO;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+
+ /**
+ * Get the element at the given index from the vector.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public long get(int index) throws IllegalStateException {
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+ throw new IllegalStateException("Value at index is null");
+ }
+ return valueBuffer.getLong((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Get the element at the given index from the vector and
+ * sets the state in holder. If element at given index
+ * is null, holder.isSet will be zero.
+ *
+ * @param index position of element
+ */
+ public void get(int index, NullableTimeMicroHolder holder) {
+ if (isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ holder.isSet = 1;
+ holder.value = valueBuffer.getLong((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Same as {@link #get(int)}.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public Long getObject(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ return valueBuffer.getLong((long) index * TYPE_WIDTH);
+ }
+ }
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ private void setValue(int index, long value) {
+ valueBuffer.setLong((long) index * TYPE_WIDTH, value);
+ }
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void set(int index, long value) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, value);
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ * If the value in holder is not indicated as set, element in the
+ * at the given index will be null.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void set(int index, NullableTimeMicroHolder holder) throws IllegalArgumentException {
+ if (holder.isSet < 0) {
+ throw new IllegalArgumentException();
+ } else if (holder.isSet > 0) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void set(int index, TimeMicroHolder holder) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ }
+
+ /**
+ * Same as {@link #set(int, long)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void setSafe(int index, long value) {
+ handleSafe(index);
+ set(index, value);
+ }
+
+ /**
+ * Same as {@link #set(int, NullableTimeMicroHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void setSafe(int index, NullableTimeMicroHolder holder) throws IllegalArgumentException {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Same as {@link #set(int, TimeMicroHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void setSafe(int index, TimeMicroHolder holder) {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Store the given value at a particular position in the vector. isSet indicates
+ * whether the value is NULL or not.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param value element value
+ */
+ public void set(int index, int isSet, long value) {
+ if (isSet > 0) {
+ set(index, value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Same as {@link #set(int, int, long)} except that it handles the case
+ * when index is greater than or equal to current value capacity of the
+ * vector.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param value element value
+ */
+ public void setSafe(int index, int isSet, long value) {
+ handleSafe(index);
+ set(index, isSet, value);
+ }
+
+ /**
+ * Given a data buffer, get the value stored at a particular position
+ * in the vector.
+ *
+ * <p>This method should not be used externally.
+ *
+ * @param buffer data buffer
+ * @param index position of the element.
+ * @return value stored at the index.
+ */
+ public static long get(final ArrowBuf buffer, int index) {
+ return buffer.getLong((long) index * TYPE_WIDTH);
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Construct a TransferPair comprising of this and a target vector of
+ * the same type.
+ *
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return new TransferImpl(ref, allocator);
+ }
+
+ /**
+ * Construct a TransferPair with a desired target vector of the same type.
+ *
+ * @param to target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new TransferImpl((TimeMicroVector) to);
+ }
+
+ private class TransferImpl implements TransferPair {
+ TimeMicroVector to;
+
+ public TransferImpl(String ref, BufferAllocator allocator) {
+ to = new TimeMicroVector(ref, field.getFieldType(), allocator);
+ }
+
+ public TransferImpl(TimeMicroVector to) {
+ this.to = to;
+ }
+
+ @Override
+ public TimeMicroVector getTo() {
+ return to;
+ }
+
+ @Override
+ public void transfer() {
+ transferTo(to);
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ splitAndTransferTo(startIndex, length, to);
+ }
+
+ @Override
+ public void copyValueSafe(int fromIndex, int toIndex) {
+ to.copyFromSafe(fromIndex, toIndex, TimeMicroVector.this);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeMilliVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeMilliVector.java
new file mode 100644
index 000000000..b96990b10
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeMilliVector.java
@@ -0,0 +1,351 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+import java.time.LocalDateTime;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.impl.TimeMilliReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.NullableTimeMilliHolder;
+import org.apache.arrow.vector.holders.TimeMilliHolder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.DateUtility;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * TimeMilliVector implements a fixed width (4 bytes) vector of
+ * time (millisecond resolution) values which could be null. A validity buffer
+ * (bit vector) is maintained to track which elements in the vector are null.
+ */
+public final class TimeMilliVector extends BaseFixedWidthVector {
+ public static final byte TYPE_WIDTH = 4;
+ private final FieldReader reader;
+
+ /**
+ * Instantiate a TimeMilliVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeMilliVector(String name, BufferAllocator allocator) {
+ this(name, FieldType.nullable(MinorType.TIMEMILLI.getType()), allocator);
+ }
+
+ /**
+ * Instantiate a TimeMilliVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param fieldType type of Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeMilliVector(String name, FieldType fieldType, BufferAllocator allocator) {
+ this(new Field(name, fieldType, null), allocator);
+ }
+
+ /**
+ * Instantiate a TimeMilliVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param field field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeMilliVector(Field field, BufferAllocator allocator) {
+ super(field, allocator, TYPE_WIDTH);
+ reader = new TimeMilliReaderImpl(TimeMilliVector.this);
+ }
+
+ /**
+ * Get a reader that supports reading values from this vector.
+ *
+ * @return Field Reader for this vector
+ */
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ /**
+ * Get minor type for this vector. The vector holds values belonging
+ * to a particular type.
+ *
+ * @return {@link org.apache.arrow.vector.types.Types.MinorType}
+ */
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.TIMEMILLI;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+
+ /**
+ * Get the element at the given index from the vector.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public int get(int index) throws IllegalStateException {
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+ throw new IllegalStateException("Value at index is null");
+ }
+ return valueBuffer.getInt((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Get the element at the given index from the vector and
+ * sets the state in holder. If element at given index
+ * is null, holder.isSet will be zero.
+ *
+ * @param index position of element
+ */
+ public void get(int index, NullableTimeMilliHolder holder) {
+ if (isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ holder.isSet = 1;
+ holder.value = valueBuffer.getInt((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Same as {@link #get(int)}.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public LocalDateTime getObject(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ }
+ final int millis = valueBuffer.getInt((long) index * TYPE_WIDTH);
+ // TODO: this doesn't seem right, time not from epoch
+ return DateUtility.getLocalDateTimeFromEpochMilli(millis);
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ private void setValue(int index, int value) {
+ valueBuffer.setInt((long) index * TYPE_WIDTH, value);
+ }
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void set(int index, int value) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, value);
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ * If the value in holder is not indicated as set, element in the
+ * at the given index will be null.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void set(int index, NullableTimeMilliHolder holder) throws IllegalArgumentException {
+ if (holder.isSet < 0) {
+ throw new IllegalArgumentException();
+ } else if (holder.isSet > 0) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void set(int index, TimeMilliHolder holder) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ }
+
+ /**
+ * Same as {@link #set(int, int)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void setSafe(int index, int value) {
+ handleSafe(index);
+ set(index, value);
+ }
+
+ /**
+ * Same as {@link #set(int, NullableTimeMilliHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void setSafe(int index, NullableTimeMilliHolder holder) throws IllegalArgumentException {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Same as {@link #set(int, TimeMilliHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void setSafe(int index, TimeMilliHolder holder) {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Store the given value at a particular position in the vector. isSet indicates
+ * whether the value is NULL or not.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param value element value
+ */
+ public void set(int index, int isSet, int value) {
+ if (isSet > 0) {
+ set(index, value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Same as {@link #set(int, int, int)} except that it handles the case
+ * when index is greater than or equal to current value capacity of the
+ * vector.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param value element value
+ */
+ public void setSafe(int index, int isSet, int value) {
+ handleSafe(index);
+ set(index, isSet, value);
+ }
+
+
+ /**
+ * Given a data buffer, get the value stored at a particular position
+ * in the vector.
+ *
+ * <p>This method should not be used externally.
+ *
+ * @param buffer data buffer
+ * @param index position of the element.
+ * @return value stored at the index.
+ */
+ public static int get(final ArrowBuf buffer, final int index) {
+ return buffer.getInt((long) index * TYPE_WIDTH);
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+ /**
+ * Construct a TransferPair comprising of this and a target vector of
+ * the same type.
+ *
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return new TransferImpl(ref, allocator);
+ }
+
+ /**
+ * Construct a TransferPair with a desired target vector of the same type.
+ *
+ * @param to target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new TransferImpl((TimeMilliVector) to);
+ }
+
+ private class TransferImpl implements TransferPair {
+ TimeMilliVector to;
+
+ public TransferImpl(String ref, BufferAllocator allocator) {
+ to = new TimeMilliVector(ref, field.getFieldType(), allocator);
+ }
+
+ public TransferImpl(TimeMilliVector to) {
+ this.to = to;
+ }
+
+ @Override
+ public TimeMilliVector getTo() {
+ return to;
+ }
+
+ @Override
+ public void transfer() {
+ transferTo(to);
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ splitAndTransferTo(startIndex, length, to);
+ }
+
+ @Override
+ public void copyValueSafe(int fromIndex, int toIndex) {
+ to.copyFromSafe(fromIndex, toIndex, TimeMilliVector.this);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeNanoVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeNanoVector.java
new file mode 100644
index 000000000..bc78a0264
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeNanoVector.java
@@ -0,0 +1,347 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.impl.TimeNanoReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.NullableTimeNanoHolder;
+import org.apache.arrow.vector.holders.TimeNanoHolder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * TimeNanoVector implements a fixed width vector (8 bytes) of
+ * time (nanosecond resolution) values which could be null. A validity buffer
+ * (bit vector) is maintained to track which elements in the vector are null.
+ */
+public final class TimeNanoVector extends BaseFixedWidthVector {
+ public static final byte TYPE_WIDTH = 8;
+ private final FieldReader reader;
+
+ /**
+ * Instantiate a TimeNanoVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeNanoVector(String name, BufferAllocator allocator) {
+ this(name, FieldType.nullable(MinorType.TIMENANO.getType()), allocator);
+ }
+
+ /**
+ * Instantiate a TimeNanoVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param fieldType type of Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeNanoVector(String name, FieldType fieldType, BufferAllocator allocator) {
+ this(new Field(name, fieldType, null), allocator);
+ }
+
+ /**
+ * Instantiate a TimeNanoVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param field Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeNanoVector(Field field, BufferAllocator allocator) {
+ super(field, allocator, TYPE_WIDTH);
+ reader = new TimeNanoReaderImpl(TimeNanoVector.this);
+ }
+
+ /**
+ * Get a reader that supports reading values from this vector.
+ *
+ * @return Field Reader for this vector
+ */
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ /**
+ * Get minor type for this vector. The vector holds values belonging
+ * to a particular type.
+ *
+ * @return {@link org.apache.arrow.vector.types.Types.MinorType}
+ */
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.TIMENANO;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Get the element at the given index from the vector.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public long get(int index) throws IllegalStateException {
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+ throw new IllegalStateException("Value at index is null");
+ }
+ return valueBuffer.getLong((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Get the element at the given index from the vector and
+ * sets the state in holder. If element at given index
+ * is null, holder.isSet will be zero.
+ *
+ * @param index position of element
+ */
+ public void get(int index, NullableTimeNanoHolder holder) {
+ if (isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ holder.isSet = 1;
+ holder.value = valueBuffer.getLong((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Same as {@link #get(int)}.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public Long getObject(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ return valueBuffer.getLong((long) index * TYPE_WIDTH);
+ }
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ private void setValue(int index, long value) {
+ valueBuffer.setLong((long) index * TYPE_WIDTH, value);
+ }
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void set(int index, long value) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, value);
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ * If the value in holder is not indicated as set, element in the
+ * at the given index will be null.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void set(int index, NullableTimeNanoHolder holder) throws IllegalArgumentException {
+ if (holder.isSet < 0) {
+ throw new IllegalArgumentException();
+ } else if (holder.isSet > 0) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void set(int index, TimeNanoHolder holder) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ }
+
+ /**
+ * Same as {@link #set(int, long)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void setSafe(int index, long value) {
+ handleSafe(index);
+ set(index, value);
+ }
+
+ /**
+ * Same as {@link #set(int, NullableTimeNanoHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void setSafe(int index, NullableTimeNanoHolder holder) throws IllegalArgumentException {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Same as {@link #set(int, TimeNanoHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void setSafe(int index, TimeNanoHolder holder) {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Store the given value at a particular position in the vector. isSet indicates
+ * whether the value is NULL or not.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param value element value
+ */
+ public void set(int index, int isSet, long value) {
+ if (isSet > 0) {
+ set(index, value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Same as {@link #set(int, int, long)} except that it handles the case
+ * when index is greater than or equal to current value capacity of the
+ * vector.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param value element value
+ */
+ public void setSafe(int index, int isSet, long value) {
+ handleSafe(index);
+ set(index, isSet, value);
+ }
+
+ /**
+ * Given a data buffer, get the value stored at a particular position
+ * in the vector.
+ *
+ * <p>This method should not be used externally.
+ *
+ * @param buffer data buffer
+ * @param index position of the element.
+ * @return value stored at the index.
+ */
+ public static long get(final ArrowBuf buffer, final int index) {
+ return buffer.getLong((long) index * TYPE_WIDTH);
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+ /**
+ * Construct a TransferPair comprising of this and a target vector of
+ * the same type.
+ *
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return new TransferImpl(ref, allocator);
+ }
+
+ /**
+ * Construct a TransferPair with a desired target vector of the same type.
+ *
+ * @param to target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new TransferImpl((TimeNanoVector) to);
+ }
+
+ private class TransferImpl implements TransferPair {
+ TimeNanoVector to;
+
+ public TransferImpl(String ref, BufferAllocator allocator) {
+ to = new TimeNanoVector(ref, field.getFieldType(), allocator);
+ }
+
+ public TransferImpl(TimeNanoVector to) {
+ this.to = to;
+ }
+
+ @Override
+ public TimeNanoVector getTo() {
+ return to;
+ }
+
+ @Override
+ public void transfer() {
+ transferTo(to);
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ splitAndTransferTo(startIndex, length, to);
+ }
+
+ @Override
+ public void copyValueSafe(int fromIndex, int toIndex) {
+ to.copyFromSafe(fromIndex, toIndex, TimeNanoVector.this);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeSecVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeSecVector.java
new file mode 100644
index 000000000..29b7381be
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeSecVector.java
@@ -0,0 +1,348 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.impl.TimeSecReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.NullableTimeSecHolder;
+import org.apache.arrow.vector.holders.TimeSecHolder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * TimeSecVector implements a fixed width (4 bytes) vector of
+ * time (seconds resolution) values which could be null. A validity buffer (bit vector) is
+ * maintained to track which elements in the vector are null.
+ */
+public final class TimeSecVector extends BaseFixedWidthVector {
+ public static final byte TYPE_WIDTH = 4;
+ private final FieldReader reader;
+
+ /**
+ * Instantiate a TimeSecVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeSecVector(String name, BufferAllocator allocator) {
+ this(name, FieldType.nullable(MinorType.TIMESEC.getType()), allocator);
+ }
+
+ /**
+ * Instantiate a TimeSecVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param fieldType type of Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeSecVector(String name, FieldType fieldType, BufferAllocator allocator) {
+ this(new Field(name, fieldType, null), allocator);
+ }
+
+ /**
+ * Instantiate a TimeSecVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param field Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeSecVector(Field field, BufferAllocator allocator) {
+ super(field, allocator, TYPE_WIDTH);
+ reader = new TimeSecReaderImpl(TimeSecVector.this);
+ }
+
+ /**
+ * Get a reader that supports reading values from this vector.
+ *
+ * @return Field Reader for this vector
+ */
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ /**
+ * Get minor type for this vector. The vector holds values belonging
+ * to a particular type.
+ *
+ * @return {@link org.apache.arrow.vector.types.Types.MinorType}
+ */
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.TIMESEC;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Get the element at the given index from the vector.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public int get(int index) throws IllegalStateException {
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+ throw new IllegalStateException("Value at index is null");
+ }
+ return valueBuffer.getInt((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Get the element at the given index from the vector and
+ * sets the state in holder. If element at given index
+ * is null, holder.isSet will be zero.
+ *
+ * @param index position of element
+ */
+ public void get(int index, NullableTimeSecHolder holder) {
+ if (isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ holder.isSet = 1;
+ holder.value = valueBuffer.getInt((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Same as {@link #get(int)}.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public Integer getObject(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ return valueBuffer.getInt((long) index * TYPE_WIDTH);
+ }
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ private void setValue(int index, int value) {
+ valueBuffer.setInt((long) index * TYPE_WIDTH, value);
+ }
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void set(int index, int value) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, value);
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ * If the value in holder is not indicated as set, element in the
+ * at the given index will be null.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void set(int index, NullableTimeSecHolder holder) throws IllegalArgumentException {
+ if (holder.isSet < 0) {
+ throw new IllegalArgumentException();
+ } else if (holder.isSet > 0) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void set(int index, TimeSecHolder holder) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ }
+
+ /**
+ * Same as {@link #set(int, int)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void setSafe(int index, int value) {
+ handleSafe(index);
+ set(index, value);
+ }
+
+ /**
+ * Same as {@link #set(int, NullableTimeSecHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void setSafe(int index, NullableTimeSecHolder holder) throws IllegalArgumentException {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Same as {@link #set(int, TimeSecHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void setSafe(int index, TimeSecHolder holder) {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Store the given value at a particular position in the vector. isSet indicates
+ * whether the value is NULL or not.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param value element value
+ */
+ public void set(int index, int isSet, int value) {
+ if (isSet > 0) {
+ set(index, value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Same as {@link #set(int, int, int)} except that it handles the case
+ * when index is greater than or equal to current value capacity of the
+ * vector.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param value element value
+ */
+ public void setSafe(int index, int isSet, int value) {
+ handleSafe(index);
+ set(index, isSet, value);
+ }
+
+ /**
+ * Given a data buffer, get the value stored at a particular position
+ * in the vector.
+ *
+ * <p>This method should not be used externally.
+ *
+ * @param buffer data buffer
+ * @param index position of the element.
+ * @return value stored at the index.
+ */
+ public static int get(final ArrowBuf buffer, final int index) {
+ return buffer.getInt((long) index * TYPE_WIDTH);
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Construct a TransferPair comprising of this and a target vector of
+ * the same type.
+ *
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return new TransferImpl(ref, allocator);
+ }
+
+ /**
+ * Construct a TransferPair with a desired target vector of the same type.
+ *
+ * @param to target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new TransferImpl((TimeSecVector) to);
+ }
+
+ private class TransferImpl implements TransferPair {
+ TimeSecVector to;
+
+ public TransferImpl(String ref, BufferAllocator allocator) {
+ to = new TimeSecVector(ref, field.getFieldType(), allocator);
+ }
+
+ public TransferImpl(TimeSecVector to) {
+ this.to = to;
+ }
+
+ @Override
+ public TimeSecVector getTo() {
+ return to;
+ }
+
+ @Override
+ public void transfer() {
+ transferTo(to);
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ splitAndTransferTo(startIndex, length, to);
+ }
+
+ @Override
+ public void copyValueSafe(int fromIndex, int toIndex) {
+ to.copyFromSafe(fromIndex, toIndex, TimeSecVector.this);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMicroTZVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMicroTZVector.java
new file mode 100644
index 000000000..17715780e
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMicroTZVector.java
@@ -0,0 +1,239 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.impl.TimeStampMicroTZReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.NullableTimeStampMicroTZHolder;
+import org.apache.arrow.vector.holders.TimeStampMicroTZHolder;
+import org.apache.arrow.vector.types.TimeUnit;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * TimeStampMicroTZVector implements a fixed width vector (8 bytes) of
+ * timestamp (microsecond resolution) values which could be null. A validity buffer
+ * (bit vector) is maintained to track which elements in the vector are null.
+ */
+public final class TimeStampMicroTZVector extends TimeStampVector {
+ private final FieldReader reader;
+ private final String timeZone;
+
+ /**
+ * Instantiate a TimeStampMicroTZVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeStampMicroTZVector(String name, BufferAllocator allocator, String timeZone) {
+ this(name, FieldType.nullable(new ArrowType.Timestamp(TimeUnit.MICROSECOND, timeZone)), allocator);
+ }
+
+ /**
+ * Instantiate a TimeStampMicroTZVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param fieldType type of Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeStampMicroTZVector(String name, FieldType fieldType, BufferAllocator allocator) {
+ super(name, fieldType, allocator);
+ ArrowType.Timestamp arrowType = (ArrowType.Timestamp) fieldType.getType();
+ timeZone = arrowType.getTimezone();
+ reader = new TimeStampMicroTZReaderImpl(TimeStampMicroTZVector.this);
+ }
+
+ /**
+ * Instantiate a TimeStampMicroTZVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param field Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeStampMicroTZVector(Field field, BufferAllocator allocator) {
+ super(field, allocator);
+ ArrowType.Timestamp arrowType = (ArrowType.Timestamp) field.getFieldType().getType();
+ timeZone = arrowType.getTimezone();
+ reader = new TimeStampMicroTZReaderImpl(TimeStampMicroTZVector.this);
+ }
+
+ /**
+ * Get a reader that supports reading values from this vector.
+ *
+ * @return Field Reader for this vector
+ */
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ /**
+ * Get minor type for this vector. The vector holds values belonging
+ * to a particular type.
+ *
+ * @return {@link org.apache.arrow.vector.types.Types.MinorType}
+ */
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.TIMESTAMPMICROTZ;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Get the element at the given index from the vector and
+ * sets the state in holder. If element at given index
+ * is null, holder.isSet will be zero.
+ *
+ * @param index position of element
+ */
+ public void get(int index, NullableTimeStampMicroTZHolder holder) {
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ holder.isSet = 1;
+ holder.value = valueBuffer.getLong((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Same as {@link #get(int)}.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public Long getObject(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ return valueBuffer.getLong((long) index * TYPE_WIDTH);
+ }
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ * If the value in holder is not indicated as set, element in the
+ * at the given index will be null.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void set(int index, NullableTimeStampMicroTZHolder holder) throws IllegalArgumentException {
+ if (holder.isSet < 0) {
+ throw new IllegalArgumentException();
+ } else if (holder.isSet > 0) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void set(int index, TimeStampMicroTZHolder holder) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ }
+
+ /**
+ * Same as {@link #set(int, NullableTimeStampMicroTZHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void setSafe(int index, NullableTimeStampMicroTZHolder holder) throws IllegalArgumentException {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Same as {@link #set(int, TimeStampMicroTZHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void setSafe(int index, TimeStampMicroTZHolder holder) {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Construct a TransferPair comprising of this and a target vector of
+ * the same type.
+ *
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ TimeStampMicroTZVector to = new TimeStampMicroTZVector(ref,
+ field.getFieldType(), allocator);
+ return new TransferImpl(to);
+ }
+
+ /**
+ * Construct a TransferPair with a desired target vector of the same type.
+ *
+ * @param to target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new TransferImpl((TimeStampMicroTZVector) to);
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMicroVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMicroVector.java
new file mode 100644
index 000000000..5cbef8962
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMicroVector.java
@@ -0,0 +1,236 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+import java.time.LocalDateTime;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.impl.TimeStampMicroReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.NullableTimeStampMicroHolder;
+import org.apache.arrow.vector.holders.TimeStampMicroHolder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.DateUtility;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * TimeStampMicroVector implements a fixed width vector (8 bytes) of
+ * timestamp (microsecond resolution) values which could be null. A validity buffer
+ * (bit vector) is maintained to track which elements in the vector are null.
+ */
+public final class TimeStampMicroVector extends TimeStampVector {
+ private final FieldReader reader;
+
+ /**
+ * Instantiate a TimeStampMicroVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeStampMicroVector(String name, BufferAllocator allocator) {
+ this(name, FieldType.nullable(MinorType.TIMESTAMPMICRO.getType()), allocator);
+ }
+
+ /**
+ * Instantiate a TimeStampMicroVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param fieldType type of Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeStampMicroVector(String name, FieldType fieldType, BufferAllocator allocator) {
+ super(name, fieldType, allocator);
+ reader = new TimeStampMicroReaderImpl(TimeStampMicroVector.this);
+ }
+
+ /**
+ * Instantiate a TimeStampMicroVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param field Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeStampMicroVector(Field field, BufferAllocator allocator) {
+ super(field, allocator);
+ reader = new TimeStampMicroReaderImpl(TimeStampMicroVector.this);
+ }
+
+ /**
+ * Get a reader that supports reading values from this vector.
+ *
+ * @return Field Reader for this vector
+ */
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ /**
+ * Get minor type for this vector. The vector holds values belonging
+ * to a particular type.
+ *
+ * @return {@link org.apache.arrow.vector.types.Types.MinorType}
+ */
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.TIMESTAMPMICRO;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Get the element at the given index from the vector and
+ * sets the state in holder. If element at given index
+ * is null, holder.isSet will be zero.
+ *
+ * @param index position of element
+ */
+ public void get(int index, NullableTimeStampMicroHolder holder) {
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ holder.isSet = 1;
+ holder.value = valueBuffer.getLong((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Same as {@link #get(int)}.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public LocalDateTime getObject(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ final long micros = valueBuffer.getLong((long) index * TYPE_WIDTH);
+ return DateUtility.getLocalDateTimeFromEpochMicro(micros);
+ }
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ * If the value in holder is not indicated as set, element in the
+ * at the given index will be null.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void set(int index, NullableTimeStampMicroHolder holder) throws IllegalArgumentException {
+ if (holder.isSet < 0) {
+ throw new IllegalArgumentException();
+ } else if (holder.isSet > 0) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void set(int index, TimeStampMicroHolder holder) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ }
+
+ /**
+ * Same as {@link #set(int, NullableTimeStampMicroHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void setSafe(int index, NullableTimeStampMicroHolder holder) throws IllegalArgumentException {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Same as {@link #set(int, TimeStampMicroHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void setSafe(int index, TimeStampMicroHolder holder) {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Construct a TransferPair comprising of this and a target vector of
+ * the same type.
+ *
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ TimeStampMicroVector to = new TimeStampMicroVector(ref,
+ field.getFieldType(), allocator);
+ return new TransferImpl(to);
+ }
+
+ /**
+ * Construct a TransferPair with a desired target vector of the same type.
+ *
+ * @param to target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new TransferImpl((TimeStampMicroVector) to);
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMilliTZVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMilliTZVector.java
new file mode 100644
index 000000000..e66bbf450
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMilliTZVector.java
@@ -0,0 +1,238 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.impl.TimeStampMilliTZReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.NullableTimeStampMilliTZHolder;
+import org.apache.arrow.vector.holders.TimeStampMilliTZHolder;
+import org.apache.arrow.vector.types.TimeUnit;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * TimeStampMilliTZVector implements a fixed width vector (8 bytes) of
+ * timestamp (millisecond resolution) values which could be null. A validity buffer
+ * (bit vector) is maintained to track which elements in the vector are null.
+ */
+public final class TimeStampMilliTZVector extends TimeStampVector {
+ private final FieldReader reader;
+ private final String timeZone;
+
+ /**
+ * Instantiate a TimeStampMilliTZVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeStampMilliTZVector(String name, BufferAllocator allocator, String timeZone) {
+ this(name, FieldType.nullable(new ArrowType.Timestamp(TimeUnit.MILLISECOND, timeZone)), allocator);
+ }
+
+ /**
+ * Instantiate a TimeStampMilliTZVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param fieldType type of Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeStampMilliTZVector(String name, FieldType fieldType, BufferAllocator allocator) {
+ super(name, fieldType, allocator);
+ ArrowType.Timestamp arrowType = (ArrowType.Timestamp) fieldType.getType();
+ timeZone = arrowType.getTimezone();
+ reader = new TimeStampMilliTZReaderImpl(TimeStampMilliTZVector.this);
+ }
+
+ /**
+ * Instantiate a TimeStampMilliTZVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param field Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeStampMilliTZVector(Field field, BufferAllocator allocator) {
+ super(field, allocator);
+ ArrowType.Timestamp arrowType = (ArrowType.Timestamp) field.getFieldType().getType();
+ timeZone = arrowType.getTimezone();
+ reader = new TimeStampMilliTZReaderImpl(TimeStampMilliTZVector.this);
+ }
+
+ /**
+ * Get a reader that supports reading values from this vector.
+ *
+ * @return Field Reader for this vector
+ */
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ /**
+ * Get minor type for this vector. The vector holds values belonging
+ * to a particular type.
+ *
+ * @return {@link org.apache.arrow.vector.types.Types.MinorType}
+ */
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.TIMESTAMPMILLITZ;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Get the element at the given index from the vector and
+ * sets the state in holder. If element at given index
+ * is null, holder.isSet will be zero.
+ *
+ * @param index position of element
+ */
+ public void get(int index, NullableTimeStampMilliTZHolder holder) {
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ holder.isSet = 1;
+ holder.value = valueBuffer.getLong((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Same as {@link #get(int)}.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public Long getObject(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ return valueBuffer.getLong((long) index * TYPE_WIDTH);
+ }
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ * If the value in holder is not indicated as set, element in the
+ * at the given index will be null.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void set(int index, NullableTimeStampMilliTZHolder holder) throws IllegalArgumentException {
+ if (holder.isSet < 0) {
+ throw new IllegalArgumentException();
+ } else if (holder.isSet > 0) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void set(int index, TimeStampMilliTZHolder holder) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ }
+
+ /**
+ * Same as {@link #set(int, NullableTimeStampMilliTZHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void setSafe(int index, NullableTimeStampMilliTZHolder holder) throws IllegalArgumentException {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Same as {@link #set(int, TimeStampMilliTZHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void setSafe(int index, TimeStampMilliTZHolder holder) {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+ /**
+ * Construct a TransferPair comprising of this and a target vector of
+ * the same type.
+ *
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ TimeStampMilliTZVector to = new TimeStampMilliTZVector(ref,
+ field.getFieldType(), allocator);
+ return new TransferImpl(to);
+ }
+
+ /**
+ * Construct a TransferPair with a desired target vector of the same type.
+ *
+ * @param to target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new TransferImpl((TimeStampMilliTZVector) to);
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMilliVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMilliVector.java
new file mode 100644
index 000000000..8f46f5606
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeStampMilliVector.java
@@ -0,0 +1,236 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+import java.time.LocalDateTime;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.impl.TimeStampMilliReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.NullableTimeStampMilliHolder;
+import org.apache.arrow.vector.holders.TimeStampMilliHolder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.DateUtility;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * TimeStampMilliVector implements a fixed width vector (8 bytes) of
+ * timestamp (millisecond resolution) values which could be null. A validity buffer
+ * (bit vector) is maintained to track which elements in the vector are null.
+ */
+public final class TimeStampMilliVector extends TimeStampVector {
+ private final FieldReader reader;
+
+ /**
+ * Instantiate a TimeStampMilliVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeStampMilliVector(String name, BufferAllocator allocator) {
+ this(name, FieldType.nullable(MinorType.TIMESTAMPMILLI.getType()), allocator);
+ }
+
+ /**
+ * Instantiate a TimeStampMilliVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param fieldType type of Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeStampMilliVector(String name, FieldType fieldType, BufferAllocator allocator) {
+ super(name, fieldType, allocator);
+ reader = new TimeStampMilliReaderImpl(TimeStampMilliVector.this);
+ }
+
+ /**
+ * Instantiate a TimeStampMilliVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param field field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeStampMilliVector(Field field, BufferAllocator allocator) {
+ super(field, allocator);
+ reader = new TimeStampMilliReaderImpl(TimeStampMilliVector.this);
+ }
+
+ /**
+ * Get a reader that supports reading values from this vector.
+ *
+ * @return Field Reader for this vector
+ */
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ /**
+ * Get minor type for this vector. The vector holds values belonging
+ * to a particular type.
+ *
+ * @return {@link org.apache.arrow.vector.types.Types.MinorType}
+ */
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.TIMESTAMPMILLI;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Get the element at the given index from the vector and
+ * sets the state in holder. If element at given index
+ * is null, holder.isSet will be zero.
+ *
+ * @param index position of element
+ */
+ public void get(int index, NullableTimeStampMilliHolder holder) {
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ holder.isSet = 1;
+ holder.value = valueBuffer.getLong((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Same as {@link #get(int)}.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public LocalDateTime getObject(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ final long millis = valueBuffer.getLong((long) index * TYPE_WIDTH);
+ return DateUtility.getLocalDateTimeFromEpochMilli(millis);
+ }
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ * If the value in holder is not indicated as set, element in the
+ * at the given index will be null.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void set(int index, NullableTimeStampMilliHolder holder) throws IllegalArgumentException {
+ if (holder.isSet < 0) {
+ throw new IllegalArgumentException();
+ } else if (holder.isSet > 0) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void set(int index, TimeStampMilliHolder holder) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ }
+
+ /**
+ * Same as {@link #set(int, NullableTimeStampMilliHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void setSafe(int index, NullableTimeStampMilliHolder holder) throws IllegalArgumentException {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Same as {@link #set(int, TimeStampMilliHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void setSafe(int index, TimeStampMilliHolder holder) {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Construct a TransferPair comprising of this and a target vector of
+ * the same type.
+ *
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ TimeStampMilliVector to = new TimeStampMilliVector(ref,
+ field.getFieldType(), allocator);
+ return new TransferImpl(to);
+ }
+
+ /**
+ * Construct a TransferPair with a desired target vector of the same type.
+ *
+ * @param to target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new TransferImpl((TimeStampMilliVector) to);
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeStampNanoTZVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeStampNanoTZVector.java
new file mode 100644
index 000000000..a3e582a7c
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeStampNanoTZVector.java
@@ -0,0 +1,241 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.impl.TimeStampNanoTZReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.NullableTimeStampNanoTZHolder;
+import org.apache.arrow.vector.holders.TimeStampNanoTZHolder;
+import org.apache.arrow.vector.types.TimeUnit;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * TimeStampNanoTZVector implements a fixed width vector (8 bytes) of
+ * timestamp (nanosecond resolution) values which could be null. A validity buffer
+ * (bit vector) is maintained to track which elements in the vector are null.
+ */
+public final class TimeStampNanoTZVector extends TimeStampVector {
+ private final FieldReader reader;
+ private final String timeZone;
+
+ /**
+ * Instantiate a TimeStampNanoTZVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeStampNanoTZVector(String name, BufferAllocator allocator, String timeZone) {
+ this(name, FieldType.nullable(new ArrowType.Timestamp(TimeUnit.NANOSECOND, timeZone)), allocator);
+ }
+
+ /**
+ * Instantiate a TimeStampNanoTZVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param fieldType type of Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeStampNanoTZVector(String name, FieldType fieldType, BufferAllocator allocator) {
+ super(name, fieldType, allocator);
+ ArrowType.Timestamp arrowType = (ArrowType.Timestamp) fieldType.getType();
+ timeZone = arrowType.getTimezone();
+ reader = new TimeStampNanoTZReaderImpl(TimeStampNanoTZVector.this);
+ }
+
+ /**
+ * Instantiate a TimeStampNanoTZVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param field Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeStampNanoTZVector(Field field, BufferAllocator allocator) {
+ super(field, allocator);
+ ArrowType.Timestamp arrowType = (ArrowType.Timestamp) field.getFieldType().getType();
+ timeZone = arrowType.getTimezone();
+ reader = new TimeStampNanoTZReaderImpl(TimeStampNanoTZVector.this);
+ }
+
+ /**
+ * Get a reader that supports reading values from this vector.
+ *
+ * @return Field Reader for this vector
+ */
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ /**
+ * Get minor type for this vector. The vector holds values belonging
+ * to a particular type.
+ *
+ * @return {@link org.apache.arrow.vector.types.Types.MinorType}
+ */
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.TIMESTAMPNANOTZ;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Get the element at the given index from the vector and
+ * sets the state in holder. If element at given index
+ * is null, holder.isSet will be zero.
+ *
+ * @param index position of element
+ */
+ public void get(int index, NullableTimeStampNanoTZHolder holder) {
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ holder.isSet = 1;
+ holder.value = valueBuffer.getLong((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Same as {@link #get(int)}.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public Long getObject(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ return valueBuffer.getLong((long) index * TYPE_WIDTH);
+ }
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ * If the value in holder is not indicated as set, element in the
+ * at the given index will be null.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void set(int index, NullableTimeStampNanoTZHolder holder) throws IllegalArgumentException {
+ if (holder.isSet < 0) {
+ throw new IllegalArgumentException();
+ } else if (holder.isSet > 0) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void set(int index, TimeStampNanoTZHolder holder) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ }
+
+ /**
+ * Same as {@link #set(int, NullableTimeStampNanoTZHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void setSafe(
+ int index,
+ NullableTimeStampNanoTZHolder holder) throws IllegalArgumentException {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Same as {@link #set(int, TimeStampNanoTZHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void setSafe(int index, TimeStampNanoTZHolder holder) {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Construct a TransferPair comprising of this and a target vector of
+ * the same type.
+ *
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ TimeStampNanoTZVector to = new TimeStampNanoTZVector(ref,
+ field.getFieldType(), allocator);
+ return new TransferImpl(to);
+ }
+
+ /**
+ * Construct a TransferPair with a desired target vector of the same type.
+ *
+ * @param to target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new TransferImpl((TimeStampNanoTZVector) to);
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeStampNanoVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeStampNanoVector.java
new file mode 100644
index 000000000..7b87dac43
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeStampNanoVector.java
@@ -0,0 +1,236 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+import java.time.LocalDateTime;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.impl.TimeStampNanoReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.NullableTimeStampNanoHolder;
+import org.apache.arrow.vector.holders.TimeStampNanoHolder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.DateUtility;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * TimeStampNanoVector implements a fixed width vector (8 bytes) of
+ * timestamp (nanosecond resolution) values which could be null. A validity buffer
+ * (bit vector) is maintained to track which elements in the vector are null.
+ */
+public final class TimeStampNanoVector extends TimeStampVector {
+ private final FieldReader reader;
+
+ /**
+ * Instantiate a TimeStampNanoVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeStampNanoVector(String name, BufferAllocator allocator) {
+ this(name, FieldType.nullable(MinorType.TIMESTAMPNANO.getType()), allocator);
+ }
+
+ /**
+ * Instantiate a TimeStampNanoVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param fieldType type of Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeStampNanoVector(String name, FieldType fieldType, BufferAllocator allocator) {
+ super(name, fieldType, allocator);
+ reader = new TimeStampNanoReaderImpl(TimeStampNanoVector.this);
+ }
+
+ /**
+ * Instantiate a TimeStampNanoVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param field Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeStampNanoVector(Field field, BufferAllocator allocator) {
+ super(field, allocator);
+ reader = new TimeStampNanoReaderImpl(TimeStampNanoVector.this);
+ }
+
+ /**
+ * Get a reader that supports reading values from this vector.
+ *
+ * @return Field Reader for this vector
+ */
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ /**
+ * Get minor type for this vector. The vector holds values belonging
+ * to a particular type.
+ *
+ * @return {@link org.apache.arrow.vector.types.Types.MinorType}
+ */
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.TIMESTAMPNANO;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Get the element at the given index from the vector and
+ * sets the state in holder. If element at given index
+ * is null, holder.isSet will be zero.
+ *
+ * @param index position of element
+ */
+ public void get(int index, NullableTimeStampNanoHolder holder) {
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ holder.isSet = 1;
+ holder.value = valueBuffer.getLong((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Same as {@link #get(int)}.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public LocalDateTime getObject(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ final long nanos = valueBuffer.getLong((long) index * TYPE_WIDTH);
+ return DateUtility.getLocalDateTimeFromEpochNano(nanos);
+ }
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ * If the value in holder is not indicated as set, element in the
+ * at the given index will be null.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void set(int index, NullableTimeStampNanoHolder holder) throws IllegalArgumentException {
+ if (holder.isSet < 0) {
+ throw new IllegalArgumentException();
+ } else if (holder.isSet > 0) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void set(int index, TimeStampNanoHolder holder) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ }
+
+ /**
+ * Same as {@link #set(int, NullableTimeStampNanoHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void setSafe(int index, NullableTimeStampNanoHolder holder) throws IllegalArgumentException {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Same as {@link #set(int, TimeStampNanoHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void setSafe(int index, TimeStampNanoHolder holder) {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Construct a TransferPair comprising of this and a target vector of
+ * the same type.
+ *
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ TimeStampNanoVector to = new TimeStampNanoVector(ref,
+ field.getFieldType(), allocator);
+ return new TransferImpl(to);
+ }
+
+ /**
+ * Construct a TransferPair with a desired target vector of the same type.
+ *
+ * @param to target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new TransferImpl((TimeStampNanoVector) to);
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeStampSecTZVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeStampSecTZVector.java
new file mode 100644
index 000000000..f5a0498fe
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeStampSecTZVector.java
@@ -0,0 +1,238 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.impl.TimeStampSecTZReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.NullableTimeStampSecTZHolder;
+import org.apache.arrow.vector.holders.TimeStampSecTZHolder;
+import org.apache.arrow.vector.types.TimeUnit;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * TimeStampSecTZVector implements a fixed width vector (8 bytes) of
+ * timestamp (seconds resolution) values which could be null. A validity buffer
+ * (bit vector) is maintained to track which elements in the vector are null.
+ */
+public final class TimeStampSecTZVector extends TimeStampVector {
+ private final FieldReader reader;
+ private final String timeZone;
+
+ /**
+ * Instantiate a TimeStampSecTZVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeStampSecTZVector(String name, BufferAllocator allocator, String timeZone) {
+ this(name, FieldType.nullable(new ArrowType.Timestamp(TimeUnit.SECOND, timeZone)), allocator);
+ }
+
+ /**
+ * Instantiate a TimeStampSecTZVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param fieldType type of Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeStampSecTZVector(String name, FieldType fieldType, BufferAllocator allocator) {
+ super(name, fieldType, allocator);
+ ArrowType.Timestamp arrowType = (ArrowType.Timestamp) fieldType.getType();
+ timeZone = arrowType.getTimezone();
+ reader = new TimeStampSecTZReaderImpl(TimeStampSecTZVector.this);
+ }
+
+ /**
+ * Instantiate a TimeStampSecTZVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param field Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeStampSecTZVector(Field field, BufferAllocator allocator) {
+ super(field, allocator);
+ ArrowType.Timestamp arrowType = (ArrowType.Timestamp) field.getFieldType().getType();
+ timeZone = arrowType.getTimezone();
+ reader = new TimeStampSecTZReaderImpl(TimeStampSecTZVector.this);
+ }
+
+ /**
+ * Get a reader that supports reading values from this vector.
+ *
+ * @return Field Reader for this vector
+ */
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ /**
+ * Get minor type for this vector. The vector holds values belonging
+ * to a particular type.
+ *
+ * @return {@link org.apache.arrow.vector.types.Types.MinorType}
+ */
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.TIMESTAMPSECTZ;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Get the element at the given index from the vector and
+ * sets the state in holder. If element at given index
+ * is null, holder.isSet will be zero.
+ *
+ * @param index position of element
+ */
+ public void get(int index, NullableTimeStampSecTZHolder holder) {
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ holder.isSet = 1;
+ holder.value = valueBuffer.getLong((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Same as {@link #get(int)}.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public Long getObject(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ return valueBuffer.getLong((long) index * TYPE_WIDTH);
+ }
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ * If the value in holder is not indicated as set, element in the
+ * at the given index will be null.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void set(int index, NullableTimeStampSecTZHolder holder) throws IllegalArgumentException {
+ if (holder.isSet < 0) {
+ throw new IllegalArgumentException();
+ } else if (holder.isSet > 0) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void set(int index, TimeStampSecTZHolder holder) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ }
+
+ /**
+ * Same as {@link #set(int, NullableTimeStampSecTZHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void setSafe(int index, NullableTimeStampSecTZHolder holder) throws IllegalArgumentException {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Same as {@link #set(int, TimeStampSecTZHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void setSafe(int index, TimeStampSecTZHolder holder) {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+ /**
+ * Construct a TransferPair comprising of this and a target vector of
+ * the same type.
+ *
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ TimeStampSecTZVector to = new TimeStampSecTZVector(ref,
+ field.getFieldType(), allocator);
+ return new TransferImpl(to);
+ }
+
+ /**
+ * Construct a TransferPair with a desired target vector of the same type.
+ *
+ * @param to target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new TransferImpl((TimeStampSecTZVector) to);
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeStampSecVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeStampSecVector.java
new file mode 100644
index 000000000..f12e19684
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeStampSecVector.java
@@ -0,0 +1,237 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+import java.time.LocalDateTime;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.impl.TimeStampSecReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.NullableTimeStampSecHolder;
+import org.apache.arrow.vector.holders.TimeStampSecHolder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.DateUtility;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * TimeStampSecVector implements a fixed width vector (8 bytes) of
+ * timestamp (seconds resolution) values which could be null. A validity buffer (bit vector) is
+ * maintained to track which elements in the vector are null.
+ */
+public final class TimeStampSecVector extends TimeStampVector {
+ private final FieldReader reader;
+
+ /**
+ * Instantiate a TimeStampSecVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeStampSecVector(String name, BufferAllocator allocator) {
+ this(name, FieldType.nullable(MinorType.TIMESTAMPSEC.getType()), allocator);
+ }
+
+ /**
+ * Instantiate a TimeStampSecVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param fieldType type of Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeStampSecVector(String name, FieldType fieldType, BufferAllocator allocator) {
+ super(name, fieldType, allocator);
+ reader = new TimeStampSecReaderImpl(TimeStampSecVector.this);
+ }
+
+ /**
+ * Instantiate a TimeStampSecVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param field Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeStampSecVector(Field field, BufferAllocator allocator) {
+ super(field, allocator);
+ reader = new TimeStampSecReaderImpl(TimeStampSecVector.this);
+ }
+
+ /**
+ * Get a reader that supports reading values from this vector.
+ *
+ * @return Field Reader for this vector
+ */
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ /**
+ * Get minor type for this vector. The vector holds values belonging
+ * to a particular type.
+ *
+ * @return {@link org.apache.arrow.vector.types.Types.MinorType}
+ */
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.TIMESTAMPSEC;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Get the element at the given index from the vector and
+ * sets the state in holder. If element at given index
+ * is null, holder.isSet will be zero.
+ *
+ * @param index position of element
+ */
+ public void get(int index, NullableTimeStampSecHolder holder) {
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ holder.isSet = 1;
+ holder.value = valueBuffer.getLong((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Same as {@link #get(int)}.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public LocalDateTime getObject(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ final long secs = valueBuffer.getLong((long) index * TYPE_WIDTH);
+ final long millis = java.util.concurrent.TimeUnit.SECONDS.toMillis(secs);
+ return DateUtility.getLocalDateTimeFromEpochMilli(millis);
+ }
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ * If the value in holder is not indicated as set, element in the
+ * at the given index will be null.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void set(int index, NullableTimeStampSecHolder holder) throws IllegalArgumentException {
+ if (holder.isSet < 0) {
+ throw new IllegalArgumentException();
+ } else if (holder.isSet > 0) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void set(int index, TimeStampSecHolder holder) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ }
+
+ /**
+ * Same as {@link #set(int, NullableTimeStampSecHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void setSafe(int index, NullableTimeStampSecHolder holder) throws IllegalArgumentException {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Same as {@link #set(int, TimeStampSecHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void setSafe(int index, TimeStampSecHolder holder) {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Construct a TransferPair comprising of this and a target vector of
+ * the same type.
+ *
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ TimeStampSecVector to = new TimeStampSecVector(ref,
+ field.getFieldType(), allocator);
+ return new TransferImpl(to);
+ }
+
+ /**
+ * Construct a TransferPair with a desired target vector of the same type.
+ *
+ * @param to target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new TransferImpl((TimeStampSecVector) to);
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeStampVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeStampVector.java
new file mode 100644
index 000000000..d85a793fb
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TimeStampVector.java
@@ -0,0 +1,197 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * TimeStampVector is an abstract interface for fixed width vector (8 bytes)
+ * of timestamp values which could be null. A validity buffer (bit vector) is
+ * maintained to track which elements in the vector are null.
+ */
+public abstract class TimeStampVector extends BaseFixedWidthVector {
+ public static final byte TYPE_WIDTH = 8;
+
+ /**
+ * Instantiate a TimeStampVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param fieldType type of Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeStampVector(String name, FieldType fieldType, BufferAllocator allocator) {
+ this(new Field(name, fieldType, null), allocator);
+ }
+
+ /**
+ * Instantiate a TimeStampVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param field field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public TimeStampVector(Field field, BufferAllocator allocator) {
+ super(field, allocator, TYPE_WIDTH);
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+
+ /**
+ * Get the element at the given index from the vector.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public long get(int index) throws IllegalStateException {
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+ throw new IllegalStateException("Value at index is null");
+ }
+ return valueBuffer.getLong((long) index * TYPE_WIDTH);
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ protected void setValue(int index, long value) {
+ valueBuffer.setLong((long) index * TYPE_WIDTH, value);
+ }
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void set(int index, long value) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, value);
+ }
+
+ /**
+ * Same as {@link #set(int, long)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void setSafe(int index, long value) {
+ handleSafe(index);
+ set(index, value);
+ }
+
+ /**
+ * Store the given value at a particular position in the vector. isSet indicates
+ * whether the value is NULL or not.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param value element value
+ */
+ public void set(int index, int isSet, long value) {
+ if (isSet > 0) {
+ set(index, value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Same as {@link #set(int, int, long)} except that it handles the case
+ * when index is greater than or equal to current value capacity of the
+ * vector.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param value element value
+ */
+ public void setSafe(int index, int isSet, long value) {
+ handleSafe(index);
+ set(index, isSet, value);
+ }
+
+ /**
+ * Given a data buffer, get the value stored at a particular position
+ * in the vector.
+ *
+ * <p>This method should not be used externally.
+ *
+ * @param buffer data buffer
+ * @param index position of the element.
+ * @return value stored at the index.
+ */
+ public static long get(final ArrowBuf buffer, final int index) {
+ return buffer.getLong((long) index * TYPE_WIDTH);
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * {@link TransferPair} for {@link TimeStampVector}.
+ */
+ public class TransferImpl implements TransferPair {
+ TimeStampVector to;
+
+ public TransferImpl(TimeStampVector to) {
+ this.to = to;
+ }
+
+ @Override
+ public TimeStampVector getTo() {
+ return to;
+ }
+
+ @Override
+ public void transfer() {
+ transferTo(to);
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ splitAndTransferTo(startIndex, length, to);
+ }
+
+ @Override
+ public void copyValueSafe(int fromIndex, int toIndex) {
+ to.copyFromSafe(fromIndex, toIndex, TimeStampVector.this);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TinyIntVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TinyIntVector.java
new file mode 100644
index 000000000..f08b0e02f
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TinyIntVector.java
@@ -0,0 +1,390 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.impl.TinyIntReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.NullableTinyIntHolder;
+import org.apache.arrow.vector.holders.TinyIntHolder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * TinyIntVector implements a fixed width (1 bytes) vector of
+ * byte values which could be null. A validity buffer (bit vector) is
+ * maintained to track which elements in the vector are null.
+ */
+public final class TinyIntVector extends BaseFixedWidthVector implements BaseIntVector {
+ public static final byte TYPE_WIDTH = 1;
+ private final FieldReader reader;
+
+ /**
+ * Instantiate a TinyIntVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param allocator allocator for memory management.
+ */
+ public TinyIntVector(String name, BufferAllocator allocator) {
+ this(name, FieldType.nullable(MinorType.TINYINT.getType()), allocator);
+ }
+
+ /**
+ * Instantiate a TinyIntVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param fieldType type of Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public TinyIntVector(String name, FieldType fieldType, BufferAllocator allocator) {
+ this(new Field(name, fieldType, null), allocator);
+ }
+
+ /**
+ * Instantiate a TinyIntVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param field field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public TinyIntVector(Field field, BufferAllocator allocator) {
+ super(field, allocator, TYPE_WIDTH);
+ reader = new TinyIntReaderImpl(TinyIntVector.this);
+ }
+
+ /**
+ * Get a reader that supports reading values from this vector.
+ *
+ * @return Field Reader for this vector
+ */
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ /**
+ * Get minor type for this vector. The vector holds values belonging
+ * to a particular type.
+ *
+ * @return {@link org.apache.arrow.vector.types.Types.MinorType}
+ */
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.TINYINT;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Get the element at the given index from the vector.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public byte get(int index) throws IllegalStateException {
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+ throw new IllegalStateException("Value at index is null");
+ }
+ return valueBuffer.getByte(index * TYPE_WIDTH);
+ }
+
+ /**
+ * Get the element at the given index from the vector and
+ * sets the state in holder. If element at given index
+ * is null, holder.isSet will be zero.
+ *
+ * @param index position of element
+ */
+ public void get(int index, NullableTinyIntHolder holder) {
+ if (isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ holder.isSet = 1;
+ holder.value = valueBuffer.getByte(index * TYPE_WIDTH);
+ }
+
+ /**
+ * Same as {@link #get(int)}.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public Byte getObject(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ return valueBuffer.getByte(index * TYPE_WIDTH);
+ }
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+ private void setValue(int index, int value) {
+ valueBuffer.setByte(index * TYPE_WIDTH, value);
+ }
+
+ private void setValue(int index, byte value) {
+ valueBuffer.setByte(index * TYPE_WIDTH, value);
+ }
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void set(int index, int value) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, value);
+ }
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void set(int index, byte value) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, value);
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ * If the value in holder is not indicated as set, element in the
+ * at the given index will be null.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void set(int index, NullableTinyIntHolder holder) throws IllegalArgumentException {
+ if (holder.isSet < 0) {
+ throw new IllegalArgumentException();
+ } else if (holder.isSet > 0) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void set(int index, TinyIntHolder holder) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ }
+
+ /**
+ * Same as {@link #set(int, int)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void setSafe(int index, int value) {
+ handleSafe(index);
+ set(index, value);
+ }
+
+ /**
+ * Same as {@link #set(int, byte)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void setSafe(int index, byte value) {
+ handleSafe(index);
+ set(index, value);
+ }
+
+ /**
+ * Same as {@link #set(int, NullableTinyIntHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void setSafe(int index, NullableTinyIntHolder holder) throws IllegalArgumentException {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Same as {@link #set(int, TinyIntHolder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void setSafe(int index, TinyIntHolder holder) {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Store the given value at a particular position in the vector. isSet indicates
+ * whether the value is NULL or not.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param value element value
+ */
+ public void set(int index, int isSet, byte value) {
+ if (isSet > 0) {
+ set(index, value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Same as {@link #set(int, int, byte)} except that it handles the case
+ * when index is greater than or equal to current value capacity of the
+ * vector.
+ *
+ * @param index position of the new value
+ * @param isSet 0 for NULL value, 1 otherwise
+ * @param value element value
+ */
+ public void setSafe(int index, int isSet, byte value) {
+ handleSafe(index);
+ set(index, isSet, value);
+ }
+
+ /**
+ * Given a data buffer, get the value stored at a particular position
+ * in the vector.
+ *
+ * <p>This method should not be used externally.
+ *
+ * @param buffer data buffer
+ * @param index position of the element.
+ * @return value stored at the index.
+ */
+ public static byte get(final ArrowBuf buffer, final int index) {
+ return buffer.getByte(index * TYPE_WIDTH);
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Construct a TransferPair comprising of this and a target vector of
+ * the same type.
+ *
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return new TransferImpl(ref, allocator);
+ }
+
+ /**
+ * Construct a TransferPair with a desired target vector of the same type.
+ *
+ * @param to target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new TransferImpl((TinyIntVector) to);
+ }
+
+ @Override
+ public void setWithPossibleTruncate(int index, long value) {
+ this.setSafe(index, (int) value);
+ }
+
+ @Override
+ public void setUnsafeWithPossibleTruncate(int index, long value) {
+ this.set(index, (int) value);
+ }
+
+ @Override
+ public long getValueAsLong(int index) {
+ return this.get(index);
+ }
+
+ private class TransferImpl implements TransferPair {
+ TinyIntVector to;
+
+ public TransferImpl(String ref, BufferAllocator allocator) {
+ to = new TinyIntVector(ref, field.getFieldType(), allocator);
+ }
+
+ public TransferImpl(TinyIntVector to) {
+ this.to = to;
+ }
+
+ @Override
+ public TinyIntVector getTo() {
+ return to;
+ }
+
+ @Override
+ public void transfer() {
+ transferTo(to);
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ splitAndTransferTo(startIndex, length, to);
+ }
+
+ @Override
+ public void copyValueSafe(int fromIndex, int toIndex) {
+ to.copyFromSafe(fromIndex, toIndex, TinyIntVector.this);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java
new file mode 100644
index 000000000..60fe2a6a6
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java
@@ -0,0 +1,448 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static java.util.Arrays.asList;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.BufferLayout.BufferType;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.ArrowType.ArrowTypeVisitor;
+import org.apache.arrow.vector.types.pojo.ArrowType.Binary;
+import org.apache.arrow.vector.types.pojo.ArrowType.Bool;
+import org.apache.arrow.vector.types.pojo.ArrowType.Date;
+import org.apache.arrow.vector.types.pojo.ArrowType.Decimal;
+import org.apache.arrow.vector.types.pojo.ArrowType.Duration;
+import org.apache.arrow.vector.types.pojo.ArrowType.FixedSizeBinary;
+import org.apache.arrow.vector.types.pojo.ArrowType.FixedSizeList;
+import org.apache.arrow.vector.types.pojo.ArrowType.FloatingPoint;
+import org.apache.arrow.vector.types.pojo.ArrowType.Int;
+import org.apache.arrow.vector.types.pojo.ArrowType.Interval;
+import org.apache.arrow.vector.types.pojo.ArrowType.LargeBinary;
+import org.apache.arrow.vector.types.pojo.ArrowType.LargeUtf8;
+import org.apache.arrow.vector.types.pojo.ArrowType.Map;
+import org.apache.arrow.vector.types.pojo.ArrowType.Null;
+import org.apache.arrow.vector.types.pojo.ArrowType.Struct;
+import org.apache.arrow.vector.types.pojo.ArrowType.Time;
+import org.apache.arrow.vector.types.pojo.ArrowType.Timestamp;
+import org.apache.arrow.vector.types.pojo.ArrowType.Union;
+import org.apache.arrow.vector.types.pojo.ArrowType.Utf8;
+
+/**
+ * The buffer layout of vectors for a given type.
+ * It defines its own buffers followed by the buffers for the children
+ * if it is a nested type (Struct_, List, Union)
+ */
+public class TypeLayout {
+
+ /**
+ * Constructs a new {@TypeLayout} for the given <code>arrowType</code>.
+ */
+ public static TypeLayout getTypeLayout(final ArrowType arrowType) {
+ TypeLayout layout = arrowType.accept(new ArrowTypeVisitor<TypeLayout>() {
+
+ @Override
+ public TypeLayout visit(Int type) {
+ return newFixedWidthTypeLayout(BufferLayout.dataBuffer(type.getBitWidth()));
+ }
+
+ @Override
+ public TypeLayout visit(Union type) {
+ List<BufferLayout> vectors;
+ switch (type.getMode()) {
+ case Dense:
+ vectors = asList(
+ BufferLayout.typeBuffer(),
+ BufferLayout.offsetBuffer() // offset to find the vector
+ );
+ break;
+ case Sparse:
+ vectors = asList(
+ BufferLayout.typeBuffer() // type of the value at the index or 0 if null
+ );
+ break;
+ default:
+ throw new UnsupportedOperationException("Unsupported Union Mode: " + type.getMode());
+ }
+ return new TypeLayout(vectors);
+ }
+
+ @Override
+ public TypeLayout visit(Struct type) {
+ List<BufferLayout> vectors = asList(
+ BufferLayout.validityVector()
+ );
+ return new TypeLayout(vectors);
+ }
+
+ @Override
+ public TypeLayout visit(Timestamp type) {
+ return newFixedWidthTypeLayout(BufferLayout.dataBuffer(64));
+ }
+
+ @Override
+ public TypeLayout visit(org.apache.arrow.vector.types.pojo.ArrowType.List type) {
+ List<BufferLayout> vectors = asList(
+ BufferLayout.validityVector(),
+ BufferLayout.offsetBuffer()
+ );
+ return new TypeLayout(vectors);
+ }
+
+ @Override
+ public TypeLayout visit(ArrowType.LargeList type) {
+ List<BufferLayout> vectors = asList(
+ BufferLayout.validityVector(),
+ BufferLayout.largeOffsetBuffer()
+ );
+ return new TypeLayout(vectors);
+ }
+
+ @Override
+ public TypeLayout visit(FixedSizeList type) {
+ List<BufferLayout> vectors = asList(
+ BufferLayout.validityVector()
+ );
+ return new TypeLayout(vectors);
+ }
+
+ @Override
+ public TypeLayout visit(Map type) {
+ List<BufferLayout> vectors = asList(
+ BufferLayout.validityVector(),
+ BufferLayout.offsetBuffer()
+ );
+ return new TypeLayout(vectors);
+ }
+
+ @Override
+ public TypeLayout visit(FloatingPoint type) {
+ int bitWidth;
+ switch (type.getPrecision()) {
+ case HALF:
+ bitWidth = 16;
+ break;
+ case SINGLE:
+ bitWidth = 32;
+ break;
+ case DOUBLE:
+ bitWidth = 64;
+ break;
+ default:
+ throw new UnsupportedOperationException("Unsupported Precision: " + type.getPrecision());
+ }
+ return newFixedWidthTypeLayout(BufferLayout.dataBuffer(bitWidth));
+ }
+
+ @Override
+ public TypeLayout visit(Decimal type) {
+ return newFixedWidthTypeLayout(BufferLayout.dataBuffer(type.getBitWidth()));
+ }
+
+ @Override
+ public TypeLayout visit(FixedSizeBinary type) {
+ return newFixedWidthTypeLayout(new BufferLayout(BufferType.DATA, type.getByteWidth() * 8));
+ }
+
+ @Override
+ public TypeLayout visit(Bool type) {
+ return newFixedWidthTypeLayout(BufferLayout.booleanVector());
+ }
+
+ @Override
+ public TypeLayout visit(Binary type) {
+ return newVariableWidthTypeLayout();
+ }
+
+ @Override
+ public TypeLayout visit(Utf8 type) {
+ return newVariableWidthTypeLayout();
+ }
+
+ @Override
+ public TypeLayout visit(LargeUtf8 type) {
+ return newLargeVariableWidthTypeLayout();
+ }
+
+ @Override
+ public TypeLayout visit(LargeBinary type) {
+ return newLargeVariableWidthTypeLayout();
+ }
+
+ private TypeLayout newVariableWidthTypeLayout() {
+ return newPrimitiveTypeLayout(BufferLayout.validityVector(), BufferLayout.offsetBuffer(),
+ BufferLayout.byteVector());
+ }
+
+ private TypeLayout newLargeVariableWidthTypeLayout() {
+ return newPrimitiveTypeLayout(BufferLayout.validityVector(), BufferLayout.largeOffsetBuffer(),
+ BufferLayout.byteVector());
+ }
+
+ private TypeLayout newPrimitiveTypeLayout(BufferLayout... vectors) {
+ return new TypeLayout(asList(vectors));
+ }
+
+ public TypeLayout newFixedWidthTypeLayout(BufferLayout dataVector) {
+ return newPrimitiveTypeLayout(BufferLayout.validityVector(), dataVector);
+ }
+
+ @Override
+ public TypeLayout visit(Null type) {
+ return new TypeLayout(Collections.<BufferLayout>emptyList());
+ }
+
+ @Override
+ public TypeLayout visit(Date type) {
+ switch (type.getUnit()) {
+ case DAY:
+ return newFixedWidthTypeLayout(BufferLayout.dataBuffer(32));
+ case MILLISECOND:
+ return newFixedWidthTypeLayout(BufferLayout.dataBuffer(64));
+ default:
+ throw new UnsupportedOperationException("Unknown unit " + type.getUnit());
+ }
+ }
+
+ @Override
+ public TypeLayout visit(Time type) {
+ return newFixedWidthTypeLayout(BufferLayout.dataBuffer(type.getBitWidth()));
+ }
+
+ @Override
+ public TypeLayout visit(Interval type) {
+ switch (type.getUnit()) {
+ case DAY_TIME:
+ return newFixedWidthTypeLayout(BufferLayout.dataBuffer(64));
+ case YEAR_MONTH:
+ return newFixedWidthTypeLayout(BufferLayout.dataBuffer(32));
+ case MONTH_DAY_NANO:
+ return newFixedWidthTypeLayout(BufferLayout.dataBuffer(128));
+ default:
+ throw new UnsupportedOperationException("Unknown unit " + type.getUnit());
+ }
+ }
+
+ @Override
+ public TypeLayout visit(Duration type) {
+ return newFixedWidthTypeLayout(BufferLayout.dataBuffer(64));
+ }
+
+ });
+ return layout;
+ }
+
+ /**
+ * Gets the number of {@link BufferLayout}s for the given <code>arrowType</code>.
+ */
+ public static int getTypeBufferCount(final ArrowType arrowType) {
+ return arrowType.accept(new ArrowTypeVisitor<Integer>() {
+
+ /**
+ * All fixed width vectors have a common number of buffers 2: one validity buffer, plus a data buffer.
+ */
+ static final int FIXED_WIDTH_BUFFER_COUNT = 2;
+
+ /**
+ * All variable width vectors have a common number of buffers 3: a validity buffer,
+ * an offset buffer, and a data buffer.
+ */
+ static final int VARIABLE_WIDTH_BUFFER_COUNT = 3;
+
+ @Override
+ public Integer visit(Int type) {
+ return FIXED_WIDTH_BUFFER_COUNT;
+ }
+
+ @Override
+ public Integer visit(Union type) {
+ switch (type.getMode()) {
+ case Dense:
+ // TODO: validate this
+ return 2;
+ case Sparse:
+ // type buffer
+ return 1;
+ default:
+ throw new UnsupportedOperationException("Unsupported Union Mode: " + type.getMode());
+ }
+ }
+
+ @Override
+ public Integer visit(Struct type) {
+ // validity buffer
+ return 1;
+ }
+
+ @Override
+ public Integer visit(Timestamp type) {
+ return FIXED_WIDTH_BUFFER_COUNT;
+ }
+
+ @Override
+ public Integer visit(org.apache.arrow.vector.types.pojo.ArrowType.List type) {
+ // validity buffer + offset buffer
+ return 2;
+ }
+
+ @Override
+ public Integer visit(ArrowType.LargeList type) {
+ // validity buffer + offset buffer
+ return 2;
+ }
+
+ @Override
+ public Integer visit(FixedSizeList type) {
+ // validity buffer
+ return 1;
+ }
+
+ @Override
+ public Integer visit(Map type) {
+ // validity buffer + offset buffer
+ return 2;
+ }
+
+ @Override
+ public Integer visit(FloatingPoint type) {
+ return FIXED_WIDTH_BUFFER_COUNT;
+ }
+
+ @Override
+ public Integer visit(Decimal type) {
+ return FIXED_WIDTH_BUFFER_COUNT;
+ }
+
+ @Override
+ public Integer visit(FixedSizeBinary type) {
+ return FIXED_WIDTH_BUFFER_COUNT;
+ }
+
+ @Override
+ public Integer visit(Bool type) {
+ return FIXED_WIDTH_BUFFER_COUNT;
+ }
+
+ @Override
+ public Integer visit(Binary type) {
+ return VARIABLE_WIDTH_BUFFER_COUNT;
+ }
+
+ @Override
+ public Integer visit(Utf8 type) {
+ return VARIABLE_WIDTH_BUFFER_COUNT;
+ }
+
+ @Override
+ public Integer visit(LargeUtf8 type) {
+ return VARIABLE_WIDTH_BUFFER_COUNT;
+ }
+
+ @Override
+ public Integer visit(LargeBinary type) {
+ return VARIABLE_WIDTH_BUFFER_COUNT;
+ }
+
+ @Override
+ public Integer visit(Null type) {
+ return 0;
+ }
+
+ @Override
+ public Integer visit(Date type) {
+ return FIXED_WIDTH_BUFFER_COUNT;
+ }
+
+ @Override
+ public Integer visit(Time type) {
+ return FIXED_WIDTH_BUFFER_COUNT;
+ }
+
+ @Override
+ public Integer visit(Interval type) {
+ return FIXED_WIDTH_BUFFER_COUNT;
+ }
+
+ @Override
+ public Integer visit(Duration type) {
+ return FIXED_WIDTH_BUFFER_COUNT;
+ }
+
+ });
+ }
+
+ private final List<BufferLayout> bufferLayouts;
+
+ public TypeLayout(List<BufferLayout> bufferLayouts) {
+ super();
+ this.bufferLayouts = Preconditions.checkNotNull(bufferLayouts);
+ }
+
+ public TypeLayout(BufferLayout... bufferLayouts) {
+ this(asList(bufferLayouts));
+ }
+
+ /**
+ * Returns the individual {@linkplain BufferLayout}s for the given type.
+ */
+ public List<BufferLayout> getBufferLayouts() {
+ return bufferLayouts;
+ }
+
+ /**
+ * Returns the types of each buffer for this layout. A layout can consist
+ * of multiple buffers for example a validity bitmap buffer, a value buffer or
+ * an offset buffer.
+ */
+ public List<BufferType> getBufferTypes() {
+ List<BufferType> types = new ArrayList<>(bufferLayouts.size());
+ for (BufferLayout vector : bufferLayouts) {
+ types.add(vector.getType());
+ }
+ return types;
+ }
+
+ public String toString() {
+ return bufferLayouts.toString();
+ }
+
+ @Override
+ public int hashCode() {
+ return bufferLayouts.hashCode();
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj) {
+ return true;
+ }
+ if (obj == null) {
+ return false;
+ }
+ if (getClass() != obj.getClass()) {
+ return false;
+ }
+ TypeLayout other = (TypeLayout) obj;
+ return bufferLayouts.equals(other.bufferLayouts);
+ }
+
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/UInt1Vector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/UInt1Vector.java
new file mode 100644
index 000000000..bd9a732c1
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/UInt1Vector.java
@@ -0,0 +1,368 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.impl.UInt1ReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.NullableUInt1Holder;
+import org.apache.arrow.vector.holders.UInt1Holder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+import org.apache.arrow.vector.util.ValueVectorUtility;
+
+/**
+ * UInt1Vector implements a fixed width (1 bytes) vector of
+ * integer values which could be null. A validity buffer (bit vector) is
+ * maintained to track which elements in the vector are null.
+ */
+public final class UInt1Vector extends BaseFixedWidthVector implements BaseIntVector {
+ /**
+ * The mask to use when promoting the unsigned byte value to an integer.
+ */
+ public static final int PROMOTION_MASK = 0xFF;
+
+ /**
+ * The maximum 8-bit unsigned integer.
+ */
+ public static final byte MAX_UINT1 = (byte) 0XFF;
+
+ public static final byte TYPE_WIDTH = 1;
+ private final FieldReader reader;
+
+ public UInt1Vector(String name, BufferAllocator allocator) {
+ this(name, FieldType.nullable(MinorType.UINT1.getType()), allocator);
+ }
+
+ public UInt1Vector(String name, FieldType fieldType, BufferAllocator allocator) {
+ this(new Field(name, fieldType, null), allocator);
+ }
+
+ public UInt1Vector(Field field, BufferAllocator allocator) {
+ super(field, allocator, TYPE_WIDTH);
+ reader = new UInt1ReaderImpl(UInt1Vector.this);
+ }
+
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.UINT1;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+ /**
+ * Given a data buffer, get the value stored at a particular position
+ * in the vector.
+ *
+ * <p>To avoid overflow, the returned type is one step up from the signed
+ * type.
+ *
+ * <p>This method is mainly meant for integration tests.
+ *
+ * @param buffer data buffer
+ * @param index position of the element.
+ * @return value stored at the index.
+ */
+ public static short getNoOverflow(final ArrowBuf buffer, final int index) {
+ byte b = buffer.getByte(index * TYPE_WIDTH);
+ return (short) (PROMOTION_MASK & b);
+ }
+
+
+ /**
+ * Get the element at the given index from the vector.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public byte get(int index) throws IllegalStateException {
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+ throw new IllegalStateException("Value at index is null");
+ }
+ return valueBuffer.getByte(index * TYPE_WIDTH);
+ }
+
+ /**
+ * Get the element at the given index from the vector and
+ * sets the state in holder. If element at given index
+ * is null, holder.isSet will be zero.
+ *
+ * @param index position of element
+ */
+ public void get(int index, NullableUInt1Holder holder) {
+ if (isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ holder.isSet = 1;
+ holder.value = valueBuffer.getByte(index * TYPE_WIDTH);
+ }
+
+ /**
+ * Same as {@link #get(int)}.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public Byte getObject(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ return valueBuffer.getByte(index * TYPE_WIDTH);
+ }
+ }
+
+ /**
+ * Returns the value stored at index without the potential for overflow.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public Short getObjectNoOverflow(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ return getNoOverflow(valueBuffer, index);
+ }
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ private void setValue(int index, int value) {
+ valueBuffer.setByte(index * TYPE_WIDTH, value);
+ }
+
+ private void setValue(int index, byte value) {
+ valueBuffer.setByte(index * TYPE_WIDTH, value);
+ }
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void set(int index, int value) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, value);
+ }
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void set(int index, byte value) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, value);
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ * If the value in holder is not indicated as set, element in the
+ * at the given index will be null.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void set(int index, NullableUInt1Holder holder) throws IllegalArgumentException {
+ if (holder.isSet < 0) {
+ throw new IllegalArgumentException();
+ } else if (holder.isSet > 0) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void set(int index, UInt1Holder holder) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ }
+
+ /**
+ * Same as {@link #set(int, int)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void setSafe(int index, int value) {
+ handleSafe(index);
+ set(index, value);
+ }
+
+ /**
+ * Same as {@link #set(int, byte)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void setSafe(int index, byte value) {
+ handleSafe(index);
+ set(index, value);
+ }
+
+ /**
+ * Same as {@link #set(int, NullableUInt1Holder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void setSafe(int index, NullableUInt1Holder holder) throws IllegalArgumentException {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Same as {@link #set(int, UInt1Holder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void setSafe(int index, UInt1Holder holder) {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Sets the value at index to value isSet > 0, otherwise sets the index position
+ * to invalid/null.
+ */
+ public void set(int index, int isSet, byte value) {
+ if (isSet > 0) {
+ set(index, value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Same as {@link #set(int, int, byte)} but will reallocate the buffer if index
+ * is larger than current capacity.
+ */
+ public void setSafe(int index, int isSet, byte value) {
+ handleSafe(index);
+ set(index, isSet, value);
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return new TransferImpl(ref, allocator);
+ }
+
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new TransferImpl((UInt1Vector) to);
+ }
+
+ @Override
+ public void setWithPossibleTruncate(int index, long value) {
+ this.setSafe(index, (int) value);
+ }
+
+ @Override
+ public void setUnsafeWithPossibleTruncate(int index, long value) {
+ this.set(index, (int) value);
+ }
+
+ @Override
+ public long getValueAsLong(int index) {
+ return this.get(index) & PROMOTION_MASK;
+ }
+
+ @Override
+ public String toString() {
+ return ValueVectorUtility.getToString(this, 0, getValueCount(), (v, i) -> v.getObjectNoOverflow(i));
+ }
+
+ private class TransferImpl implements TransferPair {
+ UInt1Vector to;
+
+ public TransferImpl(String ref, BufferAllocator allocator) {
+ to = new UInt1Vector(ref, field.getFieldType(), allocator);
+ }
+
+ public TransferImpl(UInt1Vector to) {
+ this.to = to;
+ }
+
+ @Override
+ public UInt1Vector getTo() {
+ return to;
+ }
+
+ @Override
+ public void transfer() {
+ transferTo(to);
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ splitAndTransferTo(startIndex, length, to);
+ }
+
+ @Override
+ public void copyValueSafe(int fromIndex, int toIndex) {
+ to.copyFromSafe(fromIndex, toIndex, UInt1Vector.this);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/UInt2Vector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/UInt2Vector.java
new file mode 100644
index 000000000..5c29ab6b3
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/UInt2Vector.java
@@ -0,0 +1,346 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.impl.UInt2ReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.NullableUInt2Holder;
+import org.apache.arrow.vector.holders.UInt2Holder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+import org.apache.arrow.vector.util.ValueVectorUtility;
+
+/**
+ * UInt2Vector implements a fixed width (2 bytes) vector of
+ * integer values which could be null. A validity buffer (bit vector) is
+ * maintained to track which elements in the vector are null.
+ */
+public final class UInt2Vector extends BaseFixedWidthVector implements BaseIntVector {
+
+ /**
+ * The maximum 16-bit unsigned integer.
+ */
+ public static final char MAX_UINT2 = (char) 0XFFFF;
+
+ public static final byte TYPE_WIDTH = 2;
+ private final FieldReader reader;
+
+ public UInt2Vector(String name, BufferAllocator allocator) {
+ this(name, FieldType.nullable(MinorType.UINT2.getType()), allocator);
+ }
+
+ public UInt2Vector(String name, FieldType fieldType, BufferAllocator allocator) {
+ this(new Field(name, fieldType, null), allocator);
+ }
+
+ public UInt2Vector(Field field, BufferAllocator allocator) {
+ super(field, allocator, TYPE_WIDTH);
+ reader = new UInt2ReaderImpl(UInt2Vector.this);
+ }
+
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.UINT2;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+ /**
+ * Given a data buffer, get the value stored at a particular position
+ * in the vector.
+ *
+ * <p>This method is mainly meant for integration tests.
+ *
+ * @param buffer data buffer
+ * @param index position of the element.
+ * @return value stored at the index.
+ */
+ public static char get(final ArrowBuf buffer, final int index) {
+ return buffer.getChar((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Get the element at the given index from the vector.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public char get(int index) throws IllegalStateException {
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+ throw new IllegalStateException("Value at index is null");
+ }
+ return valueBuffer.getChar((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Get the element at the given index from the vector and
+ * sets the state in holder. If element at given index
+ * is null, holder.isSet will be zero.
+ *
+ * @param index position of element
+ */
+ public void get(int index, NullableUInt2Holder holder) {
+ if (isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ holder.isSet = 1;
+ holder.value = valueBuffer.getChar((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Same as {@link #get(int)}.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public Character getObject(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ return valueBuffer.getChar((long) index * TYPE_WIDTH);
+ }
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ private void setValue(int index, int value) {
+ valueBuffer.setChar((long) index * TYPE_WIDTH, value);
+ }
+
+ private void setValue(int index, char value) {
+ valueBuffer.setChar((long) index * TYPE_WIDTH, value);
+ }
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void set(int index, int value) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, value);
+ }
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void set(int index, char value) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, value);
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ * If the value in holder is not indicated as set, element in the
+ * at the given index will be null.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void set(int index, NullableUInt2Holder holder) throws IllegalArgumentException {
+ if (holder.isSet < 0) {
+ throw new IllegalArgumentException();
+ } else if (holder.isSet > 0) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void set(int index, UInt2Holder holder) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ }
+
+ /**
+ * Same as {@link #set(int, int)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void setSafe(int index, int value) {
+ handleSafe(index);
+ set(index, value);
+ }
+
+ /**
+ * Same as {@link #set(int, char)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void setSafe(int index, char value) {
+ handleSafe(index);
+ set(index, value);
+ }
+
+ /**
+ * Same as {@link #set(int, NullableUInt2Holder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void setSafe(int index, NullableUInt2Holder holder) throws IllegalArgumentException {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Same as {@link #set(int, UInt2Holder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void setSafe(int index, UInt2Holder holder) {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Sets the given index to value is isSet is positive, otherwise sets
+ * the position as invalid/null.
+ */
+ public void set(int index, int isSet, char value) {
+ if (isSet > 0) {
+ set(index, value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Same as {@link #set(int, int, char)} but will reallocate the buffer if index
+ * is larger than current capacity.
+ */
+ public void setSafe(int index, int isSet, char value) {
+ handleSafe(index);
+ set(index, isSet, value);
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return new TransferImpl(ref, allocator);
+ }
+
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new TransferImpl((UInt2Vector) to);
+ }
+
+ @Override
+ public void setWithPossibleTruncate(int index, long value) {
+ this.setSafe(index, (int) value);
+ }
+
+ @Override
+ public void setUnsafeWithPossibleTruncate(int index, long value) {
+ this.set(index, (int) value);
+ }
+
+ @Override
+ public long getValueAsLong(int index) {
+ return this.get(index);
+ }
+
+ @Override
+ public String toString() {
+ return ValueVectorUtility.getToString(this, 0, getValueCount(), (v, i) ->
+ v.isNull(i) ? "null" : Integer.toString(v.get(i) & 0x0000ffff));
+ }
+
+ private class TransferImpl implements TransferPair {
+ UInt2Vector to;
+
+ public TransferImpl(String ref, BufferAllocator allocator) {
+ to = new UInt2Vector(ref, field.getFieldType(), allocator);
+ }
+
+ public TransferImpl(UInt2Vector to) {
+ this.to = to;
+ }
+
+ @Override
+ public UInt2Vector getTo() {
+ return to;
+ }
+
+ @Override
+ public void transfer() {
+ transferTo(to);
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ splitAndTransferTo(startIndex, length, to);
+ }
+
+ @Override
+ public void copyValueSafe(int fromIndex, int toIndex) {
+ to.copyFromSafe(fromIndex, toIndex, UInt2Vector.this);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/UInt4Vector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/UInt4Vector.java
new file mode 100644
index 000000000..cc954d67d
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/UInt4Vector.java
@@ -0,0 +1,340 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.impl.UInt4ReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.NullableUInt4Holder;
+import org.apache.arrow.vector.holders.UInt4Holder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+import org.apache.arrow.vector.util.ValueVectorUtility;
+
+/**
+ * UInt4Vector implements a fixed width (4 bytes) vector of
+ * integer values which could be null. A validity buffer (bit vector) is
+ * maintained to track which elements in the vector are null.
+ */
+public final class UInt4Vector extends BaseFixedWidthVector implements BaseIntVector {
+
+ /**
+ * The mask to use when promoting the unsigned int value to a long int.
+ */
+ public static final long PROMOTION_MASK = 0x00000000FFFFFFFFL;
+
+ /**
+ * The maximum 32-bit unsigned integer.
+ */
+ public static final int MAX_UINT4 = 0XFFFFFFFF;
+
+ public static final byte TYPE_WIDTH = 4;
+ private final FieldReader reader;
+
+ public UInt4Vector(String name, BufferAllocator allocator) {
+ this(name, FieldType.nullable(MinorType.UINT4.getType()), allocator);
+ }
+
+ public UInt4Vector(String name, FieldType fieldType, BufferAllocator allocator) {
+ this(new Field(name, fieldType, null), allocator);
+ }
+
+ public UInt4Vector(Field field, BufferAllocator allocator) {
+ super(field, allocator, TYPE_WIDTH);
+ reader = new UInt4ReaderImpl(UInt4Vector.this);
+ }
+
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.UINT4;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+ /**
+ * Given a data buffer, get the value stored at a particular position
+ * in the vector.
+ *
+ * <p>To avoid overflow, the returned type is one step up from the signed
+ * type.
+ *
+ * <p>This method is mainly meant for integration tests.
+ *
+ * @param buffer data buffer
+ * @param index position of the element.
+ * @return value stored at the index.
+ */
+ public static long getNoOverflow(final ArrowBuf buffer, final int index) {
+ long l = buffer.getInt((long) index * TYPE_WIDTH);
+ return PROMOTION_MASK & l;
+ }
+
+ /**
+ * Get the element at the given index from the vector.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public int get(int index) throws IllegalStateException {
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+ throw new IllegalStateException("Value at index is null");
+ }
+ return valueBuffer.getInt((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Get the element at the given index from the vector and
+ * sets the state in holder. If element at given index
+ * is null, holder.isSet will be zero.
+ *
+ * @param index position of element
+ */
+ public void get(int index, NullableUInt4Holder holder) {
+ if (isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ holder.isSet = 1;
+ holder.value = valueBuffer.getInt((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Same as {@link #get(int)}.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public Integer getObject(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ return valueBuffer.getInt((long) index * TYPE_WIDTH);
+ }
+ }
+
+ /**
+ * Same as {@link #get(int)}.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public Long getObjectNoOverflow(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ return getNoOverflow(valueBuffer, index);
+ }
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ private void setValue(int index, int value) {
+ valueBuffer.setInt((long) index * TYPE_WIDTH, value);
+ }
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void set(int index, int value) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, value);
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ * If the value in holder is not indicated as set, element in the
+ * at the given index will be null.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void set(int index, NullableUInt4Holder holder) throws IllegalArgumentException {
+ if (holder.isSet < 0) {
+ throw new IllegalArgumentException();
+ } else if (holder.isSet > 0) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void set(int index, UInt4Holder holder) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ }
+
+ /**
+ * Same as {@link #set(int, int)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void setSafe(int index, int value) {
+ handleSafe(index);
+ set(index, value);
+ }
+
+ /**
+ * Same as {@link #set(int, NullableUInt4Holder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void setSafe(int index, NullableUInt4Holder holder) throws IllegalArgumentException {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Same as {@link #set(int, UInt4Holder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void setSafe(int index, UInt4Holder holder) {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Sets the value at index to value isSet > 0, otherwise sets the index position
+ * to invalid/null.
+ */
+ public void set(int index, int isSet, int value) {
+ if (isSet > 0) {
+ set(index, value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Same as {@link #set(int, int, int)} but will reallocate if the buffer if index
+ * is larger than the current capacity.
+ */
+ public void setSafe(int index, int isSet, int value) {
+ handleSafe(index);
+ set(index, isSet, value);
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return new TransferImpl(ref, allocator);
+ }
+
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new TransferImpl((UInt4Vector) to);
+ }
+
+ @Override
+ public void setWithPossibleTruncate(int index, long value) {
+ this.setSafe(index, (int) value);
+ }
+
+ @Override
+ public void setUnsafeWithPossibleTruncate(int index, long value) {
+ this.set(index, (int) value);
+ }
+
+ @Override
+ public long getValueAsLong(int index) {
+ return this.get(index) & PROMOTION_MASK;
+ }
+
+ @Override
+ public String toString() {
+ return ValueVectorUtility.getToString(this, 0, getValueCount(), (v, i) -> v.getObjectNoOverflow(i));
+ }
+
+ private class TransferImpl implements TransferPair {
+ UInt4Vector to;
+
+ public TransferImpl(String ref, BufferAllocator allocator) {
+ to = new UInt4Vector(ref, field.getFieldType(), allocator);
+ }
+
+ public TransferImpl(UInt4Vector to) {
+ this.to = to;
+ }
+
+ @Override
+ public UInt4Vector getTo() {
+ return to;
+ }
+
+ @Override
+ public void transfer() {
+ transferTo(to);
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ splitAndTransferTo(startIndex, length, to);
+ }
+
+ @Override
+ public void copyValueSafe(int fromIndex, int toIndex) {
+ to.copyFromSafe(fromIndex, toIndex, UInt4Vector.this);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/UInt8Vector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/UInt8Vector.java
new file mode 100644
index 000000000..98eaf25a6
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/UInt8Vector.java
@@ -0,0 +1,336 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+import java.math.BigInteger;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.impl.UInt8ReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.NullableUInt8Holder;
+import org.apache.arrow.vector.holders.UInt8Holder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+import org.apache.arrow.vector.util.ValueVectorUtility;
+
+/**
+ * UInt8Vector implements a fixed width vector (8 bytes) of
+ * integer values which could be null. A validity buffer (bit vector) is
+ * maintained to track which elements in the vector are null.
+ */
+public final class UInt8Vector extends BaseFixedWidthVector implements BaseIntVector {
+
+ /**
+ * The maximum 64-bit unsigned long integer.
+ */
+ public static final long MAX_UINT8 = 0XFFFFFFFFFFFFFFFFL;
+
+ public static final byte TYPE_WIDTH = 8;
+ private final FieldReader reader;
+
+ public UInt8Vector(String name, BufferAllocator allocator) {
+ this(name, FieldType.nullable(MinorType.UINT8.getType()), allocator);
+ }
+
+ public UInt8Vector(String name, FieldType fieldType, BufferAllocator allocator) {
+ this(new Field(name, fieldType, null), allocator);
+ }
+
+ public UInt8Vector(Field field, BufferAllocator allocator) {
+ super(field, allocator, TYPE_WIDTH);
+ reader = new UInt8ReaderImpl(UInt8Vector.this);
+ }
+
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.UINT8;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+ private static final BigInteger SAFE_CONVERSION_MASK = new BigInteger("ffffffffffffffff", 16);
+
+ /**
+ * Given a data buffer, get the value stored at a particular position
+ * in the vector.
+ *
+ * <p>To avoid overflow, the returned type is one step up from the signed
+ * type.
+ *
+ * <p>This method is mainly meant for integration tests.
+ *
+ * @param buffer data buffer
+ * @param index position of the element.
+ * @return value stored at the index.
+ */
+ public static BigInteger getNoOverflow(final ArrowBuf buffer, final int index) {
+ BigInteger l = BigInteger.valueOf(buffer.getLong((long) index * TYPE_WIDTH));
+ return SAFE_CONVERSION_MASK.and(l);
+ }
+
+
+ /**
+ * Get the element at the given index from the vector.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public long get(int index) throws IllegalStateException {
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+ throw new IllegalStateException("Value at index is null");
+ }
+ return valueBuffer.getLong((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Get the element at the given index from the vector and
+ * sets the state in holder. If element at given index
+ * is null, holder.isSet will be zero.
+ *
+ * @param index position of element
+ */
+ public void get(int index, NullableUInt8Holder holder) {
+ if (isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ holder.isSet = 1;
+ holder.value = valueBuffer.getLong((long) index * TYPE_WIDTH);
+ }
+
+ /**
+ * Same as {@link #get(int)}.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public Long getObject(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ return valueBuffer.getLong((long) index * TYPE_WIDTH);
+ }
+ }
+
+ /**
+ * Returns the value stored at index without the potential for overflow.
+ *
+ * @param index position of element
+ * @return element at given index
+ */
+ public BigInteger getObjectNoOverflow(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ return getNoOverflow(valueBuffer, index);
+ }
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ private void setValue(int index, long value) {
+ valueBuffer.setLong((long) index * TYPE_WIDTH, value);
+ }
+
+ /**
+ * Set the element at the given index to the given value.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void set(int index, long value) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, value);
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ * If the value in holder is not indicated as set, element in the
+ * at the given index will be null.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void set(int index, NullableUInt8Holder holder) throws IllegalArgumentException {
+ if (holder.isSet < 0) {
+ throw new IllegalArgumentException();
+ } else if (holder.isSet > 0) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Set the element at the given index to the value set in data holder.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void set(int index, UInt8Holder holder) {
+ BitVectorHelper.setBit(validityBuffer, index);
+ setValue(index, holder.value);
+ }
+
+ /**
+ * Same as {@link #set(int, long)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param value value of element
+ */
+ public void setSafe(int index, long value) {
+ handleSafe(index);
+ set(index, value);
+ }
+
+ /**
+ * Same as {@link #set(int, NullableUInt8Holder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder nullable data holder for value of element
+ */
+ public void setSafe(int index, NullableUInt8Holder holder) throws IllegalArgumentException {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /**
+ * Same as {@link #set(int, UInt8Holder)} except that it handles the
+ * case when index is greater than or equal to existing
+ * value capacity {@link #getValueCapacity()}.
+ *
+ * @param index position of element
+ * @param holder data holder for value of element
+ */
+ public void setSafe(int index, UInt8Holder holder) {
+ handleSafe(index);
+ set(index, holder);
+ }
+
+ /** Sets value at index is isSet is positive otherwise sets the index to invalid/null. */
+ public void set(int index, int isSet, long value) {
+ if (isSet > 0) {
+ set(index, value);
+ } else {
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+ }
+
+ /**
+ * Same as {@link #set(int, int, long)} but will reallocate if index is greater than current capacity.
+ */
+ public void setSafe(int index, int isSet, long value) {
+ handleSafe(index);
+ set(index, isSet, value);
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return new TransferImpl(ref, allocator);
+ }
+
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new TransferImpl((UInt8Vector) to);
+ }
+
+ @Override
+ public void setWithPossibleTruncate(int index, long value) {
+ this.setSafe(index, value);
+ }
+
+ @Override
+ public void setUnsafeWithPossibleTruncate(int index, long value) {
+ this.set(index, value);
+ }
+
+ @Override
+ public long getValueAsLong(int index) {
+ return this.get(index);
+ }
+
+ @Override
+ public String toString() {
+ return ValueVectorUtility.getToString(this, 0, getValueCount(), (v, i) -> v.getObjectNoOverflow(i));
+ }
+
+ private class TransferImpl implements TransferPair {
+ UInt8Vector to;
+
+ public TransferImpl(String ref, BufferAllocator allocator) {
+ to = new UInt8Vector(ref, field.getFieldType(), allocator);
+ }
+
+ public TransferImpl(UInt8Vector to) {
+ this.to = to;
+ }
+
+ @Override
+ public UInt8Vector getTo() {
+ return to;
+ }
+
+ @Override
+ public void transfer() {
+ transferTo(to);
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ splitAndTransferTo(startIndex, length, to);
+ }
+
+ @Override
+ public void copyValueSafe(int fromIndex, int toIndex) {
+ to.copyFromSafe(fromIndex, toIndex, UInt8Vector.this);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ValueVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ValueVector.java
new file mode 100644
index 000000000..aa29c2931
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ValueVector.java
@@ -0,0 +1,285 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import java.io.Closeable;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.OutOfMemoryException;
+import org.apache.arrow.memory.util.hash.ArrowBufHasher;
+import org.apache.arrow.vector.compare.VectorVisitor;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.util.CallBack;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * An abstraction that is used to store a sequence of values in an individual column.
+ *
+ * <p>A {@link ValueVector value vector} stores underlying data in-memory in a columnar fashion that is compact and
+ * efficient. The column whose data is stored, is referred by {@link #getField()}.
+ *
+ * <p>It is important that vector is allocated before attempting to read or write.
+ *
+ * <p>There are a few "rules" around vectors:
+ *
+ * <ul>
+ * <li>values need to be written in order (e.g. index 0, 1, 2, 5)</li>
+ * <li>null vectors start with all values as null before writing anything</li>
+ * <li>for variable width types, the offset vector should be all zeros before writing</li>
+ * <li>you must call setValueCount before a vector can be read</li>
+ * <li>you should never write to a vector once it has been read.</li>
+ * </ul>
+ *
+ * <p>Please note that the current implementation doesn't enforce those rules, hence we may find few places that
+ * deviate from these rules (e.g. offset vectors in Variable Length and Repeated vector)
+ *
+ * <p>This interface "should" strive to guarantee this order of operation:
+ * <blockquote>
+ * allocate &gt; mutate &gt; setvaluecount &gt; access &gt; clear (or allocate to start the process over).
+ * </blockquote>
+ */
+public interface ValueVector extends Closeable, Iterable<ValueVector> {
+ /**
+ * Allocate new buffers. ValueVector implements logic to determine how much to allocate.
+ *
+ * @throws OutOfMemoryException Thrown if no memory can be allocated.
+ */
+ void allocateNew() throws OutOfMemoryException;
+
+ /**
+ * Allocates new buffers. ValueVector implements logic to determine how much to allocate.
+ *
+ * @return Returns true if allocation was successful.
+ */
+ boolean allocateNewSafe();
+
+ /**
+ * Allocate new buffer with double capacity, and copy data into the new buffer.
+ * Replace vector's buffer with new buffer, and release old one
+ */
+ void reAlloc();
+
+ BufferAllocator getAllocator();
+
+ /**
+ * Set the initial record capacity.
+ *
+ * @param numRecords the initial record capacity.
+ */
+ void setInitialCapacity(int numRecords);
+
+ /**
+ * Returns the maximum number of values that can be stored in this vector instance.
+ *
+ * @return the maximum number of values that can be stored in this vector instance.
+ */
+ int getValueCapacity();
+
+ /**
+ * Alternative to clear(). Allows use as an AutoCloseable in try-with-resources.
+ */
+ @Override
+ void close();
+
+ /**
+ * Release any owned ArrowBuf and reset the ValueVector to the initial state. If the
+ * vector has any child vectors, they will also be cleared.
+ */
+ void clear();
+
+ /**
+ * Reset the ValueVector to the initial state without releasing any owned ArrowBuf.
+ * Buffer capacities will remain unchanged and any previous data will be zeroed out.
+ * This includes buffers for data, validity, offset, etc. If the vector has any
+ * child vectors, they will also be reset.
+ */
+ void reset();
+
+ /**
+ * Get information about how this field is materialized.
+ *
+ * @return the field corresponding to this vector
+ */
+ Field getField();
+
+ MinorType getMinorType();
+
+ /**
+ * To transfer quota responsibility.
+ *
+ * @param allocator the target allocator
+ * @return a {@link org.apache.arrow.vector.util.TransferPair transfer pair}, creating a new target vector of
+ * the same type.
+ */
+ TransferPair getTransferPair(BufferAllocator allocator);
+
+ TransferPair getTransferPair(String ref, BufferAllocator allocator);
+
+ TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack);
+
+ /**
+ * Makes a new transfer pair used to transfer underlying buffers.
+ *
+ * @param target the target for the transfer
+ * @return a new {@link org.apache.arrow.vector.util.TransferPair transfer pair} that is used to transfer underlying
+ * buffers into the target vector.
+ */
+ TransferPair makeTransferPair(ValueVector target);
+
+ /**
+ * Get a reader for this vector.
+ *
+ * @return a {@link org.apache.arrow.vector.complex.reader.FieldReader field reader} that supports reading values
+ * from this vector.
+ */
+ FieldReader getReader();
+
+ /**
+ * Get the number of bytes used by this vector.
+ *
+ * @return the number of bytes that is used by this vector instance.
+ */
+ int getBufferSize();
+
+ /**
+ * Returns the number of bytes that is used by this vector if it holds the given number
+ * of values. The result will be the same as if setValueCount() were called, followed
+ * by calling getBufferSize(), but without any of the closing side-effects that setValueCount()
+ * implies wrt finishing off the population of a vector. Some operations might wish to use
+ * this to determine how much memory has been used by a vector so far, even though it is
+ * not finished being populated.
+ *
+ * @param valueCount the number of values to assume this vector contains
+ * @return the buffer size if this vector is holding valueCount values
+ */
+ int getBufferSizeFor(int valueCount);
+
+ /**
+ * Return the underlying buffers associated with this vector. Note that this doesn't impact the reference counts for
+ * this buffer so it only should be used for in-context access. Also note that this buffer changes regularly thus
+ * external classes shouldn't hold a reference to it (unless they change it).
+ *
+ * @param clear Whether to clear vector before returning; the buffers will still be refcounted;
+ * but the returned array will be the only reference to them
+ * @return The underlying {@link ArrowBuf buffers} that is used by this vector instance.
+ */
+ ArrowBuf[] getBuffers(boolean clear);
+
+ /**
+ * Gets the underlying buffer associated with validity vector.
+ *
+ * @return buffer
+ */
+ ArrowBuf getValidityBuffer();
+
+ /**
+ * Gets the underlying buffer associated with data vector.
+ *
+ * @return buffer
+ */
+ ArrowBuf getDataBuffer();
+
+ /**
+ * Gets the underlying buffer associated with offset vector.
+ *
+ * @return buffer
+ */
+ ArrowBuf getOffsetBuffer();
+
+ /**
+ * Gets the number of values.
+ *
+ * @return number of values in the vector
+ */
+ int getValueCount();
+
+ /**
+ * Set number of values in the vector.
+ */
+ void setValueCount(int valueCount);
+
+ /**
+ * Get friendly type object from the vector.
+ *
+ * @param index index of object to get
+ * @return friendly type object
+ */
+ Object getObject(int index);
+
+ /**
+ * Returns number of null elements in the vector.
+ *
+ * @return number of null elements
+ */
+ int getNullCount();
+
+ /**
+ * Check whether an element in the vector is null.
+ *
+ * @param index index to check for null
+ * @return true if element is null
+ */
+ boolean isNull(int index);
+
+ /**
+ * Returns hashCode of element in index with the default hasher.
+ */
+ int hashCode(int index);
+
+ /**
+ * Returns hashCode of element in index with the given hasher.
+ */
+ int hashCode(int index, ArrowBufHasher hasher);
+
+ /**
+ * Copy a cell value from a particular index in source vector to a particular
+ * position in this vector.
+ *
+ * @param fromIndex position to copy from in source vector
+ * @param thisIndex position to copy to in this vector
+ * @param from source vector
+ */
+ void copyFrom(int fromIndex, int thisIndex, ValueVector from);
+
+ /**
+ * Same as {@link #copyFrom(int, int, ValueVector)} except that
+ * it handles the case when the capacity of the vector needs to be expanded
+ * before copy.
+ *
+ * @param fromIndex position to copy from in source vector
+ * @param thisIndex position to copy to in this vector
+ * @param from source vector
+ */
+ void copyFromSafe(int fromIndex, int thisIndex, ValueVector from);
+
+ /**
+ * Accept a generic {@link VectorVisitor} and return the result.
+ * @param <OUT> the output result type.
+ * @param <IN> the input data together with visitor.
+ */
+ <OUT, IN> OUT accept(VectorVisitor<OUT, IN> visitor, IN value);
+
+ /**
+ * Gets the name of the vector.
+ * @return the name of the vector.
+ */
+ String getName();
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/VarBinaryVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/VarBinaryVector.java
new file mode 100644
index 000000000..798d30fe4
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/VarBinaryVector.java
@@ -0,0 +1,306 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.impl.VarBinaryReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.NullableVarBinaryHolder;
+import org.apache.arrow.vector.holders.VarBinaryHolder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * VarBinaryVector implements a variable width vector of binary
+ * values which could be NULL. A validity buffer (bit vector) is maintained
+ * to track which elements in the vector are null.
+ */
+public final class VarBinaryVector extends BaseVariableWidthVector {
+ private final FieldReader reader;
+
+ /**
+ * Instantiate a VarBinaryVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param allocator allocator for memory management.
+ */
+ public VarBinaryVector(String name, BufferAllocator allocator) {
+ this(name, FieldType.nullable(MinorType.VARBINARY.getType()), allocator);
+ }
+
+ /**
+ * Instantiate a VarBinaryVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param name name of the vector
+ * @param fieldType type of Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public VarBinaryVector(String name, FieldType fieldType, BufferAllocator allocator) {
+ this(new Field(name, fieldType, null), allocator);
+ }
+
+ /**
+ * Instantiate a VarBinaryVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param field field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public VarBinaryVector(Field field, BufferAllocator allocator) {
+ super(field, allocator);
+ reader = new VarBinaryReaderImpl(VarBinaryVector.this);
+ }
+
+ /**
+ * Get a reader that supports reading values from this vector.
+ *
+ * @return Field Reader for this vector
+ */
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ /**
+ * Get minor type for this vector. The vector holds values belonging
+ * to a particular type.
+ *
+ * @return {@link org.apache.arrow.vector.types.Types.MinorType}
+ */
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.VARBINARY;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Get the variable length element at specified index as byte array.
+ *
+ * @param index position of element to get
+ * @return array of bytes for non-null element, null otherwise
+ */
+ public byte[] get(int index) {
+ assert index >= 0;
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+ return null;
+ }
+ final int startOffset = getStartOffset(index);
+ final int dataLength =
+ offsetBuffer.getInt((long) (index + 1) * OFFSET_WIDTH) - startOffset;
+ final byte[] result = new byte[dataLength];
+ valueBuffer.getBytes(startOffset, result, 0, dataLength);
+ return result;
+ }
+
+ /**
+ * Get the variable length element at specified index as Text.
+ *
+ * @param index position of element to get
+ * @return byte array for non-null element, null otherwise
+ */
+ public byte[] getObject(int index) {
+ return get(index);
+ }
+
+ /**
+ * Get the variable length element at specified index and sets the state
+ * in provided holder.
+ *
+ * @param index position of element to get
+ * @param holder data holder to be populated by this function
+ */
+ public void get(int index, NullableVarBinaryHolder holder) {
+ assert index >= 0;
+ if (isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ holder.isSet = 1;
+ holder.start = getStartOffset(index);
+ holder.end = offsetBuffer.getInt((index + 1) * OFFSET_WIDTH);
+ holder.buffer = valueBuffer;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Set the variable length element at the specified index to the data
+ * buffer supplied in the holder.
+ *
+ * @param index position of the element to set
+ * @param holder holder that carries data buffer.
+ */
+ public void set(int index, VarBinaryHolder holder) {
+ assert index >= 0;
+ fillHoles(index);
+ BitVectorHelper.setBit(validityBuffer, index);
+ final int dataLength = holder.end - holder.start;
+ final int startOffset = getStartOffset(index);
+ offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, startOffset + dataLength);
+ valueBuffer.setBytes(startOffset, holder.buffer, holder.start, dataLength);
+ lastSet = index;
+ }
+
+ /**
+ * Same as {@link #set(int, VarBinaryHolder)} except that it handles the
+ * case where index and length of new element are beyond the existing
+ * capacity of the vector.
+ *
+ * @param index position of the element to set
+ * @param holder holder that carries data buffer.
+ */
+ public void setSafe(int index, VarBinaryHolder holder) {
+ assert index >= 0;
+ final int dataLength = holder.end - holder.start;
+ handleSafe(index, dataLength);
+ fillHoles(index);
+ BitVectorHelper.setBit(validityBuffer, index);
+ final int startOffset = getStartOffset(index);
+ offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, startOffset + dataLength);
+ valueBuffer.setBytes(startOffset, holder.buffer, holder.start, dataLength);
+ lastSet = index;
+ }
+
+ /**
+ * Set the variable length element at the specified index to the data
+ * buffer supplied in the holder.
+ *
+ * @param index position of the element to set
+ * @param holder holder that carries data buffer.
+ */
+ public void set(int index, NullableVarBinaryHolder holder) {
+ assert index >= 0;
+ fillHoles(index);
+ BitVectorHelper.setValidityBit(validityBuffer, index, holder.isSet);
+ final int startOffset = getStartOffset(index);
+ if (holder.isSet != 0) {
+ final int dataLength = holder.end - holder.start;
+ offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, startOffset + dataLength);
+ valueBuffer.setBytes(startOffset, holder.buffer, holder.start, dataLength);
+ } else {
+ offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, startOffset);
+ }
+ lastSet = index;
+ }
+
+ /**
+ * Same as {@link #set(int, NullableVarBinaryHolder)} except that it handles the
+ * case where index and length of new element are beyond the existing
+ * capacity of the vector.
+ *
+ * @param index position of the element to set
+ * @param holder holder that carries data buffer.
+ */
+ public void setSafe(int index, NullableVarBinaryHolder holder) {
+ assert index >= 0;
+ if (holder.isSet != 0) {
+ final int dataLength = holder.end - holder.start;
+ handleSafe(index, dataLength);
+ fillHoles(index);
+ final int startOffset = getStartOffset(index);
+ offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, startOffset + dataLength);
+ valueBuffer.setBytes(startOffset, holder.buffer, holder.start, dataLength);
+ } else {
+ fillEmpties(index + 1);
+ }
+ BitVectorHelper.setValidityBit(validityBuffer, index, holder.isSet);
+ lastSet = index;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+ /**
+ * Construct a TransferPair comprising of this and a target vector of
+ * the same type.
+ *
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return new TransferImpl(ref, allocator);
+ }
+
+ /**
+ * Construct a TransferPair with a desired target vector of the same type.
+ *
+ * @param to target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new TransferImpl((VarBinaryVector) to);
+ }
+
+ private class TransferImpl implements TransferPair {
+ VarBinaryVector to;
+
+ public TransferImpl(String ref, BufferAllocator allocator) {
+ to = new VarBinaryVector(ref, field.getFieldType(), allocator);
+ }
+
+ public TransferImpl(VarBinaryVector to) {
+ this.to = to;
+ }
+
+ @Override
+ public VarBinaryVector getTo() {
+ return to;
+ }
+
+ @Override
+ public void transfer() {
+ transferTo(to);
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ splitAndTransferTo(startIndex, length, to);
+ }
+
+ @Override
+ public void copyValueSafe(int fromIndex, int toIndex) {
+ to.copyFromSafe(fromIndex, toIndex, VarBinaryVector.this);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/VarCharVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/VarCharVector.java
new file mode 100644
index 000000000..e725e2d28
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/VarCharVector.java
@@ -0,0 +1,331 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.impl.VarCharReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.NullableVarCharHolder;
+import org.apache.arrow.vector.holders.VarCharHolder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.Text;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * VarCharVector implements a variable width vector of VARCHAR
+ * values which could be NULL. A validity buffer (bit vector) is maintained
+ * to track which elements in the vector are null.
+ */
+public final class VarCharVector extends BaseVariableWidthVector {
+ private final FieldReader reader;
+
+ /**
+ * Instantiate a VarCharVector. This doesn't allocate any memory for
+ * the data in vector.
+ * @param name name of the vector
+ * @param allocator allocator for memory management.
+ */
+ public VarCharVector(String name, BufferAllocator allocator) {
+ this(name, FieldType.nullable(MinorType.VARCHAR.getType()), allocator);
+ }
+
+ /**
+ * Instantiate a VarCharVector. This doesn't allocate any memory for
+ * the data in vector.
+ * @param name name of the vector
+ * @param fieldType type of Field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public VarCharVector(String name, FieldType fieldType, BufferAllocator allocator) {
+ this(new Field(name, fieldType, null), allocator);
+ }
+
+ /**
+ * Instantiate a VarCharVector. This doesn't allocate any memory for
+ * the data in vector.
+ *
+ * @param field field materialized by this vector
+ * @param allocator allocator for memory management.
+ */
+ public VarCharVector(Field field, BufferAllocator allocator) {
+ super(field, allocator);
+ reader = new VarCharReaderImpl(VarCharVector.this);
+ }
+
+ /**
+ * Get a reader that supports reading values from this vector.
+ * @return Field Reader for this vector
+ */
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ /**
+ * Get minor type for this vector. The vector holds values belonging
+ * to a particular type.
+ * @return {@link org.apache.arrow.vector.types.Types.MinorType}
+ */
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.VARCHAR;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value retrieval methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Get the variable length element at specified index as byte array.
+ *
+ * @param index position of element to get
+ * @return array of bytes for non-null element, null otherwise
+ */
+ public byte[] get(int index) {
+ assert index >= 0;
+ if (NULL_CHECKING_ENABLED && isSet(index) == 0) {
+ return null;
+ }
+ final int startOffset = getStartOffset(index);
+ final int dataLength =
+ offsetBuffer.getInt((long) (index + 1) * OFFSET_WIDTH) - startOffset;
+ final byte[] result = new byte[dataLength];
+ valueBuffer.getBytes(startOffset, result, 0, dataLength);
+ return result;
+ }
+
+ /**
+ * Get the variable length element at specified index as Text.
+ *
+ * @param index position of element to get
+ * @return Text object for non-null element, null otherwise
+ */
+ public Text getObject(int index) {
+ byte[] b = get(index);
+ if (b == null) {
+ return null;
+ } else {
+ return new Text(b);
+ }
+ }
+
+ /**
+ * Get the variable length element at specified index and sets the state
+ * in provided holder.
+ *
+ * @param index position of element to get
+ * @param holder data holder to be populated by this function
+ */
+ public void get(int index, NullableVarCharHolder holder) {
+ assert index >= 0;
+ if (isSet(index) == 0) {
+ holder.isSet = 0;
+ return;
+ }
+ holder.isSet = 1;
+ holder.start = getStartOffset(index);
+ holder.end = offsetBuffer.getInt((index + 1) * OFFSET_WIDTH);
+ holder.buffer = valueBuffer;
+ }
+
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector value setter methods |
+ | |
+ *----------------------------------------------------------------*/
+
+
+ /**
+ * Set the variable length element at the specified index to the data
+ * buffer supplied in the holder.
+ *
+ * @param index position of the element to set
+ * @param holder holder that carries data buffer.
+ */
+ public void set(int index, VarCharHolder holder) {
+ assert index >= 0;
+ fillHoles(index);
+ BitVectorHelper.setBit(validityBuffer, index);
+ final int dataLength = holder.end - holder.start;
+ final int startOffset = getStartOffset(index);
+ offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, startOffset + dataLength);
+ valueBuffer.setBytes(startOffset, holder.buffer, holder.start, dataLength);
+ lastSet = index;
+ }
+
+ /**
+ * Same as {@link #set(int, VarCharHolder)} except that it handles the
+ * case where index and length of new element are beyond the existing
+ * capacity of the vector.
+ *
+ * @param index position of the element to set
+ * @param holder holder that carries data buffer.
+ */
+ public void setSafe(int index, VarCharHolder holder) {
+ assert index >= 0;
+ final int dataLength = holder.end - holder.start;
+ handleSafe(index, dataLength);
+ fillHoles(index);
+
+ BitVectorHelper.setBit(validityBuffer, index);
+ final int startOffset = getStartOffset(index);
+ offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, startOffset + dataLength);
+ valueBuffer.setBytes(startOffset, holder.buffer, holder.start, dataLength);
+ lastSet = index;
+ }
+
+ /**
+ * Set the variable length element at the specified index to the data
+ * buffer supplied in the holder.
+ *
+ * @param index position of the element to set
+ * @param holder holder that carries data buffer.
+ */
+ public void set(int index, NullableVarCharHolder holder) {
+ assert index >= 0;
+ fillHoles(index);
+ BitVectorHelper.setValidityBit(validityBuffer, index, holder.isSet);
+ final int startOffset = getStartOffset(index);
+ if (holder.isSet != 0) {
+ final int dataLength = holder.end - holder.start;
+ offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, startOffset + dataLength);
+ valueBuffer.setBytes(startOffset, holder.buffer, holder.start, dataLength);
+ } else {
+ offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, startOffset);
+ }
+ lastSet = index;
+ }
+
+ /**
+ * Same as {@link #set(int, NullableVarCharHolder)} except that it handles the
+ * case where index and length of new element are beyond the existing
+ * capacity of the vector.
+ *
+ * @param index position of the element to set
+ * @param holder holder that carries data buffer.
+ */
+ public void setSafe(int index, NullableVarCharHolder holder) {
+ assert index >= 0;
+ if (holder.isSet != 0) {
+ final int dataLength = holder.end - holder.start;
+ handleSafe(index, dataLength);
+ fillHoles(index);
+ final int startOffset = getStartOffset(index);
+ offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, startOffset + dataLength);
+ valueBuffer.setBytes(startOffset, holder.buffer, holder.start, dataLength);
+ } else {
+ fillEmpties(index + 1);
+ }
+ BitVectorHelper.setValidityBit(validityBuffer, index, holder.isSet);
+ lastSet = index;
+ }
+
+ /**
+ * Set the variable length element at the specified index to the
+ * content in supplied Text.
+ *
+ * @param index position of the element to set
+ * @param text Text object with data
+ */
+ public void set(int index, Text text) {
+ set(index, text.getBytes(), 0, text.getLength());
+ }
+
+ /**
+ * Same as {@link #set(int, NullableVarCharHolder)} except that it handles the
+ * case where index and length of new element are beyond the existing
+ * capacity of the vector.
+ *
+ * @param index position of the element to set.
+ * @param text Text object with data
+ */
+ public void setSafe(int index, Text text) {
+ setSafe(index, text.getBytes(), 0, text.getLength());
+ }
+
+ /*----------------------------------------------------------------*
+ | |
+ | vector transfer |
+ | |
+ *----------------------------------------------------------------*/
+
+ /**
+ * Construct a TransferPair comprising of this and a target vector of
+ * the same type.
+ *
+ * @param ref name of the target vector
+ * @param allocator allocator for the target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return new TransferImpl(ref, allocator);
+ }
+
+ /**
+ * Construct a TransferPair with a desired target vector of the same type.
+ *
+ * @param to target vector
+ * @return {@link TransferPair}
+ */
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new TransferImpl((VarCharVector) to);
+ }
+
+ private class TransferImpl implements TransferPair {
+ VarCharVector to;
+
+ public TransferImpl(String ref, BufferAllocator allocator) {
+ to = new VarCharVector(ref, field.getFieldType(), allocator);
+ }
+
+ public TransferImpl(VarCharVector to) {
+ this.to = to;
+ }
+
+ @Override
+ public VarCharVector getTo() {
+ return to;
+ }
+
+ @Override
+ public void transfer() {
+ transferTo(to);
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ splitAndTransferTo(startIndex, length, to);
+ }
+
+ @Override
+ public void copyValueSafe(int fromIndex, int toIndex) {
+ to.copyFromSafe(fromIndex, toIndex, VarCharVector.this);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/VariableWidthVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/VariableWidthVector.java
new file mode 100644
index 000000000..f6b8364e3
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/VariableWidthVector.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+/**
+ * Interface vectors that contain variable width members (e.g. Strings, Lists, etc).
+ */
+public interface VariableWidthVector extends ElementAddressableVector, DensityAwareVector {
+
+ /**
+ * Allocate a new memory space for this vector. Must be called prior to using the ValueVector.
+ *
+ * @param totalBytes Desired size of the underlying data buffer.
+ * @param valueCount Number of values in the vector.
+ */
+ void allocateNew(long totalBytes, int valueCount);
+
+ /**
+ * Allocate a new memory space for this vector. Must be called prior to using the ValueVector.
+ * The initial size in bytes is either default (or) reused from previous allocation
+ *
+ * @param valueCount Number of values in the vector.
+ */
+ void allocateNew(int valueCount);
+
+ /**
+ * Provide the maximum amount of variable width bytes that can be stored in this vector.
+ *
+ * @return the byte capacity of this vector
+ */
+ int getByteCapacity();
+
+ /**
+ * Provide the number of bytes contained in the valueBuffer.
+ * @return the number of bytes in valueBuffer.
+ */
+ int sizeOfValueBuffer();
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/VectorDefinitionSetter.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/VectorDefinitionSetter.java
new file mode 100644
index 000000000..39804ee41
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/VectorDefinitionSetter.java
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+/**
+ * Interface for setting a specific index values as defined/valid on a vector.
+ */
+public interface VectorDefinitionSetter {
+
+ void setIndexDefined(int index);
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java
new file mode 100644
index 000000000..ed5f3aef1
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java
@@ -0,0 +1,137 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.util.Preconditions.checkArgument;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.util.Collections2;
+import org.apache.arrow.vector.compression.CompressionCodec;
+import org.apache.arrow.vector.compression.CompressionUtil;
+import org.apache.arrow.vector.compression.NoCompressionCodec;
+import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
+import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
+import org.apache.arrow.vector.types.pojo.Field;
+
+/**
+ * Loads buffers into vectors.
+ */
+public class VectorLoader {
+
+ private final VectorSchemaRoot root;
+
+ private final CompressionCodec.Factory factory;
+
+ /**
+ * A flag indicating if decompression is needed.
+ * This will affect the behavior of releasing buffers.
+ */
+ private boolean decompressionNeeded;
+
+ /**
+ * Construct with a root to load and will create children in root based on schema.
+ *
+ * @param root the root to add vectors to based on schema
+ */
+ public VectorLoader(VectorSchemaRoot root) {
+ this(root, NoCompressionCodec.Factory.INSTANCE);
+ }
+
+ /**
+ * Construct with a root to load and will create children in root based on schema.
+ *
+ * @param root the root to add vectors to based on schema.
+ * @param factory the factory to create codec.
+ */
+ public VectorLoader(VectorSchemaRoot root, CompressionCodec.Factory factory) {
+ this.root = root;
+ this.factory = factory;
+ }
+
+ /**
+ * Loads the record batch in the vectors.
+ * will not close the record batch
+ *
+ * @param recordBatch the batch to load
+ */
+ public void load(ArrowRecordBatch recordBatch) {
+ Iterator<ArrowBuf> buffers = recordBatch.getBuffers().iterator();
+ Iterator<ArrowFieldNode> nodes = recordBatch.getNodes().iterator();
+ CompressionUtil.CodecType codecType =
+ CompressionUtil.CodecType.fromCompressionType(recordBatch.getBodyCompression().getCodec());
+ decompressionNeeded = codecType != CompressionUtil.CodecType.NO_COMPRESSION;
+ CompressionCodec codec = decompressionNeeded ? factory.createCodec(codecType) : NoCompressionCodec.INSTANCE;
+ for (FieldVector fieldVector : root.getFieldVectors()) {
+ loadBuffers(fieldVector, fieldVector.getField(), buffers, nodes, codec);
+ }
+ root.setRowCount(recordBatch.getLength());
+ if (nodes.hasNext() || buffers.hasNext()) {
+ throw new IllegalArgumentException("not all nodes and buffers were consumed. nodes: " +
+ Collections2.toString(nodes) + " buffers: " + Collections2.toString(buffers));
+ }
+ }
+
+ private void loadBuffers(
+ FieldVector vector,
+ Field field,
+ Iterator<ArrowBuf> buffers,
+ Iterator<ArrowFieldNode> nodes,
+ CompressionCodec codec) {
+ checkArgument(nodes.hasNext(), "no more field nodes for for field %s and vector %s", field, vector);
+ ArrowFieldNode fieldNode = nodes.next();
+ int bufferLayoutCount = TypeLayout.getTypeBufferCount(field.getType());
+ List<ArrowBuf> ownBuffers = new ArrayList<>(bufferLayoutCount);
+ for (int j = 0; j < bufferLayoutCount; j++) {
+ ArrowBuf nextBuf = buffers.next();
+ // for vectors without nulls, the buffer is empty, so there is no need to decompress it.
+ ArrowBuf bufferToAdd = nextBuf.writerIndex() > 0 ? codec.decompress(vector.getAllocator(), nextBuf) : nextBuf;
+ ownBuffers.add(bufferToAdd);
+ if (decompressionNeeded) {
+ // decompression performed
+ nextBuf.getReferenceManager().retain();
+ }
+ }
+ try {
+ vector.loadFieldBuffers(fieldNode, ownBuffers);
+ if (decompressionNeeded) {
+ for (ArrowBuf buf : ownBuffers) {
+ buf.close();
+ }
+ }
+ } catch (RuntimeException e) {
+ throw new IllegalArgumentException("Could not load buffers for field " +
+ field + ". error message: " + e.getMessage(), e);
+ }
+ List<Field> children = field.getChildren();
+ if (children.size() > 0) {
+ List<FieldVector> childrenFromFields = vector.getChildrenFromFields();
+ checkArgument(children.size() == childrenFromFields.size(),
+ "should have as many children as in the schema: found %s expected %s",
+ childrenFromFields.size(), children.size());
+ for (int i = 0; i < childrenFromFields.size(); i++) {
+ Field child = children.get(i);
+ FieldVector fieldVector = childrenFromFields.get(i);
+ loadBuffers(fieldVector, child, buffers, nodes, codec);
+ }
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/VectorSchemaRoot.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/VectorSchemaRoot.java
new file mode 100644
index 000000000..623c77317
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/VectorSchemaRoot.java
@@ -0,0 +1,429 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+import java.util.stream.StreamSupport;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.util.AutoCloseables;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.compare.ApproxEqualsVisitor;
+import org.apache.arrow.vector.compare.Range;
+import org.apache.arrow.vector.compare.VectorEqualsVisitor;
+import org.apache.arrow.vector.compare.VectorValueEqualizer;
+import org.apache.arrow.vector.compare.util.ValueEpsilonEqualizers;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * Holder for a set of vectors to be loaded/unloaded.
+ * A VectorSchemaRoot is a container that can hold batches, batches flow through VectorSchemaRoot
+ * as part of a pipeline. Note this is different from other implementations (i.e. in C++ and Python,
+ * a RecordBatch is a collection of equal-length vector instances and was created each time for a new batch).
+
+ * The recommended usage for VectorSchemaRoot is creating a single VectorSchemaRoot based on the known
+ * schema and populated data over and over into the same VectorSchemaRoot in a stream of batches rather
+ * than create a new VectorSchemaRoot instance each time (see Flight or ArrowFileWriter for better understanding).
+ * Thus at any one point a VectorSchemaRoot may have data or may have no data (say it was transferred downstream
+ * or not yet populated).
+ */
+public class VectorSchemaRoot implements AutoCloseable {
+
+ private Schema schema;
+ private int rowCount;
+ private final List<FieldVector> fieldVectors;
+ private final Map<Field, FieldVector> fieldVectorsMap = new LinkedHashMap<>();
+
+
+ /**
+ * Constructs new instance containing each of the vectors.
+ */
+ public VectorSchemaRoot(Iterable<FieldVector> vectors) {
+ this(
+ StreamSupport.stream(vectors.spliterator(), false).map(t -> t.getField()).collect(Collectors.toList()),
+ StreamSupport.stream(vectors.spliterator(), false).collect(Collectors.toList())
+ );
+ }
+
+ /**
+ * Constructs a new instance containing the children of parent but not the parent itself.
+ */
+ public VectorSchemaRoot(FieldVector parent) {
+ this(parent.getField().getChildren(), parent.getChildrenFromFields(), parent.getValueCount());
+ }
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param fields The types of each vector.
+ * @param fieldVectors The data vectors (must be equal in size to <code>fields</code>.
+ */
+ public VectorSchemaRoot(List<Field> fields, List<FieldVector> fieldVectors) {
+ this(new Schema(fields), fieldVectors, fieldVectors.size() == 0 ? 0 : fieldVectors.get(0).getValueCount());
+ }
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param fields The types of each vector.
+ * @param fieldVectors The data vectors (must be equal in size to <code>fields</code>.
+ * @param rowCount The number of rows contained.
+ */
+ public VectorSchemaRoot(List<Field> fields, List<FieldVector> fieldVectors, int rowCount) {
+ this(new Schema(fields), fieldVectors, rowCount);
+ }
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param schema The schema for the vectors.
+ * @param fieldVectors The data vectors.
+ * @param rowCount The number of rows
+ */
+ public VectorSchemaRoot(Schema schema, List<FieldVector> fieldVectors, int rowCount) {
+ if (schema.getFields().size() != fieldVectors.size()) {
+ throw new IllegalArgumentException("Fields must match field vectors. Found " +
+ fieldVectors.size() + " vectors and " + schema.getFields().size() + " fields");
+ }
+ this.schema = schema;
+ this.rowCount = rowCount;
+ this.fieldVectors = fieldVectors;
+ for (int i = 0; i < schema.getFields().size(); ++i) {
+ Field field = schema.getFields().get(i);
+ FieldVector vector = fieldVectors.get(i);
+ fieldVectorsMap.put(field, vector);
+ }
+ }
+
+ /**
+ * Creates a new set of empty vectors corresponding to the given schema.
+ */
+ public static VectorSchemaRoot create(Schema schema, BufferAllocator allocator) {
+ List<FieldVector> fieldVectors = new ArrayList<>();
+ for (Field field : schema.getFields()) {
+ FieldVector vector = field.createVector(allocator);
+ fieldVectors.add(vector);
+ }
+ if (fieldVectors.size() != schema.getFields().size()) {
+ throw new IllegalArgumentException("The root vector did not create the right number of children. found " +
+ fieldVectors.size() + " expected " + schema.getFields().size());
+ }
+ return new VectorSchemaRoot(schema, fieldVectors, 0);
+ }
+
+ /** Constructs a new instance from vectors. */
+ public static VectorSchemaRoot of(FieldVector... vectors) {
+ return new VectorSchemaRoot(Arrays.stream(vectors).collect(Collectors.toList()));
+ }
+
+ /**
+ * Do an adaptive allocation of each vector for memory purposes. Sizes will be based on previously
+ * defined initial allocation for each vector (and subsequent size learned).
+ */
+ public void allocateNew() {
+ for (FieldVector v : fieldVectors) {
+ v.allocateNew();
+ }
+ rowCount = 0;
+ }
+
+ /**
+ * Release all the memory for each vector held in this root. This DOES NOT remove vectors from the container.
+ */
+ public void clear() {
+ for (FieldVector v : fieldVectors) {
+ v.clear();
+ }
+ rowCount = 0;
+ }
+
+ public List<FieldVector> getFieldVectors() {
+ return fieldVectors.stream().collect(Collectors.toList());
+ }
+
+ /**
+ * gets a vector by name.
+ *
+ * if name occurs multiple times this returns the first inserted entry for name
+ */
+ public FieldVector getVector(String name) {
+ for (Map.Entry<Field, FieldVector> entry: fieldVectorsMap.entrySet()) {
+ if (entry.getKey().getName().equals(name)) {
+ return entry.getValue();
+ }
+ }
+ return null;
+ }
+
+ public FieldVector getVector(Field field) {
+ return fieldVectorsMap.get(field);
+ }
+
+ public FieldVector getVector(int index) {
+ Preconditions.checkArgument(index >= 0 && index < fieldVectors.size());
+ return fieldVectors.get(index);
+ }
+
+ /**
+ * Add vector to the record batch, producing a new VectorSchemaRoot.
+ * @param index field index
+ * @param vector vector to be added.
+ * @return out VectorSchemaRoot with vector added
+ */
+ public VectorSchemaRoot addVector(int index, FieldVector vector) {
+ Preconditions.checkNotNull(vector);
+ Preconditions.checkArgument(index >= 0 && index < fieldVectors.size());
+ List<FieldVector> newVectors = new ArrayList<>();
+ for (int i = 0; i < fieldVectors.size(); i++) {
+ if (i == index) {
+ newVectors.add(vector);
+ }
+ newVectors.add(fieldVectors.get(i));
+ }
+ return new VectorSchemaRoot(newVectors);
+ }
+
+ /**
+ * Remove vector from the record batch, producing a new VectorSchemaRoot.
+ * @param index field index
+ * @return out VectorSchemaRoot with vector removed
+ */
+ public VectorSchemaRoot removeVector(int index) {
+ Preconditions.checkArgument(index >= 0 && index < fieldVectors.size());
+ List<FieldVector> newVectors = new ArrayList<>();
+ for (int i = 0; i < fieldVectors.size(); i++) {
+ if (i != index) {
+ newVectors.add(fieldVectors.get(i));
+ }
+ }
+ return new VectorSchemaRoot(newVectors);
+ }
+
+ public Schema getSchema() {
+ return schema;
+ }
+
+ public int getRowCount() {
+ return rowCount;
+ }
+
+ /**
+ * Set the row count of all the vectors in this container. Also sets the value
+ * count for each root level contained FieldVector.
+ * @param rowCount Number of records.
+ */
+ public void setRowCount(int rowCount) {
+ this.rowCount = rowCount;
+ for (FieldVector v : getFieldVectors()) {
+ v.setValueCount(rowCount);
+ }
+ }
+
+ @Override
+ public void close() {
+ try {
+ AutoCloseables.close(fieldVectors);
+ } catch (RuntimeException ex) {
+ throw ex;
+ } catch (Exception ex) {
+ // should never happen since FieldVector.close() doesn't throw IOException
+ throw new RuntimeException(ex);
+ }
+ }
+
+ private void printRow(StringBuilder sb, List<Object> row) {
+ boolean first = true;
+ for (Object v : row) {
+ if (first) {
+ first = false;
+ } else {
+ sb.append("\t");
+ }
+ sb.append(v);
+ }
+ sb.append("\n");
+ }
+
+ /**
+ * Returns a tab separated value of vectors (based on their java object representation).
+ */
+ public String contentToTSVString() {
+ StringBuilder sb = new StringBuilder();
+ List<Object> row = new ArrayList<>(schema.getFields().size());
+ for (Field field : schema.getFields()) {
+ row.add(field.getName());
+ }
+ printRow(sb, row);
+ for (int i = 0; i < rowCount; i++) {
+ row.clear();
+ for (FieldVector v : fieldVectors) {
+ row.add(v.getObject(i));
+ }
+ printRow(sb, row);
+ }
+ return sb.toString();
+ }
+
+ /**
+ * Synchronizes the schema from the current vectors.
+ * In some cases, the schema and the actual vector structure may be different.
+ * This can be caused by a promoted writer (For details, please see
+ * {@link org.apache.arrow.vector.complex.impl.PromotableWriter}).
+ * For example, when writing different types of data to a {@link org.apache.arrow.vector.complex.ListVector}
+ * may lead to such a case.
+ * When this happens, this method should be called to bring the schema and vector structure in a synchronized state.
+ * @return true if the schema is updated, false otherwise.
+ */
+ public boolean syncSchema() {
+ List<Field> oldFields = this.schema.getFields();
+ List<Field> newFields = this.fieldVectors.stream().map(ValueVector::getField).collect(Collectors.toList());
+ if (!oldFields.equals(newFields)) {
+ this.schema = new Schema(newFields);
+ return true;
+ }
+ return false;
+ }
+
+ /**
+ * Slice this root from desired index.
+ * @param index start position of the slice
+ * @return the sliced root
+ */
+ public VectorSchemaRoot slice(int index) {
+ return slice(index, this.rowCount - index);
+ }
+
+ /**
+ * Slice this root at desired index and length.
+ * @param index start position of the slice
+ * @param length length of the slice
+ * @return the sliced root
+ */
+ public VectorSchemaRoot slice(int index, int length) {
+ Preconditions.checkArgument(index >= 0, "expecting non-negative index");
+ Preconditions.checkArgument(length >= 0, "expecting non-negative length");
+ Preconditions.checkArgument(index + length <= rowCount,
+ "index + length should <= rowCount");
+
+ if (index == 0 && length == rowCount) {
+ return this;
+ }
+
+ List<FieldVector> sliceVectors = fieldVectors.stream().map(v -> {
+ TransferPair transferPair = v.getTransferPair(v.getAllocator());
+ transferPair.splitAndTransfer(index, length);
+ return (FieldVector) transferPair.getTo();
+ }).collect(Collectors.toList());
+
+ return new VectorSchemaRoot(sliceVectors);
+ }
+
+ /**
+ * Determine if two VectorSchemaRoots are exactly equal.
+ */
+ public boolean equals(VectorSchemaRoot other) {
+ if (other == null) {
+ return false;
+ }
+
+ if (!this.schema.equals(other.schema)) {
+ return false;
+ }
+
+ if (this.rowCount != other.rowCount) {
+ return false;
+ }
+
+ for (int i = 0; i < fieldVectors.size(); i++) {
+ FieldVector vector = fieldVectors.get(i);
+ FieldVector otherVector = other.fieldVectors.get(i);
+ if (!VectorEqualsVisitor.vectorEquals(vector, otherVector)) {
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ /**
+ * Determine if two VectorSchemaRoots are approximately equal using the given functions to
+ * calculate difference between float/double values.
+ * Note that approx equals are in regards to floating point values, other values are comparing
+ * to exactly equals.
+ *
+ * @param floatDiffFunction function to calculate difference between float values.
+ * @param doubleDiffFunction function to calculate difference between double values.
+ */
+ public boolean approxEquals(
+ VectorSchemaRoot other,
+ VectorValueEqualizer<Float4Vector> floatDiffFunction,
+ VectorValueEqualizer<Float8Vector> doubleDiffFunction) {
+
+ Preconditions.checkNotNull(floatDiffFunction);
+ Preconditions.checkNotNull(doubleDiffFunction);
+
+ if (other == null) {
+ return false;
+ }
+
+ if (!this.schema.equals(other.schema)) {
+ return false;
+ }
+
+ if (this.rowCount != other.rowCount) {
+ return false;
+ }
+
+ Range range = new Range(0, 0, 0);
+ for (int i = 0; i < fieldVectors.size(); i++) {
+ FieldVector vector = fieldVectors.get(i);
+ FieldVector otherVector = other.fieldVectors.get(i);
+ if (vector.getValueCount() != otherVector.getValueCount()) {
+ return false;
+ }
+ ApproxEqualsVisitor visitor =
+ new ApproxEqualsVisitor(vector, otherVector, floatDiffFunction, doubleDiffFunction);
+ range.setLength(vector.getValueCount());
+ if (!visitor.rangeEquals(range)) {
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ /**
+ * Determine if two VectorSchemaRoots are approximately equal using default functions to
+ * calculate difference between float/double values.
+ */
+ public boolean approxEquals(VectorSchemaRoot other) {
+ VectorValueEqualizer<Float4Vector> floatDiffFunction =
+ new ValueEpsilonEqualizers.Float4EpsilonEqualizer(ApproxEqualsVisitor.DEFAULT_FLOAT_EPSILON);
+ VectorValueEqualizer<Float8Vector> doubleDiffFunction =
+ new ValueEpsilonEqualizers.Float8EpsilonEqualizer(ApproxEqualsVisitor.DEFAULT_DOUBLE_EPSILON);
+ return approxEquals(other, floatDiffFunction, doubleDiffFunction);
+ }
+}
+
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/VectorUnloader.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/VectorUnloader.java
new file mode 100644
index 000000000..e2cbf3ec1
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/VectorUnloader.java
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.vector.compression.CompressionCodec;
+import org.apache.arrow.vector.compression.CompressionUtil;
+import org.apache.arrow.vector.compression.NoCompressionCodec;
+import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
+import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
+
+/**
+ * Helper class that handles converting a {@link VectorSchemaRoot}
+ * to a {@link ArrowRecordBatch}.
+ */
+public class VectorUnloader {
+
+ private final VectorSchemaRoot root;
+ private final boolean includeNullCount;
+ private final CompressionCodec codec;
+ private final boolean alignBuffers;
+
+ /**
+ * Constructs a new instance of the given set of vectors.
+ */
+ public VectorUnloader(VectorSchemaRoot root) {
+ this(root, true, NoCompressionCodec.INSTANCE, true);
+ }
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param root The set of vectors to serialize to an {@link ArrowRecordBatch}.
+ * @param includeNullCount Controls whether null count is copied to the {@link ArrowRecordBatch}
+ * @param alignBuffers Controls if buffers get aligned to 8-byte boundaries.
+ */
+ public VectorUnloader(
+ VectorSchemaRoot root, boolean includeNullCount, boolean alignBuffers) {
+ this(root, includeNullCount, NoCompressionCodec.INSTANCE, alignBuffers);
+ }
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param root The set of vectors to serialize to an {@link ArrowRecordBatch}.
+ * @param includeNullCount Controls whether null count is copied to the {@link ArrowRecordBatch}
+ * @param codec the codec for compressing data. If it is null, then no compression is needed.
+ * @param alignBuffers Controls if buffers get aligned to 8-byte boundaries.
+ */
+ public VectorUnloader(
+ VectorSchemaRoot root, boolean includeNullCount, CompressionCodec codec, boolean alignBuffers) {
+ this.root = root;
+ this.includeNullCount = includeNullCount;
+ this.codec = codec;
+ this.alignBuffers = alignBuffers;
+ }
+
+ /**
+ * Performs the depth first traversal of the Vectors to create an {@link ArrowRecordBatch} suitable
+ * for serialization.
+ */
+ public ArrowRecordBatch getRecordBatch() {
+ List<ArrowFieldNode> nodes = new ArrayList<>();
+ List<ArrowBuf> buffers = new ArrayList<>();
+ for (FieldVector vector : root.getFieldVectors()) {
+ appendNodes(vector, nodes, buffers);
+ }
+ return new ArrowRecordBatch(
+ root.getRowCount(), nodes, buffers, CompressionUtil.createBodyCompression(codec), alignBuffers);
+ }
+
+ private void appendNodes(FieldVector vector, List<ArrowFieldNode> nodes, List<ArrowBuf> buffers) {
+ nodes.add(new ArrowFieldNode(vector.getValueCount(), includeNullCount ? vector.getNullCount() : -1));
+ List<ArrowBuf> fieldBuffers = vector.getFieldBuffers();
+ int expectedBufferCount = TypeLayout.getTypeBufferCount(vector.getField().getType());
+ if (fieldBuffers.size() != expectedBufferCount) {
+ throw new IllegalArgumentException(String.format(
+ "wrong number of buffers for field %s in vector %s. found: %s",
+ vector.getField(), vector.getClass().getSimpleName(), fieldBuffers));
+ }
+ for (ArrowBuf buf : fieldBuffers) {
+ buffers.add(codec.compress(vector.getAllocator(), buf));
+ }
+ for (FieldVector child : vector.getChildrenFromFields()) {
+ appendNodes(child, nodes, buffers);
+ }
+ }
+
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ZeroVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ZeroVector.java
new file mode 100644
index 000000000..079b5c103
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ZeroVector.java
@@ -0,0 +1,138 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.util.ArrowBufPointer;
+import org.apache.arrow.memory.util.hash.ArrowBufHasher;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.CallBack;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * A zero length vector of any type.
+ */
+public final class ZeroVector extends NullVector {
+ public static final ZeroVector INSTANCE = new ZeroVector();
+
+ /**
+ * Instantiate a ZeroVector.
+ *
+ * @param name name of the vector
+ */
+ public ZeroVector(String name) {
+ super(name);
+ }
+
+ /**
+ * Instantiate a ZeroVector.
+ *
+ * @param name name of the vector
+ * @param fieldType type of Field materialized by this vector.
+ */
+ public ZeroVector(String name, FieldType fieldType) {
+ super(name, fieldType);
+ }
+
+ /**
+ * Instantiate a ZeroVector.
+ *
+ * @param field field materialized by this vector.
+ */
+ public ZeroVector(Field field) {
+ super(field);
+ }
+
+ @Deprecated
+ public ZeroVector() {
+ }
+
+ @Override
+ public int getValueCount() {
+ return 0;
+ }
+
+ @Override
+ public void setValueCount(int valueCount) {
+ }
+
+ @Override
+ public int getNullCount() {
+ return 0;
+ }
+
+ @Override
+ public boolean isNull(int index) {
+ throw new IndexOutOfBoundsException();
+ }
+
+ @Override
+ public int hashCode(int index) {
+ return 0;
+ }
+
+ @Override
+ public int hashCode(int index, ArrowBufHasher hasher) {
+ return ArrowBufPointer.NULL_HASH_CODE;
+ }
+
+ @Override
+ public int getValueCapacity() {
+ return 0;
+ }
+
+ @Override
+ public TransferPair getTransferPair(BufferAllocator allocator) {
+ return defaultPair;
+ }
+
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return defaultPair;
+ }
+
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) {
+ return defaultPair;
+ }
+
+ @Override
+ public TransferPair makeTransferPair(ValueVector target) {
+ return defaultPair;
+ }
+
+ private final TransferPair defaultPair = new TransferPair() {
+ @Override
+ public void transfer() {
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ }
+
+ @Override
+ public ValueVector getTo() {
+ return ZeroVector.this;
+ }
+
+ @Override
+ public void copyValueSafe(int from, int to) {
+ }
+ };
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compare/ApproxEqualsVisitor.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compare/ApproxEqualsVisitor.java
new file mode 100644
index 000000000..bcf8c64e0
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compare/ApproxEqualsVisitor.java
@@ -0,0 +1,147 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.compare;
+
+import java.util.function.BiFunction;
+
+import org.apache.arrow.vector.BaseFixedWidthVector;
+import org.apache.arrow.vector.Float4Vector;
+import org.apache.arrow.vector.Float8Vector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.compare.util.ValueEpsilonEqualizers;
+
+/**
+ * Visitor to compare floating point vectors approximately.
+ */
+public class ApproxEqualsVisitor extends RangeEqualsVisitor {
+
+ /**
+ * Functions to calculate difference between float/double values.
+ */
+ private final VectorValueEqualizer<Float4Vector> floatDiffFunction;
+ private final VectorValueEqualizer<Float8Vector> doubleDiffFunction;
+
+ /**
+ * Default epsilons for diff functions.
+ */
+ public static final float DEFAULT_FLOAT_EPSILON = 1.0E-6f;
+ public static final double DEFAULT_DOUBLE_EPSILON = 1.0E-6;
+
+ /**
+ * Constructs a new instance with default tolerances.
+ * @param left left vector
+ * @param right right vector
+ */
+ public ApproxEqualsVisitor(ValueVector left, ValueVector right) {
+ this (left, right, DEFAULT_FLOAT_EPSILON, DEFAULT_DOUBLE_EPSILON);
+ }
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param left left vector
+ * @param right right vector
+ * @param floatEpsilon difference for float values
+ * @param doubleEpsilon difference for double values
+ */
+ public ApproxEqualsVisitor(ValueVector left, ValueVector right, float floatEpsilon, double doubleEpsilon) {
+ this (left, right,
+ new ValueEpsilonEqualizers.Float4EpsilonEqualizer(floatEpsilon),
+ new ValueEpsilonEqualizers.Float8EpsilonEqualizer(doubleEpsilon));
+ }
+
+ /**
+ * Constructs a new instance.
+ */
+ public ApproxEqualsVisitor(ValueVector left, ValueVector right,
+ VectorValueEqualizer<Float4Vector> floatDiffFunction,
+ VectorValueEqualizer<Float8Vector> doubleDiffFunction) {
+ this (left, right, floatDiffFunction, doubleDiffFunction, DEFAULT_TYPE_COMPARATOR);
+ }
+
+ /**
+ * Constructs a new instance.
+ * @param left the left vector.
+ * @param right the right vector.
+ * @param floatDiffFunction the equalizer for float values.
+ * @param doubleDiffFunction the equalizer for double values.
+ * @param typeComparator type comparator to compare vector type.
+ */
+ public ApproxEqualsVisitor(ValueVector left, ValueVector right,
+ VectorValueEqualizer<Float4Vector> floatDiffFunction,
+ VectorValueEqualizer<Float8Vector> doubleDiffFunction,
+ BiFunction<ValueVector, ValueVector, Boolean> typeComparator) {
+ super(left, right, typeComparator);
+ this.floatDiffFunction = floatDiffFunction;
+ this.doubleDiffFunction = doubleDiffFunction;
+ }
+
+ @Override
+ public Boolean visit(BaseFixedWidthVector left, Range range) {
+ if (left instanceof Float4Vector) {
+ if (!validate(left)) {
+ return false;
+ }
+ return float4ApproxEquals(range);
+ } else if (left instanceof Float8Vector) {
+ if (!validate(left)) {
+ return false;
+ }
+ return float8ApproxEquals(range);
+ } else {
+ return super.visit(left, range);
+ }
+ }
+
+ @Override
+ protected ApproxEqualsVisitor createInnerVisitor(
+ ValueVector left, ValueVector right,
+ BiFunction<ValueVector, ValueVector, Boolean> typeComparator) {
+ return new ApproxEqualsVisitor(left, right, floatDiffFunction.clone(), doubleDiffFunction.clone(), typeComparator);
+ }
+
+ private boolean float4ApproxEquals(Range range) {
+ Float4Vector leftVector = (Float4Vector) getLeft();
+ Float4Vector rightVector = (Float4Vector) getRight();
+
+ for (int i = 0; i < range.getLength(); i++) {
+ int leftIndex = range.getLeftStart() + i;
+ int rightIndex = range.getRightStart() + i;
+
+ if (!floatDiffFunction.valuesEqual(leftVector, leftIndex, rightVector, rightIndex)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ private boolean float8ApproxEquals(Range range) {
+ Float8Vector leftVector = (Float8Vector) getLeft();
+ Float8Vector rightVector = (Float8Vector) getRight();
+
+ for (int i = 0; i < range.getLength(); i++) {
+ int leftIndex = range.getLeftStart() + i;
+ int rightIndex = range.getRightStart() + i;
+
+ if (!doubleDiffFunction.valuesEqual(leftVector, leftIndex, rightVector, rightIndex)) {
+ return false;
+ }
+ }
+ return true;
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compare/Range.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compare/Range.java
new file mode 100644
index 000000000..0de99ab01
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compare/Range.java
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.compare;
+
+/**
+ * Wrapper for the parameters of comparing a range of values in two vectors.
+ */
+public class Range {
+
+ /**
+ * Start position in the left vector.
+ */
+ private int leftStart = -1;
+
+ /**
+ * Start position in the right vector.
+ */
+ private int rightStart = -1;
+
+ /**
+ * Length of the range.
+ */
+ private int length = -1;
+
+
+ /**
+ * Constructs a new instance.
+ */
+ public Range() {}
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param leftStart start index in left vector
+ * @param rightStart start index in right vector
+ * @param length length of range
+ */
+ public Range(int leftStart, int rightStart, int length) {
+ this.leftStart = leftStart;
+ this.rightStart = rightStart;
+ this.length = length;
+ }
+
+ public int getLeftStart() {
+ return leftStart;
+ }
+
+ public int getRightStart() {
+ return rightStart;
+ }
+
+ public int getLength() {
+ return length;
+ }
+
+ public Range setLeftStart(int leftStart) {
+ this.leftStart = leftStart;
+ return this;
+ }
+
+ public Range setRightStart(int rightStart) {
+ this.rightStart = rightStart;
+ return this;
+ }
+
+ public Range setLength(int length) {
+ this.length = length;
+ return this;
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compare/RangeEqualsVisitor.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compare/RangeEqualsVisitor.java
new file mode 100644
index 000000000..35b4936e3
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compare/RangeEqualsVisitor.java
@@ -0,0 +1,563 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.compare;
+
+import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt;
+
+import java.util.List;
+import java.util.function.BiFunction;
+
+import org.apache.arrow.memory.util.ByteFunctionHelpers;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.BaseFixedWidthVector;
+import org.apache.arrow.vector.BaseLargeVariableWidthVector;
+import org.apache.arrow.vector.BaseVariableWidthVector;
+import org.apache.arrow.vector.ExtensionTypeVector;
+import org.apache.arrow.vector.NullVector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.complex.BaseRepeatedValueVector;
+import org.apache.arrow.vector.complex.DenseUnionVector;
+import org.apache.arrow.vector.complex.FixedSizeListVector;
+import org.apache.arrow.vector.complex.LargeListVector;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.NonNullableStructVector;
+import org.apache.arrow.vector.complex.UnionVector;
+
+/**
+ * Visitor to compare a range of values for vectors.
+ */
+public class RangeEqualsVisitor implements VectorVisitor<Boolean, Range> {
+ private ValueVector left;
+ private ValueVector right;
+
+ private BiFunction<ValueVector, ValueVector, Boolean> typeComparator;
+ private boolean typeCompareResult;
+
+ /**
+ * Default type comparator.
+ */
+ public static final BiFunction<ValueVector, ValueVector, Boolean> DEFAULT_TYPE_COMPARATOR =
+ (v1, v2) -> new TypeEqualsVisitor(v2).equals(v1);
+
+ /**
+ * Constructs a new instance with default type comparator.
+ * @param left left vector
+ * @param right right vector
+ */
+ public RangeEqualsVisitor(ValueVector left, ValueVector right) {
+ this (left, right, DEFAULT_TYPE_COMPARATOR);
+ }
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param left left vector
+ * @param right right vector
+ * @param typeComparator type comparator to compare vector type.
+ */
+ public RangeEqualsVisitor(
+ ValueVector left,
+ ValueVector right,
+ BiFunction<ValueVector, ValueVector, Boolean> typeComparator) {
+ this.left = left;
+ this.right = right;
+ this.typeComparator = typeComparator;
+
+ Preconditions.checkArgument(left != null,
+ "left vector cannot be null");
+ Preconditions.checkArgument(right != null,
+ "right vector cannot be null");
+
+ // type usually checks only once unless the left vector is changed.
+ checkType();
+ }
+
+ private void checkType() {
+ if (typeComparator == null || left == right) {
+ typeCompareResult = true;
+ } else {
+ typeCompareResult = typeComparator.apply(left, right);
+ }
+ }
+
+ /**
+ * Validate the passed left vector, if it is changed, reset and check type.
+ */
+ protected boolean validate(ValueVector left) {
+ if (left != this.left) {
+ this.left = left;
+ checkType();
+ }
+ return typeCompareResult;
+ }
+
+ /**
+ * Check range equals.
+ */
+ public boolean rangeEquals(Range range) {
+ if (!typeCompareResult) {
+ return false;
+ }
+
+ Preconditions.checkArgument(range.getLeftStart() >= 0,
+ "leftStart %s must be non negative.", range.getLeftStart());
+ Preconditions.checkArgument(range.getRightStart() >= 0,
+ "rightStart %s must be non negative.", range.getRightStart());
+
+ Preconditions.checkArgument(range.getRightStart() + range.getLength() <= right.getValueCount(),
+ "(rightStart + length) %s out of range[0, %s].", 0, right.getValueCount());
+ Preconditions.checkArgument(range.getLeftStart() + range.getLength() <= left.getValueCount(),
+ "(leftStart + length) %s out of range[0, %s].", 0, left.getValueCount());
+
+ return left.accept(this, range);
+ }
+
+ public ValueVector getLeft() {
+ return left;
+ }
+
+ public ValueVector getRight() {
+ return right;
+ }
+
+ @Override
+ public Boolean visit(BaseFixedWidthVector left, Range range) {
+ if (!validate(left)) {
+ return false;
+ }
+ return compareBaseFixedWidthVectors(range);
+ }
+
+ @Override
+ public Boolean visit(BaseVariableWidthVector left, Range range) {
+ if (!validate(left)) {
+ return false;
+ }
+ return compareBaseVariableWidthVectors(range);
+ }
+
+ @Override
+ public Boolean visit(BaseLargeVariableWidthVector left, Range range) {
+ if (!validate(left)) {
+ return false;
+ }
+ return compareBaseLargeVariableWidthVectors(range);
+ }
+
+ @Override
+ public Boolean visit(ListVector left, Range range) {
+ if (!validate(left)) {
+ return false;
+ }
+ return compareListVectors(range);
+ }
+
+ @Override
+ public Boolean visit(FixedSizeListVector left, Range range) {
+ if (!validate(left)) {
+ return false;
+ }
+ return compareFixedSizeListVectors(range);
+ }
+
+ @Override
+ public Boolean visit(LargeListVector left, Range range) {
+ if (!validate(left)) {
+ return false;
+ }
+ return compareLargeListVectors(range);
+ }
+
+ @Override
+ public Boolean visit(NonNullableStructVector left, Range range) {
+ if (!validate(left)) {
+ return false;
+ }
+ return compareStructVectors(range);
+ }
+
+ @Override
+ public Boolean visit(UnionVector left, Range range) {
+ if (!validate(left)) {
+ return false;
+ }
+ return compareUnionVectors(range);
+ }
+
+ @Override
+ public Boolean visit(DenseUnionVector left, Range range) {
+ if (!validate(left)) {
+ return false;
+ }
+ return compareDenseUnionVectors(range);
+ }
+
+ @Override
+ public Boolean visit(NullVector left, Range range) {
+ if (!validate(left)) {
+ return false;
+ }
+ return true;
+ }
+
+ @Override
+ public Boolean visit(ExtensionTypeVector<?> left, Range range) {
+ if (!(right instanceof ExtensionTypeVector<?>) || !validate(left)) {
+ return false;
+ }
+ ValueVector rightUnderlying = ((ExtensionTypeVector<?>) right).getUnderlyingVector();
+ TypeEqualsVisitor typeVisitor = new TypeEqualsVisitor(rightUnderlying);
+ RangeEqualsVisitor underlyingVisitor =
+ createInnerVisitor(left.getUnderlyingVector(), rightUnderlying, (l, r) -> typeVisitor.equals(l));
+ return underlyingVisitor.rangeEquals(range);
+ }
+
+ protected RangeEqualsVisitor createInnerVisitor(
+ ValueVector leftInner, ValueVector rightInner,
+ BiFunction<ValueVector, ValueVector, Boolean> typeComparator) {
+ return new RangeEqualsVisitor(leftInner, rightInner, typeComparator);
+ }
+
+ protected boolean compareUnionVectors(Range range) {
+ UnionVector leftVector = (UnionVector) left;
+ UnionVector rightVector = (UnionVector) right;
+
+ Range subRange = new Range(0, 0, 1);
+ for (int i = 0; i < range.getLength(); i++) {
+ subRange.setLeftStart(range.getLeftStart() + i).setRightStart(range.getRightStart() + i);
+ ValueVector leftSubVector = leftVector.getVector(range.getLeftStart() + i);
+ ValueVector rightSubVector = rightVector.getVector(range.getRightStart() + i);
+
+ if (leftSubVector == null || rightSubVector == null) {
+ if (leftSubVector == rightSubVector) {
+ continue;
+ } else {
+ return false;
+ }
+ }
+ TypeEqualsVisitor typeVisitor = new TypeEqualsVisitor(rightSubVector);
+ RangeEqualsVisitor visitor =
+ createInnerVisitor(leftSubVector, rightSubVector, (left, right) -> typeVisitor.equals(left));
+ if (!visitor.rangeEquals(subRange)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ protected boolean compareDenseUnionVectors(Range range) {
+ DenseUnionVector leftVector = (DenseUnionVector) left;
+ DenseUnionVector rightVector = (DenseUnionVector) right;
+
+ Range subRange = new Range(0, 0, 1);
+ for (int i = 0; i < range.getLength(); i++) {
+ boolean isLeftNull = leftVector.isNull(range.getLeftStart() + i);
+ boolean isRightNull = rightVector.isNull(range.getRightStart() + i);
+
+ // compare nullabilities
+ if (isLeftNull || isRightNull) {
+ if (isLeftNull != isRightNull) {
+ // exactly one slot is null, unequal
+ return false;
+ } else {
+ // both slots are null, pass this iteration
+ continue;
+ }
+ }
+
+ // compare type ids
+ byte leftTypeId = leftVector.getTypeId(range.getLeftStart() + i);
+ byte rightTypeId = rightVector.getTypeId(range.getRightStart() + i);
+
+ if (leftTypeId != rightTypeId) {
+ return false;
+ }
+
+ ValueVector leftSubVector = leftVector.getVectorByType(leftTypeId);
+ ValueVector rightSubVector = rightVector.getVectorByType(rightTypeId);
+
+ if (leftSubVector == null || rightSubVector == null) {
+ if (leftSubVector != rightSubVector) {
+ // exactly one of the sub-vectors is null, unequal
+ return false;
+ } else {
+ // both sub-vectors are null, pass this iteration
+ continue;
+ }
+ }
+
+ // compare values
+ int leftOffset = leftVector.getOffset(range.getLeftStart() + i);
+ int rightOffset = rightVector.getOffset(range.getRightStart() + i);
+ subRange.setLeftStart(leftOffset).setRightStart(rightOffset);
+ TypeEqualsVisitor typeVisitor = new TypeEqualsVisitor(rightSubVector);
+ RangeEqualsVisitor visitor =
+ createInnerVisitor(leftSubVector, rightSubVector, (left, right) -> typeVisitor.equals(left));
+ if (!visitor.rangeEquals(subRange)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ protected boolean compareStructVectors(Range range) {
+ NonNullableStructVector leftVector = (NonNullableStructVector) left;
+ NonNullableStructVector rightVector = (NonNullableStructVector) right;
+
+ List<String> leftChildNames = leftVector.getChildFieldNames();
+ if (!leftChildNames.equals(rightVector.getChildFieldNames())) {
+ return false;
+ }
+
+ for (String name : leftChildNames) {
+ RangeEqualsVisitor visitor =
+ createInnerVisitor(leftVector.getChild(name), rightVector.getChild(name), /*type comparator*/ null);
+ if (!visitor.rangeEquals(range)) {
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ protected boolean compareBaseFixedWidthVectors(Range range) {
+ BaseFixedWidthVector leftVector = (BaseFixedWidthVector) left;
+ BaseFixedWidthVector rightVector = (BaseFixedWidthVector) right;
+
+ for (int i = 0; i < range.getLength(); i++) {
+ int leftIndex = range.getLeftStart() + i;
+ int rightIndex = range.getRightStart() + i;
+
+ boolean isNull = leftVector.isNull(leftIndex);
+
+ if (isNull != rightVector.isNull(rightIndex)) {
+ return false;
+ }
+
+ int typeWidth = leftVector.getTypeWidth();
+ if (!isNull) {
+ int startIndexLeft = typeWidth * leftIndex;
+ int endIndexLeft = typeWidth * (leftIndex + 1);
+
+ int startIndexRight = typeWidth * rightIndex;
+ int endIndexRight = typeWidth * (rightIndex + 1);
+
+ int ret = ByteFunctionHelpers.equal(leftVector.getDataBuffer(), startIndexLeft, endIndexLeft,
+ rightVector.getDataBuffer(), startIndexRight, endIndexRight);
+
+ if (ret == 0) {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+
+ protected boolean compareBaseVariableWidthVectors(Range range) {
+ BaseVariableWidthVector leftVector = (BaseVariableWidthVector) left;
+ BaseVariableWidthVector rightVector = (BaseVariableWidthVector) right;
+
+ for (int i = 0; i < range.getLength(); i++) {
+ int leftIndex = range.getLeftStart() + i;
+ int rightIndex = range.getRightStart() + i;
+
+ boolean isNull = leftVector.isNull(leftIndex);
+ if (isNull != rightVector.isNull(rightIndex)) {
+ return false;
+ }
+
+ int offsetWidth = BaseVariableWidthVector.OFFSET_WIDTH;
+
+ if (!isNull) {
+ final int startIndexLeft = leftVector.getOffsetBuffer().getInt(leftIndex * offsetWidth);
+ final int endIndexLeft = leftVector.getOffsetBuffer().getInt((leftIndex + 1) * offsetWidth);
+
+ final int startIndexRight = rightVector.getOffsetBuffer().getInt(rightIndex * offsetWidth);
+ final int endIndexRight = rightVector.getOffsetBuffer().getInt((rightIndex + 1) * offsetWidth);
+
+ int ret = ByteFunctionHelpers.equal(leftVector.getDataBuffer(), startIndexLeft, endIndexLeft,
+ rightVector.getDataBuffer(), startIndexRight, endIndexRight);
+
+ if (ret == 0) {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+
+ protected boolean compareBaseLargeVariableWidthVectors(Range range) {
+ BaseLargeVariableWidthVector leftVector = (BaseLargeVariableWidthVector) left;
+ BaseLargeVariableWidthVector rightVector = (BaseLargeVariableWidthVector) right;
+
+ for (int i = 0; i < range.getLength(); i++) {
+ int leftIndex = range.getLeftStart() + i;
+ int rightIndex = range.getRightStart() + i;
+
+ boolean isNull = leftVector.isNull(leftIndex);
+ if (isNull != rightVector.isNull(rightIndex)) {
+ return false;
+ }
+
+ int offsetWidth = BaseLargeVariableWidthVector.OFFSET_WIDTH;
+
+ if (!isNull) {
+ final long startIndexLeft = leftVector.getOffsetBuffer().getLong((long) leftIndex * offsetWidth);
+ final long endIndexLeft = leftVector.getOffsetBuffer().getLong((long) (leftIndex + 1) * offsetWidth);
+
+ final long startIndexRight = rightVector.getOffsetBuffer().getLong((long) rightIndex * offsetWidth);
+ final long endIndexRight = rightVector.getOffsetBuffer().getLong((long) (rightIndex + 1) * offsetWidth);
+
+ int ret = ByteFunctionHelpers.equal(leftVector.getDataBuffer(), startIndexLeft, endIndexLeft,
+ rightVector.getDataBuffer(), startIndexRight, endIndexRight);
+
+ if (ret == 0) {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+
+ protected boolean compareListVectors(Range range) {
+ ListVector leftVector = (ListVector) left;
+ ListVector rightVector = (ListVector) right;
+
+ RangeEqualsVisitor innerVisitor =
+ createInnerVisitor(leftVector.getDataVector(), rightVector.getDataVector(), /*type comparator*/ null);
+ Range innerRange = new Range();
+
+ for (int i = 0; i < range.getLength(); i++) {
+ int leftIndex = range.getLeftStart() + i;
+ int rightIndex = range.getRightStart() + i;
+
+ boolean isNull = leftVector.isNull(leftIndex);
+ if (isNull != rightVector.isNull(rightIndex)) {
+ return false;
+ }
+
+ int offsetWidth = BaseRepeatedValueVector.OFFSET_WIDTH;
+
+ if (!isNull) {
+ final int startIndexLeft = leftVector.getOffsetBuffer().getInt(leftIndex * offsetWidth);
+ final int endIndexLeft = leftVector.getOffsetBuffer().getInt((leftIndex + 1) * offsetWidth);
+
+ final int startIndexRight = rightVector.getOffsetBuffer().getInt(rightIndex * offsetWidth);
+ final int endIndexRight = rightVector.getOffsetBuffer().getInt((rightIndex + 1) * offsetWidth);
+
+ if ((endIndexLeft - startIndexLeft) != (endIndexRight - startIndexRight)) {
+ return false;
+ }
+
+ innerRange = innerRange
+ .setRightStart(startIndexRight)
+ .setLeftStart(startIndexLeft)
+ .setLength(endIndexLeft - startIndexLeft);
+ if (!innerVisitor.rangeEquals(innerRange)) {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+
+ protected boolean compareFixedSizeListVectors(Range range) {
+ FixedSizeListVector leftVector = (FixedSizeListVector) left;
+ FixedSizeListVector rightVector = (FixedSizeListVector) right;
+
+ if (leftVector.getListSize() != rightVector.getListSize()) {
+ return false;
+ }
+
+ int listSize = leftVector.getListSize();
+ RangeEqualsVisitor innerVisitor =
+ createInnerVisitor(leftVector.getDataVector(), rightVector.getDataVector(), /*type comparator*/ null);
+ Range innerRange = new Range(0, 0, listSize);
+
+ for (int i = 0; i < range.getLength(); i++) {
+ int leftIndex = range.getLeftStart() + i;
+ int rightIndex = range.getRightStart() + i;
+
+ boolean isNull = leftVector.isNull(leftIndex);
+ if (isNull != rightVector.isNull(rightIndex)) {
+ return false;
+ }
+
+ if (!isNull) {
+ final int startIndexLeft = leftIndex * listSize;
+ final int endIndexLeft = (leftIndex + 1) * listSize;
+
+ final int startIndexRight = rightIndex * listSize;
+ final int endIndexRight = (rightIndex + 1) * listSize;
+
+ if ((endIndexLeft - startIndexLeft) != (endIndexRight - startIndexRight)) {
+ return false;
+ }
+
+ innerRange = innerRange.setLeftStart(startIndexLeft)
+ .setRightStart(startIndexRight);
+ if (!innerVisitor.rangeEquals(innerRange)) {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+
+ protected boolean compareLargeListVectors(Range range) {
+ LargeListVector leftVector = (LargeListVector) left;
+ LargeListVector rightVector = (LargeListVector) right;
+
+ RangeEqualsVisitor innerVisitor =
+ createInnerVisitor(leftVector.getDataVector(), rightVector.getDataVector(), /*type comparator*/ null);
+ Range innerRange = new Range();
+
+ for (int i = 0; i < range.getLength(); i++) {
+ int leftIndex = range.getLeftStart() + i;
+ int rightIndex = range.getRightStart() + i;
+
+ boolean isNull = leftVector.isNull(leftIndex);
+ if (isNull != rightVector.isNull(rightIndex)) {
+ return false;
+ }
+
+ long offsetWidth = LargeListVector.OFFSET_WIDTH;
+
+ if (!isNull) {
+ final long startIndexLeft = leftVector.getOffsetBuffer().getLong((long) leftIndex * offsetWidth);
+ final long endIndexLeft = leftVector.getOffsetBuffer().getLong((long) (leftIndex + 1) * offsetWidth);
+
+ final long startIndexRight = rightVector.getOffsetBuffer().getLong((long) rightIndex * offsetWidth);
+ final long endIndexRight = rightVector.getOffsetBuffer().getLong((long) (rightIndex + 1) * offsetWidth);
+
+ if ((endIndexLeft - startIndexLeft) != (endIndexRight - startIndexRight)) {
+ return false;
+ }
+
+ innerRange = innerRange // TODO revisit these casts when long indexing is finished
+ .setRightStart(checkedCastToInt(startIndexRight))
+ .setLeftStart(checkedCastToInt(startIndexLeft))
+ .setLength(checkedCastToInt(endIndexLeft - startIndexLeft));
+ if (!innerVisitor.rangeEquals(innerRange)) {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compare/TypeEqualsVisitor.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compare/TypeEqualsVisitor.java
new file mode 100644
index 000000000..443ee1f96
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compare/TypeEqualsVisitor.java
@@ -0,0 +1,154 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.compare;
+
+import java.util.List;
+import java.util.Objects;
+
+import org.apache.arrow.vector.BaseFixedWidthVector;
+import org.apache.arrow.vector.BaseLargeVariableWidthVector;
+import org.apache.arrow.vector.BaseVariableWidthVector;
+import org.apache.arrow.vector.ExtensionTypeVector;
+import org.apache.arrow.vector.NullVector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.complex.DenseUnionVector;
+import org.apache.arrow.vector.complex.FixedSizeListVector;
+import org.apache.arrow.vector.complex.LargeListVector;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.NonNullableStructVector;
+import org.apache.arrow.vector.complex.UnionVector;
+import org.apache.arrow.vector.types.pojo.Field;
+
+/**
+ * Visitor to compare type equals for vectors.
+ */
+public class TypeEqualsVisitor implements VectorVisitor<Boolean, Void> {
+
+ private final ValueVector right;
+
+ private final boolean checkName;
+ private final boolean checkMetadata;
+
+ /**
+ * Construct an instance.
+ */
+ public TypeEqualsVisitor(ValueVector right) {
+ this (right, true, true);
+ }
+
+ /**
+ * Construct an instance.
+ * @param right right vector
+ * @param checkName whether checks names
+ * @param checkMetadata whether checks metadata
+ */
+ public TypeEqualsVisitor(ValueVector right, boolean checkName, boolean checkMetadata) {
+ this.right = right;
+ this.checkName = checkName;
+ this.checkMetadata = checkMetadata;
+ }
+
+ /**
+ * Check type equals without passing IN param in VectorVisitor.
+ */
+ public boolean equals(ValueVector left) {
+ return left.accept(this, null);
+ }
+
+ @Override
+ public Boolean visit(BaseFixedWidthVector left, Void value) {
+ return compareField(left.getField(), right.getField());
+ }
+
+ @Override
+ public Boolean visit(BaseVariableWidthVector left, Void value) {
+ return compareField(left.getField(), right.getField());
+ }
+
+ @Override
+ public Boolean visit(BaseLargeVariableWidthVector left, Void value) {
+ return compareField(left.getField(), right.getField());
+ }
+
+ @Override
+ public Boolean visit(ListVector left, Void value) {
+ return compareField(left.getField(), right.getField());
+ }
+
+ @Override
+ public Boolean visit(FixedSizeListVector left, Void value) {
+ return compareField(left.getField(), right.getField());
+ }
+
+ @Override
+ public Boolean visit(LargeListVector left, Void value) {
+ return compareField(left.getField(), right.getField());
+ }
+
+ @Override
+ public Boolean visit(NonNullableStructVector left, Void value) {
+ return compareField(left.getField(), right.getField());
+ }
+
+ @Override
+ public Boolean visit(UnionVector left, Void value) {
+ return compareField(left.getField(), right.getField());
+ }
+
+ @Override
+ public Boolean visit(DenseUnionVector left, Void value) {
+ return compareField(left.getField(), right.getField());
+ }
+
+ @Override
+ public Boolean visit(NullVector left, Void value) {
+ return compareField(left.getField(), right.getField());
+ }
+
+ @Override
+ public Boolean visit(ExtensionTypeVector<?> left, Void value) {
+ return compareField(left.getField(), right.getField());
+ }
+
+ private boolean compareField(Field leftField, Field rightField) {
+
+ if (leftField == rightField) {
+ return true;
+ }
+
+ return (!checkName || Objects.equals(leftField.getName(), rightField.getName())) &&
+ Objects.equals(leftField.isNullable(), rightField.isNullable()) &&
+ Objects.equals(leftField.getType(), rightField.getType()) &&
+ Objects.equals(leftField.getDictionary(), rightField.getDictionary()) &&
+ (!checkMetadata || Objects.equals(leftField.getMetadata(), rightField.getMetadata())) &&
+ compareChildren(leftField.getChildren(), rightField.getChildren());
+ }
+
+ private boolean compareChildren(List<Field> leftChildren, List<Field> rightChildren) {
+ if (leftChildren.size() != rightChildren.size()) {
+ return false;
+ }
+
+ for (int i = 0; i < leftChildren.size(); i++) {
+ if (!compareField(leftChildren.get(i), rightChildren.get(i))) {
+ return false;
+ }
+ }
+ return true;
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compare/VectorEqualsVisitor.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compare/VectorEqualsVisitor.java
new file mode 100644
index 000000000..390d13854
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compare/VectorEqualsVisitor.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.compare;
+
+import static org.apache.arrow.vector.compare.RangeEqualsVisitor.DEFAULT_TYPE_COMPARATOR;
+
+import java.util.function.BiFunction;
+
+import org.apache.arrow.vector.ValueVector;
+
+/**
+ * Visitor to compare vectors equal.
+ */
+public class VectorEqualsVisitor {
+
+ /**
+ * Checks if two vectors are equals with default type comparator.
+ * @param left the left vector to compare.
+ * @param right the right vector to compare.
+ * @return true if the vectors are equal, and false otherwise.
+ */
+ public static boolean vectorEquals(ValueVector left, ValueVector right) {
+ return vectorEquals(left, right, DEFAULT_TYPE_COMPARATOR);
+ }
+
+ /**
+ * Checks if two vectors are equals.
+ * @param left the left vector to compare.
+ * @param right the right vector to compare.
+ * @param typeComparator type comparator to compare vector type.
+ * @return true if the vectors are equal, and false otherwise.
+ */
+ public static boolean vectorEquals(
+ ValueVector left,
+ ValueVector right,
+ BiFunction<ValueVector, ValueVector, Boolean> typeComparator) {
+
+ if (left.getValueCount() != right.getValueCount()) {
+ return false;
+ }
+
+ RangeEqualsVisitor visitor = new RangeEqualsVisitor(left, right, typeComparator);
+ return visitor.rangeEquals(new Range(0, 0, left.getValueCount()));
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compare/VectorValueEqualizer.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compare/VectorValueEqualizer.java
new file mode 100644
index 000000000..4f9c1a95e
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compare/VectorValueEqualizer.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.compare;
+
+import org.apache.arrow.vector.ValueVector;
+
+/**
+ * A function to determine if two vectors are equal at specified positions.
+ * @param <V> the vector type.
+ */
+public interface VectorValueEqualizer<V extends ValueVector> extends Cloneable {
+
+ /**
+ * Checks if the vectors are equal at the given positions, given that the values
+ * at both positions are non-null.
+ * @param vector1 the first vector.
+ * @param index1 index in the first vector.
+ * @param vector2 the second vector.
+ * @param index2 index in the second vector.
+ * @return true if the two values are considered to be equal, and false otherwise.
+ */
+ boolean valuesEqual(V vector1, int index1, V vector2, int index2);
+
+ /**
+ * Creates a equalizer of the same type.
+ * @return the newly created equalizer.
+ */
+ VectorValueEqualizer<V> clone();
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compare/VectorVisitor.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compare/VectorVisitor.java
new file mode 100644
index 000000000..aee090706
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compare/VectorVisitor.java
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.compare;
+
+import org.apache.arrow.vector.BaseFixedWidthVector;
+import org.apache.arrow.vector.BaseLargeVariableWidthVector;
+import org.apache.arrow.vector.BaseVariableWidthVector;
+import org.apache.arrow.vector.ExtensionTypeVector;
+import org.apache.arrow.vector.NullVector;
+import org.apache.arrow.vector.complex.DenseUnionVector;
+import org.apache.arrow.vector.complex.FixedSizeListVector;
+import org.apache.arrow.vector.complex.LargeListVector;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.NonNullableStructVector;
+import org.apache.arrow.vector.complex.UnionVector;
+
+/**
+ * Generic visitor to visit a {@link org.apache.arrow.vector.ValueVector}.
+ * @param <OUT> the output result type.
+ * @param <IN> the input data together with visitor.
+ */
+public interface VectorVisitor<OUT, IN> {
+
+ OUT visit(BaseFixedWidthVector left, IN value);
+
+ OUT visit(BaseVariableWidthVector left, IN value);
+
+ OUT visit(BaseLargeVariableWidthVector left, IN value);
+
+ OUT visit(ListVector left, IN value);
+
+ OUT visit(FixedSizeListVector left, IN value);
+
+ OUT visit(LargeListVector left, IN value);
+
+ OUT visit(NonNullableStructVector left, IN value);
+
+ OUT visit(UnionVector left, IN value);
+
+ OUT visit(DenseUnionVector left, IN value);
+
+ OUT visit(NullVector left, IN value);
+
+ OUT visit(ExtensionTypeVector<?> left, IN value);
+}
+
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compare/util/ValueEpsilonEqualizers.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compare/util/ValueEpsilonEqualizers.java
new file mode 100644
index 000000000..a7b6a8ca4
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compare/util/ValueEpsilonEqualizers.java
@@ -0,0 +1,149 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.compare.util;
+
+import org.apache.arrow.vector.Float4Vector;
+import org.apache.arrow.vector.Float8Vector;
+import org.apache.arrow.vector.FloatingPointVector;
+import org.apache.arrow.vector.compare.VectorValueEqualizer;
+
+/**
+ * Vector value equalizers that regard values as equal if their difference
+ * is within a small threshold (epsilon).
+ */
+public class ValueEpsilonEqualizers {
+
+ private ValueEpsilonEqualizers() {
+
+ }
+
+ /**
+ * Difference function for floating point values.
+ */
+ public static class FloatingPointEpsilonEqualizer implements VectorValueEqualizer<FloatingPointVector> {
+ private final double epsilon;
+
+ public FloatingPointEpsilonEqualizer(double epsilon) {
+ this.epsilon = epsilon;
+ }
+
+ @Override
+ public final boolean valuesEqual(
+ FloatingPointVector vector1, int index1, FloatingPointVector vector2, int index2) {
+ boolean isNull1 = vector1.isNull(index1);
+ boolean isNull2 = vector2.isNull(index2);
+
+ if (isNull1 || isNull2) {
+ return isNull1 == isNull2;
+ }
+
+ double d1 = vector1.getValueAsDouble(index1);
+ double d2 = vector2.getValueAsDouble(index2);
+
+ if (Double.isNaN(d1)) {
+ return Double.isNaN(d2);
+ }
+ if (Double.isInfinite(d1)) {
+ return Double.isInfinite(d2) && Math.signum(d1) == Math.signum(d2);
+ }
+
+ return Math.abs(d1 - d2) <= epsilon;
+ }
+
+ @Override
+ public VectorValueEqualizer<FloatingPointVector> clone() {
+ return new FloatingPointEpsilonEqualizer(epsilon);
+ }
+ }
+
+ /**
+ * Difference function for float values.
+ */
+ public static class Float4EpsilonEqualizer implements VectorValueEqualizer<Float4Vector> {
+ private final float epsilon;
+
+ public Float4EpsilonEqualizer(float epsilon) {
+ this.epsilon = epsilon;
+ }
+
+ @Override
+ public final boolean valuesEqual(Float4Vector vector1, int index1, Float4Vector vector2, int index2) {
+ boolean isNull1 = vector1.isNull(index1);
+ boolean isNull2 = vector2.isNull(index2);
+
+ if (isNull1 || isNull2) {
+ return isNull1 == isNull2;
+ }
+
+ float f1 = vector1.get(index1);
+ float f2 = vector2.get(index2);
+
+ if (Float.isNaN(f1)) {
+ return Float.isNaN(f2);
+ }
+ if (Float.isInfinite(f1)) {
+ return Float.isInfinite(f2) && Math.signum(f1) == Math.signum(f2);
+ }
+
+ return Math.abs(f1 - f2) <= epsilon;
+ }
+
+ @Override
+ public VectorValueEqualizer<Float4Vector> clone() {
+ return new Float4EpsilonEqualizer(epsilon);
+ }
+ }
+
+ /**
+ * Difference function for double values.
+ */
+ public static class Float8EpsilonEqualizer implements VectorValueEqualizer<Float8Vector> {
+ private final double epsilon;
+
+ public Float8EpsilonEqualizer(double epsilon) {
+ this.epsilon = epsilon;
+ }
+
+ @Override
+ public final boolean valuesEqual(Float8Vector vector1, int index1, Float8Vector vector2, int index2) {
+ boolean isNull1 = vector1.isNull(index1);
+ boolean isNull2 = vector2.isNull(index2);
+
+ if (isNull1 || isNull2) {
+ return isNull1 == isNull2;
+ }
+
+ double d1 = vector1.get(index1);
+ double d2 = vector2.get(index2);
+
+ if (Double.isNaN(d1)) {
+ return Double.isNaN(d2);
+ }
+ if (Double.isInfinite(d1)) {
+ return Double.isInfinite(d2) && Math.signum(d1) == Math.signum(d2);
+ }
+
+ return Math.abs(d1 - d2) <= epsilon;
+ }
+
+ @Override
+ public VectorValueEqualizer<Float8Vector> clone() {
+ return new Float8EpsilonEqualizer(epsilon);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractContainerVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractContainerVector.java
new file mode 100644
index 000000000..898bfe3d3
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractContainerVector.java
@@ -0,0 +1,140 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.OutOfMemoryException;
+import org.apache.arrow.vector.DensityAwareVector;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.ArrowType.FixedSizeList;
+import org.apache.arrow.vector.types.pojo.ArrowType.List;
+import org.apache.arrow.vector.types.pojo.ArrowType.Struct;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.CallBack;
+
+/**
+ * Base class for composite vectors.
+ *
+ * <p>This class implements common functionality of composite vectors.
+ */
+public abstract class AbstractContainerVector implements ValueVector, DensityAwareVector {
+ static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(AbstractContainerVector.class);
+
+ protected final String name;
+ protected final BufferAllocator allocator;
+ protected final CallBack callBack;
+
+ protected AbstractContainerVector(String name, BufferAllocator allocator, CallBack callBack) {
+ this.name = name;
+ this.allocator = allocator;
+ this.callBack = callBack;
+ }
+
+ @Override
+ public void allocateNew() throws OutOfMemoryException {
+ if (!allocateNewSafe()) {
+ throw new OutOfMemoryException();
+ }
+ }
+
+ public BufferAllocator getAllocator() {
+ return allocator;
+ }
+
+ /**
+ * Returns a {@link org.apache.arrow.vector.ValueVector} corresponding to the given field name if exists or null.
+ *
+ * @param name the name of the child to return
+ * @return the corresponding FieldVector
+ */
+ public FieldVector getChild(String name) {
+ return getChild(name, FieldVector.class);
+ }
+
+ /**
+ * Clears out all underlying child vectors.
+ */
+ @Override
+ public void close() {
+ for (ValueVector vector : (Iterable<ValueVector>) this) {
+ vector.close();
+ }
+ }
+
+ protected <T extends ValueVector> T typeify(ValueVector v, Class<T> clazz) {
+ if (clazz.isAssignableFrom(v.getClass())) {
+ return clazz.cast(v);
+ }
+ throw new IllegalStateException(String.format("Vector requested [%s] was different than type stored [%s]. Arrow " +
+ "doesn't yet support heterogeneous types.", clazz.getSimpleName(), v.getClass().getSimpleName()));
+ }
+
+ protected boolean supportsDirectRead() {
+ return false;
+ }
+
+ // return the number of child vectors
+ public abstract int size();
+
+ // add a new vector with the input FieldType or return the existing vector if we already added one with the same name
+ public abstract <T extends FieldVector> T addOrGet(String name, FieldType fieldType, Class<T> clazz);
+
+ // return the child vector with the input name
+ public abstract <T extends FieldVector> T getChild(String name, Class<T> clazz);
+
+ // return the child vector's ordinal in the composite container
+ public abstract VectorWithOrdinal getChildVectorWithOrdinal(String name);
+
+ public StructVector addOrGetStruct(String name) {
+ return addOrGet(name, FieldType.nullable(new Struct()), StructVector.class);
+ }
+
+ public ListVector addOrGetList(String name) {
+ return addOrGet(name, FieldType.nullable(new List()), ListVector.class);
+ }
+
+ public UnionVector addOrGetUnion(String name) {
+ return addOrGet(name, FieldType.nullable(MinorType.UNION.getType()), UnionVector.class);
+ }
+
+ public FixedSizeListVector addOrGetFixedSizeList(String name, int listSize) {
+ return addOrGet(name, FieldType.nullable(new FixedSizeList(listSize)), FixedSizeListVector.class);
+ }
+
+ public MapVector addOrGetMap(String name, boolean keysSorted) {
+ return addOrGet(name, FieldType.nullable(new ArrowType.Map(keysSorted)), MapVector.class);
+ }
+
+ @Override
+ public void copyFrom(int fromIndex, int thisIndex, ValueVector from) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void copyFromSafe(int fromIndex, int thisIndex, ValueVector from) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public String getName() {
+ return name;
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractStructVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractStructVector.java
new file mode 100644
index 000000000..be6d99233
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/AbstractStructVector.java
@@ -0,0 +1,425 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+import java.util.stream.Collectors;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.BitVectorHelper;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.CallBack;
+import org.apache.arrow.vector.util.PromotableMultiMapWithOrdinal;
+import org.apache.arrow.vector.util.ValueVectorUtility;
+
+/**
+ * Base class for StructVectors. Currently used by NonNullableStructVector
+ */
+public abstract class AbstractStructVector extends AbstractContainerVector {
+ private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(AbstractContainerVector.class);
+ private static final String STRUCT_CONFLICT_POLICY_ENV = "ARROW_STRUCT_CONFLICT_POLICY";
+ private static final String STRUCT_CONFLICT_POLICY_JVM = "arrow.struct.conflict.policy";
+ private static final ConflictPolicy DEFAULT_CONFLICT_POLICY;
+ // Maintains a map with key as field name and value is the vector itself
+ private final PromotableMultiMapWithOrdinal<String, FieldVector> vectors;
+ protected final boolean allowConflictPolicyChanges;
+ private ConflictPolicy conflictPolicy;
+
+
+ static {
+ String conflictPolicyStr = System.getProperty(STRUCT_CONFLICT_POLICY_JVM,
+ ConflictPolicy.CONFLICT_REPLACE.toString());
+ if (conflictPolicyStr == null) {
+ conflictPolicyStr = System.getenv(STRUCT_CONFLICT_POLICY_ENV);
+ }
+ ConflictPolicy conflictPolicy;
+ try {
+ conflictPolicy = ConflictPolicy.valueOf(conflictPolicyStr.toUpperCase());
+ } catch (Exception e) {
+ conflictPolicy = ConflictPolicy.CONFLICT_REPLACE;
+ }
+ DEFAULT_CONFLICT_POLICY = conflictPolicy;
+ }
+
+ /**
+ * Policy to determine how to react when duplicate columns are encountered.
+ */
+ public enum ConflictPolicy {
+ // Ignore the conflict and append the field. This is the default behaviour
+ CONFLICT_APPEND,
+ // Keep the existing field and ignore the newer one.
+ CONFLICT_IGNORE,
+ // Replace the existing field with the newer one.
+ CONFLICT_REPLACE,
+ // Refuse the new field and error out.
+ CONFLICT_ERROR
+ }
+
+ /**
+ * Base coonstructor that sets default conflict policy to APPEND.
+ */
+ protected AbstractStructVector(String name,
+ BufferAllocator allocator,
+ CallBack callBack,
+ ConflictPolicy conflictPolicy,
+ boolean allowConflictPolicyChanges) {
+ super(name, allocator, callBack);
+ this.conflictPolicy = conflictPolicy == null ? DEFAULT_CONFLICT_POLICY : conflictPolicy;
+ this.vectors = new PromotableMultiMapWithOrdinal<>(allowConflictPolicyChanges, this.conflictPolicy);
+ this.allowConflictPolicyChanges = allowConflictPolicyChanges;
+ }
+
+ /**
+ * Set conflict policy and return last conflict policy state.
+ */
+ public ConflictPolicy setConflictPolicy(ConflictPolicy conflictPolicy) {
+ ConflictPolicy tmp = this.conflictPolicy;
+ this.conflictPolicy = conflictPolicy;
+ this.vectors.setConflictPolicy(conflictPolicy);
+ return tmp;
+ }
+
+ public ConflictPolicy getConflictPolicy() {
+ return conflictPolicy;
+ }
+
+ @Override
+ public void close() {
+ for (final ValueVector valueVector : vectors.values()) {
+ valueVector.close();
+ }
+ vectors.clear();
+
+ super.close();
+ }
+
+ @Override
+ public boolean allocateNewSafe() {
+ /* boolean to keep track if all the memory allocation were successful
+ * Used in the case of composite vectors when we need to allocate multiple
+ * buffers for multiple vectors. If one of the allocations failed we need to
+ * clear all the memory that we allocated
+ */
+ boolean success = false;
+ try {
+ for (final ValueVector v : vectors.values()) {
+ if (!v.allocateNewSafe()) {
+ return false;
+ }
+ }
+ success = true;
+ } finally {
+ if (!success) {
+ clear();
+ }
+ }
+ return true;
+ }
+
+ @Override
+ public void reAlloc() {
+ for (final ValueVector v : vectors.values()) {
+ v.reAlloc();
+ }
+ }
+
+ /**
+ * Adds a new field with the given parameters or replaces the existing one and consequently returns the resultant
+ * {@link org.apache.arrow.vector.ValueVector}.
+ *
+ * <p>Execution takes place in the following order:
+ * <ul>
+ * <li>
+ * if field is new, create and insert a new vector of desired type.
+ * </li>
+ * <li>
+ * if field exists and existing vector is of desired vector type, return the vector.
+ * </li>
+ * <li>
+ * if field exists and null filled, clear the existing vector; create and insert a new vector of desired type.
+ * </li>
+ * <li>
+ * otherwise, throw an {@link java.lang.IllegalStateException}
+ * </li>
+ * </ul>
+ *
+ * @param childName the name of the field
+ * @param fieldType the type for the vector
+ * @param clazz class of expected vector type
+ * @param <T> class type of expected vector type
+ * @return resultant {@link org.apache.arrow.vector.ValueVector}
+ * @throws java.lang.IllegalStateException raised if there is a hard schema change
+ */
+ public <T extends FieldVector> T addOrGet(String childName, FieldType fieldType, Class<T> clazz) {
+ final ValueVector existing = getChild(childName);
+ boolean create = false;
+ if (existing == null) {
+ create = true;
+ } else if (clazz.isAssignableFrom(existing.getClass())) {
+ return clazz.cast(existing);
+ } else if (nullFilled(existing)) {
+ existing.clear();
+ create = true;
+ }
+ if (create) {
+ final T vector = clazz.cast(fieldType.createNewSingleVector(childName, allocator, callBack));
+ putChild(childName, vector);
+ if (callBack != null) {
+ callBack.doWork();
+ }
+ return vector;
+ }
+ final String message = "Arrow does not support schema change yet. Existing[%s] and desired[%s] vector types " +
+ "mismatch";
+ throw new IllegalStateException(String.format(message, existing.getClass().getSimpleName(), clazz.getSimpleName()));
+ }
+
+ private boolean nullFilled(ValueVector vector) {
+ return BitVectorHelper.checkAllBitsEqualTo(vector.getValidityBuffer(), vector.getValueCount(), false);
+ }
+
+ /**
+ * Returns a {@link org.apache.arrow.vector.ValueVector} corresponding to the given ordinal identifier.
+ *
+ * @param id the ordinal of the child to return
+ * @return the corresponding child
+ */
+ public ValueVector getChildByOrdinal(int id) {
+ return vectors.getByOrdinal(id);
+ }
+
+ /**
+ * Returns a {@link org.apache.arrow.vector.ValueVector} instance of subtype of T corresponding to the given
+ * field name if exists or null.
+ *
+ * If there is more than one element for name this will return the first inserted.
+ *
+ * @param name the name of the child to return
+ * @param clazz the expected type of the child
+ * @return the child corresponding to this name
+ */
+ @Override
+ public <T extends FieldVector> T getChild(String name, Class<T> clazz) {
+ final FieldVector f = vectors.get(name);
+ if (f == null) {
+ return null;
+ }
+ return typeify(f, clazz);
+ }
+
+ protected ValueVector add(String childName, FieldType fieldType) {
+ FieldVector vector = fieldType.createNewSingleVector(childName, allocator, callBack);
+ putChild(childName, vector);
+ if (callBack != null) {
+ callBack.doWork();
+ }
+ return vector;
+ }
+
+ /**
+ * Inserts the vector with the given name if it does not exist else replaces it with the new value.
+ *
+ * <p>Note that this method does not enforce any vector type check nor throws a schema change exception.
+ *
+ * @param name the name of the child to add
+ * @param vector the vector to add as a child
+ */
+ protected void putChild(String name, FieldVector vector) {
+ putVector(name, vector);
+ }
+
+ private void put(String name, FieldVector vector, boolean overwrite) {
+ final boolean old = vectors.put(
+ Preconditions.checkNotNull(name, "field name cannot be null"),
+ Preconditions.checkNotNull(vector, "vector cannot be null"),
+ overwrite
+ );
+ if (old) {
+ logger.debug("Field [{}] mutated to [{}] ", name,
+ vector.getClass().getSimpleName());
+ }
+ }
+
+ /**
+ * Inserts the input vector into the map if it does not exist.
+ *
+ * <p>
+ * If the field name already exists the conflict is handled according to the currently set ConflictPolicy
+ * </p>
+ *
+ * @param name field name
+ * @param vector vector to be inserted
+ */
+ protected void putVector(String name, FieldVector vector) {
+ switch (conflictPolicy) {
+ case CONFLICT_APPEND:
+ put(name, vector, false);
+ break;
+ case CONFLICT_IGNORE:
+ if (!vectors.containsKey(name)) {
+ put(name, vector, false);
+ }
+ break;
+ case CONFLICT_REPLACE:
+ if (vectors.containsKey(name)) {
+ vectors.removeAll(name);
+ }
+ put(name, vector, true);
+ break;
+ case CONFLICT_ERROR:
+ if (vectors.containsKey(name)) {
+ throw new IllegalStateException(String.format("Vector already exists: Existing[%s], Requested[%s] ",
+ vector.getClass().getSimpleName(), vector.getField().getFieldType()));
+ }
+ put(name, vector, false);
+ break;
+ default:
+ throw new IllegalStateException(String.format("%s type not a valid conflict state", conflictPolicy));
+ }
+
+ }
+
+ /**
+ * Get child vectors.
+ * @return a sequence of underlying child vectors.
+ */
+ protected List<FieldVector> getChildren() {
+ int size = vectors.size();
+ List<FieldVector> children = new ArrayList<>();
+ for (int i = 0; i < size; i++) {
+ children.add(vectors.getByOrdinal(i));
+ }
+ return children;
+ }
+
+ /**
+ * Get child field names.
+ */
+ public List<String> getChildFieldNames() {
+ return getChildren().stream()
+ .map(child -> child.getField().getName())
+ .collect(Collectors.toList());
+ }
+
+ /**
+ * Get the number of child vectors.
+ * @return the number of underlying child vectors.
+ */
+ @Override
+ public int size() {
+ return vectors.size();
+ }
+
+ @Override
+ public Iterator<ValueVector> iterator() {
+ return Collections.<ValueVector>unmodifiableCollection(vectors.values()).iterator();
+ }
+
+ /**
+ * Get primitive child vectors.
+ * @return a list of scalar child vectors recursing the entire vector hierarchy.
+ */
+ public List<ValueVector> getPrimitiveVectors() {
+ final List<ValueVector> primitiveVectors = new ArrayList<>();
+ for (final FieldVector v : vectors.values()) {
+ primitiveVectors.addAll(getPrimitiveVectors(v));
+ }
+ return primitiveVectors;
+ }
+
+ private List<ValueVector> getPrimitiveVectors(FieldVector v) {
+ final List<ValueVector> primitives = new ArrayList<>();
+ if (v instanceof AbstractStructVector) {
+ AbstractStructVector structVector = (AbstractStructVector) v;
+ primitives.addAll(structVector.getPrimitiveVectors());
+ } else if (v instanceof ListVector) {
+ ListVector listVector = (ListVector) v;
+ primitives.addAll(getPrimitiveVectors(listVector.getDataVector()));
+ } else if (v instanceof FixedSizeListVector) {
+ FixedSizeListVector listVector = (FixedSizeListVector) v;
+ primitives.addAll(getPrimitiveVectors(listVector.getDataVector()));
+ } else if (v instanceof UnionVector) {
+ UnionVector unionVector = (UnionVector) v;
+ for (final FieldVector vector : unionVector.getChildrenFromFields()) {
+ primitives.addAll(getPrimitiveVectors(vector));
+ }
+ } else {
+ primitives.add(v);
+ }
+ return primitives;
+ }
+
+ /**
+ * Get a child vector by name. If duplicate names this returns the first inserted.
+ * @param name the name of the child to return
+ * @return a vector with its corresponding ordinal mapping if field exists or null.
+ */
+ @Override
+ public VectorWithOrdinal getChildVectorWithOrdinal(String name) {
+ final int ordinal = vectors.getOrdinal(name);
+ if (ordinal < 0) {
+ return null;
+ }
+ final ValueVector vector = vectors.getByOrdinal(ordinal);
+ return new VectorWithOrdinal(vector, ordinal);
+ }
+
+ @Override
+ public ArrowBuf[] getBuffers(boolean clear) {
+ final List<ArrowBuf> buffers = new ArrayList<>();
+
+ for (final ValueVector vector : vectors.values()) {
+ for (final ArrowBuf buf : vector.getBuffers(false)) {
+ buffers.add(buf);
+ if (clear) {
+ buf.getReferenceManager().retain(1);
+ }
+ }
+ if (clear) {
+ vector.clear();
+ }
+ }
+
+ return buffers.toArray(new ArrowBuf[buffers.size()]);
+ }
+
+ @Override
+ public int getBufferSize() {
+ int actualBufSize = 0;
+
+ for (final ValueVector v : vectors.values()) {
+ for (final ArrowBuf buf : v.getBuffers(false)) {
+ actualBufSize += buf.writerIndex();
+ }
+ }
+ return actualBufSize;
+ }
+
+ @Override
+ public String toString() {
+ return ValueVectorUtility.getToString(this, 0 , getValueCount());
+ }
+
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseListVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseListVector.java
new file mode 100644
index 000000000..5f547b901
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseListVector.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex;
+
+import org.apache.arrow.vector.FieldVector;
+
+/**
+ * Abstraction for all list type vectors.
+ */
+public interface BaseListVector extends FieldVector {
+
+ /**
+ * Get data vector start index with the given list index.
+ */
+ int getElementStartIndex(int index);
+
+ /**
+ * Get data vector end index with the given list index.
+ */
+ int getElementEndIndex(int index);
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java
new file mode 100644
index 000000000..62d4a1299
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java
@@ -0,0 +1,367 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex;
+
+import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.util.CommonUtil;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.AddOrGetResult;
+import org.apache.arrow.vector.BaseFixedWidthVector;
+import org.apache.arrow.vector.BaseValueVector;
+import org.apache.arrow.vector.BaseVariableWidthVector;
+import org.apache.arrow.vector.DensityAwareVector;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.NullVector;
+import org.apache.arrow.vector.UInt4Vector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.ZeroVector;
+import org.apache.arrow.vector.types.pojo.ArrowType.ArrowTypeID;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.CallBack;
+import org.apache.arrow.vector.util.OversizedAllocationException;
+import org.apache.arrow.vector.util.SchemaChangeRuntimeException;
+
+/** Base class for Vectors that contain repeated values. */
+public abstract class BaseRepeatedValueVector extends BaseValueVector implements RepeatedValueVector, BaseListVector {
+
+ public static final FieldVector DEFAULT_DATA_VECTOR = ZeroVector.INSTANCE;
+ public static final String DATA_VECTOR_NAME = "$data$";
+
+ public static final byte OFFSET_WIDTH = 4;
+ protected ArrowBuf offsetBuffer;
+ protected FieldVector vector;
+ protected final CallBack callBack;
+ protected int valueCount;
+ protected long offsetAllocationSizeInBytes = INITIAL_VALUE_ALLOCATION * OFFSET_WIDTH;
+ private final String name;
+
+ protected String defaultDataVectorName = DATA_VECTOR_NAME;
+
+ protected BaseRepeatedValueVector(String name, BufferAllocator allocator, CallBack callBack) {
+ this(name, allocator, DEFAULT_DATA_VECTOR, callBack);
+ }
+
+ protected BaseRepeatedValueVector(String name, BufferAllocator allocator, FieldVector vector, CallBack callBack) {
+ super(allocator);
+ this.name = name;
+ this.offsetBuffer = allocator.getEmpty();
+ this.vector = Preconditions.checkNotNull(vector, "data vector cannot be null");
+ this.callBack = callBack;
+ this.valueCount = 0;
+ }
+
+ @Override
+ public String getName() {
+ return name;
+ }
+
+ @Override
+ public boolean allocateNewSafe() {
+ boolean dataAlloc = false;
+ try {
+ allocateOffsetBuffer(offsetAllocationSizeInBytes);
+ dataAlloc = vector.allocateNewSafe();
+ } catch (Exception e) {
+ e.printStackTrace();
+ clear();
+ return false;
+ } finally {
+ if (!dataAlloc) {
+ clear();
+ }
+ }
+ return dataAlloc;
+ }
+
+ protected void allocateOffsetBuffer(final long size) {
+ final int curSize = (int) size;
+ offsetBuffer = allocator.buffer(curSize);
+ offsetBuffer.readerIndex(0);
+ offsetAllocationSizeInBytes = curSize;
+ offsetBuffer.setZero(0, offsetBuffer.capacity());
+ }
+
+ @Override
+ public void reAlloc() {
+ reallocOffsetBuffer();
+ vector.reAlloc();
+ }
+
+ protected void reallocOffsetBuffer() {
+ final long currentBufferCapacity = offsetBuffer.capacity();
+ long newAllocationSize = currentBufferCapacity * 2;
+ if (newAllocationSize == 0) {
+ if (offsetAllocationSizeInBytes > 0) {
+ newAllocationSize = offsetAllocationSizeInBytes;
+ } else {
+ newAllocationSize = INITIAL_VALUE_ALLOCATION * OFFSET_WIDTH * 2;
+ }
+ }
+
+ newAllocationSize = CommonUtil.nextPowerOfTwo(newAllocationSize);
+ newAllocationSize = Math.min(newAllocationSize, (long) (OFFSET_WIDTH) * Integer.MAX_VALUE);
+ assert newAllocationSize >= 1;
+
+ if (newAllocationSize > MAX_ALLOCATION_SIZE || newAllocationSize <= offsetBuffer.capacity()) {
+ throw new OversizedAllocationException("Unable to expand the buffer");
+ }
+
+ final ArrowBuf newBuf = allocator.buffer(newAllocationSize);
+ newBuf.setBytes(0, offsetBuffer, 0, currentBufferCapacity);
+ newBuf.setZero(currentBufferCapacity, newBuf.capacity() - currentBufferCapacity);
+ offsetBuffer.getReferenceManager().release(1);
+ offsetBuffer = newBuf;
+ offsetAllocationSizeInBytes = newAllocationSize;
+ }
+
+ /**
+ * Get the offset vector.
+ * @deprecated This API will be removed, as the current implementations no longer hold inner offset vectors.
+ *
+ * @return the underlying offset vector or null if none exists.
+ */
+ @Override
+ @Deprecated
+ public UInt4Vector getOffsetVector() {
+ throw new UnsupportedOperationException("There is no inner offset vector");
+ }
+
+ @Override
+ public FieldVector getDataVector() {
+ return vector;
+ }
+
+ @Override
+ public void setInitialCapacity(int numRecords) {
+ offsetAllocationSizeInBytes = (numRecords + 1) * OFFSET_WIDTH;
+ if (vector instanceof BaseFixedWidthVector || vector instanceof BaseVariableWidthVector) {
+ vector.setInitialCapacity(numRecords * RepeatedValueVector.DEFAULT_REPEAT_PER_RECORD);
+ } else {
+ vector.setInitialCapacity(numRecords);
+ }
+ }
+
+ /**
+ * Specialized version of setInitialCapacity() for ListVector. This is
+ * used by some callers when they want to explicitly control and be
+ * conservative about memory allocated for inner data vector. This is
+ * very useful when we are working with memory constraints for a query
+ * and have a fixed amount of memory reserved for the record batch. In
+ * such cases, we are likely to face OOM or related problems when
+ * we reserve memory for a record batch with value count x and
+ * do setInitialCapacity(x) such that each vector allocates only
+ * what is necessary and not the default amount but the multiplier
+ * forces the memory requirement to go beyond what was needed.
+ *
+ * @param numRecords value count
+ * @param density density of ListVector. Density is the average size of
+ * list per position in the List vector. For example, a
+ * density value of 10 implies each position in the list
+ * vector has a list of 10 values.
+ * A density value of 0.1 implies out of 10 positions in
+ * the list vector, 1 position has a list of size 1 and
+ * remaining positions are null (no lists) or empty lists.
+ * This helps in tightly controlling the memory we provision
+ * for inner data vector.
+ */
+ @Override
+ public void setInitialCapacity(int numRecords, double density) {
+ if ((numRecords * density) >= Integer.MAX_VALUE) {
+ throw new OversizedAllocationException("Requested amount of memory is more than max allowed");
+ }
+
+ offsetAllocationSizeInBytes = (numRecords + 1) * OFFSET_WIDTH;
+
+ int innerValueCapacity = Math.max((int) (numRecords * density), 1);
+
+ if (vector instanceof DensityAwareVector) {
+ ((DensityAwareVector) vector).setInitialCapacity(innerValueCapacity, density);
+ } else {
+ vector.setInitialCapacity(innerValueCapacity);
+ }
+ }
+
+ @Override
+ public int getValueCapacity() {
+ final int offsetValueCapacity = Math.max(getOffsetBufferValueCapacity() - 1, 0);
+ if (vector == DEFAULT_DATA_VECTOR) {
+ return offsetValueCapacity;
+ }
+ return Math.min(vector.getValueCapacity(), offsetValueCapacity);
+ }
+
+ protected int getOffsetBufferValueCapacity() {
+ return capAtMaxInt(offsetBuffer.capacity() / OFFSET_WIDTH);
+ }
+
+ @Override
+ public int getBufferSize() {
+ if (valueCount == 0) {
+ return 0;
+ }
+ return ((valueCount + 1) * OFFSET_WIDTH) + vector.getBufferSize();
+ }
+
+ @Override
+ public int getBufferSizeFor(int valueCount) {
+ if (valueCount == 0) {
+ return 0;
+ }
+
+ int innerVectorValueCount = offsetBuffer.getInt(valueCount * OFFSET_WIDTH);
+
+ return ((valueCount + 1) * OFFSET_WIDTH) + vector.getBufferSizeFor(innerVectorValueCount);
+ }
+
+ @Override
+ public Iterator<ValueVector> iterator() {
+ return Collections.<ValueVector>singleton(getDataVector()).iterator();
+ }
+
+ @Override
+ public void clear() {
+ offsetBuffer = releaseBuffer(offsetBuffer);
+ vector.clear();
+ valueCount = 0;
+ super.clear();
+ }
+
+ @Override
+ public void reset() {
+ offsetBuffer.setZero(0, offsetBuffer.capacity());
+ vector.reset();
+ valueCount = 0;
+ }
+
+ @Override
+ public ArrowBuf[] getBuffers(boolean clear) {
+ final ArrowBuf[] buffers;
+ if (getBufferSize() == 0) {
+ buffers = new ArrowBuf[0];
+ } else {
+ List<ArrowBuf> list = new ArrayList<>();
+ list.add(offsetBuffer);
+ list.addAll(Arrays.asList(vector.getBuffers(false)));
+ buffers = list.toArray(new ArrowBuf[list.size()]);
+ }
+ if (clear) {
+ for (ArrowBuf buffer : buffers) {
+ buffer.getReferenceManager().retain();
+ }
+ clear();
+ }
+ return buffers;
+ }
+
+ /**
+ * Get value indicating if inner vector is set.
+ * @return 1 if inner vector is explicitly set via #addOrGetVector else 0
+ */
+ public int size() {
+ return vector == DEFAULT_DATA_VECTOR ? 0 : 1;
+ }
+
+ /**
+ * Initialize the data vector (and execute callback) if it hasn't already been done,
+ * returns the data vector.
+ */
+ public <T extends ValueVector> AddOrGetResult<T> addOrGetVector(FieldType fieldType) {
+ boolean created = false;
+ if (vector instanceof NullVector) {
+ vector = fieldType.createNewSingleVector(defaultDataVectorName, allocator, callBack);
+ // returned vector must have the same field
+ created = true;
+ if (callBack != null &&
+ // not a schema change if changing from ZeroVector to ZeroVector
+ (fieldType.getType().getTypeID() != ArrowTypeID.Null)) {
+ callBack.doWork();
+ }
+ }
+
+ if (vector.getField().getType().getTypeID() != fieldType.getType().getTypeID()) {
+ final String msg = String.format("Inner vector type mismatch. Requested type: [%s], actual type: [%s]",
+ fieldType.getType().getTypeID(), vector.getField().getType().getTypeID());
+ throw new SchemaChangeRuntimeException(msg);
+ }
+
+ return new AddOrGetResult<>((T) vector, created);
+ }
+
+ protected void replaceDataVector(FieldVector v) {
+ vector.clear();
+ vector = v;
+ }
+
+ @Override
+ public int getValueCount() {
+ return valueCount;
+ }
+
+ /* returns the value count for inner data vector for this list vector */
+ public int getInnerValueCount() {
+ return vector.getValueCount();
+ }
+
+
+ /** Returns the value count for inner data vector at a particular index. */
+ public int getInnerValueCountAt(int index) {
+ return offsetBuffer.getInt((index + 1) * OFFSET_WIDTH) -
+ offsetBuffer.getInt(index * OFFSET_WIDTH);
+ }
+
+ /** Return if value at index is null (this implementation is always false). */
+ public boolean isNull(int index) {
+ return false;
+ }
+
+ /** Return if value at index is empty (this implementation is always false). */
+ public boolean isEmpty(int index) {
+ return false;
+ }
+
+ /** Starts a new repeated value. */
+ public int startNewValue(int index) {
+ while (index >= getOffsetBufferValueCapacity()) {
+ reallocOffsetBuffer();
+ }
+ int offset = offsetBuffer.getInt(index * OFFSET_WIDTH);
+ offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, offset);
+ setValueCount(index + 1);
+ return offset;
+ }
+
+ /** Preallocates the number of repeated values. */
+ public void setValueCount(int valueCount) {
+ this.valueCount = valueCount;
+ while (valueCount > getOffsetBufferValueCapacity()) {
+ reallocOffsetBuffer();
+ }
+ final int childValueCount = valueCount == 0 ? 0 :
+ offsetBuffer.getInt(valueCount * OFFSET_WIDTH);
+ vector.setValueCount(childValueCount);
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/EmptyValuePopulator.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/EmptyValuePopulator.java
new file mode 100644
index 000000000..b32dce367
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/EmptyValuePopulator.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex;
+
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.UInt4Vector;
+
+/**
+ * A helper class that is used to track and populate empty values in repeated value vectors.
+ */
+public class EmptyValuePopulator {
+ private final UInt4Vector offsets;
+
+ public EmptyValuePopulator(UInt4Vector offsets) {
+ this.offsets = Preconditions.checkNotNull(offsets, "offsets cannot be null");
+ }
+
+ /**
+ * Marks all values since the last set as empty. The last set value is obtained from underlying offsets vector.
+ *
+ * @param lastIndex the last index (inclusive) in the offsets vector until which empty population takes place
+ * @throws java.lang.IndexOutOfBoundsException if lastIndex is negative or greater than offsets capacity.
+ */
+ public void populate(int lastIndex) {
+ if (lastIndex < 0) {
+ throw new IndexOutOfBoundsException("index cannot be negative");
+ }
+ final int lastSet = Math.max(offsets.getValueCount() - 1, 0);
+ final int previousEnd = offsets.get(lastSet); //0 ? 0 : accessor.get(lastSet);
+ for (int i = lastSet; i < lastIndex; i++) {
+ offsets.setSafe(i + 1, previousEnd);
+ }
+ offsets.setValueCount(lastIndex + 1);
+ }
+
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java
new file mode 100644
index 000000000..8d23f55fb
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/FixedSizeListVector.java
@@ -0,0 +1,675 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex;
+
+import static java.util.Collections.singletonList;
+import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt;
+import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt;
+import static org.apache.arrow.vector.complex.BaseRepeatedValueVector.DATA_VECTOR_NAME;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Objects;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.OutOfMemoryException;
+import org.apache.arrow.memory.util.ArrowBufPointer;
+import org.apache.arrow.memory.util.ByteFunctionHelpers;
+import org.apache.arrow.memory.util.CommonUtil;
+import org.apache.arrow.memory.util.hash.ArrowBufHasher;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.AddOrGetResult;
+import org.apache.arrow.vector.BaseValueVector;
+import org.apache.arrow.vector.BitVectorHelper;
+import org.apache.arrow.vector.BufferBacked;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.ZeroVector;
+import org.apache.arrow.vector.compare.VectorVisitor;
+import org.apache.arrow.vector.complex.impl.UnionFixedSizeListReader;
+import org.apache.arrow.vector.complex.impl.UnionFixedSizeListWriter;
+import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.CallBack;
+import org.apache.arrow.vector.util.JsonStringArrayList;
+import org.apache.arrow.vector.util.OversizedAllocationException;
+import org.apache.arrow.vector.util.SchemaChangeRuntimeException;
+import org.apache.arrow.vector.util.TransferPair;
+
+/** A ListVector where every list value is of the same size. */
+public class FixedSizeListVector extends BaseValueVector implements BaseListVector, PromotableVector {
+
+ public static FixedSizeListVector empty(String name, int size, BufferAllocator allocator) {
+ FieldType fieldType = FieldType.nullable(new ArrowType.FixedSizeList(size));
+ return new FixedSizeListVector(name, allocator, fieldType, null);
+ }
+
+ private FieldVector vector;
+ private ArrowBuf validityBuffer;
+ private final int listSize;
+ private final FieldType fieldType;
+ private final String name;
+
+ private UnionFixedSizeListReader reader;
+ private int valueCount;
+ private int validityAllocationSizeInBytes;
+
+ /**
+ * Creates a new instance.
+ *
+ * @param name The name for the vector.
+ * @param allocator The allocator to use for creating/reallocating buffers for the vector.
+ * @param fieldType The underlying data type of the vector.
+ * @param unusedSchemaChangeCallback Currently unused.
+ */
+ public FixedSizeListVector(String name,
+ BufferAllocator allocator,
+ FieldType fieldType,
+ CallBack unusedSchemaChangeCallback) {
+ super(allocator);
+
+ this.name = name;
+ this.validityBuffer = allocator.getEmpty();
+ this.vector = ZeroVector.INSTANCE;
+ this.fieldType = fieldType;
+ this.listSize = ((ArrowType.FixedSizeList) fieldType.getType()).getListSize();
+ Preconditions.checkArgument(listSize >= 0, "list size must be non-negative");
+ this.valueCount = 0;
+ this.validityAllocationSizeInBytes = getValidityBufferSizeFromCount(INITIAL_VALUE_ALLOCATION);
+ }
+
+ @Override
+ public Field getField() {
+ List<Field> children = Collections.singletonList(getDataVector().getField());
+ return new Field(name, fieldType, children);
+ }
+
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.FIXED_SIZE_LIST;
+ }
+
+ @Override
+ public String getName() {
+ return name;
+ }
+
+ /** Get the fixed size for each list. */
+ public int getListSize() {
+ return listSize;
+ }
+
+ @Override
+ public void initializeChildrenFromFields(List<Field> children) {
+ if (children.size() != 1) {
+ throw new IllegalArgumentException("Lists have only one child. Found: " + children);
+ }
+ Field field = children.get(0);
+ AddOrGetResult<FieldVector> addOrGetVector = addOrGetVector(field.getFieldType());
+ if (!addOrGetVector.isCreated()) {
+ throw new IllegalArgumentException("Child vector already existed: " + addOrGetVector.getVector());
+ }
+ addOrGetVector.getVector().initializeChildrenFromFields(field.getChildren());
+ }
+
+ @Override
+ public List<FieldVector> getChildrenFromFields() {
+ return singletonList(vector);
+ }
+
+ @Override
+ public void loadFieldBuffers(ArrowFieldNode fieldNode, List<ArrowBuf> ownBuffers) {
+ if (ownBuffers.size() != 1) {
+ throw new IllegalArgumentException("Illegal buffer count, expected " + 1 + ", got: " + ownBuffers.size());
+ }
+
+ ArrowBuf bitBuffer = ownBuffers.get(0);
+
+ validityBuffer.getReferenceManager().release();
+ validityBuffer = BitVectorHelper.loadValidityBuffer(fieldNode, bitBuffer, allocator);
+ valueCount = fieldNode.getLength();
+
+ validityAllocationSizeInBytes = checkedCastToInt(validityBuffer.capacity());
+ }
+
+ @Override
+ public List<ArrowBuf> getFieldBuffers() {
+ List<ArrowBuf> result = new ArrayList<>(1);
+ setReaderAndWriterIndex();
+ result.add(validityBuffer);
+
+ return result;
+ }
+
+ private void setReaderAndWriterIndex() {
+ validityBuffer.readerIndex(0);
+ validityBuffer.writerIndex(getValidityBufferSizeFromCount(valueCount));
+ }
+
+ /**
+ * Get the inner vectors.
+ *
+ * @deprecated This API will be removed as the current implementations no longer support inner vectors.
+ *
+ * @return the inner vectors for this field as defined by the TypeLayout
+ */
+ @Deprecated
+ @Override
+ public List<BufferBacked> getFieldInnerVectors() {
+ throw new UnsupportedOperationException("There are no inner vectors. Use getFieldBuffers");
+ }
+
+ @Override
+ public UnionFixedSizeListReader getReader() {
+ if (reader == null) {
+ reader = new UnionFixedSizeListReader(this);
+ }
+ return reader;
+ }
+
+ private void invalidateReader() {
+ reader = null;
+ }
+
+ @Override
+ public void allocateNew() throws OutOfMemoryException {
+ if (!allocateNewSafe()) {
+ throw new OutOfMemoryException("Failure while allocating memory");
+ }
+ }
+
+ @Override
+ public boolean allocateNewSafe() {
+ /* boolean to keep track if all the memory allocation were successful
+ * Used in the case of composite vectors when we need to allocate multiple
+ * buffers for multiple vectors. If one of the allocations failed we need to
+ * clear all the memory that we allocated
+ */
+ boolean success = false;
+ try {
+ /* we are doing a new allocation -- release the current buffers */
+ clear();
+ /* allocate validity buffer */
+ allocateValidityBuffer(validityAllocationSizeInBytes);
+ success = vector.allocateNewSafe();
+ } finally {
+ if (!success) {
+ clear();
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ private void allocateValidityBuffer(final long size) {
+ final int curSize = (int) size;
+ validityBuffer = allocator.buffer(curSize);
+ validityBuffer.readerIndex(0);
+ validityAllocationSizeInBytes = curSize;
+ validityBuffer.setZero(0, validityBuffer.capacity());
+ }
+
+ @Override
+ public void reAlloc() {
+ reallocValidityBuffer();
+ vector.reAlloc();
+ }
+
+ private void reallocValidityBuffer() {
+ final int currentBufferCapacity = checkedCastToInt(validityBuffer.capacity());
+ long newAllocationSize = currentBufferCapacity * 2;
+ if (newAllocationSize == 0) {
+ if (validityAllocationSizeInBytes > 0) {
+ newAllocationSize = validityAllocationSizeInBytes;
+ } else {
+ newAllocationSize = getValidityBufferSizeFromCount(INITIAL_VALUE_ALLOCATION) * 2;
+ }
+ }
+
+ newAllocationSize = CommonUtil.nextPowerOfTwo(newAllocationSize);
+ assert newAllocationSize >= 1;
+
+ if (newAllocationSize > MAX_ALLOCATION_SIZE) {
+ throw new OversizedAllocationException("Unable to expand the buffer");
+ }
+
+ final ArrowBuf newBuf = allocator.buffer((int) newAllocationSize);
+ newBuf.setBytes(0, validityBuffer, 0, currentBufferCapacity);
+ newBuf.setZero(currentBufferCapacity, newBuf.capacity() - currentBufferCapacity);
+ validityBuffer.getReferenceManager().release(1);
+ validityBuffer = newBuf;
+ validityAllocationSizeInBytes = (int) newAllocationSize;
+ }
+
+ public FieldVector getDataVector() {
+ return vector;
+ }
+
+ /**
+ * Start a new value in the list vector.
+ *
+ * @param index index of the value to start
+ */
+ public int startNewValue(int index) {
+ while (index >= getValidityBufferValueCapacity()) {
+ reallocValidityBuffer();
+ }
+
+ BitVectorHelper.setBit(validityBuffer, index);
+ return index * listSize;
+ }
+
+ public UnionFixedSizeListWriter getWriter() {
+ return new UnionFixedSizeListWriter(this);
+ }
+
+ @Override
+ public void setInitialCapacity(int numRecords) {
+ validityAllocationSizeInBytes = getValidityBufferSizeFromCount(numRecords);
+ vector.setInitialCapacity(numRecords * listSize);
+ }
+
+ @Override
+ public int getValueCapacity() {
+ if (vector == ZeroVector.INSTANCE || listSize == 0) {
+ return 0;
+ }
+ return Math.min(vector.getValueCapacity() / listSize, getValidityBufferValueCapacity());
+ }
+
+ @Override
+ public int getBufferSize() {
+ if (getValueCount() == 0) {
+ return 0;
+ }
+ return getValidityBufferSizeFromCount(valueCount) + vector.getBufferSize();
+ }
+
+ @Override
+ public int getBufferSizeFor(int valueCount) {
+ if (valueCount == 0) {
+ return 0;
+ }
+ return getValidityBufferSizeFromCount(valueCount) +
+ vector.getBufferSizeFor(valueCount * listSize);
+ }
+
+ @Override
+ public Iterator<ValueVector> iterator() {
+ return Collections.<ValueVector>singleton(vector).iterator();
+ }
+
+ @Override
+ public void clear() {
+ validityBuffer = releaseBuffer(validityBuffer);
+ vector.clear();
+ valueCount = 0;
+ super.clear();
+ }
+
+ @Override
+ public void reset() {
+ validityBuffer.setZero(0, validityBuffer.capacity());
+ vector.reset();
+ valueCount = 0;
+ }
+
+ @Override
+ public ArrowBuf[] getBuffers(boolean clear) {
+ setReaderAndWriterIndex();
+ final ArrowBuf[] buffers;
+ if (getBufferSize() == 0) {
+ buffers = new ArrowBuf[0];
+ } else {
+ List<ArrowBuf> list = new ArrayList<>();
+ list.add(validityBuffer);
+ list.addAll(Arrays.asList(vector.getBuffers(false)));
+ buffers = list.toArray(new ArrowBuf[list.size()]);
+ }
+ if (clear) {
+ for (ArrowBuf buffer : buffers) {
+ buffer.getReferenceManager().retain();
+ }
+ clear();
+ }
+ return buffers;
+ }
+
+ /**
+ * Get value indicating if inner vector is set.
+ * @return 1 if inner vector is explicitly set via #addOrGetVector else 0
+ */
+ public int size() {
+ return vector == ZeroVector.INSTANCE ? 0 : 1;
+ }
+
+ @Override
+ @SuppressWarnings("unchecked")
+ public <T extends ValueVector> AddOrGetResult<T> addOrGetVector(FieldType type) {
+ boolean created = false;
+ if (vector == ZeroVector.INSTANCE) {
+ vector = type.createNewSingleVector(DATA_VECTOR_NAME, allocator, null);
+ invalidateReader();
+ created = true;
+ }
+ // returned vector must have the same field
+ if (!Objects.equals(vector.getField().getType(), type.getType())) {
+ final String msg = String.format("Inner vector type mismatch. Requested type: [%s], actual type: [%s]",
+ type.getType(), vector.getField().getType());
+ throw new SchemaChangeRuntimeException(msg);
+ }
+
+ return new AddOrGetResult<>((T) vector, created);
+ }
+
+ @Override
+ public void copyFromSafe(int inIndex, int outIndex, ValueVector from) {
+ copyFrom(inIndex, outIndex, from);
+ }
+
+ @Override
+ public void copyFrom(int fromIndex, int thisIndex, ValueVector from) {
+ Preconditions.checkArgument(this.getMinorType() == from.getMinorType());
+ TransferPair pair = from.makeTransferPair(this);
+ pair.copyValueSafe(fromIndex, thisIndex);
+ }
+
+ @Override
+ public UnionVector promoteToUnion() {
+ UnionVector vector = new UnionVector(name, allocator, /* field type */ null, /* call-back */ null);
+ this.vector.clear();
+ this.vector = vector;
+ invalidateReader();
+ return vector;
+ }
+
+ @Override
+ public long getValidityBufferAddress() {
+ return validityBuffer.memoryAddress();
+ }
+
+ @Override
+ public long getDataBufferAddress() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public long getOffsetBufferAddress() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public ArrowBuf getValidityBuffer() {
+ return validityBuffer;
+ }
+
+ @Override
+ public ArrowBuf getDataBuffer() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public ArrowBuf getOffsetBuffer() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public List<?> getObject(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ }
+ final List<Object> vals = new JsonStringArrayList<>(listSize);
+ for (int i = 0; i < listSize; i++) {
+ vals.add(vector.getObject(index * listSize + i));
+ }
+ return vals;
+ }
+
+ /**
+ * Returns whether the value at index null.
+ */
+ public boolean isNull(int index) {
+ return (isSet(index) == 0);
+ }
+
+ /**
+ * Returns non-zero when the value at index is non-null.
+ */
+ public int isSet(int index) {
+ final int byteIndex = index >> 3;
+ final byte b = validityBuffer.getByte(byteIndex);
+ final int bitIndex = index & 7;
+ return (b >> bitIndex) & 0x01;
+ }
+
+ @Override
+ public int getNullCount() {
+ return BitVectorHelper.getNullCount(validityBuffer, valueCount);
+ }
+
+ @Override
+ public int getValueCount() {
+ return valueCount;
+ }
+
+ /**
+ * Returns the number of elements the validity buffer can represent with its
+ * current capacity.
+ */
+ private int getValidityBufferValueCapacity() {
+ return capAtMaxInt(validityBuffer.capacity() * 8);
+ }
+
+ /**
+ * Sets the value at index to null. Reallocates if index is larger than capacity.
+ */
+ public void setNull(int index) {
+ while (index >= getValidityBufferValueCapacity()) {
+ reallocValidityBuffer();
+ }
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+
+ /** Sets the value at index to not-null. Reallocates if index is larger than capacity. */
+ public void setNotNull(int index) {
+ while (index >= getValidityBufferValueCapacity()) {
+ reallocValidityBuffer();
+ }
+ BitVectorHelper.setBit(validityBuffer, index);
+ }
+
+ @Override
+ public void setValueCount(int valueCount) {
+ this.valueCount = valueCount;
+ while (valueCount > getValidityBufferValueCapacity()) {
+ reallocValidityBuffer();
+ }
+ vector.setValueCount(valueCount * listSize);
+ }
+
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return getTransferPair(ref, allocator, null);
+ }
+
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) {
+ return new TransferImpl(ref, allocator, callBack);
+ }
+
+ @Override
+ public TransferPair makeTransferPair(ValueVector target) {
+ return new TransferImpl((FixedSizeListVector) target);
+ }
+
+ @Override
+ public int hashCode(int index) {
+ return hashCode(index, null);
+ }
+
+ @Override
+ public int hashCode(int index, ArrowBufHasher hasher) {
+ if (isSet(index) == 0) {
+ return ArrowBufPointer.NULL_HASH_CODE;
+ }
+ int hash = 0;
+ for (int i = 0; i < listSize; i++) {
+ hash = ByteFunctionHelpers.combineHash(hash, vector.hashCode(index * listSize + i, hasher));
+ }
+ return hash;
+ }
+
+ @Override
+ public <OUT, IN> OUT accept(VectorVisitor<OUT, IN> visitor, IN value) {
+ return visitor.visit(this, value);
+ }
+
+ @Override
+ public int getElementStartIndex(int index) {
+ return listSize * index;
+ }
+
+ @Override
+ public int getElementEndIndex(int index) {
+ return listSize * (index + 1);
+ }
+
+ private class TransferImpl implements TransferPair {
+
+ FixedSizeListVector to;
+ TransferPair dataPair;
+
+ public TransferImpl(String name, BufferAllocator allocator, CallBack callBack) {
+ this(new FixedSizeListVector(name, allocator, fieldType, callBack));
+ }
+
+ public TransferImpl(FixedSizeListVector to) {
+ this.to = to;
+ to.addOrGetVector(vector.getField().getFieldType());
+ dataPair = vector.makeTransferPair(to.vector);
+ }
+
+ @Override
+ public void transfer() {
+ to.clear();
+ dataPair.transfer();
+ to.validityBuffer = BaseValueVector.transferBuffer(validityBuffer, to.allocator);
+ to.setValueCount(valueCount);
+ clear();
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ Preconditions.checkArgument(startIndex >= 0 && length >= 0 && startIndex + length <= valueCount,
+ "Invalid parameters startIndex: %s, length: %s for valueCount: %s", startIndex, length, valueCount);
+ final int startPoint = listSize * startIndex;
+ final int sliceLength = listSize * length;
+ to.clear();
+
+ /* splitAndTransfer validity buffer */
+ splitAndTransferValidityBuffer(startIndex, length, to);
+ /* splitAndTransfer data buffer */
+ dataPair.splitAndTransfer(startPoint, sliceLength);
+ to.setValueCount(length);
+ }
+
+ /*
+ * transfer the validity.
+ */
+ private void splitAndTransferValidityBuffer(int startIndex, int length, FixedSizeListVector target) {
+ int firstByteSource = BitVectorHelper.byteIndex(startIndex);
+ int lastByteSource = BitVectorHelper.byteIndex(valueCount - 1);
+ int byteSizeTarget = getValidityBufferSizeFromCount(length);
+ int offset = startIndex % 8;
+
+ if (length > 0) {
+ if (offset == 0) {
+ // slice
+ if (target.validityBuffer != null) {
+ target.validityBuffer.getReferenceManager().release();
+ }
+ target.validityBuffer = validityBuffer.slice(firstByteSource, byteSizeTarget);
+ target.validityBuffer.getReferenceManager().retain(1);
+ } else {
+ /* Copy data
+ * When the first bit starts from the middle of a byte (offset != 0),
+ * copy data from src BitVector.
+ * Each byte in the target is composed by a part in i-th byte,
+ * another part in (i+1)-th byte.
+ */
+ target.allocateValidityBuffer(byteSizeTarget);
+
+ for (int i = 0; i < byteSizeTarget - 1; i++) {
+ byte b1 = BitVectorHelper.getBitsFromCurrentByte(validityBuffer, firstByteSource + i, offset);
+ byte b2 = BitVectorHelper.getBitsFromNextByte(validityBuffer, firstByteSource + i + 1, offset);
+
+ target.validityBuffer.setByte(i, (b1 + b2));
+ }
+
+ /* Copying the last piece is done in the following manner:
+ * if the source vector has 1 or more bytes remaining, we copy
+ * the last piece as a byte formed by shifting data
+ * from the current byte and the next byte.
+ *
+ * if the source vector has no more bytes remaining
+ * (we are at the last byte), we copy the last piece as a byte
+ * by shifting data from the current byte.
+ */
+ if ((firstByteSource + byteSizeTarget - 1) < lastByteSource) {
+ byte b1 = BitVectorHelper.getBitsFromCurrentByte(validityBuffer,
+ firstByteSource + byteSizeTarget - 1, offset);
+ byte b2 = BitVectorHelper.getBitsFromNextByte(validityBuffer,
+ firstByteSource + byteSizeTarget, offset);
+
+ target.validityBuffer.setByte(byteSizeTarget - 1, b1 + b2);
+ } else {
+ byte b1 = BitVectorHelper.getBitsFromCurrentByte(validityBuffer,
+ firstByteSource + byteSizeTarget - 1, offset);
+ target.validityBuffer.setByte(byteSizeTarget - 1, b1);
+ }
+ }
+ }
+ }
+
+ @Override
+ public ValueVector getTo() {
+ return to;
+ }
+
+ @Override
+ public void copyValueSafe(int fromIndex, int toIndex) {
+ while (toIndex >= to.getValueCapacity()) {
+ to.reAlloc();
+ }
+ BitVectorHelper.setValidityBit(to.validityBuffer, toIndex, isSet(fromIndex));
+ int fromOffset = fromIndex * listSize;
+ int toOffset = toIndex * listSize;
+ for (int i = 0; i < listSize; i++) {
+ dataPair.copyValueSafe(fromOffset + i, toOffset + i);
+ }
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java
new file mode 100644
index 000000000..6fbdda277
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java
@@ -0,0 +1,1036 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex;
+
+import static java.util.Collections.singletonList;
+import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt;
+import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt;
+import static org.apache.arrow.util.Preconditions.checkNotNull;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.OutOfMemoryException;
+import org.apache.arrow.memory.util.ArrowBufPointer;
+import org.apache.arrow.memory.util.ByteFunctionHelpers;
+import org.apache.arrow.memory.util.CommonUtil;
+import org.apache.arrow.memory.util.hash.ArrowBufHasher;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.AddOrGetResult;
+import org.apache.arrow.vector.BaseFixedWidthVector;
+import org.apache.arrow.vector.BaseValueVector;
+import org.apache.arrow.vector.BaseVariableWidthVector;
+import org.apache.arrow.vector.BitVectorHelper;
+import org.apache.arrow.vector.BufferBacked;
+import org.apache.arrow.vector.DensityAwareVector;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.NullVector;
+import org.apache.arrow.vector.UInt4Vector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.ZeroVector;
+import org.apache.arrow.vector.compare.VectorVisitor;
+import org.apache.arrow.vector.complex.impl.ComplexCopier;
+import org.apache.arrow.vector.complex.impl.UnionLargeListReader;
+import org.apache.arrow.vector.complex.impl.UnionLargeListWriter;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.CallBack;
+import org.apache.arrow.vector.util.JsonStringArrayList;
+import org.apache.arrow.vector.util.OversizedAllocationException;
+import org.apache.arrow.vector.util.SchemaChangeRuntimeException;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * A list vector contains lists of a specific type of elements. Its structure contains 3 elements.
+ * <ol>
+ * <li>A validity buffer.</li>
+ * <li> An offset buffer, that denotes lists boundaries. </li>
+ * <li> A child data vector that contains the elements of lists. </li>
+ * </ol>
+ *
+ * This is the LargeList variant of list, it has a 64-bit wide offset
+ *
+ * <p>
+ * WARNING: Currently Arrow in Java doesn't support 64-bit vectors. This class
+ * follows the expected behaviour of a LargeList but doesn't actually support allocating
+ * a 64-bit vector. It has little use until 64-bit vectors are supported and should be used
+ * with caution.
+ * todo review checkedCastToInt usage in this class.
+ * Once int64 indexed vectors are supported these checks aren't needed.
+ * </p>
+ */
+public class LargeListVector extends BaseValueVector implements RepeatedValueVector, FieldVector, PromotableVector {
+
+ public static LargeListVector empty(String name, BufferAllocator allocator) {
+ return new LargeListVector(name, allocator, FieldType.nullable(ArrowType.LargeList.INSTANCE), null);
+ }
+
+ public static final FieldVector DEFAULT_DATA_VECTOR = ZeroVector.INSTANCE;
+ public static final String DATA_VECTOR_NAME = "$data$";
+
+ public static final byte OFFSET_WIDTH = 8;
+ protected ArrowBuf offsetBuffer;
+ protected FieldVector vector;
+ protected final CallBack callBack;
+ protected int valueCount;
+ protected long offsetAllocationSizeInBytes = INITIAL_VALUE_ALLOCATION * OFFSET_WIDTH;
+ private final String name;
+
+ protected String defaultDataVectorName = DATA_VECTOR_NAME;
+ protected ArrowBuf validityBuffer;
+ protected UnionLargeListReader reader;
+ private final FieldType fieldType;
+ private int validityAllocationSizeInBytes;
+
+ /**
+ * The maximum index that is actually set.
+ */
+ private int lastSet;
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param name The name of the instance.
+ * @param allocator The allocator to use for allocating/reallocating buffers.
+ * @param fieldType The type of this list.
+ * @param callBack A schema change callback.
+ */
+ public LargeListVector(String name, BufferAllocator allocator, FieldType fieldType, CallBack callBack) {
+ super(allocator);
+ this.name = name;
+ this.validityBuffer = allocator.getEmpty();
+ this.fieldType = checkNotNull(fieldType);
+ this.callBack = callBack;
+ this.validityAllocationSizeInBytes = getValidityBufferSizeFromCount(INITIAL_VALUE_ALLOCATION);
+ this.lastSet = -1;
+ this.offsetBuffer = allocator.getEmpty();
+ this.vector = vector == null ? DEFAULT_DATA_VECTOR : vector;
+ this.valueCount = 0;
+ }
+
+ @Override
+ public void initializeChildrenFromFields(List<Field> children) {
+ if (children.size() != 1) {
+ throw new IllegalArgumentException("Lists have only one child. Found: " + children);
+ }
+ Field field = children.get(0);
+ AddOrGetResult<FieldVector> addOrGetVector = addOrGetVector(field.getFieldType());
+ if (!addOrGetVector.isCreated()) {
+ throw new IllegalArgumentException("Child vector already existed: " + addOrGetVector.getVector());
+ }
+
+ addOrGetVector.getVector().initializeChildrenFromFields(field.getChildren());
+ }
+
+ @Override
+ public void setInitialCapacity(int numRecords) {
+ validityAllocationSizeInBytes = getValidityBufferSizeFromCount(numRecords);
+ offsetAllocationSizeInBytes = (long) (numRecords + 1) * OFFSET_WIDTH;
+ if (vector instanceof BaseFixedWidthVector || vector instanceof BaseVariableWidthVector) {
+ vector.setInitialCapacity(numRecords * RepeatedValueVector.DEFAULT_REPEAT_PER_RECORD);
+ } else {
+ vector.setInitialCapacity(numRecords);
+ }
+ }
+
+ /**
+ * Specialized version of setInitialCapacity() for ListVector. This is
+ * used by some callers when they want to explicitly control and be
+ * conservative about memory allocated for inner data vector. This is
+ * very useful when we are working with memory constraints for a query
+ * and have a fixed amount of memory reserved for the record batch. In
+ * such cases, we are likely to face OOM or related problems when
+ * we reserve memory for a record batch with value count x and
+ * do setInitialCapacity(x) such that each vector allocates only
+ * what is necessary and not the default amount but the multiplier
+ * forces the memory requirement to go beyond what was needed.
+ *
+ * @param numRecords value count
+ * @param density density of ListVector. Density is the average size of
+ * list per position in the List vector. For example, a
+ * density value of 10 implies each position in the list
+ * vector has a list of 10 values.
+ * A density value of 0.1 implies out of 10 positions in
+ * the list vector, 1 position has a list of size 1 and
+ * remaining positions are null (no lists) or empty lists.
+ * This helps in tightly controlling the memory we provision
+ * for inner data vector.
+ */
+ @Override
+ public void setInitialCapacity(int numRecords, double density) {
+ validityAllocationSizeInBytes = getValidityBufferSizeFromCount(numRecords);
+ if ((numRecords * density) >= Integer.MAX_VALUE) {
+ throw new OversizedAllocationException("Requested amount of memory is more than max allowed");
+ }
+
+ offsetAllocationSizeInBytes = (numRecords + 1) * OFFSET_WIDTH;
+
+ int innerValueCapacity = Math.max((int) (numRecords * density), 1);
+
+ if (vector instanceof DensityAwareVector) {
+ ((DensityAwareVector) vector).setInitialCapacity(innerValueCapacity, density);
+ } else {
+ vector.setInitialCapacity(innerValueCapacity);
+ }
+ }
+
+ /**
+ * Get the density of this ListVector.
+ * @return density
+ */
+ public double getDensity() {
+ if (valueCount == 0) {
+ return 0.0D;
+ }
+ final long startOffset = offsetBuffer.getLong(0L);
+ final long endOffset = offsetBuffer.getLong((long) valueCount * OFFSET_WIDTH);
+ final double totalListSize = endOffset - startOffset;
+ return totalListSize / valueCount;
+ }
+
+ @Override
+ public List<FieldVector> getChildrenFromFields() {
+ return singletonList(getDataVector());
+ }
+
+ /**
+ * Load the buffers of this vector with provided source buffers.
+ * The caller manages the source buffers and populates them before invoking
+ * this method.
+ * @param fieldNode the fieldNode indicating the value count
+ * @param ownBuffers the buffers for this Field (own buffers only, children not included)
+ */
+ @Override
+ public void loadFieldBuffers(ArrowFieldNode fieldNode, List<ArrowBuf> ownBuffers) {
+ if (ownBuffers.size() != 2) {
+ throw new IllegalArgumentException("Illegal buffer count, expected " + 2 + ", got: " + ownBuffers.size());
+ }
+
+ ArrowBuf bitBuffer = ownBuffers.get(0);
+ ArrowBuf offBuffer = ownBuffers.get(1);
+
+ validityBuffer.getReferenceManager().release();
+ validityBuffer = BitVectorHelper.loadValidityBuffer(fieldNode, bitBuffer, allocator);
+ offsetBuffer.getReferenceManager().release();
+ offsetBuffer = offBuffer.getReferenceManager().retain(offBuffer, allocator);
+
+ validityAllocationSizeInBytes = checkedCastToInt(validityBuffer.capacity());
+ offsetAllocationSizeInBytes = offsetBuffer.capacity();
+
+ lastSet = fieldNode.getLength() - 1;
+ valueCount = fieldNode.getLength();
+ }
+
+ /**
+ * Get the buffers belonging to this vector.
+ * @return the inner buffers.
+ */
+ @Override
+ public List<ArrowBuf> getFieldBuffers() {
+ List<ArrowBuf> result = new ArrayList<>(2);
+ setReaderAndWriterIndex();
+ result.add(validityBuffer);
+ result.add(offsetBuffer);
+
+ return result;
+ }
+
+ /**
+ * Set the reader and writer indexes for the inner buffers.
+ */
+ private void setReaderAndWriterIndex() {
+ validityBuffer.readerIndex(0);
+ offsetBuffer.readerIndex(0);
+ if (valueCount == 0) {
+ validityBuffer.writerIndex(0);
+ offsetBuffer.writerIndex(0);
+ } else {
+ validityBuffer.writerIndex(getValidityBufferSizeFromCount(valueCount));
+ offsetBuffer.writerIndex((valueCount + 1) * OFFSET_WIDTH);
+ }
+ }
+
+ /**
+ * Get the inner vectors.
+ *
+ * @deprecated This API will be removed as the current implementations no longer support inner vectors.
+ *
+ * @return the inner vectors for this field as defined by the TypeLayout
+ */
+ @Deprecated
+ @Override
+ public List<BufferBacked> getFieldInnerVectors() {
+ throw new UnsupportedOperationException("There are no inner vectors. Use getFieldBuffers");
+ }
+
+ /**
+ * Same as {@link #allocateNewSafe()}.
+ */
+ @Override
+ public void allocateNew() throws OutOfMemoryException {
+ if (!allocateNewSafe()) {
+ throw new OutOfMemoryException("Failure while allocating memory");
+ }
+ }
+
+ /**
+ * Allocate memory for the vector. We internally use a default value count
+ * of 4096 to allocate memory for at least these many elements in the
+ * vector.
+ *
+ * @return false if memory allocation fails, true otherwise.
+ */
+ public boolean allocateNewSafe() {
+ boolean success = false;
+ try {
+ /* we are doing a new allocation -- release the current buffers */
+ clear();
+ /* allocate validity buffer */
+ allocateValidityBuffer(validityAllocationSizeInBytes);
+ /* allocate offset and data buffer */
+ boolean dataAlloc = false;
+ try {
+ allocateOffsetBuffer(offsetAllocationSizeInBytes);
+ dataAlloc = vector.allocateNewSafe();
+ } catch (Exception e) {
+ e.printStackTrace();
+ clear();
+ return false;
+ } finally {
+ if (!dataAlloc) {
+ clear();
+ }
+ }
+ success = dataAlloc;
+ } finally {
+ if (!success) {
+ clear();
+ return false;
+ }
+ }
+ return true;
+ }
+
+ private void allocateValidityBuffer(final long size) {
+ final int curSize = (int) size;
+ validityBuffer = allocator.buffer(curSize);
+ validityBuffer.readerIndex(0);
+ validityAllocationSizeInBytes = curSize;
+ validityBuffer.setZero(0, validityBuffer.capacity());
+ }
+
+ protected void allocateOffsetBuffer(final long size) {
+ offsetBuffer = allocator.buffer(size);
+ offsetBuffer.readerIndex(0);
+ offsetAllocationSizeInBytes = size;
+ offsetBuffer.setZero(0, offsetBuffer.capacity());
+ }
+
+ /**
+ * Resize the vector to increase the capacity. The internal behavior is to
+ * double the current value capacity.
+ */
+ @Override
+ public void reAlloc() {
+ /* reallocate the validity buffer */
+ reallocValidityBuffer();
+ /* reallocate the offset and data */
+ reallocOffsetBuffer();
+ vector.reAlloc();
+ }
+
+ private void reallocValidityAndOffsetBuffers() {
+ reallocOffsetBuffer();
+ reallocValidityBuffer();
+ }
+
+ protected void reallocOffsetBuffer() {
+ final long currentBufferCapacity = offsetBuffer.capacity();
+ long newAllocationSize = currentBufferCapacity * 2;
+ if (newAllocationSize == 0) {
+ if (offsetAllocationSizeInBytes > 0) {
+ newAllocationSize = offsetAllocationSizeInBytes;
+ } else {
+ newAllocationSize = INITIAL_VALUE_ALLOCATION * OFFSET_WIDTH * 2;
+ }
+ }
+
+ newAllocationSize = CommonUtil.nextPowerOfTwo(newAllocationSize);
+ newAllocationSize = Math.min(newAllocationSize, (long) (OFFSET_WIDTH) * Integer.MAX_VALUE);
+ assert newAllocationSize >= 1;
+
+ if (newAllocationSize > MAX_ALLOCATION_SIZE || newAllocationSize <= offsetBuffer.capacity()) {
+ throw new OversizedAllocationException("Unable to expand the buffer");
+ }
+
+ final ArrowBuf newBuf = allocator.buffer(newAllocationSize);
+ newBuf.setBytes(0, offsetBuffer, 0, currentBufferCapacity);
+ newBuf.setZero(currentBufferCapacity, newBuf.capacity() - currentBufferCapacity);
+ offsetBuffer.getReferenceManager().release(1);
+ offsetBuffer = newBuf;
+ offsetAllocationSizeInBytes = newAllocationSize;
+ }
+
+ private void reallocValidityBuffer() {
+ final int currentBufferCapacity = checkedCastToInt(validityBuffer.capacity());
+ long newAllocationSize = currentBufferCapacity * 2;
+ if (newAllocationSize == 0) {
+ if (validityAllocationSizeInBytes > 0) {
+ newAllocationSize = validityAllocationSizeInBytes;
+ } else {
+ newAllocationSize = getValidityBufferSizeFromCount(INITIAL_VALUE_ALLOCATION) * 2;
+ }
+ }
+ newAllocationSize = CommonUtil.nextPowerOfTwo(newAllocationSize);
+ assert newAllocationSize >= 1;
+
+ if (newAllocationSize > MAX_ALLOCATION_SIZE) {
+ throw new OversizedAllocationException("Unable to expand the buffer");
+ }
+
+ final ArrowBuf newBuf = allocator.buffer((int) newAllocationSize);
+ newBuf.setBytes(0, validityBuffer, 0, currentBufferCapacity);
+ newBuf.setZero(currentBufferCapacity, newBuf.capacity() - currentBufferCapacity);
+ validityBuffer.getReferenceManager().release(1);
+ validityBuffer = newBuf;
+ validityAllocationSizeInBytes = (int) newAllocationSize;
+ }
+
+ /**
+ * Same as {@link #copyFrom(int, int, ValueVector)} except that
+ * it handles the case when the capacity of the vector needs to be expanded
+ * before copy.
+ * @param inIndex position to copy from in source vector
+ * @param outIndex position to copy to in this vector
+ * @param from source vector
+ */
+ @Override
+ public void copyFromSafe(int inIndex, int outIndex, ValueVector from) {
+ copyFrom(inIndex, outIndex, from);
+ }
+
+ /**
+ * Copy a cell value from a particular index in source vector to a particular
+ * position in this vector.
+ * @param inIndex position to copy from in source vector
+ * @param outIndex position to copy to in this vector
+ * @param from source vector
+ */
+ @Override
+ public void copyFrom(int inIndex, int outIndex, ValueVector from) {
+ Preconditions.checkArgument(this.getMinorType() == from.getMinorType());
+ FieldReader in = from.getReader();
+ in.setPosition(inIndex);
+ UnionLargeListWriter out = getWriter();
+ out.setPosition(outIndex);
+ ComplexCopier.copy(in, out);
+ }
+
+ /**
+ * Get the offset vector.
+ * @deprecated This API will be removed, as the current implementations no longer hold inner offset vectors.
+ *
+ * @return the underlying offset vector or null if none exists.
+ */
+ @Override
+ @Deprecated
+ public UInt4Vector getOffsetVector() {
+ throw new UnsupportedOperationException("There is no inner offset vector");
+ }
+
+ /**
+ * Get the inner data vector for this list vector.
+ * @return data vector
+ */
+ @Override
+ public FieldVector getDataVector() {
+ return vector;
+ }
+
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return getTransferPair(ref, allocator, null);
+ }
+
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) {
+ return new TransferImpl(ref, allocator, callBack);
+ }
+
+ @Override
+ public TransferPair makeTransferPair(ValueVector target) {
+ return new TransferImpl((LargeListVector) target);
+ }
+
+ @Override
+ public long getValidityBufferAddress() {
+ return (validityBuffer.memoryAddress());
+ }
+
+ @Override
+ public long getDataBufferAddress() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public long getOffsetBufferAddress() {
+ return (offsetBuffer.memoryAddress());
+ }
+
+ @Override
+ public ArrowBuf getValidityBuffer() {
+ return validityBuffer;
+ }
+
+ @Override
+ public ArrowBuf getDataBuffer() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public ArrowBuf getOffsetBuffer() {
+ return offsetBuffer;
+ }
+
+ @Override
+ public int getValueCount() {
+ return valueCount;
+ }
+
+ @Override
+ public int hashCode(int index) {
+ return hashCode(index, null);
+ }
+
+ @Override
+ public int hashCode(int index, ArrowBufHasher hasher) {
+ if (isSet(index) == 0) {
+ return ArrowBufPointer.NULL_HASH_CODE;
+ }
+ int hash = 0;
+ final long start = offsetBuffer.getLong((long) index * OFFSET_WIDTH);
+ final long end = offsetBuffer.getLong(((long) index + 1L) * OFFSET_WIDTH);
+ for (long i = start; i < end; i++) {
+ hash = ByteFunctionHelpers.combineHash(hash, vector.hashCode(checkedCastToInt(i), hasher));
+ }
+ return hash;
+ }
+
+ @Override
+ public <OUT, IN> OUT accept(VectorVisitor<OUT, IN> visitor, IN value) {
+ return visitor.visit(this, value);
+ }
+
+ public UnionLargeListWriter getWriter() {
+ return new UnionLargeListWriter(this);
+ }
+
+ protected void replaceDataVector(FieldVector v) {
+ vector.clear();
+ vector = v;
+ }
+
+ @Override
+ public UnionVector promoteToUnion() {
+ UnionVector vector = new UnionVector("$data$", allocator, /* field type */ null, callBack);
+ replaceDataVector(vector);
+ invalidateReader();
+ if (callBack != null) {
+ callBack.doWork();
+ }
+ return vector;
+ }
+
+ private class TransferImpl implements TransferPair {
+
+ LargeListVector to;
+ TransferPair dataTransferPair;
+
+ public TransferImpl(String name, BufferAllocator allocator, CallBack callBack) {
+ this(new LargeListVector(name, allocator, fieldType, callBack));
+ }
+
+ public TransferImpl(LargeListVector to) {
+ this.to = to;
+ to.addOrGetVector(vector.getField().getFieldType());
+ if (to.getDataVector() instanceof ZeroVector) {
+ to.addOrGetVector(vector.getField().getFieldType());
+ }
+ dataTransferPair = getDataVector().makeTransferPair(to.getDataVector());
+ }
+
+ /**
+ * Transfer this vector'data to another vector. The memory associated
+ * with this vector is transferred to the allocator of target vector
+ * for accounting and management purposes.
+ */
+ @Override
+ public void transfer() {
+ to.clear();
+ dataTransferPair.transfer();
+ to.validityBuffer = transferBuffer(validityBuffer, to.allocator);
+ to.offsetBuffer = transferBuffer(offsetBuffer, to.allocator);
+ to.lastSet = lastSet;
+ if (valueCount > 0) {
+ to.setValueCount(valueCount);
+ }
+ clear();
+ }
+
+ /**
+ * Slice this vector at desired index and length and transfer the
+ * corresponding data to the target vector.
+ * @param startIndex start position of the split in source vector.
+ * @param length length of the split.
+ */
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ Preconditions.checkArgument(startIndex >= 0 && length >= 0 && startIndex + length <= valueCount,
+ "Invalid parameters startIndex: %s, length: %s for valueCount: %s", startIndex, length, valueCount);
+ final long startPoint = offsetBuffer.getLong((long) startIndex * OFFSET_WIDTH);
+ final long sliceLength = offsetBuffer.getLong((long) (startIndex + length) * OFFSET_WIDTH) - startPoint;
+ to.clear();
+ to.allocateOffsetBuffer((length + 1) * OFFSET_WIDTH);
+ /* splitAndTransfer offset buffer */
+ for (int i = 0; i < length + 1; i++) {
+ final long relativeOffset = offsetBuffer.getLong((long) (startIndex + i) * OFFSET_WIDTH) - startPoint;
+ to.offsetBuffer.setLong((long) i * OFFSET_WIDTH, relativeOffset);
+ }
+ /* splitAndTransfer validity buffer */
+ splitAndTransferValidityBuffer(startIndex, length, to);
+ /* splitAndTransfer data buffer */
+ dataTransferPair.splitAndTransfer(checkedCastToInt(startPoint), checkedCastToInt(sliceLength));
+ to.lastSet = length - 1;
+ to.setValueCount(length);
+ }
+
+ /*
+ * transfer the validity.
+ */
+ private void splitAndTransferValidityBuffer(int startIndex, int length, LargeListVector target) {
+ int firstByteSource = BitVectorHelper.byteIndex(startIndex);
+ int lastByteSource = BitVectorHelper.byteIndex(valueCount - 1);
+ int byteSizeTarget = getValidityBufferSizeFromCount(length);
+ int offset = startIndex % 8;
+
+ if (length > 0) {
+ if (offset == 0) {
+ // slice
+ if (target.validityBuffer != null) {
+ target.validityBuffer.getReferenceManager().release();
+ }
+ target.validityBuffer = validityBuffer.slice(firstByteSource, byteSizeTarget);
+ target.validityBuffer.getReferenceManager().retain(1);
+ } else {
+ /* Copy data
+ * When the first bit starts from the middle of a byte (offset != 0),
+ * copy data from src BitVector.
+ * Each byte in the target is composed by a part in i-th byte,
+ * another part in (i+1)-th byte.
+ */
+ target.allocateValidityBuffer(byteSizeTarget);
+
+ for (int i = 0; i < byteSizeTarget - 1; i++) {
+ byte b1 = BitVectorHelper.getBitsFromCurrentByte(validityBuffer, firstByteSource + i, offset);
+ byte b2 = BitVectorHelper.getBitsFromNextByte(validityBuffer, firstByteSource + i + 1, offset);
+
+ target.validityBuffer.setByte(i, (b1 + b2));
+ }
+
+ /* Copying the last piece is done in the following manner:
+ * if the source vector has 1 or more bytes remaining, we copy
+ * the last piece as a byte formed by shifting data
+ * from the current byte and the next byte.
+ *
+ * if the source vector has no more bytes remaining
+ * (we are at the last byte), we copy the last piece as a byte
+ * by shifting data from the current byte.
+ */
+ if ((firstByteSource + byteSizeTarget - 1) < lastByteSource) {
+ byte b1 = BitVectorHelper.getBitsFromCurrentByte(validityBuffer,
+ firstByteSource + byteSizeTarget - 1, offset);
+ byte b2 = BitVectorHelper.getBitsFromNextByte(validityBuffer,
+ firstByteSource + byteSizeTarget, offset);
+
+ target.validityBuffer.setByte(byteSizeTarget - 1, b1 + b2);
+ } else {
+ byte b1 = BitVectorHelper.getBitsFromCurrentByte(validityBuffer,
+ firstByteSource + byteSizeTarget - 1, offset);
+ target.validityBuffer.setByte(byteSizeTarget - 1, b1);
+ }
+ }
+ }
+ }
+
+ @Override
+ public ValueVector getTo() {
+ return to;
+ }
+
+ @Override
+ public void copyValueSafe(int from, int to) {
+ this.to.copyFrom(from, to, LargeListVector.this);
+ }
+ }
+
+ @Override
+ public UnionLargeListReader getReader() {
+ if (reader == null) {
+ reader = new UnionLargeListReader(this);
+ }
+ return reader;
+ }
+
+ /**
+ * Initialize the data vector (and execute callback) if it hasn't already been done,
+ * returns the data vector.
+ */
+ public <T extends ValueVector> AddOrGetResult<T> addOrGetVector(FieldType fieldType) {
+ boolean created = false;
+ if (vector instanceof NullVector) {
+ vector = fieldType.createNewSingleVector(defaultDataVectorName, allocator, callBack);
+ // returned vector must have the same field
+ created = true;
+ if (callBack != null &&
+ // not a schema change if changing from ZeroVector to ZeroVector
+ (fieldType.getType().getTypeID() != ArrowType.ArrowTypeID.Null)) {
+ callBack.doWork();
+ }
+ }
+
+ if (vector.getField().getType().getTypeID() != fieldType.getType().getTypeID()) {
+ final String msg = String.format("Inner vector type mismatch. Requested type: [%s], actual type: [%s]",
+ fieldType.getType().getTypeID(), vector.getField().getType().getTypeID());
+ throw new SchemaChangeRuntimeException(msg);
+ }
+
+ invalidateReader();
+ return new AddOrGetResult<>((T) vector, created);
+ }
+
+ /**
+ * Get the size (number of bytes) of underlying buffers used by this
+ * vector.
+ * @return size of underlying buffers.
+ */
+ @Override
+ public int getBufferSize() {
+ if (valueCount == 0) {
+ return 0;
+ }
+ final int offsetBufferSize = (valueCount + 1) * OFFSET_WIDTH;
+ final int validityBufferSize = getValidityBufferSizeFromCount(valueCount);
+ return offsetBufferSize + validityBufferSize + vector.getBufferSize();
+ }
+
+ @Override
+ public int getBufferSizeFor(int valueCount) {
+ if (valueCount == 0) {
+ return 0;
+ }
+ final int validityBufferSize = getValidityBufferSizeFromCount(valueCount);
+ long innerVectorValueCount = offsetBuffer.getLong((long) valueCount * OFFSET_WIDTH);
+
+ return ((valueCount + 1) * OFFSET_WIDTH) +
+ vector.getBufferSizeFor(checkedCastToInt(innerVectorValueCount)) +
+ validityBufferSize;
+ }
+
+ @Override
+ public Field getField() {
+ return new Field(getName(), fieldType, Collections.singletonList(getDataVector().getField()));
+ }
+
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.LARGELIST;
+ }
+
+ @Override
+ public String getName() {
+ return name;
+ }
+
+ @Override
+ public void clear() {
+ offsetBuffer = releaseBuffer(offsetBuffer);
+ vector.clear();
+ valueCount = 0;
+ super.clear();
+ validityBuffer = releaseBuffer(validityBuffer);
+ lastSet = -1;
+ }
+
+ @Override
+ public void reset() {
+ offsetBuffer.setZero(0, offsetBuffer.capacity());
+ vector.reset();
+ valueCount = 0;
+ validityBuffer.setZero(0, validityBuffer.capacity());
+ lastSet = -1;
+ }
+
+ /**
+ * Return the underlying buffers associated with this vector. Note that this doesn't
+ * impact the reference counts for this buffer so it only should be used for in-context
+ * access. Also note that this buffer changes regularly thus
+ * external classes shouldn't hold a reference to it (unless they change it).
+ *
+ * @param clear Whether to clear vector before returning; the buffers will still be refcounted
+ * but the returned array will be the only reference to them
+ * @return The underlying {@link ArrowBuf buffers} that is used by this
+ * vector instance.
+ */
+ @Override
+ public ArrowBuf[] getBuffers(boolean clear) {
+ setReaderAndWriterIndex();
+ final ArrowBuf[] buffers;
+ if (getBufferSize() == 0) {
+ buffers = new ArrowBuf[0];
+ } else {
+ List<ArrowBuf> list = new ArrayList<>();
+ list.add(offsetBuffer);
+ list.add(validityBuffer);
+ list.addAll(Arrays.asList(vector.getBuffers(false)));
+ buffers = list.toArray(new ArrowBuf[list.size()]);
+ }
+ if (clear) {
+ for (ArrowBuf buffer : buffers) {
+ buffer.getReferenceManager().retain();
+ }
+ clear();
+ }
+ return buffers;
+ }
+
+ protected void invalidateReader() {
+ reader = null;
+ }
+
+ /**
+ * Get the element in the list vector at a particular index.
+ * @param index position of the element
+ * @return Object at given position
+ */
+ @Override
+ public List<?> getObject(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ }
+ final List<Object> vals = new JsonStringArrayList<>();
+ final long start = offsetBuffer.getLong((long) index * OFFSET_WIDTH);
+ final long end = offsetBuffer.getLong(((long) index + 1L) * OFFSET_WIDTH);
+ final ValueVector vv = getDataVector();
+ for (long i = start; i < end; i++) {
+ vals.add(vv.getObject(checkedCastToInt(i)));
+ }
+
+ return vals;
+ }
+
+ /**
+ * Check if element at given index is null.
+ *
+ * @param index position of element
+ * @return true if element at given index is null, false otherwise
+ */
+ @Override
+ public boolean isNull(int index) {
+ return (isSet(index) == 0);
+ }
+
+ /**
+ * Check if element at given index is empty list.
+ * @param index position of element
+ * @return true if element at given index is empty list or NULL, false otherwise
+ */
+ public boolean isEmpty(int index) {
+ if (isNull(index)) {
+ return true;
+ } else {
+ final long start = offsetBuffer.getLong((long) index * OFFSET_WIDTH);
+ final long end = offsetBuffer.getLong(((long) index + 1L) * OFFSET_WIDTH);
+ return start == end;
+ }
+ }
+
+ /**
+ * Same as {@link #isNull(int)}.
+ *
+ * @param index position of element
+ * @return 1 if element at given index is not null, 0 otherwise
+ */
+ public int isSet(int index) {
+ final int byteIndex = index >> 3;
+ final byte b = validityBuffer.getByte(byteIndex);
+ final int bitIndex = index & 7;
+ return (b >> bitIndex) & 0x01;
+ }
+
+ /**
+ * Get the number of elements that are null in the vector.
+ *
+ * @return the number of null elements.
+ */
+ @Override
+ public int getNullCount() {
+ return BitVectorHelper.getNullCount(validityBuffer, valueCount);
+ }
+
+ /**
+ * Get the current value capacity for the vector.
+ * @return number of elements that vector can hold.
+ */
+ @Override
+ public int getValueCapacity() {
+ return getValidityAndOffsetValueCapacity();
+ }
+
+ protected int getOffsetBufferValueCapacity() {
+ return checkedCastToInt(offsetBuffer.capacity() / OFFSET_WIDTH);
+ }
+
+ private int getValidityAndOffsetValueCapacity() {
+ final int offsetValueCapacity = Math.max(getOffsetBufferValueCapacity() - 1, 0);
+ return Math.min(offsetValueCapacity, getValidityBufferValueCapacity());
+ }
+
+ private int getValidityBufferValueCapacity() {
+ return capAtMaxInt(validityBuffer.capacity() * 8);
+ }
+
+ /**
+ * Sets the list at index to be not-null. Reallocates validity buffer if index
+ * is larger than current capacity.
+ */
+ public void setNotNull(int index) {
+ while (index >= getValidityAndOffsetValueCapacity()) {
+ reallocValidityAndOffsetBuffers();
+ }
+ BitVectorHelper.setBit(validityBuffer, index);
+ lastSet = index;
+ }
+
+ /**
+ * Sets list at index to be null.
+ * @param index position in vector
+ */
+ public void setNull(int index) {
+ while (index >= getValidityAndOffsetValueCapacity()) {
+ reallocValidityAndOffsetBuffers();
+ }
+ if (lastSet >= index) {
+ lastSet = index - 1;
+ }
+ for (int i = lastSet + 1; i <= index; i++) {
+ final int currentOffset = offsetBuffer.getInt(i * OFFSET_WIDTH);
+ offsetBuffer.setInt((i + 1) * OFFSET_WIDTH, currentOffset);
+ }
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+
+ /**
+ * Start a new value in the list vector.
+ *
+ * @param index index of the value to start
+ */
+ public long startNewValue(long index) {
+ while (index >= getValidityAndOffsetValueCapacity()) {
+ reallocValidityAndOffsetBuffers();
+ }
+ for (int i = lastSet + 1; i <= index; i++) {
+ final long currentOffset = offsetBuffer.getLong((long) i * OFFSET_WIDTH);
+ offsetBuffer.setLong(((long) i + 1L) * OFFSET_WIDTH, currentOffset);
+ }
+ BitVectorHelper.setBit(validityBuffer, index);
+ lastSet = checkedCastToInt(index);
+ return offsetBuffer.getLong(((long) lastSet + 1L) * OFFSET_WIDTH);
+ }
+
+ /**
+ * End the current value.
+ *
+ * @param index index of the value to end
+ * @param size number of elements in the list that was written
+ */
+ public void endValue(int index, long size) {
+ final long currentOffset = offsetBuffer.getLong(((long) index + 1L) * OFFSET_WIDTH);
+ offsetBuffer.setLong(((long) index + 1L) * OFFSET_WIDTH, currentOffset + size);
+ }
+
+ /**
+ * Sets the value count for the vector.
+ *
+ * <p>
+ * Important note: The underlying vector does not support 64-bit
+ * allocations yet. This may throw if attempting to hold larger
+ * than what a 32-bit vector can store.
+ * </p>
+ *
+ * @param valueCount value count
+ */
+ @Override
+ public void setValueCount(int valueCount) {
+ this.valueCount = valueCount;
+ if (valueCount > 0) {
+ while (valueCount > getValidityAndOffsetValueCapacity()) {
+ /* check if validity and offset buffers need to be re-allocated */
+ reallocValidityAndOffsetBuffers();
+ }
+ for (int i = lastSet + 1; i < valueCount; i++) {
+ /* fill the holes with offsets */
+ final long currentOffset = offsetBuffer.getLong((long) i * OFFSET_WIDTH);
+ offsetBuffer.setLong(((long) i + 1L) * OFFSET_WIDTH, currentOffset);
+ }
+ }
+ /* valueCount for the data vector is the current end offset */
+ final long childValueCount = (valueCount == 0) ? 0 :
+ offsetBuffer.getLong(((long) lastSet + 1L) * OFFSET_WIDTH);
+ /* set the value count of data vector and this will take care of
+ * checking whether data buffer needs to be reallocated.
+ * TODO: revisit when 64-bit vectors are supported
+ */
+ Preconditions.checkArgument(childValueCount <= Integer.MAX_VALUE || childValueCount >= Integer.MIN_VALUE,
+ "LargeListVector doesn't yet support 64-bit allocations: %s", childValueCount);
+ vector.setValueCount((int) childValueCount);
+ }
+
+ public void setLastSet(int value) {
+ lastSet = value;
+ }
+
+ public int getLastSet() {
+ return lastSet;
+ }
+
+ public long getElementStartIndex(int index) {
+ return offsetBuffer.getLong((long) index * OFFSET_WIDTH);
+ }
+
+ public long getElementEndIndex(int index) {
+ return offsetBuffer.getLong(((long) index + 1L) * OFFSET_WIDTH);
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java
new file mode 100644
index 000000000..b5b32951a
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java
@@ -0,0 +1,879 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex;
+
+import static java.util.Collections.singletonList;
+import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt;
+import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt;
+import static org.apache.arrow.util.Preconditions.checkNotNull;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.OutOfMemoryException;
+import org.apache.arrow.memory.util.ArrowBufPointer;
+import org.apache.arrow.memory.util.ByteFunctionHelpers;
+import org.apache.arrow.memory.util.CommonUtil;
+import org.apache.arrow.memory.util.hash.ArrowBufHasher;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.AddOrGetResult;
+import org.apache.arrow.vector.BitVectorHelper;
+import org.apache.arrow.vector.BufferBacked;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.ZeroVector;
+import org.apache.arrow.vector.compare.VectorVisitor;
+import org.apache.arrow.vector.complex.impl.ComplexCopier;
+import org.apache.arrow.vector.complex.impl.UnionListReader;
+import org.apache.arrow.vector.complex.impl.UnionListWriter;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.complex.writer.FieldWriter;
+import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.CallBack;
+import org.apache.arrow.vector.util.JsonStringArrayList;
+import org.apache.arrow.vector.util.OversizedAllocationException;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * A list vector contains lists of a specific type of elements. Its structure contains 3 elements.
+ * <ol>
+ * <li>A validity buffer.</li>
+ * <li> An offset buffer, that denotes lists boundaries. </li>
+ * <li> A child data vector that contains the elements of lists. </li>
+ * </ol>
+ * The latter two are managed by its superclass.
+ */
+public class ListVector extends BaseRepeatedValueVector implements PromotableVector {
+
+ public static ListVector empty(String name, BufferAllocator allocator) {
+ return new ListVector(name, allocator, FieldType.nullable(ArrowType.List.INSTANCE), null);
+ }
+
+ protected ArrowBuf validityBuffer;
+ protected UnionListReader reader;
+ private CallBack callBack;
+ private final FieldType fieldType;
+ private int validityAllocationSizeInBytes;
+
+ /**
+ * The maximum index that is actually set.
+ */
+ private int lastSet;
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param name The name of the instance.
+ * @param allocator The allocator to use for allocating/reallocating buffers.
+ * @param fieldType The type of this list.
+ * @param callBack A schema change callback.
+ */
+ public ListVector(String name, BufferAllocator allocator, FieldType fieldType, CallBack callBack) {
+ super(name, allocator, callBack);
+ this.validityBuffer = allocator.getEmpty();
+ this.fieldType = checkNotNull(fieldType);
+ this.callBack = callBack;
+ this.validityAllocationSizeInBytes = getValidityBufferSizeFromCount(INITIAL_VALUE_ALLOCATION);
+ this.lastSet = -1;
+ }
+
+ @Override
+ public void initializeChildrenFromFields(List<Field> children) {
+ if (children.size() != 1) {
+ throw new IllegalArgumentException("Lists have only one child. Found: " + children);
+ }
+ Field field = children.get(0);
+ AddOrGetResult<FieldVector> addOrGetVector = addOrGetVector(field.getFieldType());
+ if (!addOrGetVector.isCreated()) {
+ throw new IllegalArgumentException("Child vector already existed: " + addOrGetVector.getVector());
+ }
+
+ addOrGetVector.getVector().initializeChildrenFromFields(field.getChildren());
+ }
+
+ @Override
+ public void setInitialCapacity(int numRecords) {
+ validityAllocationSizeInBytes = getValidityBufferSizeFromCount(numRecords);
+ super.setInitialCapacity(numRecords);
+ }
+
+ /**
+ * Specialized version of setInitialCapacity() for ListVector. This is
+ * used by some callers when they want to explicitly control and be
+ * conservative about memory allocated for inner data vector. This is
+ * very useful when we are working with memory constraints for a query
+ * and have a fixed amount of memory reserved for the record batch. In
+ * such cases, we are likely to face OOM or related problems when
+ * we reserve memory for a record batch with value count x and
+ * do setInitialCapacity(x) such that each vector allocates only
+ * what is necessary and not the default amount but the multiplier
+ * forces the memory requirement to go beyond what was needed.
+ *
+ * @param numRecords value count
+ * @param density density of ListVector. Density is the average size of
+ * list per position in the List vector. For example, a
+ * density value of 10 implies each position in the list
+ * vector has a list of 10 values.
+ * A density value of 0.1 implies out of 10 positions in
+ * the list vector, 1 position has a list of size 1 and
+ * remaining positions are null (no lists) or empty lists.
+ * This helps in tightly controlling the memory we provision
+ * for inner data vector.
+ */
+ @Override
+ public void setInitialCapacity(int numRecords, double density) {
+ validityAllocationSizeInBytes = getValidityBufferSizeFromCount(numRecords);
+ super.setInitialCapacity(numRecords, density);
+ }
+
+ /**
+ * Get the density of this ListVector.
+ * @return density
+ */
+ public double getDensity() {
+ if (valueCount == 0) {
+ return 0.0D;
+ }
+ final int startOffset = offsetBuffer.getInt(0);
+ final int endOffset = offsetBuffer.getInt(valueCount * OFFSET_WIDTH);
+ final double totalListSize = endOffset - startOffset;
+ return totalListSize / valueCount;
+ }
+
+ @Override
+ public List<FieldVector> getChildrenFromFields() {
+ return singletonList(getDataVector());
+ }
+
+ /**
+ * Load the buffers of this vector with provided source buffers.
+ * The caller manages the source buffers and populates them before invoking
+ * this method.
+ * @param fieldNode the fieldNode indicating the value count
+ * @param ownBuffers the buffers for this Field (own buffers only, children not included)
+ */
+ @Override
+ public void loadFieldBuffers(ArrowFieldNode fieldNode, List<ArrowBuf> ownBuffers) {
+ if (ownBuffers.size() != 2) {
+ throw new IllegalArgumentException("Illegal buffer count, expected " + 2 + ", got: " + ownBuffers.size());
+ }
+
+ ArrowBuf bitBuffer = ownBuffers.get(0);
+ ArrowBuf offBuffer = ownBuffers.get(1);
+
+ validityBuffer.getReferenceManager().release();
+ validityBuffer = BitVectorHelper.loadValidityBuffer(fieldNode, bitBuffer, allocator);
+ offsetBuffer.getReferenceManager().release();
+ offsetBuffer = offBuffer.getReferenceManager().retain(offBuffer, allocator);
+
+ validityAllocationSizeInBytes = checkedCastToInt(validityBuffer.capacity());
+ offsetAllocationSizeInBytes = offsetBuffer.capacity();
+
+ lastSet = fieldNode.getLength() - 1;
+ valueCount = fieldNode.getLength();
+ }
+
+ /**
+ * Get the buffers belonging to this vector.
+ * @return the inner buffers.
+ */
+ @Override
+ public List<ArrowBuf> getFieldBuffers() {
+ List<ArrowBuf> result = new ArrayList<>(2);
+ setReaderAndWriterIndex();
+ result.add(validityBuffer);
+ result.add(offsetBuffer);
+
+ return result;
+ }
+
+ /**
+ * Set the reader and writer indexes for the inner buffers.
+ */
+ private void setReaderAndWriterIndex() {
+ validityBuffer.readerIndex(0);
+ offsetBuffer.readerIndex(0);
+ if (valueCount == 0) {
+ validityBuffer.writerIndex(0);
+ offsetBuffer.writerIndex(0);
+ } else {
+ validityBuffer.writerIndex(getValidityBufferSizeFromCount(valueCount));
+ offsetBuffer.writerIndex((valueCount + 1) * OFFSET_WIDTH);
+ }
+ }
+
+ /**
+ * Get the inner vectors.
+ *
+ * @deprecated This API will be removed as the current implementations no longer support inner vectors.
+ *
+ * @return the inner vectors for this field as defined by the TypeLayout
+ */
+ @Deprecated
+ @Override
+ public List<BufferBacked> getFieldInnerVectors() {
+ throw new UnsupportedOperationException("There are no inner vectors. Use getFieldBuffers");
+ }
+
+ public UnionListWriter getWriter() {
+ return new UnionListWriter(this);
+ }
+
+ /**
+ * Same as {@link #allocateNewSafe()}.
+ */
+ @Override
+ public void allocateNew() throws OutOfMemoryException {
+ if (!allocateNewSafe()) {
+ throw new OutOfMemoryException("Failure while allocating memory");
+ }
+ }
+
+ /**
+ * Allocate memory for the vector. We internally use a default value count
+ * of 4096 to allocate memory for at least these many elements in the
+ * vector.
+ *
+ * @return false if memory allocation fails, true otherwise.
+ */
+ public boolean allocateNewSafe() {
+ boolean success = false;
+ try {
+ /* we are doing a new allocation -- release the current buffers */
+ clear();
+ /* allocate validity buffer */
+ allocateValidityBuffer(validityAllocationSizeInBytes);
+ /* allocate offset and data buffer */
+ success = super.allocateNewSafe();
+ } finally {
+ if (!success) {
+ clear();
+ return false;
+ }
+ }
+ return true;
+ }
+
+ private void allocateValidityBuffer(final long size) {
+ final int curSize = (int) size;
+ validityBuffer = allocator.buffer(curSize);
+ validityBuffer.readerIndex(0);
+ validityAllocationSizeInBytes = curSize;
+ validityBuffer.setZero(0, validityBuffer.capacity());
+ }
+
+ /**
+ * Resize the vector to increase the capacity. The internal behavior is to
+ * double the current value capacity.
+ */
+ @Override
+ public void reAlloc() {
+ /* reallocate the validity buffer */
+ reallocValidityBuffer();
+ /* reallocate the offset and data */
+ super.reAlloc();
+ }
+
+ private void reallocValidityAndOffsetBuffers() {
+ reallocOffsetBuffer();
+ reallocValidityBuffer();
+ }
+
+ private void reallocValidityBuffer() {
+ final int currentBufferCapacity = checkedCastToInt(validityBuffer.capacity());
+ long newAllocationSize = currentBufferCapacity * 2;
+ if (newAllocationSize == 0) {
+ if (validityAllocationSizeInBytes > 0) {
+ newAllocationSize = validityAllocationSizeInBytes;
+ } else {
+ newAllocationSize = getValidityBufferSizeFromCount(INITIAL_VALUE_ALLOCATION) * 2;
+ }
+ }
+ newAllocationSize = CommonUtil.nextPowerOfTwo(newAllocationSize);
+ assert newAllocationSize >= 1;
+
+ if (newAllocationSize > MAX_ALLOCATION_SIZE) {
+ throw new OversizedAllocationException("Unable to expand the buffer");
+ }
+
+ final ArrowBuf newBuf = allocator.buffer((int) newAllocationSize);
+ newBuf.setBytes(0, validityBuffer, 0, currentBufferCapacity);
+ newBuf.setZero(currentBufferCapacity, newBuf.capacity() - currentBufferCapacity);
+ validityBuffer.getReferenceManager().release(1);
+ validityBuffer = newBuf;
+ validityAllocationSizeInBytes = (int) newAllocationSize;
+ }
+
+ /**
+ * Same as {@link #copyFrom(int, int, ValueVector)} except that
+ * it handles the case when the capacity of the vector needs to be expanded
+ * before copy.
+ * @param inIndex position to copy from in source vector
+ * @param outIndex position to copy to in this vector
+ * @param from source vector
+ */
+ @Override
+ public void copyFromSafe(int inIndex, int outIndex, ValueVector from) {
+ copyFrom(inIndex, outIndex, from);
+ }
+
+ /**
+ * Copy a cell value from a particular index in source vector to a particular
+ * position in this vector.
+ * @param inIndex position to copy from in source vector
+ * @param outIndex position to copy to in this vector
+ * @param from source vector
+ */
+ @Override
+ public void copyFrom(int inIndex, int outIndex, ValueVector from) {
+ Preconditions.checkArgument(this.getMinorType() == from.getMinorType());
+ FieldReader in = from.getReader();
+ in.setPosition(inIndex);
+ FieldWriter out = getWriter();
+ out.setPosition(outIndex);
+ ComplexCopier.copy(in, out);
+ }
+
+ /**
+ * Get the inner data vector for this list vector.
+ * @return data vector
+ */
+ @Override
+ public FieldVector getDataVector() {
+ return vector;
+ }
+
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return getTransferPair(ref, allocator, null);
+ }
+
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) {
+ return new TransferImpl(ref, allocator, callBack);
+ }
+
+ @Override
+ public TransferPair makeTransferPair(ValueVector target) {
+ return new TransferImpl((ListVector) target);
+ }
+
+ @Override
+ public long getValidityBufferAddress() {
+ return (validityBuffer.memoryAddress());
+ }
+
+ @Override
+ public long getDataBufferAddress() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public long getOffsetBufferAddress() {
+ return (offsetBuffer.memoryAddress());
+ }
+
+ @Override
+ public ArrowBuf getValidityBuffer() {
+ return validityBuffer;
+ }
+
+ @Override
+ public ArrowBuf getDataBuffer() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public ArrowBuf getOffsetBuffer() {
+ return offsetBuffer;
+ }
+
+ @Override
+ public int hashCode(int index) {
+ return hashCode(index, null);
+ }
+
+ @Override
+ public int hashCode(int index, ArrowBufHasher hasher) {
+ if (isSet(index) == 0) {
+ return ArrowBufPointer.NULL_HASH_CODE;
+ }
+ int hash = 0;
+ final int start = offsetBuffer.getInt(index * OFFSET_WIDTH);
+ final int end = offsetBuffer.getInt((index + 1) * OFFSET_WIDTH);
+ for (int i = start; i < end; i++) {
+ hash = ByteFunctionHelpers.combineHash(hash, vector.hashCode(i, hasher));
+ }
+ return hash;
+ }
+
+ @Override
+ public <OUT, IN> OUT accept(VectorVisitor<OUT, IN> visitor, IN value) {
+ return visitor.visit(this, value);
+ }
+
+ private class TransferImpl implements TransferPair {
+
+ ListVector to;
+ TransferPair dataTransferPair;
+
+ public TransferImpl(String name, BufferAllocator allocator, CallBack callBack) {
+ this(new ListVector(name, allocator, fieldType, callBack));
+ }
+
+ public TransferImpl(ListVector to) {
+ this.to = to;
+ to.addOrGetVector(vector.getField().getFieldType());
+ if (to.getDataVector() instanceof ZeroVector) {
+ to.addOrGetVector(vector.getField().getFieldType());
+ }
+ dataTransferPair = getDataVector().makeTransferPair(to.getDataVector());
+ }
+
+ /**
+ * Transfer this vector'data to another vector. The memory associated
+ * with this vector is transferred to the allocator of target vector
+ * for accounting and management purposes.
+ */
+ @Override
+ public void transfer() {
+ to.clear();
+ dataTransferPair.transfer();
+ to.validityBuffer = transferBuffer(validityBuffer, to.allocator);
+ to.offsetBuffer = transferBuffer(offsetBuffer, to.allocator);
+ to.lastSet = lastSet;
+ if (valueCount > 0) {
+ to.setValueCount(valueCount);
+ }
+ clear();
+ }
+
+ /**
+ * Slice this vector at desired index and length and transfer the
+ * corresponding data to the target vector.
+ * @param startIndex start position of the split in source vector.
+ * @param length length of the split.
+ */
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ Preconditions.checkArgument(startIndex >= 0 && length >= 0 && startIndex + length <= valueCount,
+ "Invalid parameters startIndex: %s, length: %s for valueCount: %s", startIndex, length, valueCount);
+ final int startPoint = offsetBuffer.getInt(startIndex * OFFSET_WIDTH);
+ final int sliceLength = offsetBuffer.getInt((startIndex + length) * OFFSET_WIDTH) - startPoint;
+ to.clear();
+ to.allocateOffsetBuffer((length + 1) * OFFSET_WIDTH);
+ /* splitAndTransfer offset buffer */
+ for (int i = 0; i < length + 1; i++) {
+ final int relativeOffset = offsetBuffer.getInt((startIndex + i) * OFFSET_WIDTH) - startPoint;
+ to.offsetBuffer.setInt(i * OFFSET_WIDTH, relativeOffset);
+ }
+ /* splitAndTransfer validity buffer */
+ splitAndTransferValidityBuffer(startIndex, length, to);
+ /* splitAndTransfer data buffer */
+ dataTransferPair.splitAndTransfer(startPoint, sliceLength);
+ to.lastSet = length - 1;
+ to.setValueCount(length);
+ }
+
+ /*
+ * transfer the validity.
+ */
+ private void splitAndTransferValidityBuffer(int startIndex, int length, ListVector target) {
+ int firstByteSource = BitVectorHelper.byteIndex(startIndex);
+ int lastByteSource = BitVectorHelper.byteIndex(valueCount - 1);
+ int byteSizeTarget = getValidityBufferSizeFromCount(length);
+ int offset = startIndex % 8;
+
+ if (length > 0) {
+ if (offset == 0) {
+ // slice
+ if (target.validityBuffer != null) {
+ target.validityBuffer.getReferenceManager().release();
+ }
+ target.validityBuffer = validityBuffer.slice(firstByteSource, byteSizeTarget);
+ target.validityBuffer.getReferenceManager().retain(1);
+ } else {
+ /* Copy data
+ * When the first bit starts from the middle of a byte (offset != 0),
+ * copy data from src BitVector.
+ * Each byte in the target is composed by a part in i-th byte,
+ * another part in (i+1)-th byte.
+ */
+ target.allocateValidityBuffer(byteSizeTarget);
+
+ for (int i = 0; i < byteSizeTarget - 1; i++) {
+ byte b1 = BitVectorHelper.getBitsFromCurrentByte(validityBuffer, firstByteSource + i, offset);
+ byte b2 = BitVectorHelper.getBitsFromNextByte(validityBuffer, firstByteSource + i + 1, offset);
+
+ target.validityBuffer.setByte(i, (b1 + b2));
+ }
+
+ /* Copying the last piece is done in the following manner:
+ * if the source vector has 1 or more bytes remaining, we copy
+ * the last piece as a byte formed by shifting data
+ * from the current byte and the next byte.
+ *
+ * if the source vector has no more bytes remaining
+ * (we are at the last byte), we copy the last piece as a byte
+ * by shifting data from the current byte.
+ */
+ if ((firstByteSource + byteSizeTarget - 1) < lastByteSource) {
+ byte b1 = BitVectorHelper.getBitsFromCurrentByte(validityBuffer,
+ firstByteSource + byteSizeTarget - 1, offset);
+ byte b2 = BitVectorHelper.getBitsFromNextByte(validityBuffer,
+ firstByteSource + byteSizeTarget, offset);
+
+ target.validityBuffer.setByte(byteSizeTarget - 1, b1 + b2);
+ } else {
+ byte b1 = BitVectorHelper.getBitsFromCurrentByte(validityBuffer,
+ firstByteSource + byteSizeTarget - 1, offset);
+ target.validityBuffer.setByte(byteSizeTarget - 1, b1);
+ }
+ }
+ }
+ }
+
+ @Override
+ public ValueVector getTo() {
+ return to;
+ }
+
+ @Override
+ public void copyValueSafe(int from, int to) {
+ this.to.copyFrom(from, to, ListVector.this);
+ }
+ }
+
+ @Override
+ public UnionListReader getReader() {
+ if (reader == null) {
+ reader = new UnionListReader(this);
+ }
+ return reader;
+ }
+
+ /** Initialize the child data vector to field type. */
+ public <T extends ValueVector> AddOrGetResult<T> addOrGetVector(FieldType fieldType) {
+ AddOrGetResult<T> result = super.addOrGetVector(fieldType);
+ invalidateReader();
+ return result;
+ }
+
+ /**
+ * Get the size (number of bytes) of underlying buffers used by this
+ * vector.
+ * @return size of underlying buffers.
+ */
+ @Override
+ public int getBufferSize() {
+ if (valueCount == 0) {
+ return 0;
+ }
+ final int offsetBufferSize = (valueCount + 1) * OFFSET_WIDTH;
+ final int validityBufferSize = getValidityBufferSizeFromCount(valueCount);
+ return offsetBufferSize + validityBufferSize + vector.getBufferSize();
+ }
+
+ @Override
+ public int getBufferSizeFor(int valueCount) {
+ if (valueCount == 0) {
+ return 0;
+ }
+ final int validityBufferSize = getValidityBufferSizeFromCount(valueCount);
+
+ return super.getBufferSizeFor(valueCount) + validityBufferSize;
+ }
+
+ @Override
+ public Field getField() {
+ return new Field(getName(), fieldType, Collections.singletonList(getDataVector().getField()));
+ }
+
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.LIST;
+ }
+
+ @Override
+ public void clear() {
+ super.clear();
+ validityBuffer = releaseBuffer(validityBuffer);
+ lastSet = -1;
+ }
+
+ @Override
+ public void reset() {
+ super.reset();
+ validityBuffer.setZero(0, validityBuffer.capacity());
+ lastSet = -1;
+ }
+
+ /**
+ * Return the underlying buffers associated with this vector. Note that this doesn't
+ * impact the reference counts for this buffer so it only should be used for in-context
+ * access. Also note that this buffer changes regularly thus
+ * external classes shouldn't hold a reference to it (unless they change it).
+ *
+ * @param clear Whether to clear vector before returning; the buffers will still be refcounted
+ * but the returned array will be the only reference to them
+ * @return The underlying {@link ArrowBuf buffers} that is used by this
+ * vector instance.
+ */
+ @Override
+ public ArrowBuf[] getBuffers(boolean clear) {
+ setReaderAndWriterIndex();
+ final ArrowBuf[] buffers;
+ if (getBufferSize() == 0) {
+ buffers = new ArrowBuf[0];
+ } else {
+ List<ArrowBuf> list = new ArrayList<>();
+ list.add(offsetBuffer);
+ list.add(validityBuffer);
+ list.addAll(Arrays.asList(vector.getBuffers(false)));
+ buffers = list.toArray(new ArrowBuf[list.size()]);
+ }
+ if (clear) {
+ for (ArrowBuf buffer : buffers) {
+ buffer.getReferenceManager().retain();
+ }
+ clear();
+ }
+ return buffers;
+ }
+
+ @Override
+ public UnionVector promoteToUnion() {
+ UnionVector vector = new UnionVector("$data$", allocator, /* field type*/ null, callBack);
+ replaceDataVector(vector);
+ invalidateReader();
+ if (callBack != null) {
+ callBack.doWork();
+ }
+ return vector;
+ }
+
+ protected void invalidateReader() {
+ reader = null;
+ }
+
+ /**
+ * Get the element in the list vector at a particular index.
+ * @param index position of the element
+ * @return Object at given position
+ */
+ @Override
+ public List<?> getObject(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ }
+ final List<Object> vals = new JsonStringArrayList<>();
+ final int start = offsetBuffer.getInt(index * OFFSET_WIDTH);
+ final int end = offsetBuffer.getInt((index + 1) * OFFSET_WIDTH);
+ final ValueVector vv = getDataVector();
+ for (int i = start; i < end; i++) {
+ vals.add(vv.getObject(i));
+ }
+
+ return vals;
+ }
+
+ /**
+ * Check if element at given index is null.
+ *
+ * @param index position of element
+ * @return true if element at given index is null, false otherwise
+ */
+ @Override
+ public boolean isNull(int index) {
+ return (isSet(index) == 0);
+ }
+
+ /**
+ * Check if element at given index is empty list.
+ * @param index position of element
+ * @return true if element at given index is empty list or NULL, false otherwise
+ */
+ @Override
+ public boolean isEmpty(int index) {
+ if (isNull(index)) {
+ return true;
+ } else {
+ final int start = offsetBuffer.getInt(index * OFFSET_WIDTH);
+ final int end = offsetBuffer.getInt((index + 1) * OFFSET_WIDTH);
+ return start == end;
+ }
+ }
+
+ /**
+ * Same as {@link #isNull(int)}.
+ *
+ * @param index position of element
+ * @return 1 if element at given index is not null, 0 otherwise
+ */
+ public int isSet(int index) {
+ final int byteIndex = index >> 3;
+ final byte b = validityBuffer.getByte(byteIndex);
+ final int bitIndex = index & 7;
+ return (b >> bitIndex) & 0x01;
+ }
+
+ /**
+ * Get the number of elements that are null in the vector.
+ *
+ * @return the number of null elements.
+ */
+ @Override
+ public int getNullCount() {
+ return BitVectorHelper.getNullCount(validityBuffer, valueCount);
+ }
+
+ /**
+ * Get the current value capacity for the vector.
+ * @return number of elements that vector can hold.
+ */
+ @Override
+ public int getValueCapacity() {
+ return getValidityAndOffsetValueCapacity();
+ }
+
+ private int getValidityAndOffsetValueCapacity() {
+ final int offsetValueCapacity = Math.max(getOffsetBufferValueCapacity() - 1, 0);
+ return Math.min(offsetValueCapacity, getValidityBufferValueCapacity());
+ }
+
+ private int getValidityBufferValueCapacity() {
+ return capAtMaxInt(validityBuffer.capacity() * 8);
+ }
+
+ /**
+ * Sets the list at index to be not-null. Reallocates validity buffer if index
+ * is larger than current capacity.
+ */
+ public void setNotNull(int index) {
+ while (index >= getValidityAndOffsetValueCapacity()) {
+ reallocValidityAndOffsetBuffers();
+ }
+ BitVectorHelper.setBit(validityBuffer, index);
+ lastSet = index;
+ }
+
+ /**
+ * Sets list at index to be null.
+ * @param index position in vector
+ */
+ public void setNull(int index) {
+ while (index >= getValidityAndOffsetValueCapacity()) {
+ reallocValidityAndOffsetBuffers();
+ }
+ if (lastSet >= index) {
+ lastSet = index - 1;
+ }
+ for (int i = lastSet + 1; i <= index; i++) {
+ final int currentOffset = offsetBuffer.getInt(i * OFFSET_WIDTH);
+ offsetBuffer.setInt((i + 1) * OFFSET_WIDTH, currentOffset);
+ }
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+
+ /**
+ * Start a new value in the list vector.
+ *
+ * @param index index of the value to start
+ */
+ @Override
+ public int startNewValue(int index) {
+ while (index >= getValidityAndOffsetValueCapacity()) {
+ reallocValidityAndOffsetBuffers();
+ }
+ if (lastSet >= index) {
+ lastSet = index - 1;
+ }
+ for (int i = lastSet + 1; i <= index; i++) {
+ final int currentOffset = offsetBuffer.getInt(i * OFFSET_WIDTH);
+ offsetBuffer.setInt((i + 1) * OFFSET_WIDTH, currentOffset);
+ }
+ BitVectorHelper.setBit(validityBuffer, index);
+ lastSet = index;
+ return offsetBuffer.getInt((lastSet + 1) * OFFSET_WIDTH);
+ }
+
+ /**
+ * End the current value.
+ *
+ * @param index index of the value to end
+ * @param size number of elements in the list that was written
+ */
+ public void endValue(int index, int size) {
+ final int currentOffset = offsetBuffer.getInt((index + 1) * OFFSET_WIDTH);
+ offsetBuffer.setInt((index + 1) * OFFSET_WIDTH, currentOffset + size);
+ }
+
+ /**
+ * Sets the value count for the vector.
+ *
+ * @param valueCount value count
+ */
+ @Override
+ public void setValueCount(int valueCount) {
+ this.valueCount = valueCount;
+ if (valueCount > 0) {
+ while (valueCount > getValidityAndOffsetValueCapacity()) {
+ /* check if validity and offset buffers need to be re-allocated */
+ reallocValidityAndOffsetBuffers();
+ }
+ for (int i = lastSet + 1; i < valueCount; i++) {
+ /* fill the holes with offsets */
+ final int currentOffset = offsetBuffer.getInt(i * OFFSET_WIDTH);
+ offsetBuffer.setInt((i + 1) * OFFSET_WIDTH, currentOffset);
+ }
+ }
+ /* valueCount for the data vector is the current end offset */
+ final int childValueCount = (valueCount == 0) ? 0 :
+ offsetBuffer.getInt((lastSet + 1) * OFFSET_WIDTH);
+ /* set the value count of data vector and this will take care of
+ * checking whether data buffer needs to be reallocated.
+ */
+ vector.setValueCount(childValueCount);
+ }
+
+ public void setLastSet(int value) {
+ lastSet = value;
+ }
+
+ public int getLastSet() {
+ return lastSet;
+ }
+
+ @Override
+ public int getElementStartIndex(int index) {
+ return offsetBuffer.getInt(index * OFFSET_WIDTH);
+ }
+
+ @Override
+ public int getElementEndIndex(int index) {
+ return offsetBuffer.getInt((index + 1) * OFFSET_WIDTH);
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java
new file mode 100644
index 000000000..d4275e6fe
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/MapVector.java
@@ -0,0 +1,122 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex;
+
+import static org.apache.arrow.util.Preconditions.checkArgument;
+
+import java.util.List;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.AddOrGetResult;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.complex.impl.UnionMapReader;
+import org.apache.arrow.vector.complex.impl.UnionMapWriter;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType.Map;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.CallBack;
+
+/**
+ * A MapVector is used to store entries of key/value pairs. It is a container vector that is
+ * composed of a list of struct values with "key" and "value" fields. The MapVector is nullable,
+ * but if a map is set at a given index, there must be an entry. In other words, the StructVector
+ * data is non-nullable. Also for a given entry, the "key" is non-nullable, however the "value" can
+ * be null.
+ */
+public class MapVector extends ListVector {
+
+ public static final String KEY_NAME = "key";
+ public static final String VALUE_NAME = "value";
+ public static final String DATA_VECTOR_NAME = "entries";
+
+ /**
+ * Construct an empty MapVector with no data. Child vectors must be added subsequently.
+ *
+ * @param name The name of the vector.
+ * @param allocator The allocator used for allocating/reallocating buffers.
+ * @param keysSorted True if the map keys have been pre-sorted.
+ * @return a new instance of MapVector.
+ */
+ public static MapVector empty(String name, BufferAllocator allocator, boolean keysSorted) {
+ return new MapVector(name, allocator, FieldType.nullable(new Map(keysSorted)), null);
+ }
+
+ /**
+ * Construct a MapVector instance.
+ *
+ * @param name The name of the vector.
+ * @param allocator The allocator used for allocating/reallocating buffers.
+ * @param fieldType The type definition of the MapVector.
+ * @param callBack A schema change callback.
+ */
+ public MapVector(String name, BufferAllocator allocator, FieldType fieldType, CallBack callBack) {
+ super(name, allocator, fieldType, callBack);
+ defaultDataVectorName = DATA_VECTOR_NAME;
+ }
+
+ /**
+ * Initialize child vectors of the map from the given list of fields.
+ *
+ * @param children List of fields that will be children of this MapVector.
+ */
+ @Override
+ public void initializeChildrenFromFields(List<Field> children) {
+ checkArgument(children.size() == 1, "Maps have one List child. Found: %s", children);
+
+ Field structField = children.get(0);
+ MinorType minorType = Types.getMinorTypeForArrowType(structField.getType());
+ checkArgument(minorType == MinorType.STRUCT && !structField.isNullable(),
+ "Map data should be a non-nullable struct type");
+ checkArgument(structField.getChildren().size() == 2,
+ "Map data should be a struct with 2 children. Found: %s", children);
+
+ Field keyField = structField.getChildren().get(0);
+ checkArgument(!keyField.isNullable(), "Map data key type should be a non-nullable");
+
+ AddOrGetResult<FieldVector> addOrGetVector = addOrGetVector(structField.getFieldType());
+ checkArgument(addOrGetVector.isCreated(), "Child vector already existed: %s", addOrGetVector.getVector());
+
+ addOrGetVector.getVector().initializeChildrenFromFields(structField.getChildren());
+ }
+
+ /**
+ * Get the writer for this MapVector instance.
+ */
+ @Override
+ public UnionMapWriter getWriter() {
+ return new UnionMapWriter(this);
+ }
+
+ /**
+ * Get the reader for this MapVector instance.
+ */
+ @Override
+ public UnionMapReader getReader() {
+ if (reader == null) {
+ reader = new UnionMapReader(this);
+ }
+ return (UnionMapReader) reader;
+ }
+
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.MAP;
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/NonNullableStructVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/NonNullableStructVector.java
new file mode 100644
index 000000000..4da266812
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/NonNullableStructVector.java
@@ -0,0 +1,440 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex;
+
+import static org.apache.arrow.util.Preconditions.checkNotNull;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.util.ByteFunctionHelpers;
+import org.apache.arrow.memory.util.hash.ArrowBufHasher;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.DensityAwareVector;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.compare.VectorVisitor;
+import org.apache.arrow.vector.complex.impl.SingleStructReaderImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.holders.ComplexHolder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.CallBack;
+import org.apache.arrow.vector.util.JsonStringHashMap;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * A struct vector that has no null values (and no validity buffer).
+ * Child Vectors are handled in {@link AbstractStructVector}.
+ */
+public class NonNullableStructVector extends AbstractStructVector {
+
+ public static NonNullableStructVector empty(String name, BufferAllocator allocator) {
+ FieldType fieldType = new FieldType(false, ArrowType.Struct.INSTANCE, null, null);
+ return new NonNullableStructVector(name, allocator, fieldType, null, ConflictPolicy.CONFLICT_REPLACE, false);
+ }
+
+ public static NonNullableStructVector emptyWithDuplicates(String name, BufferAllocator allocator) {
+ FieldType fieldType = new FieldType(false, ArrowType.Struct.INSTANCE, null, null);
+ return new NonNullableStructVector(name, allocator, fieldType, null, ConflictPolicy.CONFLICT_APPEND, true);
+ }
+
+ private final SingleStructReaderImpl reader = new SingleStructReaderImpl(this);
+ protected final FieldType fieldType;
+ public int valueCount;
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param name The name of the instance.
+ * @param allocator The allocator to use to allocating/reallocating buffers.
+ * @param fieldType The type of this list.
+ */
+ public NonNullableStructVector(String name,
+ BufferAllocator allocator,
+ FieldType fieldType,
+ CallBack callBack) {
+ super(name,
+ allocator,
+ callBack,
+ null,
+ true);
+ this.fieldType = checkNotNull(fieldType);
+ this.valueCount = 0;
+ }
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param name The name of the instance.
+ * @param allocator The allocator to use to allocating/reallocating buffers.
+ * @param fieldType The type of this list.
+ * @param callBack A schema change callback.
+ * @param conflictPolicy How to handle duplicate field names in the struct.
+ */
+ public NonNullableStructVector(String name,
+ BufferAllocator allocator,
+ FieldType fieldType,
+ CallBack callBack,
+ ConflictPolicy conflictPolicy,
+ boolean allowConflictPolicyChanges) {
+ super(name, allocator, callBack, conflictPolicy, allowConflictPolicyChanges);
+ this.fieldType = checkNotNull(fieldType);
+ this.valueCount = 0;
+ }
+
+ @Override
+ public FieldReader getReader() {
+ return reader;
+ }
+
+ private transient StructTransferPair ephPair;
+
+ /**
+ * Copies the element at fromIndex in the provided vector to thisIndex. Reallocates buffers
+ * if thisIndex is larger then current capacity.
+ */
+ @Override
+ public void copyFrom(int fromIndex, int thisIndex, ValueVector from) {
+ Preconditions.checkArgument(this.getMinorType() == from.getMinorType());
+ if (ephPair == null || ephPair.from != from) {
+ ephPair = (StructTransferPair) from.makeTransferPair(this);
+ }
+ ephPair.copyValueSafe(fromIndex, thisIndex);
+ }
+
+ @Override
+ public void copyFromSafe(int fromIndex, int thisIndex, ValueVector from) {
+ copyFrom(fromIndex, thisIndex, from);
+ }
+
+ @Override
+ protected boolean supportsDirectRead() {
+ return true;
+ }
+
+ public Iterator<String> fieldNameIterator() {
+ return getChildFieldNames().iterator();
+ }
+
+ @Override
+ public void setInitialCapacity(int numRecords) {
+ for (final ValueVector v : this) {
+ v.setInitialCapacity(numRecords);
+ }
+ }
+
+ @Override
+ public void setInitialCapacity(int valueCount, double density) {
+ for (final ValueVector vector : this) {
+ if (vector instanceof DensityAwareVector) {
+ ((DensityAwareVector) vector).setInitialCapacity(valueCount, density);
+ } else {
+ vector.setInitialCapacity(valueCount);
+ }
+ }
+ }
+
+ @Override
+ public int getBufferSize() {
+ if (valueCount == 0 || size() == 0) {
+ return 0;
+ }
+ long buffer = 0;
+ for (final ValueVector v : this) {
+ buffer += v.getBufferSize();
+ }
+
+ return (int) buffer;
+ }
+
+ @Override
+ public int getBufferSizeFor(final int valueCount) {
+ if (valueCount == 0) {
+ return 0;
+ }
+
+ long bufferSize = 0;
+ for (final ValueVector v : this) {
+ bufferSize += v.getBufferSizeFor(valueCount);
+ }
+
+ return (int) bufferSize;
+ }
+
+ @Override
+ public ArrowBuf getValidityBuffer() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public ArrowBuf getDataBuffer() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public ArrowBuf getOffsetBuffer() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public TransferPair getTransferPair(BufferAllocator allocator) {
+ return getTransferPair(name, allocator, null);
+ }
+
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) {
+ return new StructTransferPair(this, new NonNullableStructVector(name,
+ allocator,
+ fieldType,
+ callBack,
+ getConflictPolicy(),
+ allowConflictPolicyChanges), false);
+ }
+
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new StructTransferPair(this, (NonNullableStructVector) to);
+ }
+
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return new StructTransferPair(this, new NonNullableStructVector(ref,
+ allocator,
+ fieldType,
+ callBack,
+ getConflictPolicy(),
+ allowConflictPolicyChanges), false);
+ }
+
+ /**
+ * {@link TransferPair} for this this class.
+ */
+ protected static class StructTransferPair implements TransferPair {
+ private final TransferPair[] pairs;
+ private final NonNullableStructVector from;
+ private final NonNullableStructVector to;
+
+ public StructTransferPair(NonNullableStructVector from, NonNullableStructVector to) {
+ this(from, to, true);
+ }
+
+ protected StructTransferPair(NonNullableStructVector from, NonNullableStructVector to, boolean allocate) {
+ this.from = from;
+ this.to = to;
+ this.pairs = new TransferPair[from.size()];
+ this.to.ephPair = null;
+
+ int i = 0;
+ FieldVector vector;
+ for (String child : from.getChildFieldNames()) {
+ int preSize = to.size();
+ vector = from.getChild(child);
+ if (vector == null) {
+ continue;
+ }
+ //DRILL-1872: we add the child fields for the vector, looking up the field by name. For a map vector,
+ // the child fields may be nested fields of the top level child. For example if the structure
+ // of a child field is oa.oab.oabc then we add oa, then add oab to oa then oabc to oab.
+ // But the children member of a Materialized field is a HashSet. If the fields are added in the
+ // children HashSet, and the hashCode of the Materialized field includes the hash code of the
+ // children, the hashCode value of oa changes *after* the field has been added to the HashSet.
+ // (This is similar to what happens in ScanBatch where the children cannot be added till they are
+ // read). To take care of this, we ensure that the hashCode of the MaterializedField does not
+ // include the hashCode of the children but is based only on MaterializedField$key.
+ final FieldVector newVector = to.addOrGet(child, vector.getField().getFieldType(), vector.getClass());
+ if (allocate && to.size() != preSize) {
+ newVector.allocateNew();
+ }
+ pairs[i++] = vector.makeTransferPair(newVector);
+ }
+ }
+
+ @Override
+ public void transfer() {
+ for (final TransferPair p : pairs) {
+ p.transfer();
+ }
+ to.valueCount = from.valueCount;
+ from.clear();
+ }
+
+ @Override
+ public ValueVector getTo() {
+ return to;
+ }
+
+ @Override
+ public void copyValueSafe(int from, int to) {
+ for (TransferPair p : pairs) {
+ p.copyValueSafe(from, to);
+ }
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ for (TransferPair p : pairs) {
+ p.splitAndTransfer(startIndex, length);
+ }
+ to.setValueCount(length);
+ }
+ }
+
+ @Override
+ public int getValueCapacity() {
+ if (size() == 0) {
+ return 0;
+ }
+
+ return getChildren().stream()
+ .mapToInt(child -> child.getValueCapacity())
+ .min()
+ .getAsInt();
+ }
+
+ @Override
+ public Map<String, ?> getObject(int index) {
+ Map<String, Object> vv = new JsonStringHashMap<>();
+ for (String child : getChildFieldNames()) {
+ ValueVector v = getChild(child);
+ if (v != null && index < v.getValueCount()) {
+ Object value = v.getObject(index);
+ if (value != null) {
+ vv.put(child, value);
+ }
+ }
+ }
+ return vv;
+ }
+
+ @Override
+ public int hashCode(int index) {
+ return hashCode(index, null);
+ }
+
+ @Override
+ public int hashCode(int index, ArrowBufHasher hasher) {
+ int hash = 0;
+ for (FieldVector v : getChildren()) {
+ if (index < v.getValueCount()) {
+ hash = ByteFunctionHelpers.combineHash(hash, v.hashCode(index, hasher));
+ }
+ }
+ return hash;
+ }
+
+ @Override
+ public <OUT, IN> OUT accept(VectorVisitor<OUT, IN> visitor, IN value) {
+ return visitor.visit(this, value);
+ }
+
+ @Override
+ public boolean isNull(int index) {
+ return false;
+ }
+
+ @Override
+ public int getNullCount() {
+ return 0;
+ }
+
+ public void get(int index, ComplexHolder holder) {
+ reader.setPosition(index);
+ holder.reader = reader;
+ }
+
+ @Override
+ public int getValueCount() {
+ return valueCount;
+ }
+
+ public ValueVector getVectorById(int id) {
+ return getChildByOrdinal(id);
+ }
+
+ @Override
+ public void setValueCount(int valueCount) {
+ for (final ValueVector v : getChildren()) {
+ v.setValueCount(valueCount);
+ }
+ NonNullableStructVector.this.valueCount = valueCount;
+ }
+
+ @Override
+ public void clear() {
+ for (final ValueVector v : getChildren()) {
+ v.clear();
+ }
+ valueCount = 0;
+ }
+
+ @Override
+ public void reset() {
+ for (final ValueVector v : getChildren()) {
+ v.reset();
+ }
+ valueCount = 0;
+ }
+
+ @Override
+ public Field getField() {
+ List<Field> children = new ArrayList<>();
+ for (ValueVector child : getChildren()) {
+ children.add(child.getField());
+ }
+ return new Field(name, fieldType, children);
+ }
+
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.STRUCT;
+ }
+
+ @Override
+ public void close() {
+ final Collection<FieldVector> vectors = getChildren();
+ for (final FieldVector v : vectors) {
+ v.close();
+ }
+ vectors.clear();
+
+ valueCount = 0;
+
+ super.close();
+ }
+
+ /** Initializes the struct's members from the given Fields. */
+ public void initializeChildrenFromFields(List<Field> children) {
+ for (Field field : children) {
+ FieldVector vector = (FieldVector) this.add(field.getName(), field.getFieldType());
+ vector.initializeChildrenFromFields(field.getChildren());
+ }
+ }
+
+ public List<FieldVector> getChildrenFromFields() {
+ return getChildren();
+ }
+
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/Positionable.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/Positionable.java
new file mode 100644
index 000000000..dda495408
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/Positionable.java
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex;
+
+/**
+ * Get and set position in a particular data structure.
+ *
+ */
+@SuppressWarnings("unused") // Used in when instantiating freemarker templates.
+public interface Positionable {
+ int getPosition();
+
+ void setPosition(int index);
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/PromotableVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/PromotableVector.java
new file mode 100644
index 000000000..d4dd94acb
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/PromotableVector.java
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex;
+
+import org.apache.arrow.vector.AddOrGetResult;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.types.pojo.FieldType;
+
+/**
+ * Vector that can store multiple {@linkplain FieldType} vectors as children.
+ */
+public interface PromotableVector {
+
+ <T extends ValueVector> AddOrGetResult<T> addOrGetVector(FieldType type);
+
+ UnionVector promoteToUnion();
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/RepeatedFixedWidthVectorLike.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/RepeatedFixedWidthVectorLike.java
new file mode 100644
index 000000000..e754f6913
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/RepeatedFixedWidthVectorLike.java
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex;
+
+/**
+ * A {@link org.apache.arrow.vector.ValueVector} mix-in that can be used in conjunction with
+ * {@link RepeatedValueVector} subtypes.
+ */
+public interface RepeatedFixedWidthVectorLike {
+ /**
+ * Allocate a new memory space for this vector. Must be called prior to using the ValueVector.
+ *
+ * @param valueCount Number of separate repeating groupings.
+ * @param innerValueCount Number of supported values in the vector.
+ */
+ void allocateNew(int valueCount, int innerValueCount);
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/RepeatedValueVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/RepeatedValueVector.java
new file mode 100644
index 000000000..1cae881dd
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/RepeatedValueVector.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex;
+
+import org.apache.arrow.vector.DensityAwareVector;
+import org.apache.arrow.vector.UInt4Vector;
+import org.apache.arrow.vector.ValueVector;
+
+/**
+ * An abstraction representing repeated value vectors.
+ *
+ * <p>A repeated vector contains values that may either be flat or nested. A value consists of zero or more
+ * cells(inner values). Current design maintains data and offsets vectors. Each cell is stored in the data vector.
+ * Repeated vector uses the offset vector to determine the sequence of cells pertaining to an individual value.
+ */
+public interface RepeatedValueVector extends ValueVector, DensityAwareVector {
+
+ int DEFAULT_REPEAT_PER_RECORD = 5;
+
+ /**
+ * Get the offset vector.
+ * @deprecated This API will be removed, as the current implementations no longer hold inner offset vectors.
+ *
+ * @return the underlying offset vector or null if none exists.
+ */
+ @Deprecated
+ UInt4Vector getOffsetVector();
+
+ /**
+ * Get the data vector.
+ * @return the underlying data vector or null if none exists.
+ */
+ ValueVector getDataVector();
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/RepeatedVariableWidthVectorLike.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/RepeatedVariableWidthVectorLike.java
new file mode 100644
index 000000000..5f5324138
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/RepeatedVariableWidthVectorLike.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex;
+
+/**
+ * A {@link org.apache.arrow.vector.ValueVector} mix-in that can be used in conjunction with
+ * variable {@link RepeatedValueVector} subtypes (e.g. Strings, Lists, etc).
+ */
+public interface RepeatedVariableWidthVectorLike {
+ /**
+ * Allocate a new memory space for this vector. Must be called prior to using the ValueVector.
+ *
+ * @param totalBytes Desired size of the underlying data buffer.
+ * @param parentValueCount Number of separate repeating groupings.
+ * @param childValueCount Number of supported values in the vector.
+ */
+ void allocateNew(int totalBytes, int parentValueCount, int childValueCount);
+
+ /**
+ * Provide the maximum amount of variable width bytes that can be stored int his vector.
+ *
+ * @return the byte capacity
+ */
+ int getByteCapacity();
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/StateTool.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/StateTool.java
new file mode 100644
index 000000000..0098f6836
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/StateTool.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex;
+
+import java.util.Arrays;
+
+/**
+ * Utility methods for state machines based on enums.
+ */
+public class StateTool {
+ private StateTool() {}
+
+ static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(StateTool.class);
+
+ /**
+ * Verifies <code>currentState</code> is in one of <code>expectedStates</code>,
+ * throws an IllegalArgumentException if it isn't.
+ */
+ public static <T extends Enum<?>> void check(T currentState, T... expectedStates) {
+ for (T s : expectedStates) {
+ if (s == currentState) {
+ return;
+ }
+ }
+ throw new IllegalArgumentException(String.format("Expected to be in one of these states %s but was actually in " +
+ "state %s", Arrays.toString(expectedStates), currentState));
+ }
+
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/StructVector.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/StructVector.java
new file mode 100644
index 000000000..2dabc6e01
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/StructVector.java
@@ -0,0 +1,608 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex;
+
+import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt;
+import static org.apache.arrow.util.Preconditions.checkNotNull;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.util.ArrowBufPointer;
+import org.apache.arrow.memory.util.CommonUtil;
+import org.apache.arrow.memory.util.hash.ArrowBufHasher;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.BaseValueVector;
+import org.apache.arrow.vector.BitVectorHelper;
+import org.apache.arrow.vector.BufferBacked;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.complex.impl.NullableStructReaderImpl;
+import org.apache.arrow.vector.complex.impl.NullableStructWriter;
+import org.apache.arrow.vector.holders.ComplexHolder;
+import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.ArrowType.Struct;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.CallBack;
+import org.apache.arrow.vector.util.OversizedAllocationException;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * A Struct vector consists of nullability/validity buffer and children vectors
+ * that make up the struct's fields. The children vectors are handled by the
+ * parent class.
+ */
+public class StructVector extends NonNullableStructVector implements FieldVector {
+
+ public static StructVector empty(String name, BufferAllocator allocator) {
+ FieldType fieldType = FieldType.nullable(Struct.INSTANCE);
+ return new StructVector(name, allocator, fieldType, null, ConflictPolicy.CONFLICT_REPLACE, false);
+ }
+
+ public static StructVector emptyWithDuplicates(String name, BufferAllocator allocator) {
+ FieldType fieldType = new FieldType(false, ArrowType.Struct.INSTANCE, null, null);
+ return new StructVector(name, allocator, fieldType, null, ConflictPolicy.CONFLICT_APPEND, true);
+ }
+
+ private final NullableStructReaderImpl reader = new NullableStructReaderImpl(this);
+ private final NullableStructWriter writer = new NullableStructWriter(this);
+
+ protected ArrowBuf validityBuffer;
+ private int validityAllocationSizeInBytes;
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param name The name of the instance.
+ * @param allocator The allocator to use to allocating/reallocating buffers.
+ * @param fieldType The type of this list.
+ * @param callBack A schema change callback.
+ */
+ public StructVector(String name,
+ BufferAllocator allocator,
+ FieldType fieldType,
+ CallBack callBack) {
+ super(name,
+ checkNotNull(allocator),
+ fieldType,
+ callBack);
+ this.validityBuffer = allocator.getEmpty();
+ this.validityAllocationSizeInBytes =
+ BitVectorHelper.getValidityBufferSize(BaseValueVector.INITIAL_VALUE_ALLOCATION);
+ }
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param name The name of the instance.
+ * @param allocator The allocator to use to allocating/reallocating buffers.
+ * @param fieldType The type of this list.
+ * @param callBack A schema change callback.
+ * @param conflictPolicy policy to determine how duplicate names are handled.
+ * @param allowConflictPolicyChanges wether duplicate names are allowed at all.
+ */
+ public StructVector(String name,
+ BufferAllocator allocator,
+ FieldType fieldType,
+ CallBack callBack,
+ ConflictPolicy conflictPolicy,
+ boolean allowConflictPolicyChanges) {
+ super(name, checkNotNull(allocator), fieldType, callBack, conflictPolicy, allowConflictPolicyChanges);
+ this.validityBuffer = allocator.getEmpty();
+ this.validityAllocationSizeInBytes =
+ BitVectorHelper.getValidityBufferSize(BaseValueVector.INITIAL_VALUE_ALLOCATION);
+ }
+
+ @Override
+ public void loadFieldBuffers(ArrowFieldNode fieldNode, List<ArrowBuf> ownBuffers) {
+ if (ownBuffers.size() != 1) {
+ throw new IllegalArgumentException("Illegal buffer count, expected " + 1 + ", got: " + ownBuffers.size());
+ }
+
+ ArrowBuf bitBuffer = ownBuffers.get(0);
+
+ validityBuffer.getReferenceManager().release();
+ validityBuffer = BitVectorHelper.loadValidityBuffer(fieldNode, bitBuffer, allocator);
+ valueCount = fieldNode.getLength();
+ validityAllocationSizeInBytes = checkedCastToInt(validityBuffer.capacity());
+ }
+
+ @Override
+ public List<ArrowBuf> getFieldBuffers() {
+ List<ArrowBuf> result = new ArrayList<>(1);
+ setReaderAndWriterIndex();
+ result.add(validityBuffer);
+
+ return result;
+ }
+
+ private void setReaderAndWriterIndex() {
+ validityBuffer.readerIndex(0);
+ validityBuffer.writerIndex(BitVectorHelper.getValidityBufferSize(valueCount));
+ }
+
+ /**
+ * Get the inner vectors.
+ *
+ * @deprecated This API will be removed as the current implementations no longer support inner vectors.
+ *
+ * @return the inner vectors for this field as defined by the TypeLayout
+ */
+ @Deprecated
+ @Override
+ public List<BufferBacked> getFieldInnerVectors() {
+ throw new UnsupportedOperationException("There are no inner vectors. Use getFieldBuffers");
+ }
+
+ @Override
+ public NullableStructReaderImpl getReader() {
+ return reader;
+ }
+
+ public NullableStructWriter getWriter() {
+ return writer;
+ }
+
+ @Override
+ public TransferPair getTransferPair(BufferAllocator allocator) {
+ return new NullableStructTransferPair(this, new StructVector(name,
+ allocator,
+ fieldType,
+ null,
+ getConflictPolicy(),
+ allowConflictPolicyChanges), false);
+ }
+
+ @Override
+ public TransferPair makeTransferPair(ValueVector to) {
+ return new NullableStructTransferPair(this, (StructVector) to, false);
+ }
+
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator) {
+ return new NullableStructTransferPair(this, new StructVector(ref,
+ allocator,
+ fieldType,
+ null,
+ getConflictPolicy(),
+ allowConflictPolicyChanges), false);
+ }
+
+ @Override
+ public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) {
+ return new NullableStructTransferPair(this, new StructVector(ref,
+ allocator,
+ fieldType,
+ callBack,
+ getConflictPolicy(),
+ allowConflictPolicyChanges), false);
+ }
+
+ /**
+ * {@link TransferPair} for this (nullable) {@link StructVector}.
+ */
+ protected class NullableStructTransferPair extends StructTransferPair {
+
+ private StructVector target;
+
+ protected NullableStructTransferPair(StructVector from, StructVector to, boolean allocate) {
+ super(from, to, allocate);
+ this.target = to;
+ }
+
+ @Override
+ public void transfer() {
+ target.clear();
+ target.validityBuffer = BaseValueVector.transferBuffer(validityBuffer, target.allocator);
+ super.transfer();
+ clear();
+ }
+
+ @Override
+ public void copyValueSafe(int fromIndex, int toIndex) {
+ while (toIndex >= target.getValidityBufferValueCapacity()) {
+ target.reallocValidityBuffer();
+ }
+ BitVectorHelper.setValidityBit(target.validityBuffer, toIndex, isSet(fromIndex));
+ super.copyValueSafe(fromIndex, toIndex);
+ }
+
+ @Override
+ public void splitAndTransfer(int startIndex, int length) {
+ Preconditions.checkArgument(startIndex >= 0 && length >= 0 && startIndex + length <= valueCount,
+ "Invalid parameters startIndex: %s, length: %s for valueCount: %s", startIndex, length, valueCount);
+ target.clear();
+ splitAndTransferValidityBuffer(startIndex, length, target);
+ super.splitAndTransfer(startIndex, length);
+ }
+ }
+
+ /*
+ * transfer the validity.
+ */
+ private void splitAndTransferValidityBuffer(int startIndex, int length, StructVector target) {
+ int firstByteSource = BitVectorHelper.byteIndex(startIndex);
+ int lastByteSource = BitVectorHelper.byteIndex(valueCount - 1);
+ int byteSizeTarget = BitVectorHelper.getValidityBufferSize(length);
+ int offset = startIndex % 8;
+
+ if (length > 0) {
+ if (offset == 0) {
+ // slice
+ if (target.validityBuffer != null) {
+ target.validityBuffer.getReferenceManager().release();
+ }
+ target.validityBuffer = validityBuffer.slice(firstByteSource, byteSizeTarget);
+ target.validityBuffer.getReferenceManager().retain(1);
+ } else {
+ /* Copy data
+ * When the first bit starts from the middle of a byte (offset != 0),
+ * copy data from src BitVector.
+ * Each byte in the target is composed by a part in i-th byte,
+ * another part in (i+1)-th byte.
+ */
+ target.allocateValidityBuffer(byteSizeTarget);
+
+ for (int i = 0; i < byteSizeTarget - 1; i++) {
+ byte b1 = BitVectorHelper.getBitsFromCurrentByte(validityBuffer, firstByteSource + i, offset);
+ byte b2 = BitVectorHelper.getBitsFromNextByte(validityBuffer, firstByteSource + i + 1, offset);
+
+ target.validityBuffer.setByte(i, (b1 + b2));
+ }
+
+ /* Copying the last piece is done in the following manner:
+ * if the source vector has 1 or more bytes remaining, we copy
+ * the last piece as a byte formed by shifting data
+ * from the current byte and the next byte.
+ *
+ * if the source vector has no more bytes remaining
+ * (we are at the last byte), we copy the last piece as a byte
+ * by shifting data from the current byte.
+ */
+ if ((firstByteSource + byteSizeTarget - 1) < lastByteSource) {
+ byte b1 = BitVectorHelper.getBitsFromCurrentByte(validityBuffer,
+ firstByteSource + byteSizeTarget - 1, offset);
+ byte b2 = BitVectorHelper.getBitsFromNextByte(validityBuffer,
+ firstByteSource + byteSizeTarget, offset);
+
+ target.validityBuffer.setByte(byteSizeTarget - 1, b1 + b2);
+ } else {
+ byte b1 = BitVectorHelper.getBitsFromCurrentByte(validityBuffer,
+ firstByteSource + byteSizeTarget - 1, offset);
+ target.validityBuffer.setByte(byteSizeTarget - 1, b1);
+ }
+ }
+ }
+ }
+
+ /**
+ * Get the value capacity of the internal validity buffer.
+ * @return number of elements that validity buffer can hold
+ */
+ private int getValidityBufferValueCapacity() {
+ return checkedCastToInt(validityBuffer.capacity() * 8);
+ }
+
+ /**
+ * Get the current value capacity for the vector.
+ * @return number of elements that vector can hold.
+ */
+ @Override
+ public int getValueCapacity() {
+ return Math.min(getValidityBufferValueCapacity(),
+ super.getValueCapacity());
+ }
+
+ /**
+ * Return the underlying buffers associated with this vector. Note that this doesn't
+ * impact the reference counts for this buffer so it only should be used for in-context
+ * access. Also note that this buffer changes regularly thus
+ * external classes shouldn't hold a reference to it (unless they change it).
+ *
+ * @param clear Whether to clear vector before returning; the buffers will still be refcounted
+ * but the returned array will be the only reference to them
+ * @return The underlying {@link ArrowBuf buffers} that is used by this
+ * vector instance.
+ */
+ @Override
+ public ArrowBuf[] getBuffers(boolean clear) {
+ setReaderAndWriterIndex();
+ final ArrowBuf[] buffers;
+ if (getBufferSize() == 0) {
+ buffers = new ArrowBuf[0];
+ } else {
+ List<ArrowBuf> list = new ArrayList<>();
+ list.add(validityBuffer);
+ list.addAll(Arrays.asList(super.getBuffers(false)));
+ buffers = list.toArray(new ArrowBuf[list.size()]);
+ }
+ if (clear) {
+ for (ArrowBuf buffer : buffers) {
+ buffer.getReferenceManager().retain();
+ }
+ clear();
+ }
+
+ return buffers;
+ }
+
+ /**
+ * Close the vector and release the associated buffers.
+ */
+ @Override
+ public void close() {
+ clearValidityBuffer();
+ super.close();
+ }
+
+ /**
+ * Same as {@link #close()}.
+ */
+ @Override
+ public void clear() {
+ clearValidityBuffer();
+ super.clear();
+ }
+
+ /**
+ * Reset this vector to empty, does not release buffers.
+ */
+ @Override
+ public void reset() {
+ super.reset();
+ validityBuffer.setZero(0, validityBuffer.capacity());
+ }
+
+ /**
+ * Release the validity buffer.
+ */
+ private void clearValidityBuffer() {
+ validityBuffer.getReferenceManager().release();
+ validityBuffer = allocator.getEmpty();
+ }
+
+ /**
+ * Get the size (number of bytes) of underlying buffers used by this vector.
+ *
+ * @return size of underlying buffers.
+ */
+ @Override
+ public int getBufferSize() {
+ if (valueCount == 0) {
+ return 0;
+ }
+ return super.getBufferSize() +
+ BitVectorHelper.getValidityBufferSize(valueCount);
+ }
+
+ /**
+ * Get the potential buffer size for a particular number of records.
+ *
+ * @param valueCount desired number of elements in the vector
+ * @return estimated size of underlying buffers if the vector holds
+ * a given number of elements
+ */
+ @Override
+ public int getBufferSizeFor(final int valueCount) {
+ if (valueCount == 0) {
+ return 0;
+ }
+ return super.getBufferSizeFor(valueCount) +
+ BitVectorHelper.getValidityBufferSize(valueCount);
+ }
+
+ @Override
+ public void setInitialCapacity(int numRecords) {
+ validityAllocationSizeInBytes = BitVectorHelper.getValidityBufferSize(numRecords);
+ super.setInitialCapacity(numRecords);
+ }
+
+ @Override
+ public void setInitialCapacity(int numRecords, double density) {
+ validityAllocationSizeInBytes = BitVectorHelper.getValidityBufferSize(numRecords);
+ super.setInitialCapacity(numRecords, density);
+ }
+
+ @Override
+ public boolean allocateNewSafe() {
+ /* Boolean to keep track if all the memory allocations were successful
+ * Used in the case of composite vectors when we need to allocate multiple
+ * buffers for multiple vectors. If one of the allocations failed we need to
+ * clear all the memory that we allocated
+ */
+ boolean success = false;
+ try {
+ clear();
+ allocateValidityBuffer(validityAllocationSizeInBytes);
+ success = super.allocateNewSafe();
+ } finally {
+ if (!success) {
+ clear();
+ return false;
+ }
+ }
+ return true;
+ }
+
+ private void allocateValidityBuffer(final long size) {
+ final int curSize = (int) size;
+ validityBuffer = allocator.buffer(curSize);
+ validityBuffer.readerIndex(0);
+ validityAllocationSizeInBytes = curSize;
+ validityBuffer.setZero(0, validityBuffer.capacity());
+ }
+
+ @Override
+ public void reAlloc() {
+ /* reallocate the validity buffer */
+ reallocValidityBuffer();
+ super.reAlloc();
+ }
+
+ private void reallocValidityBuffer() {
+ final int currentBufferCapacity = checkedCastToInt(validityBuffer.capacity());
+ long newAllocationSize = currentBufferCapacity * 2;
+ if (newAllocationSize == 0) {
+ if (validityAllocationSizeInBytes > 0) {
+ newAllocationSize = validityAllocationSizeInBytes;
+ } else {
+ newAllocationSize = BitVectorHelper.getValidityBufferSize(BaseValueVector.INITIAL_VALUE_ALLOCATION) * 2;
+ }
+ }
+ newAllocationSize = CommonUtil.nextPowerOfTwo(newAllocationSize);
+ assert newAllocationSize >= 1;
+
+ if (newAllocationSize > BaseValueVector.MAX_ALLOCATION_SIZE) {
+ throw new OversizedAllocationException("Unable to expand the buffer");
+ }
+
+ final ArrowBuf newBuf = allocator.buffer((int) newAllocationSize);
+ newBuf.setBytes(0, validityBuffer, 0, currentBufferCapacity);
+ newBuf.setZero(currentBufferCapacity, newBuf.capacity() - currentBufferCapacity);
+ validityBuffer.getReferenceManager().release(1);
+ validityBuffer = newBuf;
+ validityAllocationSizeInBytes = (int) newAllocationSize;
+ }
+
+ @Override
+ public long getValidityBufferAddress() {
+ return validityBuffer.memoryAddress();
+ }
+
+ @Override
+ public long getDataBufferAddress() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public long getOffsetBufferAddress() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public ArrowBuf getValidityBuffer() {
+ return validityBuffer;
+ }
+
+ @Override
+ public ArrowBuf getDataBuffer() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public ArrowBuf getOffsetBuffer() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public Map<String, ?> getObject(int index) {
+ if (isSet(index) == 0) {
+ return null;
+ } else {
+ return super.getObject(index);
+ }
+ }
+
+ @Override
+ public int hashCode(int index) {
+ return hashCode(index, null);
+ }
+
+ @Override
+ public int hashCode(int index, ArrowBufHasher hasher) {
+ if (isSet(index) == 0) {
+ return ArrowBufPointer.NULL_HASH_CODE;
+ } else {
+ return super.hashCode(index, hasher);
+ }
+ }
+
+ @Override
+ public void get(int index, ComplexHolder holder) {
+ holder.isSet = isSet(index);
+ if (holder.isSet == 0) {
+ holder.reader = null;
+ return;
+ }
+ super.get(index, holder);
+ }
+
+ /**
+ * Return the number of null values in the vector.
+ */
+ public int getNullCount() {
+ return BitVectorHelper.getNullCount(validityBuffer, valueCount);
+ }
+
+ /**
+ * Returns true if the value at the provided index is null.
+ */
+ public boolean isNull(int index) {
+ return isSet(index) == 0;
+ }
+
+ /**
+ * Returns true the value at the given index is set (i.e. not null).
+ */
+ public int isSet(int index) {
+ final int byteIndex = index >> 3;
+ final byte b = validityBuffer.getByte(byteIndex);
+ final int bitIndex = index & 7;
+ return (b >> bitIndex) & 0x01;
+ }
+
+ /**
+ * Marks the value at index as being set. Reallocates the validity buffer
+ * if index is larger than current capacity.
+ */
+ public void setIndexDefined(int index) {
+ while (index >= getValidityBufferValueCapacity()) {
+ /* realloc the inner buffers if needed */
+ reallocValidityBuffer();
+ }
+ BitVectorHelper.setBit(validityBuffer, index);
+ }
+
+ /**
+ * Marks the value at index as null/not set.
+ */
+ public void setNull(int index) {
+ while (index >= getValidityBufferValueCapacity()) {
+ /* realloc the inner buffers if needed */
+ reallocValidityBuffer();
+ }
+ BitVectorHelper.unsetBit(validityBuffer, index);
+ }
+
+ @Override
+ public void setValueCount(int valueCount) {
+ Preconditions.checkArgument(valueCount >= 0);
+ while (valueCount > getValidityBufferValueCapacity()) {
+ /* realloc the inner buffers if needed */
+ reallocValidityBuffer();
+ }
+ super.setValueCount(valueCount);
+ this.valueCount = valueCount;
+ }
+
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/VectorWithOrdinal.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/VectorWithOrdinal.java
new file mode 100644
index 000000000..fa00f4b63
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/VectorWithOrdinal.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex;
+
+import org.apache.arrow.vector.ValueVector;
+
+/**
+ * Tuple of a {@link ValueVector} and an index into a data structure containing the {@link ValueVector}.
+ * Useful for composite types to determine the index of a child.
+ */
+public class VectorWithOrdinal {
+ public final ValueVector vector;
+ public final int ordinal;
+
+ public VectorWithOrdinal(ValueVector v, int ordinal) {
+ this.vector = v;
+ this.ordinal = ordinal;
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/AbstractBaseReader.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/AbstractBaseReader.java
new file mode 100644
index 000000000..c80fcb89d
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/AbstractBaseReader.java
@@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex.impl;
+
+import java.util.Iterator;
+
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.complex.writer.BaseWriter.ListWriter;
+import org.apache.arrow.vector.complex.writer.BaseWriter.MapWriter;
+import org.apache.arrow.vector.complex.writer.FieldWriter;
+import org.apache.arrow.vector.holders.DenseUnionHolder;
+import org.apache.arrow.vector.holders.UnionHolder;
+
+/**
+ * Base class providing common functionality for {@link FieldReader} implementations.
+ *
+ * <p>This includes tracking the current index and throwing implementations of optional methods.
+ */
+abstract class AbstractBaseReader implements FieldReader {
+
+ static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(AbstractBaseReader.class);
+
+ private int index;
+
+ public AbstractBaseReader() {
+ super();
+ }
+
+ @Override
+ public int getPosition() {
+ return index;
+ }
+
+ public void setPosition(int index) {
+ this.index = index;
+ }
+
+ protected int idx() {
+ return index;
+ }
+
+ @Override
+ public void reset() {
+ index = 0;
+ }
+
+ @Override
+ public Iterator<String> iterator() {
+ throw new IllegalStateException("The current reader doesn't support reading as a map.");
+ }
+
+ @Override
+ public boolean next() {
+ throw new IllegalStateException("The current reader doesn't support getting next information.");
+ }
+
+ @Override
+ public int size() {
+ throw new IllegalStateException("The current reader doesn't support getting size information.");
+ }
+
+ @Override
+ public void read(UnionHolder holder) {
+ holder.reader = this;
+ holder.isSet = this.isSet() ? 1 : 0;
+ }
+
+ @Override
+ public void read(int index, UnionHolder holder) {
+ throw new IllegalStateException("The current reader doesn't support reading union type");
+ }
+
+ @Override
+ public void copyAsValue(UnionWriter writer) {
+ throw new IllegalStateException("The current reader doesn't support reading union type");
+ }
+
+ @Override
+ public void read(DenseUnionHolder holder) {
+ holder.reader = this;
+ holder.isSet = this.isSet() ? 1 : 0;
+ }
+
+ @Override
+ public void read(int index, DenseUnionHolder holder) {
+ throw new IllegalStateException("The current reader doesn't support reading dense union type");
+ }
+
+ @Override
+ public void copyAsValue(DenseUnionWriter writer) {
+ throw new IllegalStateException("The current reader doesn't support reading dense union type");
+ }
+
+ @Override
+ public void copyAsValue(ListWriter writer) {
+ ComplexCopier.copy(this, (FieldWriter) writer);
+ }
+
+ @Override
+ public void copyAsValue(MapWriter writer) {
+ ComplexCopier.copy(this, (FieldWriter) writer);
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/AbstractBaseWriter.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/AbstractBaseWriter.java
new file mode 100644
index 000000000..cc3c5deed
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/AbstractBaseWriter.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex.impl;
+
+import org.apache.arrow.vector.complex.writer.FieldWriter;
+
+
+/**
+ * Base class providing common functionality for {@link FieldWriter} implementations.
+ *
+ * <p>Currently this only includes index tracking.
+ */
+abstract class AbstractBaseWriter implements FieldWriter {
+ //private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(AbstractBaseWriter.class);
+
+ private int index;
+
+ @Override
+ public String toString() {
+ return super.toString() + "[index = " + index + "]";
+ }
+
+ int idx() {
+ return index;
+ }
+
+ @Override
+ public int getPosition() {
+ return index;
+ }
+
+ @Override
+ public void setPosition(int index) {
+ this.index = index;
+ }
+
+ @Override
+ public void end() {
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/ComplexWriterImpl.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/ComplexWriterImpl.java
new file mode 100644
index 000000000..13b26bb67
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/ComplexWriterImpl.java
@@ -0,0 +1,227 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex.impl;
+
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.NonNullableStructVector;
+import org.apache.arrow.vector.complex.StateTool;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.complex.writer.BaseWriter.ComplexWriter;
+import org.apache.arrow.vector.types.pojo.Field;
+
+/**
+ * Concrete implementation of {@link ComplexWriter}.
+ */
+public class ComplexWriterImpl extends AbstractFieldWriter implements ComplexWriter {
+
+ private NullableStructWriter structRoot;
+ private UnionListWriter listRoot;
+ private final NonNullableStructVector container;
+
+ Mode mode = Mode.INIT;
+ private final String name;
+ private final boolean unionEnabled;
+ private final NullableStructWriterFactory nullableStructWriterFactory;
+
+ private enum Mode { INIT, STRUCT, LIST }
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param name The name of the writer (for tracking).
+ * @param container A container for the data field to be written.
+ * @param unionEnabled Unused.
+ * @param caseSensitive Whether field names are case sensitive (if false field names will be lowercase.
+ */
+ public ComplexWriterImpl(
+ String name,
+ NonNullableStructVector container,
+ boolean unionEnabled,
+ boolean caseSensitive) {
+ this.name = name;
+ this.container = container;
+ this.unionEnabled = unionEnabled;
+ nullableStructWriterFactory = caseSensitive ?
+ NullableStructWriterFactory.getNullableCaseSensitiveStructWriterFactoryInstance() :
+ NullableStructWriterFactory.getNullableStructWriterFactoryInstance();
+ }
+
+ public ComplexWriterImpl(String name, NonNullableStructVector container, boolean unionEnabled) {
+ this(name, container, unionEnabled, false);
+ }
+
+ public ComplexWriterImpl(String name, NonNullableStructVector container) {
+ this(name, container, false);
+ }
+
+ @Override
+ public Field getField() {
+ return container.getField();
+ }
+
+ @Override
+ public int getValueCapacity() {
+ return container.getValueCapacity();
+ }
+
+ private void check(Mode... modes) {
+ StateTool.check(mode, modes);
+ }
+
+ @Override
+ public void reset() {
+ setPosition(0);
+ }
+
+ @Override
+ public void close() throws Exception {
+ clear();
+ structRoot.close();
+ if (listRoot != null) {
+ listRoot.close();
+ }
+ }
+
+ @Override
+ public void clear() {
+ switch (mode) {
+ case STRUCT:
+ structRoot.clear();
+ break;
+ case LIST:
+ listRoot.clear();
+ break;
+ default:
+ break;
+ }
+ }
+
+ @Override
+ public void setValueCount(int count) {
+ switch (mode) {
+ case STRUCT:
+ structRoot.setValueCount(count);
+ break;
+ case LIST:
+ listRoot.setValueCount(count);
+ break;
+ default:
+ break;
+ }
+ }
+
+ @Override
+ public void setPosition(int index) {
+ super.setPosition(index);
+ switch (mode) {
+ case STRUCT:
+ structRoot.setPosition(index);
+ break;
+ case LIST:
+ listRoot.setPosition(index);
+ break;
+ default:
+ break;
+ }
+ }
+
+ /**
+ * Returns a StructWriter, initializing it necessary from the constructor this instance
+ * was constructed with.
+ */
+ public StructWriter directStruct() {
+ Preconditions.checkArgument(name == null);
+
+ switch (mode) {
+
+ case INIT:
+ structRoot = nullableStructWriterFactory.build((StructVector) container);
+ structRoot.setPosition(idx());
+ mode = Mode.STRUCT;
+ break;
+
+ case STRUCT:
+ break;
+
+ default:
+ check(Mode.INIT, Mode.STRUCT);
+ }
+
+ return structRoot;
+ }
+
+ @Override
+ public StructWriter rootAsStruct() {
+ switch (mode) {
+
+ case INIT:
+ // TODO allow dictionaries in complex types
+ StructVector struct = container.addOrGetStruct(name);
+ structRoot = nullableStructWriterFactory.build(struct);
+ structRoot.setPosition(idx());
+ mode = Mode.STRUCT;
+ break;
+
+ case STRUCT:
+ break;
+
+ default:
+ check(Mode.INIT, Mode.STRUCT);
+ }
+
+ return structRoot;
+ }
+
+ @Override
+ public void allocate() {
+ if (structRoot != null) {
+ structRoot.allocate();
+ } else if (listRoot != null) {
+ listRoot.allocate();
+ }
+ }
+
+ @Override
+ public ListWriter rootAsList() {
+ switch (mode) {
+
+ case INIT:
+ int vectorCount = container.size();
+ // TODO allow dictionaries in complex types
+ ListVector listVector = container.addOrGetList(name);
+ if (container.size() > vectorCount) {
+ listVector.allocateNew();
+ }
+ listRoot = new UnionListWriter(listVector, nullableStructWriterFactory);
+ listRoot.setPosition(idx());
+ mode = Mode.LIST;
+ break;
+
+ case LIST:
+ break;
+
+ default:
+ check(Mode.INIT, Mode.STRUCT);
+ }
+
+ return listRoot;
+ }
+
+
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/NullableStructReaderImpl.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/NullableStructReaderImpl.java
new file mode 100644
index 000000000..5c098f627
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/NullableStructReaderImpl.java
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex.impl;
+
+import org.apache.arrow.vector.complex.NonNullableStructVector;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.complex.writer.BaseWriter.StructWriter;
+import org.apache.arrow.vector.types.pojo.Field;
+
+/**
+ * An {@link org.apache.arrow.vector.complex.reader.FieldReader} for
+ * reading nullable struct vectors.
+ */
+public class NullableStructReaderImpl extends SingleStructReaderImpl {
+
+ private StructVector nullableStructVector;
+
+ public NullableStructReaderImpl(NonNullableStructVector vector) {
+ super(vector);
+ this.nullableStructVector = (StructVector) vector;
+ }
+
+ @Override
+ public Field getField() {
+ return nullableStructVector.getField();
+ }
+
+ @Override
+ public void copyAsValue(StructWriter writer) {
+ NullableStructWriter impl = (NullableStructWriter) writer;
+ impl.container.copyFromSafe(idx(), impl.idx(), nullableStructVector);
+ }
+
+ @Override
+ public void copyAsField(String name, StructWriter writer) {
+ NullableStructWriter impl = (NullableStructWriter) writer.struct(name);
+ impl.container.copyFromSafe(idx(), impl.idx(), nullableStructVector);
+ }
+
+ @Override
+ public boolean isSet() {
+ return !nullableStructVector.isNull(idx());
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/NullableStructWriterFactory.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/NullableStructWriterFactory.java
new file mode 100644
index 000000000..458aa7b61
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/NullableStructWriterFactory.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex.impl;
+
+import org.apache.arrow.vector.complex.StructVector;
+
+/**
+ * A factory for {@link NullableStructWriter} instances. The factory allows for configuring if field
+ * names should be considered case sensitive.
+ */
+public class NullableStructWriterFactory {
+ private final boolean caseSensitive;
+ private static final NullableStructWriterFactory nullableStructWriterFactory =
+ new NullableStructWriterFactory(false);
+ private static final NullableStructWriterFactory nullableCaseSensitiveWriterFactory =
+ new NullableStructWriterFactory(true);
+
+ public NullableStructWriterFactory(boolean caseSensitive) {
+ this.caseSensitive = caseSensitive;
+ }
+
+ public NullableStructWriter build(StructVector container) {
+ return this.caseSensitive ? new NullableCaseSensitiveStructWriter(container) : new NullableStructWriter(container);
+ }
+
+ public static NullableStructWriterFactory getNullableStructWriterFactoryInstance() {
+ return nullableStructWriterFactory;
+ }
+
+ public static NullableStructWriterFactory getNullableCaseSensitiveStructWriterFactoryInstance() {
+ return nullableCaseSensitiveWriterFactory;
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java
new file mode 100644
index 000000000..06b064fda
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java
@@ -0,0 +1,398 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex.impl;
+
+import java.math.BigDecimal;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.NullVector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.complex.AbstractStructVector;
+import org.apache.arrow.vector.complex.FixedSizeListVector;
+import org.apache.arrow.vector.complex.LargeListVector;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.MapVector;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.complex.UnionVector;
+import org.apache.arrow.vector.complex.writer.FieldWriter;
+import org.apache.arrow.vector.holders.Decimal256Holder;
+import org.apache.arrow.vector.holders.DecimalHolder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * This FieldWriter implementation delegates all FieldWriter API calls to an inner FieldWriter. This inner field writer
+ * can start as a specific type, and this class will promote the writer to a UnionWriter if a call is made that the
+ * specifically typed writer cannot handle. A new UnionVector is created, wrapping the original vector, and replaces the
+ * original vector in the parent vector, which can be either an AbstractStructVector or a ListVector.
+ *
+ * <p>The writer used can either be for single elements (struct) or lists.</p>
+ */
+public class PromotableWriter extends AbstractPromotableFieldWriter {
+
+ private final AbstractStructVector parentContainer;
+ private final ListVector listVector;
+ private final FixedSizeListVector fixedListVector;
+ private final LargeListVector largeListVector;
+ private final NullableStructWriterFactory nullableStructWriterFactory;
+ private int position;
+ private static final int MAX_DECIMAL_PRECISION = 38;
+ private static final int MAX_DECIMAL256_PRECISION = 76;
+
+ private enum State {
+ UNTYPED, SINGLE, UNION
+ }
+
+ private MinorType type;
+ private ValueVector vector;
+ private UnionVector unionVector;
+ private State state;
+ private FieldWriter writer;
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param v The vector to write.
+ * @param parentContainer The parent container for the vector.
+ */
+ public PromotableWriter(ValueVector v, AbstractStructVector parentContainer) {
+ this(v, parentContainer, NullableStructWriterFactory.getNullableStructWriterFactoryInstance());
+ }
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param v The vector to initialize the writer with.
+ * @param parentContainer The parent container for the vector.
+ * @param nullableStructWriterFactory The factory to create the delegate writer.
+ */
+ public PromotableWriter(
+ ValueVector v,
+ AbstractStructVector parentContainer,
+ NullableStructWriterFactory nullableStructWriterFactory) {
+ this.parentContainer = parentContainer;
+ this.listVector = null;
+ this.fixedListVector = null;
+ this.largeListVector = null;
+ this.nullableStructWriterFactory = nullableStructWriterFactory;
+ init(v);
+ }
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param v The vector to initialize the writer with.
+ * @param listVector The vector that serves as a parent of v.
+ */
+ public PromotableWriter(ValueVector v, ListVector listVector) {
+ this(v, listVector, NullableStructWriterFactory.getNullableStructWriterFactoryInstance());
+ }
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param v The vector to initialize the writer with.
+ * @param fixedListVector The vector that serves as a parent of v.
+ */
+ public PromotableWriter(ValueVector v, FixedSizeListVector fixedListVector) {
+ this(v, fixedListVector, NullableStructWriterFactory.getNullableStructWriterFactoryInstance());
+ }
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param v The vector to initialize the writer with.
+ * @param largeListVector The vector that serves as a parent of v.
+ */
+ public PromotableWriter(ValueVector v, LargeListVector largeListVector) {
+ this(v, largeListVector, NullableStructWriterFactory.getNullableStructWriterFactoryInstance());
+ }
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param v The vector to initialize the writer with.
+ * @param listVector The vector that serves as a parent of v.
+ * @param nullableStructWriterFactory The factory to create the delegate writer.
+ */
+ public PromotableWriter(
+ ValueVector v,
+ ListVector listVector,
+ NullableStructWriterFactory nullableStructWriterFactory) {
+ this.listVector = listVector;
+ this.parentContainer = null;
+ this.fixedListVector = null;
+ this.largeListVector = null;
+ this.nullableStructWriterFactory = nullableStructWriterFactory;
+ init(v);
+ }
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param v The vector to initialize the writer with.
+ * @param fixedListVector The vector that serves as a parent of v.
+ * @param nullableStructWriterFactory The factory to create the delegate writer.
+ */
+ public PromotableWriter(
+ ValueVector v,
+ FixedSizeListVector fixedListVector,
+ NullableStructWriterFactory nullableStructWriterFactory) {
+ this.fixedListVector = fixedListVector;
+ this.parentContainer = null;
+ this.listVector = null;
+ this.largeListVector = null;
+ this.nullableStructWriterFactory = nullableStructWriterFactory;
+ init(v);
+ }
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param v The vector to initialize the writer with.
+ * @param largeListVector The vector that serves as a parent of v.
+ * @param nullableStructWriterFactory The factory to create the delegate writer.
+ */
+ public PromotableWriter(
+ ValueVector v,
+ LargeListVector largeListVector,
+ NullableStructWriterFactory nullableStructWriterFactory) {
+ this.largeListVector = largeListVector;
+ this.fixedListVector = null;
+ this.parentContainer = null;
+ this.listVector = null;
+ this.nullableStructWriterFactory = nullableStructWriterFactory;
+ init(v);
+ }
+
+ private void init(ValueVector v) {
+ if (v instanceof UnionVector) {
+ state = State.UNION;
+ unionVector = (UnionVector) v;
+ writer = new UnionWriter(unionVector, nullableStructWriterFactory);
+ } else if (v instanceof NullVector) {
+ state = State.UNTYPED;
+ } else {
+ setWriter(v);
+ }
+ }
+
+ @Override
+ public void setAddVectorAsNullable(boolean nullable) {
+ super.setAddVectorAsNullable(nullable);
+ if (writer instanceof AbstractFieldWriter) {
+ ((AbstractFieldWriter) writer).setAddVectorAsNullable(nullable);
+ }
+ }
+
+ private void setWriter(ValueVector v) {
+ state = State.SINGLE;
+ vector = v;
+ type = v.getMinorType();
+ switch (type) {
+ case STRUCT:
+ writer = nullableStructWriterFactory.build((StructVector) vector);
+ break;
+ case LIST:
+ writer = new UnionListWriter((ListVector) vector, nullableStructWriterFactory);
+ break;
+ case MAP:
+ writer = new UnionMapWriter((MapVector) vector);
+ break;
+ case UNION:
+ writer = new UnionWriter((UnionVector) vector, nullableStructWriterFactory);
+ break;
+ default:
+ writer = type.getNewFieldWriter(vector);
+ break;
+ }
+ }
+
+ @Override
+ public void writeNull() {
+ FieldWriter w = getWriter();
+ if (w != null) {
+ w.writeNull();
+ }
+ setPosition(idx() + 1);
+ }
+
+ @Override
+ public void setPosition(int index) {
+ super.setPosition(index);
+ FieldWriter w = getWriter();
+ if (w == null) {
+ position = index;
+ } else {
+ w.setPosition(index);
+ }
+ }
+
+ @Override
+ protected FieldWriter getWriter(MinorType type, ArrowType arrowType) {
+ if (state == State.UNION) {
+ if (type == MinorType.DECIMAL || type == MinorType.MAP) {
+ ((UnionWriter) writer).getWriter(type, arrowType);
+ } else {
+ ((UnionWriter) writer).getWriter(type);
+ }
+ } else if (state == State.UNTYPED) {
+ if (type == null) {
+ // ???
+ return null;
+ }
+ if (arrowType == null) {
+ arrowType = type.getType();
+ }
+ FieldType fieldType = new FieldType(addVectorAsNullable, arrowType, null, null);
+ ValueVector v;
+ if (listVector != null) {
+ v = listVector.addOrGetVector(fieldType).getVector();
+ } else if (fixedListVector != null) {
+ v = fixedListVector.addOrGetVector(fieldType).getVector();
+ } else {
+ v = largeListVector.addOrGetVector(fieldType).getVector();
+ }
+ v.allocateNew();
+ setWriter(v);
+ writer.setPosition(position);
+ } else if (type != this.type) {
+ promoteToUnion();
+ if (type == MinorType.DECIMAL || type == MinorType.MAP) {
+ ((UnionWriter) writer).getWriter(type, arrowType);
+ } else {
+ ((UnionWriter) writer).getWriter(type);
+ }
+ }
+ return writer;
+ }
+
+ @Override
+ public boolean isEmptyStruct() {
+ return writer.isEmptyStruct();
+ }
+
+ protected FieldWriter getWriter() {
+ return writer;
+ }
+
+ private FieldWriter promoteToUnion() {
+ String name = vector.getField().getName();
+ TransferPair tp = vector.getTransferPair(vector.getMinorType().name().toLowerCase(), vector.getAllocator());
+ tp.transfer();
+ if (parentContainer != null) {
+ // TODO allow dictionaries in complex types
+ unionVector = parentContainer.addOrGetUnion(name);
+ unionVector.allocateNew();
+ } else if (listVector != null) {
+ unionVector = listVector.promoteToUnion();
+ } else if (fixedListVector != null) {
+ unionVector = fixedListVector.promoteToUnion();
+ } else if (largeListVector != null) {
+ unionVector = largeListVector.promoteToUnion();
+ }
+ unionVector.addVector((FieldVector) tp.getTo());
+ writer = new UnionWriter(unionVector, nullableStructWriterFactory);
+ writer.setPosition(idx());
+ for (int i = 0; i <= idx(); i++) {
+ unionVector.setType(i, vector.getMinorType());
+ }
+ vector = null;
+ state = State.UNION;
+ return writer;
+ }
+
+ @Override
+ public void write(DecimalHolder holder) {
+ getWriter(MinorType.DECIMAL,
+ new ArrowType.Decimal(MAX_DECIMAL_PRECISION, holder.scale, /*bitWidth=*/128)).write(holder);
+ }
+
+ @Override
+ public void writeDecimal(long start, ArrowBuf buffer, ArrowType arrowType) {
+ getWriter(MinorType.DECIMAL, new ArrowType.Decimal(MAX_DECIMAL_PRECISION,
+ ((ArrowType.Decimal) arrowType).getScale(), /*bitWidth=*/128)).writeDecimal(start, buffer, arrowType);
+ }
+
+ @Override
+ public void writeDecimal(BigDecimal value) {
+ getWriter(MinorType.DECIMAL,
+ new ArrowType.Decimal(MAX_DECIMAL_PRECISION, value.scale(), /*bitWidth=*/128)).writeDecimal(value);
+ }
+
+ @Override
+ public void writeBigEndianBytesToDecimal(byte[] value, ArrowType arrowType) {
+ getWriter(MinorType.DECIMAL, new ArrowType.Decimal(MAX_DECIMAL_PRECISION,
+ ((ArrowType.Decimal) arrowType).getScale(), /*bitWidth=*/128)).writeBigEndianBytesToDecimal(value, arrowType);
+ }
+
+ @Override
+ public void write(Decimal256Holder holder) {
+ getWriter(MinorType.DECIMAL256,
+ new ArrowType.Decimal(MAX_DECIMAL256_PRECISION, holder.scale, /*bitWidth=*/256)).write(holder);
+ }
+
+ @Override
+ public void writeDecimal256(long start, ArrowBuf buffer, ArrowType arrowType) {
+ getWriter(MinorType.DECIMAL256, new ArrowType.Decimal(MAX_DECIMAL256_PRECISION,
+ ((ArrowType.Decimal) arrowType).getScale(), /*bitWidth=*/256)).writeDecimal256(start, buffer, arrowType);
+ }
+
+ @Override
+ public void writeDecimal256(BigDecimal value) {
+ getWriter(MinorType.DECIMAL256,
+ new ArrowType.Decimal(MAX_DECIMAL256_PRECISION, value.scale(), /*bitWidth=*/256)).writeDecimal256(value);
+ }
+
+ @Override
+ public void writeBigEndianBytesToDecimal256(byte[] value, ArrowType arrowType) {
+ getWriter(MinorType.DECIMAL256, new ArrowType.Decimal(MAX_DECIMAL256_PRECISION,
+ ((ArrowType.Decimal) arrowType).getScale(),
+ /*bitWidth=*/256)).writeBigEndianBytesToDecimal256(value, arrowType);
+ }
+
+
+ @Override
+ public void allocate() {
+ getWriter().allocate();
+ }
+
+ @Override
+ public void clear() {
+ getWriter().clear();
+ }
+
+ @Override
+ public Field getField() {
+ return getWriter().getField();
+ }
+
+ @Override
+ public int getValueCapacity() {
+ return getWriter().getValueCapacity();
+ }
+
+ @Override
+ public void close() throws Exception {
+ getWriter().close();
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/SingleListReaderImpl.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/SingleListReaderImpl.java
new file mode 100644
index 000000000..9bbe60421
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/SingleListReaderImpl.java
@@ -0,0 +1,91 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex.impl;
+
+
+import org.apache.arrow.vector.complex.AbstractContainerVector;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.complex.writer.BaseWriter.ListWriter;
+import org.apache.arrow.vector.complex.writer.BaseWriter.StructWriter;
+import org.apache.arrow.vector.types.Types.MinorType;
+
+/**
+ * An implementation of {@link AbstractFieldReader} for lists vectors.
+ */
+@SuppressWarnings("unused")
+public class SingleListReaderImpl extends AbstractFieldReader {
+
+ private final String name;
+ private final AbstractContainerVector container;
+ private FieldReader reader;
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param name The name of field to read in container.
+ * @param container The container holding a list.
+ */
+ public SingleListReaderImpl(String name, AbstractContainerVector container) {
+ super();
+ this.name = name;
+ this.container = container;
+ }
+
+ @Override
+ public void setPosition(int index) {
+ super.setPosition(index);
+ if (reader != null) {
+ reader.setPosition(index);
+ }
+ }
+
+ @Override
+ public Object readObject() {
+ return reader.readObject();
+ }
+
+ @Override
+ public FieldReader reader() {
+ if (reader == null) {
+ reader = container.getChild(name).getReader();
+ setPosition(idx());
+ }
+ return reader;
+ }
+
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.LIST;
+ }
+
+ @Override
+ public boolean isSet() {
+ return false;
+ }
+
+ @Override
+ public void copyAsValue(ListWriter writer) {
+ throw new UnsupportedOperationException("Generic list copying not yet supported. Please resolve to typed list.");
+ }
+
+ @Override
+ public void copyAsField(String name, StructWriter writer) {
+ throw new UnsupportedOperationException("Generic list copying not yet supported. Please resolve to typed list.");
+ }
+
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/SingleStructReaderImpl.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/SingleStructReaderImpl.java
new file mode 100644
index 000000000..3590e40ce
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/SingleStructReaderImpl.java
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex.impl;
+
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.complex.NonNullableStructVector;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.complex.writer.BaseWriter.StructWriter;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+
+/**
+ * {@link FieldReader} for a single {@link org.apache.arrow.vector.complex.NonNullableStructVector}.
+ */
+@SuppressWarnings("unused")
+public class SingleStructReaderImpl extends AbstractFieldReader {
+
+ private final NonNullableStructVector vector;
+ private final Map<String, FieldReader> fields = new HashMap<>();
+
+ public SingleStructReaderImpl(NonNullableStructVector vector) {
+ this.vector = vector;
+ }
+
+ private void setChildrenPosition(int index) {
+ for (FieldReader r : fields.values()) {
+ r.setPosition(index);
+ }
+ }
+
+ @Override
+ public Field getField() {
+ return vector.getField();
+ }
+
+ @Override
+ public FieldReader reader(String name) {
+ FieldReader reader = fields.get(name);
+ if (reader == null) {
+ ValueVector child = vector.getChild(name);
+ if (child == null) {
+ reader = NullReader.INSTANCE;
+ } else {
+ reader = child.getReader();
+ }
+ fields.put(name, reader);
+ reader.setPosition(idx());
+ }
+ return reader;
+ }
+
+ @Override
+ public void setPosition(int index) {
+ super.setPosition(index);
+ for (FieldReader r : fields.values()) {
+ r.setPosition(index);
+ }
+ }
+
+ @Override
+ public Object readObject() {
+ return vector.getObject(idx());
+ }
+
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.STRUCT;
+ }
+
+ @Override
+ public boolean isSet() {
+ return true;
+ }
+
+ @Override
+ public java.util.Iterator<String> iterator() {
+ return vector.fieldNameIterator();
+ }
+
+ @Override
+ public void copyAsValue(StructWriter writer) {
+ SingleStructWriter impl = (SingleStructWriter) writer;
+ impl.container.copyFromSafe(idx(), impl.idx(), vector);
+ }
+
+ @Override
+ public void copyAsField(String name, StructWriter writer) {
+ SingleStructWriter impl = (SingleStructWriter) writer.struct(name);
+ impl.container.copyFromSafe(idx(), impl.idx(), vector);
+ }
+
+
+}
+
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/StructOrListWriterImpl.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/StructOrListWriterImpl.java
new file mode 100644
index 000000000..e9c0825dd
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/StructOrListWriterImpl.java
@@ -0,0 +1,137 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex.impl;
+
+import org.apache.arrow.vector.complex.writer.BaseWriter;
+import org.apache.arrow.vector.complex.writer.BaseWriter.StructOrListWriter;
+import org.apache.arrow.vector.complex.writer.BigIntWriter;
+import org.apache.arrow.vector.complex.writer.BitWriter;
+import org.apache.arrow.vector.complex.writer.Float4Writer;
+import org.apache.arrow.vector.complex.writer.Float8Writer;
+import org.apache.arrow.vector.complex.writer.IntWriter;
+import org.apache.arrow.vector.complex.writer.VarBinaryWriter;
+import org.apache.arrow.vector.complex.writer.VarCharWriter;
+
+/**
+ * Concrete implementation of {@link StructOrListWriter}.
+ */
+public class StructOrListWriterImpl implements StructOrListWriter {
+
+ public final BaseWriter.StructWriter struct;
+ public final BaseWriter.ListWriter list;
+
+ /**
+ * Constructs a new instance using a {@link BaseWriter.StructWriter}
+ * (instead of an {@link BaseWriter.ListWriter}).
+ */
+ public StructOrListWriterImpl(final BaseWriter.StructWriter writer) {
+ this.struct = writer;
+ this.list = null;
+ }
+
+ /**
+ * Constructs a new instance using a {@link BaseWriter.ListWriter}
+ * (instead of a {@link BaseWriter.StructWriter}).
+ */
+ public StructOrListWriterImpl(final BaseWriter.ListWriter writer) {
+ this.struct = null;
+ this.list = writer;
+ }
+
+ /**
+ * Start writing to either the list or the struct.
+ */
+ public void start() {
+ if (struct != null) {
+ struct.start();
+ } else {
+ list.startList();
+ }
+ }
+
+ /**
+ * Finish writing to the list or struct.
+ */
+ public void end() {
+ if (struct != null) {
+ struct.end();
+ } else {
+ list.endList();
+ }
+ }
+
+ /**
+ * Creates a new writer for a struct with the given name.
+ */
+ public StructOrListWriter struct(final String name) {
+ assert struct != null;
+ return new StructOrListWriterImpl(struct.struct(name));
+ }
+
+ /**
+ * Creates a new writer for a list of structs.
+ *
+ * @param name Unused.
+ */
+ public StructOrListWriter listoftstruct(final String name) {
+ assert list != null;
+ return new StructOrListWriterImpl(list.struct());
+ }
+
+ public StructOrListWriter list(final String name) {
+ assert struct != null;
+ return new StructOrListWriterImpl(struct.list(name));
+ }
+
+ public boolean isStructWriter() {
+ return struct != null;
+ }
+
+ public boolean isListWriter() {
+ return list != null;
+ }
+
+ public VarCharWriter varChar(final String name) {
+ return (struct != null) ? struct.varChar(name) : list.varChar();
+ }
+
+ public IntWriter integer(final String name) {
+ return (struct != null) ? struct.integer(name) : list.integer();
+ }
+
+ public BigIntWriter bigInt(final String name) {
+ return (struct != null) ? struct.bigInt(name) : list.bigInt();
+ }
+
+ public Float4Writer float4(final String name) {
+ return (struct != null) ? struct.float4(name) : list.float4();
+ }
+
+ public Float8Writer float8(final String name) {
+ return (struct != null) ? struct.float8(name) : list.float8();
+ }
+
+ public BitWriter bit(final String name) {
+ return (struct != null) ? struct.bit(name) : list.bit();
+ }
+
+ public VarBinaryWriter binary(final String name) {
+ return (struct != null) ? struct.varBinary(name) : list.varBinary();
+ }
+
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionFixedSizeListReader.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionFixedSizeListReader.java
new file mode 100644
index 000000000..ece729ae5
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionFixedSizeListReader.java
@@ -0,0 +1,105 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex.impl;
+
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.complex.FixedSizeListVector;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.complex.writer.BaseWriter.ListWriter;
+import org.apache.arrow.vector.complex.writer.FieldWriter;
+import org.apache.arrow.vector.holders.UnionHolder;
+import org.apache.arrow.vector.types.Types.MinorType;
+
+/**
+ * Reader for fixed size list vectors.
+ */
+public class UnionFixedSizeListReader extends AbstractFieldReader {
+
+ private final FixedSizeListVector vector;
+ private final ValueVector data;
+ private final int listSize;
+
+ private int currentOffset;
+
+ /**
+ * Constructs a new instance that reads data in <code>vector</code>.
+ */
+ public UnionFixedSizeListReader(FixedSizeListVector vector) {
+ this.vector = vector;
+ this.data = vector.getDataVector();
+ this.listSize = vector.getListSize();
+ }
+
+ @Override
+ public boolean isSet() {
+ return !vector.isNull(idx());
+ }
+
+ @Override
+ public FieldReader reader() {
+ return data.getReader();
+ }
+
+ @Override
+ public Object readObject() {
+ return vector.getObject(idx());
+ }
+
+ @Override
+ public MinorType getMinorType() {
+ return vector.getMinorType();
+ }
+
+ @Override
+ public void setPosition(int index) {
+ super.setPosition(index);
+ data.getReader().setPosition(index * listSize);
+ currentOffset = 0;
+ }
+
+ @Override
+ public void read(int index, UnionHolder holder) {
+ setPosition(idx());
+ for (int i = -1; i < index; i++) {
+ if (!next()) {
+ throw new IndexOutOfBoundsException("Requested " + index + ", size " + listSize);
+ }
+ }
+ holder.reader = data.getReader();
+ holder.isSet = vector.isNull(idx()) ? 0 : 1;
+ }
+
+ @Override
+ public int size() {
+ return listSize;
+ }
+
+ @Override
+ public boolean next() {
+ if (currentOffset < listSize) {
+ data.getReader().setPosition(idx() * listSize + currentOffset++);
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ public void copyAsValue(ListWriter writer) {
+ ComplexCopier.copy(this, (FieldWriter) writer);
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionLargeListReader.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionLargeListReader.java
new file mode 100644
index 000000000..faf088b55
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionLargeListReader.java
@@ -0,0 +1,109 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex.impl;
+
+import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt;
+
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.complex.LargeListVector;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.complex.writer.FieldWriter;
+import org.apache.arrow.vector.holders.UnionHolder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+
+/**
+ * {@link FieldReader} for list of union types.
+ */
+public class UnionLargeListReader extends AbstractFieldReader {
+
+ private LargeListVector vector;
+ private ValueVector data;
+ private long index;
+ private static final long OFFSET_WIDTH = 8L;
+
+ public UnionLargeListReader(LargeListVector vector) {
+ this.vector = vector;
+ this.data = vector.getDataVector();
+ }
+
+ @Override
+ public Field getField() {
+ return vector.getField();
+ }
+
+ @Override
+ public boolean isSet() {
+ return !vector.isNull(idx());
+ }
+
+ private long currentOffset;
+ private long maxOffset;
+
+ @Override
+ public void setPosition(int index) {
+ super.setPosition(index);
+ currentOffset = vector.getOffsetBuffer().getLong((long) index * OFFSET_WIDTH) - 1;
+ maxOffset = vector.getOffsetBuffer().getLong(((long) index + 1L) * OFFSET_WIDTH);
+ }
+
+ @Override
+ public FieldReader reader() {
+ return data.getReader();
+ }
+
+ @Override
+ public Object readObject() {
+ return vector.getObject(idx());
+ }
+
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.LARGELIST;
+ }
+
+ @Override
+ public void read(int index, UnionHolder holder) {
+ setPosition(index);
+ for (int i = -1; i < index; i++) {
+ next();
+ }
+ holder.reader = data.getReader();
+ holder.isSet = data.getReader().isSet() ? 1 : 0;
+ }
+
+ @Override
+ public int size() {
+ int size = checkedCastToInt(maxOffset - currentOffset - 1); //todo revisit when int64 vectors are done
+ return size < 0 ? 0 : size;
+ }
+
+ @Override
+ public boolean next() {
+ if (currentOffset + 1 < maxOffset) {
+ data.getReader().setPosition(checkedCastToInt(++currentOffset)); // todo revisit when int64 vectors are done
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ public void copyAsValue(UnionLargeListWriter writer) {
+ ComplexCopier.copy(this, (FieldWriter) writer);
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionListReader.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionListReader.java
new file mode 100644
index 000000000..a8c185aef
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionListReader.java
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex.impl;
+
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.complex.writer.BaseWriter.ListWriter;
+import org.apache.arrow.vector.complex.writer.FieldWriter;
+import org.apache.arrow.vector.holders.UnionHolder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+
+/**
+ * {@link FieldReader} for list of union types.
+ */
+public class UnionListReader extends AbstractFieldReader {
+
+ private ListVector vector;
+ private ValueVector data;
+ private static final int OFFSET_WIDTH = 4;
+
+ public UnionListReader(ListVector vector) {
+ this.vector = vector;
+ this.data = vector.getDataVector();
+ }
+
+ @Override
+ public Field getField() {
+ return vector.getField();
+ }
+
+ @Override
+ public boolean isSet() {
+ return !vector.isNull(idx());
+ }
+
+ private int currentOffset;
+ private int maxOffset;
+
+ @Override
+ public void setPosition(int index) {
+ super.setPosition(index);
+ currentOffset = vector.getOffsetBuffer().getInt(index * OFFSET_WIDTH) - 1;
+ maxOffset = vector.getOffsetBuffer().getInt((index + 1) * OFFSET_WIDTH);
+ }
+
+ @Override
+ public FieldReader reader() {
+ return data.getReader();
+ }
+
+ @Override
+ public Object readObject() {
+ return vector.getObject(idx());
+ }
+
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.LIST;
+ }
+
+ @Override
+ public void read(int index, UnionHolder holder) {
+ setPosition(idx());
+ for (int i = -1; i < index; i++) {
+ next();
+ }
+ holder.reader = data.getReader();
+ holder.isSet = data.getReader().isSet() ? 1 : 0;
+ }
+
+ @Override
+ public int size() {
+ int size = maxOffset - currentOffset - 1;
+ return size < 0 ? 0 : size;
+ }
+
+ @Override
+ public boolean next() {
+ if (currentOffset + 1 < maxOffset) {
+ data.getReader().setPosition(++currentOffset);
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ public void copyAsValue(ListWriter writer) {
+ ComplexCopier.copy(this, (FieldWriter) writer);
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionMapReader.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionMapReader.java
new file mode 100644
index 000000000..7a1bdce9b
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionMapReader.java
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex.impl;
+
+import org.apache.arrow.vector.complex.MapVector;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.types.Types.MinorType;
+
+/**
+ * Reader for a MapVector.
+ */
+public class UnionMapReader extends UnionListReader {
+
+ private String keyName = MapVector.KEY_NAME;
+ private String valueName = MapVector.VALUE_NAME;
+
+ /**
+ * Construct a new reader for the given vector.
+ *
+ * @param vector Vector to read from.
+ */
+ public UnionMapReader(MapVector vector) {
+ super(vector);
+ }
+
+ /**
+ * Set the key, value field names to read.
+ *
+ * @param key Field name for key.
+ * @param value Field name for value.
+ */
+ public void setKeyValueNames(String key, String value) {
+ keyName = key;
+ valueName = value;
+ }
+
+ /**
+ * Start reading a key from the map entry.
+ *
+ * @return reader that can be used to read the key.
+ */
+ public FieldReader key() {
+ return reader().reader(keyName);
+ }
+
+ /**
+ * Start reading a value element from the map entry.
+ *
+ * @return reader that can be used to read the value.
+ */
+ public FieldReader value() {
+ return reader().reader(valueName);
+ }
+
+ /**
+ * Return the MinorType of the reader as MAP.
+ */
+ @Override
+ public MinorType getMinorType() {
+ return MinorType.MAP;
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/reader/FieldReader.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/reader/FieldReader.java
new file mode 100644
index 000000000..a888abbaa
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/reader/FieldReader.java
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex.reader;
+
+import org.apache.arrow.vector.complex.reader.BaseReader.ListReader;
+import org.apache.arrow.vector.complex.reader.BaseReader.MapReader;
+import org.apache.arrow.vector.complex.reader.BaseReader.RepeatedListReader;
+import org.apache.arrow.vector.complex.reader.BaseReader.RepeatedMapReader;
+import org.apache.arrow.vector.complex.reader.BaseReader.RepeatedStructReader;
+import org.apache.arrow.vector.complex.reader.BaseReader.ScalarReader;
+import org.apache.arrow.vector.complex.reader.BaseReader.StructReader;
+
+
+/**
+ * Composite of all Reader types (e.g. {@link StructReader}, {@link ScalarReader}, etc). Each reader type
+ * is in essence a way of iterating over a {@link org.apache.arrow.vector.ValueVector}.
+ */
+public interface FieldReader extends StructReader, ListReader, MapReader, ScalarReader,
+ RepeatedStructReader, RepeatedListReader, RepeatedMapReader {
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/writer/FieldWriter.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/writer/FieldWriter.java
new file mode 100644
index 000000000..a3cb7108a
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/complex/writer/FieldWriter.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex.writer;
+
+import org.apache.arrow.vector.complex.writer.BaseWriter.ListWriter;
+import org.apache.arrow.vector.complex.writer.BaseWriter.MapWriter;
+import org.apache.arrow.vector.complex.writer.BaseWriter.ScalarWriter;
+import org.apache.arrow.vector.complex.writer.BaseWriter.StructWriter;
+
+/**
+ * Composite of all writer types. Writers are convenience classes for incrementally
+ * adding values to {@linkplain org.apache.arrow.vector.ValueVector}s.
+ */
+public interface FieldWriter extends StructWriter, ListWriter, MapWriter, ScalarWriter {
+ void allocate();
+
+ void clear();
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compression/AbstractCompressionCodec.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compression/AbstractCompressionCodec.java
new file mode 100644
index 000000000..39b32968d
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compression/AbstractCompressionCodec.java
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.compression;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.util.MemoryUtil;
+import org.apache.arrow.util.Preconditions;
+
+/**
+ * The base class for concrete compression codecs, providing
+ * common logic for all compression codecs.
+ */
+public abstract class AbstractCompressionCodec implements CompressionCodec {
+
+ @Override
+ public ArrowBuf compress(BufferAllocator allocator, ArrowBuf uncompressedBuffer) {
+ if (uncompressedBuffer.writerIndex() == 0L) {
+ // shortcut for empty buffer
+ ArrowBuf compressedBuffer = allocator.buffer(CompressionUtil.SIZE_OF_UNCOMPRESSED_LENGTH);
+ compressedBuffer.setLong(0, 0);
+ compressedBuffer.writerIndex(CompressionUtil.SIZE_OF_UNCOMPRESSED_LENGTH);
+ uncompressedBuffer.close();
+ return compressedBuffer;
+ }
+
+ ArrowBuf compressedBuffer = doCompress(allocator, uncompressedBuffer);
+ long compressedLength = compressedBuffer.writerIndex() - CompressionUtil.SIZE_OF_UNCOMPRESSED_LENGTH;
+ long uncompressedLength = uncompressedBuffer.writerIndex();
+
+ if (compressedLength > uncompressedLength) {
+ // compressed buffer is larger, send the raw buffer
+ compressedBuffer.close();
+ compressedBuffer = CompressionUtil.packageRawBuffer(allocator, uncompressedBuffer);
+ } else {
+ writeUncompressedLength(compressedBuffer, uncompressedLength);
+ }
+
+ uncompressedBuffer.close();
+ return compressedBuffer;
+ }
+
+ @Override
+ public ArrowBuf decompress(BufferAllocator allocator, ArrowBuf compressedBuffer) {
+ Preconditions.checkArgument(compressedBuffer.writerIndex() >= CompressionUtil.SIZE_OF_UNCOMPRESSED_LENGTH,
+ "Not enough data to decompress.");
+
+ long decompressedLength = readUncompressedLength(compressedBuffer);
+
+ if (decompressedLength == 0L) {
+ // shortcut for empty buffer
+ compressedBuffer.close();
+ return allocator.getEmpty();
+ }
+
+ if (decompressedLength == CompressionUtil.NO_COMPRESSION_LENGTH) {
+ // no compression
+ return CompressionUtil.extractUncompressedBuffer(compressedBuffer);
+ }
+
+ ArrowBuf decompressedBuffer = doDecompress(allocator, compressedBuffer);
+ compressedBuffer.close();
+ return decompressedBuffer;
+ }
+
+ protected void writeUncompressedLength(ArrowBuf compressedBuffer, long uncompressedLength) {
+ if (!MemoryUtil.LITTLE_ENDIAN) {
+ uncompressedLength = Long.reverseBytes(uncompressedLength);
+ }
+ // first 8 bytes reserved for uncompressed length, according to the specification
+ compressedBuffer.setLong(0, uncompressedLength);
+ }
+
+ protected long readUncompressedLength(ArrowBuf compressedBuffer) {
+ long decompressedLength = compressedBuffer.getLong(0);
+ if (!MemoryUtil.LITTLE_ENDIAN) {
+ decompressedLength = Long.reverseBytes(decompressedLength);
+ }
+ return decompressedLength;
+ }
+
+ /**
+ * The method that actually performs the data compression.
+ * The layout of the returned compressed buffer is the compressed data,
+ * plus 8 bytes reserved at the beginning of the buffer for the uncompressed data size.
+ * <p>
+ * Please note that this method is not responsible for releasing the uncompressed buffer.
+ * </p>
+ */
+ protected abstract ArrowBuf doCompress(BufferAllocator allocator, ArrowBuf uncompressedBuffer);
+
+ /**
+ * The method that actually performs the data decompression.
+ * The layout of the compressed buffer is the compressed data,
+ * plus 8 bytes at the beginning of the buffer storing the uncompressed data size.
+ * <p>
+ * Please note that this method is not responsible for releasing the compressed buffer.
+ * </p>
+ */
+ protected abstract ArrowBuf doDecompress(BufferAllocator allocator, ArrowBuf compressedBuffer);
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compression/CompressionCodec.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compression/CompressionCodec.java
new file mode 100644
index 000000000..a6dd8b51f
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compression/CompressionCodec.java
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.compression;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+
+/**
+ * The codec for compression/decompression.
+ */
+public interface CompressionCodec {
+
+ /**
+ * Compress a buffer.
+ * @param allocator the allocator for allocating memory for compressed buffer.
+ * @param uncompressedBuffer the buffer to compress.
+ * Implementation of this method should take care of releasing this buffer.
+ * @return the compressed buffer
+ */
+ ArrowBuf compress(BufferAllocator allocator, ArrowBuf uncompressedBuffer);
+
+ /**
+ * Decompress a buffer.
+ * @param allocator the allocator for allocating memory for decompressed buffer.
+ * @param compressedBuffer the buffer to be decompressed.
+ * Implementation of this method should take care of releasing this buffer.
+ * @return the decompressed buffer.
+ */
+ ArrowBuf decompress(BufferAllocator allocator, ArrowBuf compressedBuffer);
+
+ /**
+ * Gets the type of the codec.
+ * @return the type of the codec.
+ */
+ CompressionUtil.CodecType getCodecType();
+
+ /**
+ * Factory to create compression codec.
+ */
+ interface Factory {
+
+ /**
+ * Creates the codec based on the codec type.
+ */
+ CompressionCodec createCodec(CompressionUtil.CodecType codecType);
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compression/CompressionUtil.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compression/CompressionUtil.java
new file mode 100644
index 000000000..1deb38c84
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compression/CompressionUtil.java
@@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.compression;
+
+import org.apache.arrow.flatbuf.BodyCompressionMethod;
+import org.apache.arrow.flatbuf.CompressionType;
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.ipc.message.ArrowBodyCompression;
+
+/**
+ * Utilities for data compression/decompression.
+ */
+public class CompressionUtil {
+
+ /**
+ * Compression codec types corresponding to flat buffer implementation in {@link CompressionType}.
+ */
+ public enum CodecType {
+
+ NO_COMPRESSION(NoCompressionCodec.COMPRESSION_TYPE),
+
+ LZ4_FRAME(org.apache.arrow.flatbuf.CompressionType.LZ4_FRAME),
+
+ ZSTD(org.apache.arrow.flatbuf.CompressionType.ZSTD);
+
+ private final byte type;
+
+ CodecType(byte type) {
+ this.type = type;
+ }
+
+ public byte getType() {
+ return type;
+ }
+
+ /**
+ * Gets the codec type from the compression type defined in {@link CompressionType}.
+ */
+ public static CodecType fromCompressionType(byte type) {
+ for (CodecType codecType : values()) {
+ if (codecType.type == type) {
+ return codecType;
+ }
+ }
+ return NO_COMPRESSION;
+ }
+ }
+
+ public static final long SIZE_OF_UNCOMPRESSED_LENGTH = 8L;
+
+ /**
+ * Special flag to indicate no compression.
+ * (e.g. when the compressed buffer has a larger size.)
+ */
+ public static final long NO_COMPRESSION_LENGTH = -1L;
+
+ private CompressionUtil() {
+ }
+
+ /**
+ * Creates the {@link ArrowBodyCompression} object, given the {@link CompressionCodec}.
+ * The implementation of this method should depend on the values of
+ * {@link org.apache.arrow.flatbuf.CompressionType#names}.
+ */
+ public static ArrowBodyCompression createBodyCompression(CompressionCodec codec) {
+ return new ArrowBodyCompression(codec.getCodecType().getType(), BodyCompressionMethod.BUFFER);
+ }
+
+ /**
+ * Process compression by compressing the buffer as is.
+ */
+ public static ArrowBuf packageRawBuffer(BufferAllocator allocator, ArrowBuf inputBuffer) {
+ ArrowBuf compressedBuffer = allocator.buffer(SIZE_OF_UNCOMPRESSED_LENGTH + inputBuffer.writerIndex());
+ compressedBuffer.setLong(0, NO_COMPRESSION_LENGTH);
+ compressedBuffer.setBytes(SIZE_OF_UNCOMPRESSED_LENGTH, inputBuffer, 0, inputBuffer.writerIndex());
+ compressedBuffer.writerIndex(SIZE_OF_UNCOMPRESSED_LENGTH + inputBuffer.writerIndex());
+ return compressedBuffer;
+ }
+
+ /**
+ * Process decompression by slicing the buffer that contains the uncompressed bytes.
+ */
+ public static ArrowBuf extractUncompressedBuffer(ArrowBuf inputBuffer) {
+ return inputBuffer.slice(SIZE_OF_UNCOMPRESSED_LENGTH,
+ inputBuffer.writerIndex() - SIZE_OF_UNCOMPRESSED_LENGTH);
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compression/NoCompressionCodec.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compression/NoCompressionCodec.java
new file mode 100644
index 000000000..e5e8e9d46
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/compression/NoCompressionCodec.java
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.compression;
+
+import org.apache.arrow.flatbuf.BodyCompressionMethod;
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.ipc.message.ArrowBodyCompression;
+
+/**
+ * The default compression codec that does no compression.
+ */
+public class NoCompressionCodec implements CompressionCodec {
+
+ public static final NoCompressionCodec INSTANCE = new NoCompressionCodec();
+
+ public static final byte COMPRESSION_TYPE = -1;
+
+ public static final ArrowBodyCompression DEFAULT_BODY_COMPRESSION =
+ new ArrowBodyCompression(COMPRESSION_TYPE, BodyCompressionMethod.BUFFER);
+
+ private NoCompressionCodec() {
+ }
+
+ @Override
+ public ArrowBuf compress(BufferAllocator allocator, ArrowBuf uncompressedBuffer) {
+ return uncompressedBuffer;
+ }
+
+ @Override
+ public ArrowBuf decompress(BufferAllocator allocator, ArrowBuf compressedBuffer) {
+ return compressedBuffer;
+ }
+
+ @Override
+ public CompressionUtil.CodecType getCodecType() {
+ return CompressionUtil.CodecType.NO_COMPRESSION;
+ }
+
+ /**
+ * The default factory that creates a {@link NoCompressionCodec}.
+ */
+ public static class Factory implements CompressionCodec.Factory {
+
+ public static final NoCompressionCodec.Factory INSTANCE = new NoCompressionCodec.Factory();
+
+ @Override
+ public CompressionCodec createCodec(CompressionUtil.CodecType codecType) {
+ return NoCompressionCodec.INSTANCE;
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/dictionary/Dictionary.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/dictionary/Dictionary.java
new file mode 100644
index 000000000..6f40e5814
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/dictionary/Dictionary.java
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.dictionary;
+
+import java.util.Objects;
+
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.compare.VectorEqualsVisitor;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.DictionaryEncoding;
+
+/**
+ * A dictionary (integer to Value mapping) that is used to facilitate
+ * dictionary encoding compression.
+ */
+public class Dictionary {
+
+ private final DictionaryEncoding encoding;
+ private final FieldVector dictionary;
+
+ public Dictionary(FieldVector dictionary, DictionaryEncoding encoding) {
+ this.dictionary = dictionary;
+ this.encoding = encoding;
+ }
+
+ public FieldVector getVector() {
+ return dictionary;
+ }
+
+ public DictionaryEncoding getEncoding() {
+ return encoding;
+ }
+
+ public ArrowType getVectorType() {
+ return dictionary.getField().getType();
+ }
+
+ @Override
+ public String toString() {
+ return "Dictionary " + encoding + " " + dictionary;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) {
+ return true;
+ }
+ if (o == null || getClass() != o.getClass()) {
+ return false;
+ }
+ Dictionary that = (Dictionary) o;
+ return Objects.equals(encoding, that.encoding) &&
+ new VectorEqualsVisitor().vectorEquals(that.dictionary, dictionary);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(encoding, dictionary);
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/dictionary/DictionaryEncoder.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/dictionary/DictionaryEncoder.java
new file mode 100644
index 000000000..babb0dbd3
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/dictionary/DictionaryEncoder.java
@@ -0,0 +1,196 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.dictionary;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.util.hash.ArrowBufHasher;
+import org.apache.arrow.memory.util.hash.SimpleHasher;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.BaseIntVector;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * Encoder/decoder for Dictionary encoded {@link ValueVector}. Dictionary encoding produces an
+ * integer {@link ValueVector}. Each entry in the Vector is index into the dictionary which can hold
+ * values of any type.
+ */
+public class DictionaryEncoder {
+
+ private final DictionaryHashTable hashTable;
+ private final Dictionary dictionary;
+ private final BufferAllocator allocator;
+
+ /**
+ * Construct an instance.
+ */
+ public DictionaryEncoder(Dictionary dictionary, BufferAllocator allocator) {
+ this (dictionary, allocator, SimpleHasher.INSTANCE);
+ }
+
+ /**
+ * Construct an instance.
+ */
+ public DictionaryEncoder(Dictionary dictionary, BufferAllocator allocator, ArrowBufHasher hasher) {
+ this.dictionary = dictionary;
+ this.allocator = allocator;
+ hashTable = new DictionaryHashTable(dictionary.getVector(), hasher);
+ }
+
+ /**
+ * Dictionary encodes a vector with a provided dictionary. The dictionary must contain all values in the vector.
+ *
+ * @param vector vector to encode
+ * @param dictionary dictionary used for encoding
+ * @return dictionary encoded vector
+ */
+ public static ValueVector encode(ValueVector vector, Dictionary dictionary) {
+ DictionaryEncoder encoder = new DictionaryEncoder(dictionary, vector.getAllocator());
+ return encoder.encode(vector);
+ }
+
+ /**
+ * Decodes a dictionary encoded array using the provided dictionary.
+ *
+ * @param indices dictionary encoded values, must be int type
+ * @param dictionary dictionary used to decode the values
+ * @return vector with values restored from dictionary
+ */
+ public static ValueVector decode(ValueVector indices, Dictionary dictionary) {
+ DictionaryEncoder encoder = new DictionaryEncoder(dictionary, indices.getAllocator());
+ return encoder.decode(indices);
+ }
+
+ /**
+ * Get the indexType according to the dictionary vector valueCount.
+ * @param valueCount dictionary vector valueCount.
+ * @return index type.
+ */
+ public static ArrowType.Int getIndexType(int valueCount) {
+ Preconditions.checkArgument(valueCount >= 0);
+ if (valueCount <= Byte.MAX_VALUE) {
+ return new ArrowType.Int(8, true);
+ } else if (valueCount <= Character.MAX_VALUE) {
+ return new ArrowType.Int(16, true);
+ } else if (valueCount <= Integer.MAX_VALUE) {
+ return new ArrowType.Int(32, true);
+ } else {
+ return new ArrowType.Int(64, true);
+ }
+ }
+
+ /**
+ * Populates indices between start and end with the encoded values of vector.
+ * @param vector the vector to encode
+ * @param indices the index vector
+ * @param encoding the hash table for encoding
+ * @param start the start index
+ * @param end the end index
+ */
+ static void buildIndexVector(
+ ValueVector vector,
+ BaseIntVector indices,
+ DictionaryHashTable encoding,
+ int start,
+ int end) {
+
+ for (int i = start; i < end; i++) {
+ if (!vector.isNull(i)) {
+ // if it's null leave it null
+ // note: this may fail if value was not included in the dictionary
+ int encoded = encoding.getIndex(i, vector);
+ if (encoded == -1) {
+ throw new IllegalArgumentException("Dictionary encoding not defined for value:" + vector.getObject(i));
+ }
+ indices.setWithPossibleTruncate(i, encoded);
+ }
+ }
+ }
+
+ /**
+ * Retrieve values to target vector from index vector.
+ * @param indices the index vector
+ * @param transfer the {@link TransferPair} to copy dictionary data into target vector.
+ * @param dictionaryCount the value count of dictionary vector.
+ * @param start the start index
+ * @param end the end index
+ */
+ static void retrieveIndexVector(
+ BaseIntVector indices,
+ TransferPair transfer,
+ int dictionaryCount,
+ int start,
+ int end) {
+ for (int i = start; i < end; i++) {
+ if (!indices.isNull(i)) {
+ int indexAsInt = (int) indices.getValueAsLong(i);
+ if (indexAsInt > dictionaryCount) {
+ throw new IllegalArgumentException("Provided dictionary does not contain value for index " + indexAsInt);
+ }
+ transfer.copyValueSafe(indexAsInt, i);
+ }
+ }
+ }
+
+ /**
+ * Encodes a vector with the built hash table in this encoder.
+ */
+ public ValueVector encode(ValueVector vector) {
+
+ Field valueField = vector.getField();
+ FieldType indexFieldType = new FieldType(valueField.isNullable(), dictionary.getEncoding().getIndexType(),
+ dictionary.getEncoding(), valueField.getMetadata());
+ Field indexField = new Field(valueField.getName(), indexFieldType, null);
+
+ // vector to hold our indices (dictionary encoded values)
+ FieldVector createdVector = indexField.createVector(allocator);
+ if (! (createdVector instanceof BaseIntVector)) {
+ throw new IllegalArgumentException("Dictionary encoding does not have a valid int type:" +
+ createdVector.getClass());
+ }
+
+ BaseIntVector indices = (BaseIntVector) createdVector;
+ indices.allocateNew();
+
+ buildIndexVector(vector, indices, hashTable, 0, vector.getValueCount());
+ indices.setValueCount(vector.getValueCount());
+ return indices;
+ }
+
+ /**
+ * Decodes a vector with the built hash table in this encoder.
+ */
+ public ValueVector decode(ValueVector indices) {
+ int count = indices.getValueCount();
+ ValueVector dictionaryVector = dictionary.getVector();
+ int dictionaryCount = dictionaryVector.getValueCount();
+ // copy the dictionary values into the decoded vector
+ TransferPair transfer = dictionaryVector.getTransferPair(allocator);
+ transfer.getTo().allocateNewSafe();
+
+ BaseIntVector baseIntVector = (BaseIntVector) indices;
+ retrieveIndexVector(baseIntVector, transfer, dictionaryCount, 0, count);
+ ValueVector decoded = transfer.getTo();
+ decoded.setValueCount(count);
+ return decoded;
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/dictionary/DictionaryHashTable.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/dictionary/DictionaryHashTable.java
new file mode 100644
index 000000000..9926a8e2a
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/dictionary/DictionaryHashTable.java
@@ -0,0 +1,295 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.dictionary;
+
+import org.apache.arrow.memory.util.hash.ArrowBufHasher;
+import org.apache.arrow.memory.util.hash.SimpleHasher;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.compare.Range;
+import org.apache.arrow.vector.compare.RangeEqualsVisitor;
+
+/**
+ * HashTable used for Dictionary encoding. It holds two vectors (the vector to encode and dictionary vector)
+ * It stores the index in dictionary vector and for a given index in encode vector,
+ * it could return dictionary index.
+ */
+public class DictionaryHashTable {
+
+ /**
+ * Represents a null value in map.
+ */
+ static final int NULL_VALUE = -1;
+
+ /**
+ * The default initial capacity - MUST be a power of two.
+ */
+ static final int DEFAULT_INITIAL_CAPACITY = 1 << 4;
+
+ /**
+ * The maximum capacity, used if a higher value is implicitly specified
+ * by either of the constructors with arguments.
+ */
+ static final int MAXIMUM_CAPACITY = 1 << 30;
+
+ /**
+ * The load factor used when none specified in constructor.
+ */
+ static final float DEFAULT_LOAD_FACTOR = 0.75f;
+
+ static final DictionaryHashTable.Entry[] EMPTY_TABLE = {};
+
+ /**
+ * The table, initialized on first use, and resized as
+ * necessary. When allocated, length is always a power of two.
+ */
+ transient DictionaryHashTable.Entry[] table = EMPTY_TABLE;
+
+ /**
+ * The number of key-value mappings contained in this map.
+ */
+ transient int size;
+
+ /**
+ * The next size value at which to resize (capacity * load factor).
+ */
+ int threshold;
+
+ /**
+ * The load factor for the hash table.
+ */
+ final float loadFactor;
+
+ private final ValueVector dictionary;
+
+ private final ArrowBufHasher hasher;
+
+ /**
+ * Constructs an empty map with the specified initial capacity and load factor.
+ */
+ public DictionaryHashTable(int initialCapacity, ValueVector dictionary, ArrowBufHasher hasher) {
+ if (initialCapacity < 0) {
+ throw new IllegalArgumentException("Illegal initial capacity: " +
+ initialCapacity);
+ }
+ if (initialCapacity > MAXIMUM_CAPACITY) {
+ initialCapacity = MAXIMUM_CAPACITY;
+ }
+ this.loadFactor = DEFAULT_LOAD_FACTOR;
+ this.threshold = initialCapacity;
+
+ this.dictionary = dictionary;
+
+ this.hasher = hasher;
+
+ // build hash table
+ for (int i = 0; i < this.dictionary.getValueCount(); i++) {
+ put(i);
+ }
+ }
+
+ public DictionaryHashTable(ValueVector dictionary, ArrowBufHasher hasher) {
+ this(DEFAULT_INITIAL_CAPACITY, dictionary, hasher);
+ }
+
+ public DictionaryHashTable(ValueVector dictionary) {
+ this(dictionary, SimpleHasher.INSTANCE);
+ }
+
+ /**
+ * Compute the capacity with given threshold and create init table.
+ */
+ private void inflateTable(int threshold) {
+ int capacity = roundUpToPowerOf2(threshold);
+ this.threshold = (int) Math.min(capacity * loadFactor, MAXIMUM_CAPACITY + 1);
+ table = new DictionaryHashTable.Entry[capacity];
+ }
+
+ /**
+ * Computes the storage location in an array for the given hashCode.
+ */
+ static int indexFor(int h, int length) {
+ return h & (length - 1);
+ }
+
+ /**
+ * Returns a power of two size for the given size.
+ */
+ static final int roundUpToPowerOf2(int size) {
+ int n = size - 1;
+ n |= n >>> 1;
+ n |= n >>> 2;
+ n |= n >>> 4;
+ n |= n >>> 8;
+ n |= n >>> 16;
+ return (n < 0) ? 1 : (n >= MAXIMUM_CAPACITY) ? MAXIMUM_CAPACITY : n + 1;
+ }
+
+ /**
+ * get the corresponding dictionary index with the given index in vector which to encode.
+ * @param indexInArray index in vector.
+ * @return dictionary vector index or -1 if no value equals.
+ */
+ public int getIndex(int indexInArray, ValueVector toEncode) {
+ int hash = toEncode.hashCode(indexInArray, this.hasher);
+ int index = indexFor(hash, table.length);
+
+ RangeEqualsVisitor equalVisitor = new RangeEqualsVisitor(dictionary, toEncode, null);
+ Range range = new Range(0, 0, 1);
+
+ for (DictionaryHashTable.Entry e = table[index]; e != null ; e = e.next) {
+ if (e.hash == hash) {
+ int dictIndex = e.index;
+
+ range = range.setRightStart(indexInArray)
+ .setLeftStart(dictIndex);
+ if (equalVisitor.rangeEquals(range)) {
+ return dictIndex;
+ }
+ }
+ }
+ return NULL_VALUE;
+ }
+
+ /**
+ * put the index of dictionary vector to build hash table.
+ */
+ private void put(int indexInDictionary) {
+ if (table == EMPTY_TABLE) {
+ inflateTable(threshold);
+ }
+
+ int hash = dictionary.hashCode(indexInDictionary, this.hasher);
+ int i = indexFor(hash, table.length);
+ for (DictionaryHashTable.Entry e = table[i]; e != null; e = e.next) {
+ if (e.hash == hash && e.index == indexInDictionary) {
+ //already has this index, return
+ return;
+ }
+ }
+
+ addEntry(hash, indexInDictionary, i);
+ }
+
+ /**
+ * Create a new Entry at the specific position of table.
+ */
+ void createEntry(int hash, int index, int bucketIndex) {
+ DictionaryHashTable.Entry e = table[bucketIndex];
+ table[bucketIndex] = new DictionaryHashTable.Entry(hash, index, e);
+ size++;
+ }
+
+ /**
+ * Add Entry at the specified location of the table.
+ */
+ void addEntry(int hash, int index, int bucketIndex) {
+ if ((size >= threshold) && (null != table[bucketIndex])) {
+ resize(2 * table.length);
+ bucketIndex = indexFor(hash, table.length);
+ }
+
+ createEntry(hash, index, bucketIndex);
+ }
+
+ /**
+ * Resize table with given new capacity.
+ */
+ void resize(int newCapacity) {
+ DictionaryHashTable.Entry[] oldTable = table;
+ int oldCapacity = oldTable.length;
+ if (oldCapacity == MAXIMUM_CAPACITY) {
+ threshold = Integer.MAX_VALUE;
+ return;
+ }
+
+ DictionaryHashTable.Entry[] newTable = new DictionaryHashTable.Entry[newCapacity];
+ transfer(newTable);
+ table = newTable;
+ threshold = (int) Math.min(newCapacity * loadFactor, MAXIMUM_CAPACITY + 1);
+ }
+
+ /**
+ * Transfer entries into new table from old table.
+ * @param newTable new table
+ */
+ void transfer(DictionaryHashTable.Entry[] newTable) {
+ int newCapacity = newTable.length;
+ for (DictionaryHashTable.Entry e : table) {
+ while (null != e) {
+ DictionaryHashTable.Entry next = e.next;
+ int i = indexFor(e.hash, newCapacity);
+ e.next = newTable[i];
+ newTable[i] = e;
+ e = next;
+ }
+ }
+ }
+
+ /**
+ * Returns the number of mappings in this Map.
+ */
+ public int size() {
+ return size;
+ }
+
+ /**
+ * Removes all elements from this map, leaving it empty.
+ */
+ public void clear() {
+ size = 0;
+ for (int i = 0; i < table.length; i++) {
+ table[i] = null;
+ }
+ }
+
+ /**
+ * Class to keep dictionary index data within hash table.
+ */
+ static class Entry {
+ //dictionary index
+ int index;
+ DictionaryHashTable.Entry next;
+ int hash;
+
+ Entry(int hash, int index, DictionaryHashTable.Entry next) {
+ this.index = index;
+ this.hash = hash;
+ this.next = next;
+ }
+
+ public final int getIndex() {
+ return this.index;
+ }
+
+ @Override
+ public int hashCode() {
+ return hash;
+ }
+
+ public final boolean equals(Object o) {
+ if (!(o instanceof DictionaryHashTable.Entry)) {
+ return false;
+ }
+ DictionaryHashTable.Entry e = (DictionaryHashTable.Entry) o;
+ if (index == e.getIndex()) {
+ return true;
+ }
+ return false;
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/dictionary/DictionaryProvider.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/dictionary/DictionaryProvider.java
new file mode 100644
index 000000000..21165c07d
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/dictionary/DictionaryProvider.java
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.dictionary;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * A manager for association of dictionary IDs to their corresponding {@link Dictionary}.
+ */
+public interface DictionaryProvider {
+
+ /** Return the dictionary for the given ID. */
+ Dictionary lookup(long id);
+
+ /**
+ * Implementation of {@link DictionaryProvider} that is backed by a hash-map.
+ */
+ class MapDictionaryProvider implements DictionaryProvider {
+
+ private final Map<Long, Dictionary> map;
+
+ /**
+ * Constructs a new instance from the given dictionaries.
+ */
+ public MapDictionaryProvider(Dictionary... dictionaries) {
+ this.map = new HashMap<>();
+ for (Dictionary dictionary : dictionaries) {
+ put(dictionary);
+ }
+ }
+
+ public void put(Dictionary dictionary) {
+ map.put(dictionary.getEncoding().getId(), dictionary);
+ }
+
+ public final Set<Long> getDictionaryIds() {
+ return map.keySet();
+ }
+
+ @Override
+ public Dictionary lookup(long id) {
+ return map.get(id);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/dictionary/ListSubfieldEncoder.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/dictionary/ListSubfieldEncoder.java
new file mode 100644
index 000000000..dd2bb26e3
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/dictionary/ListSubfieldEncoder.java
@@ -0,0 +1,137 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.dictionary;
+
+import java.util.Collections;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.util.hash.ArrowBufHasher;
+import org.apache.arrow.memory.util.hash.SimpleHasher;
+import org.apache.arrow.vector.BaseIntVector;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.complex.BaseListVector;
+import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * Sub fields encoder/decoder for Dictionary encoded {@link BaseListVector}.
+ */
+public class ListSubfieldEncoder {
+
+ private final DictionaryHashTable hashTable;
+ private final Dictionary dictionary;
+ private final BufferAllocator allocator;
+
+ public ListSubfieldEncoder(Dictionary dictionary, BufferAllocator allocator) {
+ this (dictionary, allocator, SimpleHasher.INSTANCE);
+ }
+
+ /**
+ * Construct an instance.
+ */
+ public ListSubfieldEncoder(Dictionary dictionary, BufferAllocator allocator, ArrowBufHasher hasher) {
+ this.dictionary = dictionary;
+ this.allocator = allocator;
+ BaseListVector dictVector = (BaseListVector) dictionary.getVector();
+ hashTable = new DictionaryHashTable(getDataVector(dictVector), hasher);
+ }
+
+ private FieldVector getDataVector(BaseListVector vector) {
+ return vector.getChildrenFromFields().get(0);
+ }
+
+ private BaseListVector cloneVector(BaseListVector vector) {
+
+ final FieldType fieldType = vector.getField().getFieldType();
+ BaseListVector cloned = (BaseListVector) fieldType.createNewSingleVector(vector.getField().getName(),
+ allocator, /*schemaCallBack=*/null);
+
+ final ArrowFieldNode fieldNode = new ArrowFieldNode(vector.getValueCount(), vector.getNullCount());
+ cloned.loadFieldBuffers(fieldNode, vector.getFieldBuffers());
+
+ return cloned;
+ }
+
+ /**
+ * Dictionary encodes subfields for complex vector with a provided dictionary.
+ * The dictionary must contain all values in the sub fields vector.
+ * @param vector vector to encode
+ * @return dictionary encoded vector
+ */
+ public BaseListVector encodeListSubField(BaseListVector vector) {
+ final int valueCount = vector.getValueCount();
+
+ FieldType indexFieldType = new FieldType(vector.getField().isNullable(),
+ dictionary.getEncoding().getIndexType(), dictionary.getEncoding(), vector.getField().getMetadata());
+ Field valueField = new Field(vector.getField().getName(), indexFieldType, null);
+
+ // clone list vector and initialize data vector
+ BaseListVector encoded = cloneVector(vector);
+ encoded.initializeChildrenFromFields(Collections.singletonList(valueField));
+ BaseIntVector indices = (BaseIntVector) getDataVector(encoded);
+
+ ValueVector dataVector = getDataVector(vector);
+ for (int i = 0; i < valueCount; i++) {
+ if (!vector.isNull(i)) {
+ int start = vector.getElementStartIndex(i);
+ int end = vector.getElementEndIndex(i);
+
+ DictionaryEncoder.buildIndexVector(dataVector, indices, hashTable, start, end);
+ }
+ }
+
+ return encoded;
+ }
+
+ /**
+ * Decodes a dictionary subfields encoded vector using the provided dictionary.
+ * @param vector dictionary encoded vector, its data vector must be int type
+ * @return vector with values restored from dictionary
+ */
+ public BaseListVector decodeListSubField(BaseListVector vector) {
+
+ int valueCount = vector.getValueCount();
+ BaseListVector dictionaryVector = (BaseListVector) dictionary.getVector();
+ int dictionaryValueCount = getDataVector(dictionaryVector).getValueCount();
+
+ // clone list vector and initialize data vector
+ BaseListVector decoded = cloneVector(vector);
+ Field dataVectorField = getDataVector(dictionaryVector).getField();
+ decoded.initializeChildrenFromFields(Collections.singletonList(dataVectorField));
+
+ // get data vector
+ ValueVector dataVector = getDataVector(decoded);
+
+ TransferPair transfer = getDataVector(dictionaryVector).makeTransferPair(dataVector);
+ BaseIntVector indices = (BaseIntVector) getDataVector(vector);
+
+ for (int i = 0; i < valueCount; i++) {
+
+ if (!vector.isNull(i)) {
+ int start = vector.getElementStartIndex(i);
+ int end = vector.getElementEndIndex(i);
+
+ DictionaryEncoder.retrieveIndexVector(indices, transfer, dictionaryValueCount, start, end);
+ }
+ }
+ return decoded;
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/dictionary/StructSubfieldEncoder.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/dictionary/StructSubfieldEncoder.java
new file mode 100644
index 000000000..6542b298d
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/dictionary/StructSubfieldEncoder.java
@@ -0,0 +1,196 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.dictionary;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.util.hash.ArrowBufHasher;
+import org.apache.arrow.memory.util.hash.SimpleHasher;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.BaseIntVector;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
+import org.apache.arrow.vector.types.pojo.DictionaryEncoding;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+
+/**
+ * Sub fields encoder/decoder for Dictionary encoded {@link StructVector}.
+ * Notes that child vectors within struct vector can either be dictionary encodable or not.
+ */
+public class StructSubfieldEncoder {
+
+ private final BufferAllocator allocator;
+
+ private final DictionaryProvider.MapDictionaryProvider provider;
+ private final Map<Long, DictionaryHashTable> dictionaryIdToHashTable;
+
+ /**
+ * Construct an instance.
+ */
+ public StructSubfieldEncoder(BufferAllocator allocator, DictionaryProvider.MapDictionaryProvider provider) {
+ this (allocator, provider, SimpleHasher.INSTANCE);
+ }
+
+ /**
+ * Construct an instance.
+ */
+ public StructSubfieldEncoder(
+ BufferAllocator allocator,
+ DictionaryProvider.MapDictionaryProvider provider,
+ ArrowBufHasher hasher) {
+
+ this.allocator = allocator;
+ this.provider = provider;
+
+ this.dictionaryIdToHashTable = new HashMap<>();
+
+ provider.getDictionaryIds().forEach(id ->
+ dictionaryIdToHashTable.put(id, new DictionaryHashTable(provider.lookup(id).getVector(), hasher)));
+ }
+
+ private FieldVector getChildVector(StructVector vector, int index) {
+ return vector.getChildrenFromFields().get(index);
+ }
+
+ private StructVector cloneVector(StructVector vector) {
+
+ final FieldType fieldType = vector.getField().getFieldType();
+ StructVector cloned = (StructVector) fieldType.createNewSingleVector(
+ vector.getField().getName(), allocator, /*schemaCallback=*/null);
+
+ final ArrowFieldNode fieldNode = new ArrowFieldNode(vector.getValueCount(), vector.getNullCount());
+ cloned.loadFieldBuffers(fieldNode, vector.getFieldBuffers());
+
+ return cloned;
+ }
+
+ /**
+ * Dictionary encodes subfields for complex vector with a provided dictionary.
+ * The dictionary must contain all values in the sub fields vector.
+ * @param vector vector to encode
+ * @param columnToDictionaryId the mappings between child vector index and dictionary id. A null dictionary
+ * id indicates the child vector is not encodable.
+ * @return dictionary encoded vector
+ */
+ public StructVector encode(StructVector vector, Map<Integer, Long> columnToDictionaryId) {
+ final int valueCount = vector.getValueCount();
+ final int childCount = vector.getChildrenFromFields().size();
+
+ List<Field> childrenFields = new ArrayList<>();
+
+ // initialize child fields.
+ for (int i = 0; i < childCount; i++) {
+ FieldVector childVector = getChildVector(vector, i);
+ Long dictionaryId = columnToDictionaryId.get(i);
+ // A null dictionaryId indicates the child vector shouldn't be encoded.
+ if (dictionaryId == null) {
+ childrenFields.add(childVector.getField());
+ } else {
+ Dictionary dictionary = provider.lookup(dictionaryId);
+ Preconditions.checkNotNull(dictionary, "Dictionary not found with id:" + dictionaryId);
+ FieldType indexFieldType = new FieldType(childVector.getField().isNullable(),
+ dictionary.getEncoding().getIndexType(), dictionary.getEncoding());
+ childrenFields.add(new Field(childVector.getField().getName(), indexFieldType, /*children=*/null));
+ }
+ }
+
+ // clone list vector and initialize data vector
+ StructVector encoded = cloneVector(vector);
+ encoded.initializeChildrenFromFields(childrenFields);
+ encoded.setValueCount(valueCount);
+
+ for (int index = 0; index < childCount; index++) {
+ FieldVector childVector = getChildVector(vector, index);
+ FieldVector encodedChildVector = getChildVector(encoded, index);
+ Long dictionaryId = columnToDictionaryId.get(index);
+ if (dictionaryId != null) {
+ BaseIntVector indices = (BaseIntVector) encodedChildVector;
+ DictionaryEncoder.buildIndexVector(childVector, indices, dictionaryIdToHashTable.get(dictionaryId),
+ 0, valueCount);
+ } else {
+ childVector.makeTransferPair(encodedChildVector).splitAndTransfer(0, valueCount);
+ }
+ }
+
+ return encoded;
+ }
+
+ /**
+ * Decodes a dictionary subfields encoded vector using the provided dictionary.
+ * @param vector dictionary encoded vector, its child vector must be int type
+ * @return vector with values restored from dictionary
+ */
+ public StructVector decode(StructVector vector) {
+
+ final int valueCount = vector.getValueCount();
+ final int childCount = vector.getChildrenFromFields().size();
+
+ // clone list vector and initialize child vectors
+ StructVector decoded = cloneVector(vector);
+ List<Field> childFields = new ArrayList<>();
+ for (int i = 0; i < childCount; i++) {
+ FieldVector childVector = getChildVector(vector, i);
+ Dictionary dictionary = getChildVectorDictionary(childVector);
+ // childVector is not encoded.
+ if (dictionary == null) {
+ childFields.add(childVector.getField());
+ } else {
+ childFields.add(dictionary.getVector().getField());
+ }
+ }
+ decoded.initializeChildrenFromFields(childFields);
+ decoded.setValueCount(valueCount);
+
+ for (int index = 0; index < childCount; index++) {
+ // get child vector
+ FieldVector childVector = getChildVector(vector, index);
+ FieldVector decodedChildVector = getChildVector(decoded, index);
+ Dictionary dictionary = getChildVectorDictionary(childVector);
+ if (dictionary == null) {
+ childVector.makeTransferPair(decodedChildVector).splitAndTransfer(0, valueCount);
+ } else {
+ TransferPair transfer = dictionary.getVector().makeTransferPair(decodedChildVector);
+ BaseIntVector indices = (BaseIntVector) childVector;
+
+ DictionaryEncoder.retrieveIndexVector(indices, transfer, valueCount, 0, valueCount);
+ }
+ }
+
+ return decoded;
+ }
+
+ /**
+ * Get the child vector dictionary, return null if not dictionary encoded.
+ */
+ private Dictionary getChildVectorDictionary(FieldVector childVector) {
+ DictionaryEncoding dictionaryEncoding = childVector.getField().getDictionary();
+ if (dictionaryEncoding != null) {
+ Dictionary dictionary = provider.lookup(dictionaryEncoding.getId());
+ Preconditions.checkNotNull(dictionary, "Dictionary not found with id:" + dictionary);
+ return dictionary;
+ }
+ return null;
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/holders/ComplexHolder.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/holders/ComplexHolder.java
new file mode 100644
index 000000000..b4cb4882f
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/holders/ComplexHolder.java
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.holders;
+
+import org.apache.arrow.vector.complex.reader.FieldReader;
+
+/**
+ * Represents a single value of a complex type (e.g. Union, Struct).
+ */
+public class ComplexHolder implements ValueHolder {
+ public FieldReader reader;
+ public int isSet;
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/holders/DenseUnionHolder.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/holders/DenseUnionHolder.java
new file mode 100644
index 000000000..c3052711e
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/holders/DenseUnionHolder.java
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.holders;
+
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.types.Types;
+
+/**
+ * {@link ValueHolder} for Dense Union Vectors.
+ */
+public class DenseUnionHolder implements ValueHolder {
+ public FieldReader reader;
+ public int isSet;
+ public byte typeId;
+
+ public Types.MinorType getMinorType() {
+ return reader.getMinorType();
+ }
+
+ public boolean isSet() {
+ return isSet == 1;
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/holders/RepeatedListHolder.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/holders/RepeatedListHolder.java
new file mode 100644
index 000000000..fc17704f0
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/holders/RepeatedListHolder.java
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.holders;
+
+/**
+ * {@link ValueHolder} for a nested {@link org.apache.arrow.vector.complex.ListVector}.
+ */
+public final class RepeatedListHolder implements ValueHolder {
+ public int start;
+ public int end;
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/holders/RepeatedStructHolder.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/holders/RepeatedStructHolder.java
new file mode 100644
index 000000000..32c590a8a
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/holders/RepeatedStructHolder.java
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.holders;
+
+/**
+ * {@link ValueHolder} for a list of structs.
+ */
+public final class RepeatedStructHolder implements ValueHolder {
+ public int start;
+ public int end;
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/holders/UnionHolder.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/holders/UnionHolder.java
new file mode 100644
index 000000000..e67a0e941
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/holders/UnionHolder.java
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.holders;
+
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.types.Types.MinorType;
+
+/**
+ * {@link ValueHolder} for Union Vectors.
+ */
+public class UnionHolder implements ValueHolder {
+ public FieldReader reader;
+ public int isSet;
+
+ public MinorType getMinorType() {
+ return reader.getMinorType();
+ }
+
+ public boolean isSet() {
+ return isSet == 1;
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/holders/ValueHolder.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/holders/ValueHolder.java
new file mode 100644
index 000000000..a809e6bb8
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/holders/ValueHolder.java
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.holders;
+
+/**
+ * Wrapper object for an individual value in Arrow.
+ *
+ * <p>ValueHolders are designed to be mutable wrapper objects for defining clean
+ * APIs that access data in Arrow. For performance, object creation is avoided
+ * at all costs throughout execution. For this reason, ValueHolders are
+ * disallowed from implementing any methods, this allows for them to be
+ * replaced by their java primitive inner members during optimization of
+ * run-time generated code.
+ */
+public interface ValueHolder {
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowFileReader.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowFileReader.java
new file mode 100644
index 000000000..f4e9e0db1
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowFileReader.java
@@ -0,0 +1,230 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.channels.SeekableByteChannel;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.arrow.flatbuf.Footer;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.util.VisibleForTesting;
+import org.apache.arrow.vector.compression.CompressionCodec;
+import org.apache.arrow.vector.compression.NoCompressionCodec;
+import org.apache.arrow.vector.ipc.message.ArrowBlock;
+import org.apache.arrow.vector.ipc.message.ArrowDictionaryBatch;
+import org.apache.arrow.vector.ipc.message.ArrowFooter;
+import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
+import org.apache.arrow.vector.ipc.message.MessageSerializer;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.apache.arrow.vector.validate.MetadataV4UnionChecker;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * An implementation of {@link ArrowReader} that reads the standard arrow binary
+ * file format.
+ */
+public class ArrowFileReader extends ArrowReader {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(ArrowFileReader.class);
+
+ private SeekableReadChannel in;
+ private ArrowFooter footer;
+ private int currentDictionaryBatch = 0;
+ private int currentRecordBatch = 0;
+
+ public ArrowFileReader(
+ SeekableReadChannel in, BufferAllocator allocator, CompressionCodec.Factory compressionFactory) {
+ super(allocator, compressionFactory);
+ this.in = in;
+ }
+
+ public ArrowFileReader(
+ SeekableByteChannel in, BufferAllocator allocator, CompressionCodec.Factory compressionFactory) {
+ this(new SeekableReadChannel(in), allocator, compressionFactory);
+ }
+
+ public ArrowFileReader(SeekableReadChannel in, BufferAllocator allocator) {
+ this(in, allocator, NoCompressionCodec.Factory.INSTANCE);
+ }
+
+ public ArrowFileReader(SeekableByteChannel in, BufferAllocator allocator) {
+ this(new SeekableReadChannel(in), allocator);
+ }
+
+ @Override
+ public long bytesRead() {
+ return in.bytesRead();
+ }
+
+ @Override
+ protected void closeReadSource() throws IOException {
+ in.close();
+ }
+
+ @Override
+ protected Schema readSchema() throws IOException {
+ if (footer == null) {
+ if (in.size() <= (ArrowMagic.MAGIC_LENGTH * 2 + 4)) {
+ throw new InvalidArrowFileException("file too small: " + in.size());
+ }
+ ByteBuffer buffer = ByteBuffer.allocate(4 + ArrowMagic.MAGIC_LENGTH);
+ long footerLengthOffset = in.size() - buffer.remaining();
+ in.setPosition(footerLengthOffset);
+ in.readFully(buffer);
+ buffer.flip();
+ byte[] array = buffer.array();
+ if (!ArrowMagic.validateMagic(Arrays.copyOfRange(array, 4, array.length))) {
+ throw new InvalidArrowFileException("missing Magic number " + Arrays.toString(buffer.array()));
+ }
+ int footerLength = MessageSerializer.bytesToInt(array);
+ if (footerLength <= 0 || footerLength + ArrowMagic.MAGIC_LENGTH * 2 + 4 > in.size()) {
+ throw new InvalidArrowFileException("invalid footer length: " + footerLength);
+ }
+ long footerOffset = footerLengthOffset - footerLength;
+ LOGGER.debug("Footer starts at {}, length: {}", footerOffset, footerLength);
+ ByteBuffer footerBuffer = ByteBuffer.allocate(footerLength);
+ in.setPosition(footerOffset);
+ in.readFully(footerBuffer);
+ footerBuffer.flip();
+ Footer footerFB = Footer.getRootAsFooter(footerBuffer);
+ this.footer = new ArrowFooter(footerFB);
+ }
+ MetadataV4UnionChecker.checkRead(footer.getSchema(), footer.getMetadataVersion());
+ return footer.getSchema();
+ }
+
+ @Override
+ public void initialize() throws IOException {
+ super.initialize();
+
+ // empty stream, has no dictionaries in IPC.
+ if (footer.getRecordBatches().size() == 0) {
+ return;
+ }
+ // Read and load all dictionaries from schema
+ for (int i = 0; i < dictionaries.size(); i++) {
+ ArrowDictionaryBatch dictionaryBatch = readDictionary();
+ loadDictionary(dictionaryBatch);
+ }
+ }
+
+ /**
+ * Get custom metadata.
+ */
+ public Map<String, String> getMetaData() {
+ if (footer != null) {
+ return footer.getMetaData();
+ }
+ return new HashMap<>();
+ }
+
+ /**
+ * Read a dictionary batch from the source, will be invoked after the schema has been read and
+ * called N times, where N is the number of dictionaries indicated by the schema Fields.
+ *
+ * @return the read ArrowDictionaryBatch
+ * @throws IOException on error
+ */
+ public ArrowDictionaryBatch readDictionary() throws IOException {
+ if (currentDictionaryBatch >= footer.getDictionaries().size()) {
+ throw new IOException("Requested more dictionaries than defined in footer: " + currentDictionaryBatch);
+ }
+ ArrowBlock block = footer.getDictionaries().get(currentDictionaryBatch++);
+ return readDictionaryBatch(in, block, allocator);
+ }
+
+ /** Returns true if a batch was read, false if no more batches. */
+ @Override
+ public boolean loadNextBatch() throws IOException {
+ prepareLoadNextBatch();
+
+ if (currentRecordBatch < footer.getRecordBatches().size()) {
+ ArrowBlock block = footer.getRecordBatches().get(currentRecordBatch++);
+ ArrowRecordBatch batch = readRecordBatch(in, block, allocator);
+ loadRecordBatch(batch);
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+
+ public List<ArrowBlock> getDictionaryBlocks() throws IOException {
+ ensureInitialized();
+ return footer.getDictionaries();
+ }
+
+ /**
+ * Returns the {@link ArrowBlock} metadata from the file.
+ */
+ public List<ArrowBlock> getRecordBlocks() throws IOException {
+ ensureInitialized();
+ return footer.getRecordBatches();
+ }
+
+ /**
+ * Loads record batch for the given block.
+ */
+ public boolean loadRecordBatch(ArrowBlock block) throws IOException {
+ ensureInitialized();
+ int blockIndex = footer.getRecordBatches().indexOf(block);
+ if (blockIndex == -1) {
+ throw new IllegalArgumentException("Arrow block does not exist in record batches: " + block);
+ }
+ currentRecordBatch = blockIndex;
+ return loadNextBatch();
+ }
+
+ @VisibleForTesting
+ ArrowFooter getFooter() {
+ return footer;
+ }
+
+ private ArrowDictionaryBatch readDictionaryBatch(SeekableReadChannel in,
+ ArrowBlock block,
+ BufferAllocator allocator) throws IOException {
+ LOGGER.debug("DictionaryRecordBatch at {}, metadata: {}, body: {}",
+ block.getOffset(), block.getMetadataLength(), block.getBodyLength());
+ in.setPosition(block.getOffset());
+ ArrowDictionaryBatch batch = MessageSerializer.deserializeDictionaryBatch(in, block, allocator);
+ if (batch == null) {
+ throw new IOException("Invalid file. No batch at offset: " + block.getOffset());
+ }
+ return batch;
+ }
+
+ private ArrowRecordBatch readRecordBatch(SeekableReadChannel in,
+ ArrowBlock block,
+ BufferAllocator allocator) throws IOException {
+ LOGGER.debug("RecordBatch at {}, metadata: {}, body: {}",
+ block.getOffset(), block.getMetadataLength(),
+ block.getBodyLength());
+ in.setPosition(block.getOffset());
+ ArrowRecordBatch batch = MessageSerializer.deserializeRecordBatch(in, block, allocator);
+ if (batch == null) {
+ throw new IOException("Invalid file. No batch at offset: " + block.getOffset());
+ }
+ return batch;
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowFileWriter.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowFileWriter.java
new file mode 100644
index 000000000..55cd26285
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowFileWriter.java
@@ -0,0 +1,119 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc;
+
+import java.io.IOException;
+import java.nio.channels.WritableByteChannel;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.arrow.util.VisibleForTesting;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.dictionary.DictionaryProvider;
+import org.apache.arrow.vector.ipc.message.ArrowBlock;
+import org.apache.arrow.vector.ipc.message.ArrowDictionaryBatch;
+import org.apache.arrow.vector.ipc.message.ArrowFooter;
+import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
+import org.apache.arrow.vector.ipc.message.IpcOption;
+import org.apache.arrow.vector.ipc.message.MessageSerializer;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * {@link ArrowWriter} that writes out a Arrow files (https://arrow.apache.org/docs/format/IPC.html#file-format).
+ */
+public class ArrowFileWriter extends ArrowWriter {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(ArrowFileWriter.class);
+
+ // All ArrowBlocks written are saved in these lists to be passed to ArrowFooter in endInternal.
+ private final List<ArrowBlock> dictionaryBlocks = new ArrayList<>();
+ private final List<ArrowBlock> recordBlocks = new ArrayList<>();
+
+ private Map<String, String> metaData;
+
+ public ArrowFileWriter(VectorSchemaRoot root, DictionaryProvider provider, WritableByteChannel out) {
+ super(root, provider, out);
+ }
+
+ public ArrowFileWriter(VectorSchemaRoot root, DictionaryProvider provider, WritableByteChannel out,
+ Map<String, String> metaData) {
+ super(root, provider, out);
+ this.metaData = metaData;
+ }
+
+ public ArrowFileWriter(VectorSchemaRoot root, DictionaryProvider provider, WritableByteChannel out,
+ IpcOption option) {
+ super(root, provider, out, option);
+ }
+
+ public ArrowFileWriter(VectorSchemaRoot root, DictionaryProvider provider, WritableByteChannel out,
+ Map<String, String> metaData, IpcOption option) {
+ super(root, provider, out, option);
+ this.metaData = metaData;
+ }
+
+ @Override
+ protected void startInternal(WriteChannel out) throws IOException {
+ ArrowMagic.writeMagic(out, true);
+ }
+
+ @Override
+ protected ArrowBlock writeDictionaryBatch(ArrowDictionaryBatch batch) throws IOException {
+ ArrowBlock block = super.writeDictionaryBatch(batch);
+ dictionaryBlocks.add(block);
+ return block;
+ }
+
+ @Override
+ protected ArrowBlock writeRecordBatch(ArrowRecordBatch batch) throws IOException {
+ ArrowBlock block = super.writeRecordBatch(batch);
+ recordBlocks.add(block);
+ return block;
+ }
+
+ @Override
+ protected void endInternal(WriteChannel out) throws IOException {
+ if (!option.write_legacy_ipc_format) {
+ out.writeIntLittleEndian(MessageSerializer.IPC_CONTINUATION_TOKEN);
+ }
+ out.writeIntLittleEndian(0);
+
+ long footerStart = out.getCurrentPosition();
+ out.write(new ArrowFooter(schema, dictionaryBlocks, recordBlocks, metaData, option.metadataVersion), false);
+ int footerLength = (int) (out.getCurrentPosition() - footerStart);
+ if (footerLength <= 0) {
+ throw new InvalidArrowFileException("invalid footer");
+ }
+ out.writeIntLittleEndian(footerLength);
+ LOGGER.debug("Footer starts at {}, length: {}", footerStart, footerLength);
+ ArrowMagic.writeMagic(out, false);
+ LOGGER.debug("magic written, now at {}", out.getCurrentPosition());
+ }
+
+ @VisibleForTesting
+ public List<ArrowBlock> getRecordBlocks() {
+ return recordBlocks;
+ }
+
+ @VisibleForTesting
+ public List<ArrowBlock> getDictionaryBlocks() {
+ return dictionaryBlocks;
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowMagic.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowMagic.java
new file mode 100644
index 000000000..9c399669a
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowMagic.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc;
+
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
+
+/**
+ * Magic header/footer helpers for {@link ArrowFileWriter} and {@link ArrowFileReader} formatted files.
+ */
+class ArrowMagic {
+ private ArrowMagic(){}
+
+ private static final byte[] MAGIC = "ARROW1".getBytes(StandardCharsets.UTF_8);
+
+ public static final int MAGIC_LENGTH = MAGIC.length;
+
+ public static void writeMagic(WriteChannel out, boolean align) throws IOException {
+ out.write(MAGIC);
+ if (align) {
+ out.align();
+ }
+ }
+
+ public static boolean validateMagic(byte[] array) {
+ return Arrays.equals(MAGIC, array);
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowReader.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowReader.java
new file mode 100644
index 000000000..9d940deec
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowReader.java
@@ -0,0 +1,255 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.VectorLoader;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.compression.CompressionCodec;
+import org.apache.arrow.vector.compression.NoCompressionCodec;
+import org.apache.arrow.vector.dictionary.Dictionary;
+import org.apache.arrow.vector.dictionary.DictionaryProvider;
+import org.apache.arrow.vector.ipc.message.ArrowDictionaryBatch;
+import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.apache.arrow.vector.util.DictionaryUtility;
+import org.apache.arrow.vector.util.VectorBatchAppender;
+
+/**
+ * Abstract class to read Schema and ArrowRecordBatches.
+ *
+ */
+public abstract class ArrowReader implements DictionaryProvider, AutoCloseable {
+
+ protected final BufferAllocator allocator;
+ private VectorLoader loader;
+ private VectorSchemaRoot root;
+ protected Map<Long, Dictionary> dictionaries;
+ private boolean initialized = false;
+
+ private final CompressionCodec.Factory compressionFactory;
+
+ protected ArrowReader(BufferAllocator allocator) {
+ this(allocator, NoCompressionCodec.Factory.INSTANCE);
+ }
+
+ protected ArrowReader(BufferAllocator allocator, CompressionCodec.Factory compressionFactory) {
+ this.allocator = allocator;
+ this.compressionFactory = compressionFactory;
+ }
+
+ /**
+ * Returns the vector schema root. This will be loaded with new values on every call to loadNextBatch.
+ *
+ * @return the vector schema root
+ * @throws IOException if reading of schema fails
+ */
+ public VectorSchemaRoot getVectorSchemaRoot() throws IOException {
+ ensureInitialized();
+ return root;
+ }
+
+ /**
+ * Returns any dictionaries that were loaded along with ArrowRecordBatches.
+ *
+ * @return Map of dictionaries to dictionary id, empty if no dictionaries loaded
+ * @throws IOException if reading of schema fails
+ */
+ public Map<Long, Dictionary> getDictionaryVectors() throws IOException {
+ ensureInitialized();
+ return dictionaries;
+ }
+
+ /**
+ * Lookup a dictionary that has been loaded using the dictionary id.
+ *
+ * @param id Unique identifier for a dictionary
+ * @return the requested dictionary or null if not found
+ */
+ @Override
+ public Dictionary lookup(long id) {
+ if (!initialized) {
+ throw new IllegalStateException("Unable to lookup until reader has been initialized");
+ }
+
+ return dictionaries.get(id);
+ }
+
+ /**
+ * Load the next ArrowRecordBatch to the vector schema root if available.
+ *
+ * @return true if a batch was read, false on EOS
+ * @throws IOException on error
+ */
+ public abstract boolean loadNextBatch() throws IOException;
+
+ /**
+ * Return the number of bytes read from the ReadChannel.
+ *
+ * @return number of bytes read
+ */
+ public abstract long bytesRead();
+
+ /**
+ * Close resources, including vector schema root and dictionary vectors, and the
+ * underlying read source.
+ *
+ * @throws IOException on error
+ */
+ @Override
+ public void close() throws IOException {
+ close(true);
+ }
+
+ /**
+ * Close resources, including vector schema root and dictionary vectors. If the flag
+ * closeReadChannel is true then close the underlying read source, otherwise leave it open.
+ *
+ * @param closeReadSource Flag to control if closing the underlying read source
+ * @throws IOException on error
+ */
+ public void close(boolean closeReadSource) throws IOException {
+ if (initialized) {
+ root.close();
+ for (Dictionary dictionary : dictionaries.values()) {
+ dictionary.getVector().close();
+ }
+ }
+
+ if (closeReadSource) {
+ closeReadSource();
+ }
+ }
+
+ /**
+ * Close the underlying read source.
+ *
+ * @throws IOException on error
+ */
+ protected abstract void closeReadSource() throws IOException;
+
+ /**
+ * Read the Schema from the source, will be invoked at the beginning the initialization.
+ *
+ * @return the read Schema
+ * @throws IOException on error
+ */
+ protected abstract Schema readSchema() throws IOException;
+
+ /**
+ * Initialize if not done previously.
+ *
+ * @throws IOException on error
+ */
+ protected void ensureInitialized() throws IOException {
+ if (!initialized) {
+ initialize();
+ initialized = true;
+ }
+ }
+
+ /**
+ * Reads the schema and initializes the vectors.
+ */
+ protected void initialize() throws IOException {
+ Schema originalSchema = readSchema();
+ List<Field> fields = new ArrayList<>(originalSchema.getFields().size());
+ List<FieldVector> vectors = new ArrayList<>(originalSchema.getFields().size());
+ Map<Long, Dictionary> dictionaries = new HashMap<>();
+
+ // Convert fields with dictionaries to have the index type
+ for (Field field : originalSchema.getFields()) {
+ Field updated = DictionaryUtility.toMemoryFormat(field, allocator, dictionaries);
+ fields.add(updated);
+ vectors.add(updated.createVector(allocator));
+ }
+ Schema schema = new Schema(fields, originalSchema.getCustomMetadata());
+
+ this.root = new VectorSchemaRoot(schema, vectors, 0);
+ this.loader = new VectorLoader(root, compressionFactory);
+ this.dictionaries = Collections.unmodifiableMap(dictionaries);
+ }
+
+ /**
+ * Ensure the reader has been initialized and reset the VectorSchemaRoot row count to 0.
+ *
+ * @throws IOException on error
+ */
+ protected void prepareLoadNextBatch() throws IOException {
+ ensureInitialized();
+ root.setRowCount(0);
+ }
+
+ /**
+ * Load an ArrowRecordBatch to the readers VectorSchemaRoot.
+ *
+ * @param batch the record batch to load
+ */
+ protected void loadRecordBatch(ArrowRecordBatch batch) {
+ try {
+ loader.load(batch);
+ } finally {
+ batch.close();
+ }
+ }
+
+ /**
+ * Load an ArrowDictionaryBatch to the readers dictionary vectors.
+ *
+ * @param dictionaryBatch dictionary batch to load
+ */
+ protected void loadDictionary(ArrowDictionaryBatch dictionaryBatch) {
+ long id = dictionaryBatch.getDictionaryId();
+ Dictionary dictionary = dictionaries.get(id);
+ if (dictionary == null) {
+ throw new IllegalArgumentException("Dictionary ID " + id + " not defined in schema");
+ }
+ FieldVector vector = dictionary.getVector();
+ // if is deltaVector, concat it with non-delta vector with the same ID.
+ if (dictionaryBatch.isDelta()) {
+ try (FieldVector deltaVector = vector.getField().createVector(allocator)) {
+ load(dictionaryBatch, deltaVector);
+ VectorBatchAppender.batchAppend(vector, deltaVector);
+ }
+ return;
+ }
+
+ load(dictionaryBatch, vector);
+ }
+
+ private void load(ArrowDictionaryBatch dictionaryBatch, FieldVector vector) {
+ VectorSchemaRoot root = new VectorSchemaRoot(
+ Collections.singletonList(vector.getField()),
+ Collections.singletonList(vector), 0);
+ VectorLoader loader = new VectorLoader(root);
+ try {
+ loader.load(dictionaryBatch.getDictionary());
+ } finally {
+ dictionaryBatch.close();
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowStreamReader.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowStreamReader.java
new file mode 100644
index 000000000..a0096aaf3
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowStreamReader.java
@@ -0,0 +1,229 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.channels.Channels;
+import java.nio.channels.ReadableByteChannel;
+
+import org.apache.arrow.flatbuf.MessageHeader;
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.compression.CompressionCodec;
+import org.apache.arrow.vector.compression.NoCompressionCodec;
+import org.apache.arrow.vector.ipc.message.ArrowDictionaryBatch;
+import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
+import org.apache.arrow.vector.ipc.message.MessageChannelReader;
+import org.apache.arrow.vector.ipc.message.MessageResult;
+import org.apache.arrow.vector.ipc.message.MessageSerializer;
+import org.apache.arrow.vector.types.MetadataVersion;
+import org.apache.arrow.vector.types.pojo.DictionaryEncoding;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.apache.arrow.vector.validate.MetadataV4UnionChecker;
+
+/**
+ * This class reads from an input stream and produces ArrowRecordBatches.
+ */
+public class ArrowStreamReader extends ArrowReader {
+
+ private MessageChannelReader messageReader;
+
+ private int loadedDictionaryCount;
+
+ /**
+ * Constructs a streaming reader using a MessageChannelReader. Non-blocking.
+ *
+ * @param messageReader reader used to get messages from a ReadChannel
+ * @param allocator to allocate new buffers
+ * @param compressionFactory the factory to create compression codec.
+ */
+ public ArrowStreamReader(
+ MessageChannelReader messageReader, BufferAllocator allocator, CompressionCodec.Factory compressionFactory) {
+ super(allocator, compressionFactory);
+ this.messageReader = messageReader;
+ }
+
+ /**
+ * Constructs a streaming reader using a MessageChannelReader. Non-blocking.
+ *
+ * @param messageReader reader used to get messages from a ReadChannel
+ * @param allocator to allocate new buffers
+ */
+ public ArrowStreamReader(MessageChannelReader messageReader, BufferAllocator allocator) {
+ this(messageReader, allocator, NoCompressionCodec.Factory.INSTANCE);
+ }
+
+ /**
+ * Constructs a streaming reader from a ReadableByteChannel input. Non-blocking.
+ *
+ * @param in ReadableByteChannel to read messages from
+ * @param allocator to allocate new buffers
+ * @param compressionFactory the factory to create compression codec.
+ */
+ public ArrowStreamReader(
+ ReadableByteChannel in, BufferAllocator allocator, CompressionCodec.Factory compressionFactory) {
+ this(new MessageChannelReader(new ReadChannel(in), allocator), allocator, compressionFactory);
+ }
+
+ /**
+ * Constructs a streaming reader from a ReadableByteChannel input. Non-blocking.
+ *
+ * @param in ReadableByteChannel to read messages from
+ * @param allocator to allocate new buffers
+ */
+ public ArrowStreamReader(ReadableByteChannel in, BufferAllocator allocator) {
+ this(new MessageChannelReader(new ReadChannel(in), allocator), allocator);
+ }
+
+ /**
+ * Constructs a streaming reader from a ReadableByteChannel input. Non-blocking.
+ *
+ * @param in InputStream to read messages from
+ * @param allocator to allocate new buffers
+ * @param compressionFactory the factory to create compression codec.
+ */
+ public ArrowStreamReader(
+ InputStream in, BufferAllocator allocator, CompressionCodec.Factory compressionFactory) {
+ this(Channels.newChannel(in), allocator, compressionFactory);
+ }
+
+ /**
+ * Constructs a streaming reader from an InputStream. Non-blocking.
+ *
+ * @param in InputStream to read messages from
+ * @param allocator to allocate new buffers
+ */
+ public ArrowStreamReader(InputStream in, BufferAllocator allocator) {
+ this(Channels.newChannel(in), allocator);
+ }
+
+ /**
+ * Get the number of bytes read from the stream since constructing the reader.
+ *
+ * @return number of bytes
+ */
+ @Override
+ public long bytesRead() {
+ return messageReader.bytesRead();
+ }
+
+ /**
+ * Closes the underlying read source.
+ *
+ * @throws IOException on error
+ */
+ @Override
+ protected void closeReadSource() throws IOException {
+ messageReader.close();
+ }
+
+ /**
+ * Load the next ArrowRecordBatch to the vector schema root if available.
+ *
+ * @return true if a batch was read, false on EOS
+ * @throws IOException on error
+ */
+ public boolean loadNextBatch() throws IOException {
+ prepareLoadNextBatch();
+ MessageResult result = messageReader.readNext();
+
+ // Reached EOS
+ if (result == null) {
+ return false;
+ }
+
+ if (result.getMessage().headerType() == MessageHeader.RecordBatch) {
+ ArrowBuf bodyBuffer = result.getBodyBuffer();
+
+ // For zero-length batches, need an empty buffer to deserialize the batch
+ if (bodyBuffer == null) {
+ bodyBuffer = allocator.getEmpty();
+ }
+
+ ArrowRecordBatch batch = MessageSerializer.deserializeRecordBatch(result.getMessage(), bodyBuffer);
+ loadRecordBatch(batch);
+ checkDictionaries();
+ return true;
+ } else if (result.getMessage().headerType() == MessageHeader.DictionaryBatch) {
+ // if it's dictionary message, read dictionary message out and continue to read unless get a batch or eos.
+ ArrowDictionaryBatch dictionaryBatch = readDictionary(result);
+ loadDictionary(dictionaryBatch);
+ loadedDictionaryCount++;
+ return loadNextBatch();
+ } else {
+ throw new IOException("Expected RecordBatch or DictionaryBatch but header was " +
+ result.getMessage().headerType());
+ }
+ }
+
+ /**
+ * When read a record batch, check whether its dictionaries are available.
+ */
+ private void checkDictionaries() throws IOException {
+ // if all dictionaries are loaded, return.
+ if (loadedDictionaryCount == dictionaries.size()) {
+ return;
+ }
+ for (FieldVector vector : getVectorSchemaRoot().getFieldVectors()) {
+ DictionaryEncoding encoding = vector.getField().getDictionary();
+ if (encoding != null) {
+ // if the dictionaries it needs is not available and the vector is not all null, something was wrong.
+ if (!dictionaries.containsKey(encoding.getId()) && vector.getNullCount() < vector.getValueCount()) {
+ throw new IOException("The dictionary was not available, id was:" + encoding.getId());
+ }
+ }
+ }
+ }
+
+ /**
+ * Reads the schema message from the beginning of the stream.
+ *
+ * @return the deserialized arrow schema
+ */
+ @Override
+ protected Schema readSchema() throws IOException {
+ MessageResult result = messageReader.readNext();
+
+ if (result == null) {
+ throw new IOException("Unexpected end of input. Missing schema.");
+ }
+
+ if (result.getMessage().headerType() != MessageHeader.Schema) {
+ throw new IOException("Expected schema but header was " + result.getMessage().headerType());
+ }
+
+ final Schema schema = MessageSerializer.deserializeSchema(result.getMessage());
+ MetadataV4UnionChecker.checkRead(schema, MetadataVersion.fromFlatbufID(result.getMessage().version()));
+ return schema;
+ }
+
+
+ private ArrowDictionaryBatch readDictionary(MessageResult result) throws IOException {
+
+ ArrowBuf bodyBuffer = result.getBodyBuffer();
+
+ // For zero-length batches, need an empty buffer to deserialize the batch
+ if (bodyBuffer == null) {
+ bodyBuffer = allocator.getEmpty();
+ }
+
+ return MessageSerializer.deserializeDictionaryBatch(result.getMessage(), bodyBuffer);
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowStreamWriter.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowStreamWriter.java
new file mode 100644
index 000000000..deb98580f
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowStreamWriter.java
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.nio.channels.Channels;
+import java.nio.channels.WritableByteChannel;
+
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.dictionary.DictionaryProvider;
+import org.apache.arrow.vector.ipc.message.IpcOption;
+import org.apache.arrow.vector.ipc.message.MessageSerializer;
+
+/**
+ * Writer for the Arrow stream format to send ArrowRecordBatches over a WriteChannel.
+ */
+public class ArrowStreamWriter extends ArrowWriter {
+
+ /**
+ * Construct an ArrowStreamWriter with an optional DictionaryProvider for the OutputStream.
+ *
+ * @param root Existing VectorSchemaRoot with vectors to be written.
+ * @param provider DictionaryProvider for any vectors that are dictionary encoded.
+ * (Optional, can be null)
+ * @param out OutputStream for writing.
+ */
+ public ArrowStreamWriter(VectorSchemaRoot root, DictionaryProvider provider, OutputStream out) {
+ this(root, provider, Channels.newChannel(out));
+ }
+
+ /**
+ * Construct an ArrowStreamWriter with an optional DictionaryProvider for the WritableByteChannel.
+ */
+ public ArrowStreamWriter(VectorSchemaRoot root, DictionaryProvider provider, WritableByteChannel out) {
+ this(root, provider, out, IpcOption.DEFAULT);
+ }
+
+ /**
+ * Construct an ArrowStreamWriter with an optional DictionaryProvider for the WritableByteChannel.
+ *
+ * @param root Existing VectorSchemaRoot with vectors to be written.
+ * @param provider DictionaryProvider for any vectors that are dictionary encoded.
+ * (Optional, can be null)
+ * @param option IPC write options
+ * @param out WritableByteChannel for writing.
+ */
+ public ArrowStreamWriter(VectorSchemaRoot root, DictionaryProvider provider, WritableByteChannel out,
+ IpcOption option) {
+ super(root, provider, out, option);
+ }
+
+ /**
+ * Write an EOS identifier to the WriteChannel.
+ *
+ * @param out Open WriteChannel with an active Arrow stream.
+ * @param option IPC write option
+ * @throws IOException on error
+ */
+ public static void writeEndOfStream(WriteChannel out, IpcOption option) throws IOException {
+ if (!option.write_legacy_ipc_format) {
+ out.writeIntLittleEndian(MessageSerializer.IPC_CONTINUATION_TOKEN);
+ }
+ out.writeIntLittleEndian(0);
+ }
+
+ @Override
+ protected void endInternal(WriteChannel out) throws IOException {
+ writeEndOfStream(out, option);
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowWriter.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowWriter.java
new file mode 100644
index 000000000..7bc9a306f
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/ArrowWriter.java
@@ -0,0 +1,210 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc;
+
+import java.io.IOException;
+import java.nio.channels.WritableByteChannel;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.arrow.util.AutoCloseables;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.VectorUnloader;
+import org.apache.arrow.vector.dictionary.Dictionary;
+import org.apache.arrow.vector.dictionary.DictionaryProvider;
+import org.apache.arrow.vector.ipc.message.ArrowBlock;
+import org.apache.arrow.vector.ipc.message.ArrowDictionaryBatch;
+import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
+import org.apache.arrow.vector.ipc.message.IpcOption;
+import org.apache.arrow.vector.ipc.message.MessageSerializer;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.apache.arrow.vector.util.DictionaryUtility;
+import org.apache.arrow.vector.validate.MetadataV4UnionChecker;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Abstract base class for implementing Arrow writers for IPC over a WriteChannel.
+ */
+public abstract class ArrowWriter implements AutoCloseable {
+
+ protected static final Logger LOGGER = LoggerFactory.getLogger(ArrowWriter.class);
+
+ // schema with fields in message format, not memory format
+ protected final Schema schema;
+ protected final WriteChannel out;
+
+ private final VectorUnloader unloader;
+ private final List<ArrowDictionaryBatch> dictionaries;
+
+ private boolean started = false;
+ private boolean ended = false;
+
+ private boolean dictWritten = false;
+
+ protected IpcOption option;
+
+ protected ArrowWriter(VectorSchemaRoot root, DictionaryProvider provider, WritableByteChannel out) {
+ this (root, provider, out, IpcOption.DEFAULT);
+ }
+
+ /**
+ * Note: fields are not closed when the writer is closed.
+ *
+ * @param root the vectors to write to the output
+ * @param provider where to find the dictionaries
+ * @param out the output where to write
+ * @param option IPC write options
+ */
+ protected ArrowWriter(VectorSchemaRoot root, DictionaryProvider provider, WritableByteChannel out, IpcOption option) {
+ this.unloader = new VectorUnloader(root);
+ this.out = new WriteChannel(out);
+ this.option = option;
+
+ List<Field> fields = new ArrayList<>(root.getSchema().getFields().size());
+ Set<Long> dictionaryIdsUsed = new HashSet<>();
+
+ MetadataV4UnionChecker.checkForUnion(root.getSchema().getFields().iterator(), option.metadataVersion);
+ // Convert fields with dictionaries to have dictionary type
+ for (Field field : root.getSchema().getFields()) {
+ fields.add(DictionaryUtility.toMessageFormat(field, provider, dictionaryIdsUsed));
+ }
+
+ // Create a record batch for each dictionary
+ this.dictionaries = new ArrayList<>(dictionaryIdsUsed.size());
+ for (long id : dictionaryIdsUsed) {
+ Dictionary dictionary = provider.lookup(id);
+ FieldVector vector = dictionary.getVector();
+ int count = vector.getValueCount();
+ VectorSchemaRoot dictRoot = new VectorSchemaRoot(
+ Collections.singletonList(vector.getField()),
+ Collections.singletonList(vector),
+ count);
+ VectorUnloader unloader = new VectorUnloader(dictRoot);
+ ArrowRecordBatch batch = unloader.getRecordBatch();
+ this.dictionaries.add(new ArrowDictionaryBatch(id, batch));
+ }
+
+ this.schema = new Schema(fields, root.getSchema().getCustomMetadata());
+ }
+
+ public void start() throws IOException {
+ ensureStarted();
+ }
+
+ /**
+ * Writes the record batch currently loaded in this instance's VectorSchemaRoot.
+ */
+ public void writeBatch() throws IOException {
+ ensureStarted();
+ ensureDictionariesWritten();
+ try (ArrowRecordBatch batch = unloader.getRecordBatch()) {
+ writeRecordBatch(batch);
+ }
+ }
+
+ protected ArrowBlock writeDictionaryBatch(ArrowDictionaryBatch batch) throws IOException {
+ ArrowBlock block = MessageSerializer.serialize(out, batch, option);
+ if (LOGGER.isDebugEnabled()) {
+ LOGGER.debug("DictionaryRecordBatch at {}, metadata: {}, body: {}",
+ block.getOffset(), block.getMetadataLength(), block.getBodyLength());
+ }
+ return block;
+ }
+
+ protected ArrowBlock writeRecordBatch(ArrowRecordBatch batch) throws IOException {
+ ArrowBlock block = MessageSerializer.serialize(out, batch, option);
+ if (LOGGER.isDebugEnabled()) {
+ LOGGER.debug("RecordBatch at {}, metadata: {}, body: {}",
+ block.getOffset(), block.getMetadataLength(), block.getBodyLength());
+ }
+ return block;
+ }
+
+ public void end() throws IOException {
+ ensureStarted();
+ ensureEnded();
+ }
+
+ public long bytesWritten() {
+ return out.getCurrentPosition();
+ }
+
+ private void ensureStarted() throws IOException {
+ if (!started) {
+ started = true;
+ startInternal(out);
+ // write the schema - for file formats this is duplicated in the footer, but matches
+ // the streaming format
+ MessageSerializer.serialize(out, schema, option);
+ }
+ }
+
+ /**
+ * Write dictionaries after schema and before recordBatches, dictionaries won't be
+ * written if empty stream (only has schema data in IPC).
+ */
+ private void ensureDictionariesWritten() throws IOException {
+ if (!dictWritten) {
+ dictWritten = true;
+ // write out any dictionaries
+ try {
+ for (ArrowDictionaryBatch batch : dictionaries) {
+ writeDictionaryBatch(batch);
+ }
+ } finally {
+ try {
+ AutoCloseables.close(dictionaries);
+ } catch (Exception e) {
+ throw new RuntimeException("Error occurred while closing dictionaries.", e);
+ }
+ }
+ }
+ }
+
+ private void ensureEnded() throws IOException {
+ if (!ended) {
+ ended = true;
+ endInternal(out);
+ }
+ }
+
+ protected void startInternal(WriteChannel out) throws IOException {
+ }
+
+ protected void endInternal(WriteChannel out) throws IOException {
+ }
+
+ @Override
+ public void close() {
+ try {
+ end();
+ out.close();
+ if (!dictWritten) {
+ AutoCloseables.close(dictionaries);
+ }
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/InvalidArrowFileException.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/InvalidArrowFileException.java
new file mode 100644
index 000000000..e234058e6
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/InvalidArrowFileException.java
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc;
+
+/**
+ * Exception indicating a problem with an Arrow File (https://arrow.apache.org/docs/format/IPC.html#file-format).
+ */
+public class InvalidArrowFileException extends RuntimeException {
+ private static final long serialVersionUID = 1L;
+
+ public InvalidArrowFileException(String message) {
+ super(message);
+ }
+
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileReader.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileReader.java
new file mode 100644
index 000000000..d093e840a
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileReader.java
@@ -0,0 +1,806 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc;
+
+import static com.fasterxml.jackson.core.JsonToken.END_ARRAY;
+import static com.fasterxml.jackson.core.JsonToken.END_OBJECT;
+import static com.fasterxml.jackson.core.JsonToken.START_ARRAY;
+import static com.fasterxml.jackson.core.JsonToken.START_OBJECT;
+import static java.nio.charset.StandardCharsets.UTF_8;
+import static org.apache.arrow.vector.BufferLayout.BufferType.DATA;
+import static org.apache.arrow.vector.BufferLayout.BufferType.OFFSET;
+import static org.apache.arrow.vector.BufferLayout.BufferType.TYPE;
+import static org.apache.arrow.vector.BufferLayout.BufferType.VALIDITY;
+
+import java.io.File;
+import java.io.IOException;
+import java.math.BigDecimal;
+import java.math.BigInteger;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.BigIntVector;
+import org.apache.arrow.vector.BitVectorHelper;
+import org.apache.arrow.vector.BufferLayout.BufferType;
+import org.apache.arrow.vector.Decimal256Vector;
+import org.apache.arrow.vector.DecimalVector;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.Float4Vector;
+import org.apache.arrow.vector.Float8Vector;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.IntervalDayVector;
+import org.apache.arrow.vector.IntervalMonthDayNanoVector;
+import org.apache.arrow.vector.SmallIntVector;
+import org.apache.arrow.vector.TinyIntVector;
+import org.apache.arrow.vector.TypeLayout;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.dictionary.Dictionary;
+import org.apache.arrow.vector.dictionary.DictionaryProvider;
+import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.apache.arrow.vector.util.DecimalUtility;
+import org.apache.arrow.vector.util.DictionaryUtility;
+import org.apache.commons.codec.DecoderException;
+import org.apache.commons.codec.binary.Hex;
+
+import com.fasterxml.jackson.core.JsonParseException;
+import com.fasterxml.jackson.core.JsonParser;
+import com.fasterxml.jackson.core.JsonToken;
+import com.fasterxml.jackson.databind.MapperFeature;
+import com.fasterxml.jackson.databind.MappingJsonFactory;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+/**
+ * A reader for JSON files that translates them into vectors. This reader is used for integration tests.
+ *
+ * <p>This class uses a streaming parser API, method naming tends to reflect this implementation
+ * detail.
+ */
+public class JsonFileReader implements AutoCloseable, DictionaryProvider {
+ private final JsonParser parser;
+ private final BufferAllocator allocator;
+ private Schema schema;
+ private Map<Long, Dictionary> dictionaries;
+ private Boolean started = false;
+
+ /**
+ * Constructs a new instance.
+ * @param inputFile The file to read.
+ * @param allocator The allocator to use for allocating buffers.
+ */
+ public JsonFileReader(File inputFile, BufferAllocator allocator) throws JsonParseException, IOException {
+ super();
+ this.allocator = allocator;
+ MappingJsonFactory jsonFactory = new MappingJsonFactory(new ObjectMapper()
+ //ignore case for enums
+ .configure(MapperFeature.ACCEPT_CASE_INSENSITIVE_ENUMS, true)
+ );
+ this.parser = jsonFactory.createParser(inputFile);
+ // Allow reading NaN for floating point values
+ this.parser.configure(JsonParser.Feature.ALLOW_NON_NUMERIC_NUMBERS, true);
+ }
+
+ @Override
+ public Dictionary lookup(long id) {
+ if (!started) {
+ throw new IllegalStateException("Unable to lookup until after read() has started");
+ }
+
+ return dictionaries.get(id);
+ }
+
+ /** Reads the beginning (schema section) of the json file and returns it. */
+ public Schema start() throws JsonParseException, IOException {
+ readToken(START_OBJECT);
+ {
+ Schema originalSchema = readNextField("schema", Schema.class);
+ List<Field> fields = new ArrayList<>();
+ dictionaries = new HashMap<>();
+
+ // Convert fields with dictionaries to have the index type
+ for (Field field : originalSchema.getFields()) {
+ fields.add(DictionaryUtility.toMemoryFormat(field, allocator, dictionaries));
+ }
+ this.schema = new Schema(fields, originalSchema.getCustomMetadata());
+
+ if (!dictionaries.isEmpty()) {
+ nextFieldIs("dictionaries");
+ readDictionaryBatches();
+ }
+
+ nextFieldIs("batches");
+ readToken(START_ARRAY);
+ started = true;
+ return this.schema;
+ }
+ }
+
+ private void readDictionaryBatches() throws JsonParseException, IOException {
+ readToken(START_ARRAY);
+ JsonToken token = parser.nextToken();
+ boolean haveDictionaryBatch = token == START_OBJECT;
+ while (haveDictionaryBatch) {
+
+ // Lookup what dictionary for the batch about to be read
+ long id = readNextField("id", Long.class);
+ Dictionary dict = dictionaries.get(id);
+ if (dict == null) {
+ throw new IllegalArgumentException("Dictionary with id: " + id + " missing encoding from schema Field");
+ }
+
+ // Read the dictionary record batch
+ nextFieldIs("data");
+ FieldVector vector = dict.getVector();
+ List<Field> fields = Collections.singletonList(vector.getField());
+ List<FieldVector> vectors = Collections.singletonList(vector);
+ VectorSchemaRoot root = new VectorSchemaRoot(fields, vectors, vector.getValueCount());
+ read(root);
+
+ readToken(END_OBJECT);
+ token = parser.nextToken();
+ haveDictionaryBatch = token == START_OBJECT;
+ }
+
+ if (token != END_ARRAY) {
+ throw new IllegalArgumentException("Invalid token: " + token + " expected end of array at " +
+ parser.getTokenLocation());
+ }
+ }
+
+ /**
+ * Reads the next record batch from the file into <code>root</code>.
+ */
+ public boolean read(VectorSchemaRoot root) throws IOException {
+ JsonToken t = parser.nextToken();
+ if (t == START_OBJECT) {
+ {
+ int count = readNextField("count", Integer.class);
+ nextFieldIs("columns");
+ readToken(START_ARRAY);
+ {
+ for (Field field : root.getSchema().getFields()) {
+ FieldVector vector = root.getVector(field);
+ readFromJsonIntoVector(field, vector);
+ }
+ }
+ readToken(END_ARRAY);
+ root.setRowCount(count);
+ }
+ readToken(END_OBJECT);
+ return true;
+ } else if (t == END_ARRAY) {
+ root.setRowCount(0);
+ return false;
+ } else {
+ throw new IllegalArgumentException("Invalid token: " + t);
+ }
+ }
+
+ /**
+ * Returns the next record batch from the file.
+ */
+ public VectorSchemaRoot read() throws IOException {
+ JsonToken t = parser.nextToken();
+ if (t == START_OBJECT) {
+ VectorSchemaRoot recordBatch = VectorSchemaRoot.create(schema, allocator);
+ {
+ int count = readNextField("count", Integer.class);
+ recordBatch.setRowCount(count);
+ nextFieldIs("columns");
+ readToken(START_ARRAY);
+ {
+ for (Field field : schema.getFields()) {
+ FieldVector vector = recordBatch.getVector(field);
+ readFromJsonIntoVector(field, vector);
+ }
+ }
+ readToken(END_ARRAY);
+ }
+ readToken(END_OBJECT);
+ return recordBatch;
+ } else if (t == END_ARRAY) {
+ return null;
+ } else {
+ throw new IllegalArgumentException("Invalid token: " + t);
+ }
+ }
+
+ private abstract class BufferReader {
+ protected abstract ArrowBuf read(BufferAllocator allocator, int count) throws IOException;
+
+ ArrowBuf readBuffer(BufferAllocator allocator, int count) throws IOException {
+ readToken(START_ARRAY);
+ ArrowBuf buf = read(allocator, count);
+ readToken(END_ARRAY);
+ return buf;
+ }
+ }
+
+ private class BufferHelper {
+ BufferReader BIT = new BufferReader() {
+ @Override
+ protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
+ final int bufferSize = BitVectorHelper.getValidityBufferSize(count);
+ ArrowBuf buf = allocator.buffer(bufferSize);
+
+ // C++ integration test fails without this.
+ buf.setZero(0, bufferSize);
+
+ for (int i = 0; i < count; i++) {
+ parser.nextToken();
+ BitVectorHelper.setValidityBit(buf, i, parser.readValueAs(Boolean.class) ? 1 : 0);
+ }
+
+ buf.writerIndex(bufferSize);
+ return buf;
+ }
+ };
+
+ BufferReader DAY_MILLIS = new BufferReader() {
+ @Override
+ protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
+ final long size = (long) count * IntervalDayVector.TYPE_WIDTH;
+ ArrowBuf buf = allocator.buffer(size);
+
+ for (int i = 0; i < count; i++) {
+ readToken(START_OBJECT);
+ buf.writeInt(readNextField("days", Integer.class));
+ buf.writeInt(readNextField("milliseconds", Integer.class));
+ readToken(END_OBJECT);
+ }
+
+ return buf;
+ }
+ };
+
+ BufferReader MONTH_DAY_NANOS = new BufferReader() {
+ @Override
+ protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
+ final long size = (long) count * IntervalMonthDayNanoVector.TYPE_WIDTH;
+ ArrowBuf buf = allocator.buffer(size);
+
+ for (int i = 0; i < count; i++) {
+ readToken(START_OBJECT);
+ buf.writeInt(readNextField("months", Integer.class));
+ buf.writeInt(readNextField("days", Integer.class));
+ buf.writeLong(readNextField("nanoseconds", Long.class));
+ readToken(END_OBJECT);
+ }
+
+ return buf;
+ }
+ };
+
+
+ BufferReader INT1 = new BufferReader() {
+ @Override
+ protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
+ final long size = (long) count * TinyIntVector.TYPE_WIDTH;
+ ArrowBuf buf = allocator.buffer(size);
+
+ for (int i = 0; i < count; i++) {
+ parser.nextToken();
+ buf.writeByte(parser.getByteValue());
+ }
+
+ return buf;
+ }
+ };
+
+ BufferReader INT2 = new BufferReader() {
+ @Override
+ protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
+ final long size = (long) count * SmallIntVector.TYPE_WIDTH;
+ ArrowBuf buf = allocator.buffer(size);
+
+ for (int i = 0; i < count; i++) {
+ parser.nextToken();
+ buf.writeShort(parser.getShortValue());
+ }
+
+ return buf;
+ }
+ };
+
+ BufferReader INT4 = new BufferReader() {
+ @Override
+ protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
+ final long size = (long) count * IntVector.TYPE_WIDTH;
+ ArrowBuf buf = allocator.buffer(size);
+
+ for (int i = 0; i < count; i++) {
+ parser.nextToken();
+ buf.writeInt(parser.getIntValue());
+ }
+
+ return buf;
+ }
+ };
+
+ BufferReader INT8 = new BufferReader() {
+ @Override
+ protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
+ final long size = (long) count * BigIntVector.TYPE_WIDTH;
+ ArrowBuf buf = allocator.buffer(size);
+
+ for (int i = 0; i < count; i++) {
+ parser.nextToken();
+ String value = parser.getValueAsString();
+ buf.writeLong(Long.valueOf(value));
+ }
+
+ return buf;
+ }
+ };
+
+ BufferReader UINT1 = new BufferReader() {
+ @Override
+ protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
+ final long size = (long) count * TinyIntVector.TYPE_WIDTH;
+ ArrowBuf buf = allocator.buffer(size);
+
+ for (int i = 0; i < count; i++) {
+ parser.nextToken();
+ buf.writeByte(parser.getShortValue() & 0xFF);
+ }
+
+ return buf;
+ }
+ };
+
+ BufferReader UINT2 = new BufferReader() {
+ @Override
+ protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
+ final long size = (long) count * SmallIntVector.TYPE_WIDTH;
+ ArrowBuf buf = allocator.buffer(size);
+
+ for (int i = 0; i < count; i++) {
+ parser.nextToken();
+ buf.writeShort(parser.getIntValue() & 0xFFFF);
+ }
+
+ return buf;
+ }
+ };
+
+ BufferReader UINT4 = new BufferReader() {
+ @Override
+ protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
+ final long size = (long) count * IntVector.TYPE_WIDTH;
+ ArrowBuf buf = allocator.buffer(size);
+
+ for (int i = 0; i < count; i++) {
+ parser.nextToken();
+ buf.writeInt((int) parser.getLongValue());
+ }
+
+ return buf;
+ }
+ };
+
+ BufferReader UINT8 = new BufferReader() {
+ @Override
+ protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
+ final long size = (long) count * BigIntVector.TYPE_WIDTH;
+ ArrowBuf buf = allocator.buffer(size);
+
+ for (int i = 0; i < count; i++) {
+ parser.nextToken();
+ BigInteger value = new BigInteger(parser.getValueAsString());
+ buf.writeLong(value.longValue());
+ }
+
+ return buf;
+ }
+ };
+
+ BufferReader FLOAT4 = new BufferReader() {
+ @Override
+ protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
+ final long size = (long) count * Float4Vector.TYPE_WIDTH;
+ ArrowBuf buf = allocator.buffer(size);
+
+ for (int i = 0; i < count; i++) {
+ parser.nextToken();
+ buf.writeFloat(parser.getFloatValue());
+ }
+
+ return buf;
+ }
+ };
+
+ BufferReader FLOAT8 = new BufferReader() {
+ @Override
+ protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
+ final long size = (long) count * Float8Vector.TYPE_WIDTH;
+ ArrowBuf buf = allocator.buffer(size);
+
+ for (int i = 0; i < count; i++) {
+ parser.nextToken();
+ buf.writeDouble(parser.getDoubleValue());
+ }
+
+ return buf;
+ }
+ };
+
+ BufferReader DECIMAL = new BufferReader() {
+ @Override
+ protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
+ final long size = (long) count * DecimalVector.TYPE_WIDTH;
+ ArrowBuf buf = allocator.buffer(size);
+
+ for (int i = 0; i < count; i++) {
+ parser.nextToken();
+ BigDecimal decimalValue = new BigDecimal(parser.readValueAs(String.class));
+ DecimalUtility.writeBigDecimalToArrowBuf(decimalValue, buf, i, DecimalVector.TYPE_WIDTH);
+ }
+
+ buf.writerIndex(size);
+ return buf;
+ }
+ };
+
+ BufferReader DECIMAL256 = new BufferReader() {
+ @Override
+ protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
+ final long size = (long) count * Decimal256Vector.TYPE_WIDTH;
+ ArrowBuf buf = allocator.buffer(size);
+
+ for (int i = 0; i < count; i++) {
+ parser.nextToken();
+ BigDecimal decimalValue = new BigDecimal(parser.readValueAs(String.class));
+ DecimalUtility.writeBigDecimalToArrowBuf(decimalValue, buf, i, Decimal256Vector.TYPE_WIDTH);
+ }
+
+ buf.writerIndex(size);
+ return buf;
+ }
+ };
+
+ ArrowBuf readBinaryValues(
+ BufferAllocator allocator, int count) throws IOException {
+ ArrayList<byte[]> values = new ArrayList<>(count);
+ long bufferSize = 0L;
+ for (int i = 0; i < count; i++) {
+ parser.nextToken();
+ final byte[] value = decodeHexSafe(parser.readValueAs(String.class));
+ values.add(value);
+ bufferSize += value.length;
+ }
+
+ ArrowBuf buf = allocator.buffer(bufferSize);
+
+ for (byte[] value : values) {
+ buf.writeBytes(value);
+ }
+
+ return buf;
+ }
+
+ ArrowBuf readStringValues(
+ BufferAllocator allocator, int count) throws IOException {
+ ArrayList<byte[]> values = new ArrayList<>(count);
+ long bufferSize = 0L;
+ for (int i = 0; i < count; i++) {
+ parser.nextToken();
+ final byte[] value = parser.getValueAsString().getBytes(UTF_8);
+ values.add(value);
+ bufferSize += value.length;
+ }
+
+ ArrowBuf buf = allocator.buffer(bufferSize);
+
+ for (byte[] value : values) {
+ buf.writeBytes(value);
+ }
+
+ return buf;
+ }
+
+ BufferReader FIXEDSIZEBINARY = new BufferReader() {
+ @Override
+ protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
+ return readBinaryValues(allocator, count);
+ }
+ };
+
+ BufferReader VARCHAR = new BufferReader() {
+ @Override
+ protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
+ return readStringValues(allocator, count);
+ }
+ };
+
+ BufferReader LARGEVARCHAR = new BufferReader() {
+ @Override
+ protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
+ return readStringValues(allocator, count);
+ }
+ };
+
+ BufferReader VARBINARY = new BufferReader() {
+ @Override
+ protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
+ return readBinaryValues(allocator, count);
+ }
+ };
+
+ BufferReader LARGEVARBINARY = new BufferReader() {
+ @Override
+ protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
+ return readBinaryValues(allocator, count);
+ }
+ };
+ }
+
+ private ArrowBuf readIntoBuffer(BufferAllocator allocator, BufferType bufferType,
+ Types.MinorType type, int count) throws IOException {
+ ArrowBuf buf;
+
+ BufferHelper helper = new BufferHelper();
+
+ BufferReader reader = null;
+
+ if (bufferType.equals(VALIDITY)) {
+ reader = helper.BIT;
+ } else if (bufferType.equals(OFFSET)) {
+ if (type == Types.MinorType.LARGELIST ||
+ type == Types.MinorType.LARGEVARCHAR ||
+ type == Types.MinorType.LARGEVARBINARY) {
+ reader = helper.INT8;
+ } else {
+ reader = helper.INT4;
+ }
+ } else if (bufferType.equals(TYPE)) {
+ reader = helper.INT1;
+ } else if (bufferType.equals(DATA)) {
+ switch (type) {
+ case BIT:
+ reader = helper.BIT;
+ break;
+ case TINYINT:
+ reader = helper.INT1;
+ break;
+ case SMALLINT:
+ reader = helper.INT2;
+ break;
+ case INT:
+ reader = helper.INT4;
+ break;
+ case BIGINT:
+ reader = helper.INT8;
+ break;
+ case UINT1:
+ reader = helper.UINT1;
+ break;
+ case UINT2:
+ reader = helper.UINT2;
+ break;
+ case UINT4:
+ reader = helper.UINT4;
+ break;
+ case UINT8:
+ reader = helper.UINT8;
+ break;
+ case FLOAT4:
+ reader = helper.FLOAT4;
+ break;
+ case FLOAT8:
+ reader = helper.FLOAT8;
+ break;
+ case DECIMAL:
+ reader = helper.DECIMAL;
+ break;
+ case DECIMAL256:
+ reader = helper.DECIMAL256;
+ break;
+ case FIXEDSIZEBINARY:
+ reader = helper.FIXEDSIZEBINARY;
+ break;
+ case VARCHAR:
+ reader = helper.VARCHAR;
+ break;
+ case LARGEVARCHAR:
+ reader = helper.LARGEVARCHAR;
+ break;
+ case VARBINARY:
+ reader = helper.VARBINARY;
+ break;
+ case LARGEVARBINARY:
+ reader = helper.LARGEVARBINARY;
+ break;
+ case DATEDAY:
+ reader = helper.INT4;
+ break;
+ case DATEMILLI:
+ reader = helper.INT8;
+ break;
+ case TIMESEC:
+ case TIMEMILLI:
+ reader = helper.INT4;
+ break;
+ case TIMEMICRO:
+ case TIMENANO:
+ reader = helper.INT8;
+ break;
+ case TIMESTAMPNANO:
+ case TIMESTAMPMICRO:
+ case TIMESTAMPMILLI:
+ case TIMESTAMPSEC:
+ case TIMESTAMPNANOTZ:
+ case TIMESTAMPMICROTZ:
+ case TIMESTAMPMILLITZ:
+ case TIMESTAMPSECTZ:
+ reader = helper.INT8;
+ break;
+ case INTERVALYEAR:
+ reader = helper.INT4;
+ break;
+ case INTERVALDAY:
+ reader = helper.DAY_MILLIS;
+ break;
+ case INTERVALMONTHDAYNANO:
+ reader = helper.MONTH_DAY_NANOS;
+ break;
+ case DURATION:
+ reader = helper.INT8;
+ break;
+ default:
+ throw new UnsupportedOperationException("Cannot read array of type " + type);
+ }
+ } else {
+ throw new InvalidArrowFileException("Unrecognized buffer type " + bufferType);
+ }
+
+ buf = reader.readBuffer(allocator, count);
+
+ Preconditions.checkNotNull(buf);
+ return buf;
+ }
+
+ private void readFromJsonIntoVector(Field field, FieldVector vector) throws JsonParseException, IOException {
+ TypeLayout typeLayout = TypeLayout.getTypeLayout(field.getType());
+ List<BufferType> vectorTypes = typeLayout.getBufferTypes();
+ ArrowBuf[] vectorBuffers = new ArrowBuf[vectorTypes.size()];
+ /*
+ * The order of inner buffers is :
+ * Fixed width vector:
+ * -- validity buffer
+ * -- data buffer
+ * Variable width vector:
+ * -- validity buffer
+ * -- offset buffer
+ * -- data buffer
+ *
+ * This is similar to what getFieldInnerVectors() used to give but now that we don't have
+ * inner vectors anymore, we will work directly at the buffer level -- populate buffers
+ * locally as we read from Json parser and do loadFieldBuffers on the vector followed by
+ * releasing the local buffers.
+ */
+ readToken(START_OBJECT);
+ {
+ // If currently reading dictionaries, field name is not important so don't check
+ String name = readNextField("name", String.class);
+ if (started && !Objects.equals(field.getName(), name)) {
+ throw new IllegalArgumentException("Expected field " + field.getName() + " but got " + name);
+ }
+
+ /* Initialize the vector with required capacity but don't allocateNew since we would
+ * be doing loadFieldBuffers.
+ */
+ int valueCount = readNextField("count", Integer.class);
+ vector.setInitialCapacity(valueCount);
+
+ for (int v = 0; v < vectorTypes.size(); v++) {
+ BufferType bufferType = vectorTypes.get(v);
+ nextFieldIs(bufferType.getName());
+ int innerBufferValueCount = valueCount;
+ if (bufferType.equals(OFFSET) && !field.getType().getTypeID().equals(ArrowType.ArrowTypeID.Union)) {
+ /* offset buffer has 1 additional value capacity */
+ innerBufferValueCount = valueCount + 1;
+ }
+
+ vectorBuffers[v] = readIntoBuffer(allocator, bufferType, vector.getMinorType(), innerBufferValueCount);
+ }
+
+ if (vectorBuffers.length == 0) {
+ readToken(END_OBJECT);
+ return;
+ }
+
+ int nullCount = 0;
+ if (!(vector.getField().getFieldType().getType() instanceof ArrowType.Union)) {
+ nullCount = BitVectorHelper.getNullCount(vectorBuffers[0], valueCount);
+ }
+ final ArrowFieldNode fieldNode = new ArrowFieldNode(valueCount, nullCount);
+ vector.loadFieldBuffers(fieldNode, Arrays.asList(vectorBuffers));
+
+ /* read child vectors (if any) */
+ List<Field> fields = field.getChildren();
+ if (!fields.isEmpty()) {
+ List<FieldVector> vectorChildren = vector.getChildrenFromFields();
+ if (fields.size() != vectorChildren.size()) {
+ throw new IllegalArgumentException(
+ "fields and children are not the same size: " + fields.size() + " != " + vectorChildren.size());
+ }
+ nextFieldIs("children");
+ readToken(START_ARRAY);
+ for (int i = 0; i < fields.size(); i++) {
+ Field childField = fields.get(i);
+ FieldVector childVector = vectorChildren.get(i);
+ readFromJsonIntoVector(childField, childVector);
+ }
+ readToken(END_ARRAY);
+ }
+ }
+ readToken(END_OBJECT);
+
+ for (ArrowBuf buffer: vectorBuffers) {
+ buffer.getReferenceManager().release();
+ }
+ }
+
+ private byte[] decodeHexSafe(String hexString) throws IOException {
+ try {
+ return Hex.decodeHex(hexString.toCharArray());
+ } catch (DecoderException e) {
+ throw new IOException("Unable to decode hex string: " + hexString, e);
+ }
+ }
+
+ @Override
+ public void close() throws IOException {
+ parser.close();
+ for (Dictionary dictionary : dictionaries.values()) {
+ dictionary.getVector().close();
+ }
+ }
+
+ private <T> T readNextField(String expectedFieldName, Class<T> c) throws IOException, JsonParseException {
+ nextFieldIs(expectedFieldName);
+ parser.nextToken();
+ return parser.readValueAs(c);
+ }
+
+ private void nextFieldIs(String expectedFieldName) throws IOException, JsonParseException {
+ String name = parser.nextFieldName();
+ if (name == null || !name.equals(expectedFieldName)) {
+ throw new IllegalStateException("Expected " + expectedFieldName + " but got " + name);
+ }
+ }
+
+ private void readToken(JsonToken expected) throws JsonParseException, IOException {
+ JsonToken t = parser.nextToken();
+ if (t != expected) {
+ throw new IllegalStateException("Expected " + expected + " but got " + t);
+ }
+ }
+
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileWriter.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileWriter.java
new file mode 100644
index 000000000..58760c1a9
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileWriter.java
@@ -0,0 +1,417 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc;
+
+import static org.apache.arrow.vector.BufferLayout.BufferType.*;
+
+import java.io.File;
+import java.io.IOException;
+import java.math.BigDecimal;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.BaseVariableWidthVector;
+import org.apache.arrow.vector.BigIntVector;
+import org.apache.arrow.vector.BitVectorHelper;
+import org.apache.arrow.vector.BufferLayout.BufferType;
+import org.apache.arrow.vector.DateDayVector;
+import org.apache.arrow.vector.DateMilliVector;
+import org.apache.arrow.vector.Decimal256Vector;
+import org.apache.arrow.vector.DecimalVector;
+import org.apache.arrow.vector.DurationVector;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.FixedSizeBinaryVector;
+import org.apache.arrow.vector.Float4Vector;
+import org.apache.arrow.vector.Float8Vector;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.IntervalDayVector;
+import org.apache.arrow.vector.IntervalMonthDayNanoVector;
+import org.apache.arrow.vector.IntervalYearVector;
+import org.apache.arrow.vector.SmallIntVector;
+import org.apache.arrow.vector.TimeMicroVector;
+import org.apache.arrow.vector.TimeMilliVector;
+import org.apache.arrow.vector.TimeNanoVector;
+import org.apache.arrow.vector.TimeSecVector;
+import org.apache.arrow.vector.TimeStampMicroTZVector;
+import org.apache.arrow.vector.TimeStampMicroVector;
+import org.apache.arrow.vector.TimeStampMilliTZVector;
+import org.apache.arrow.vector.TimeStampMilliVector;
+import org.apache.arrow.vector.TimeStampNanoTZVector;
+import org.apache.arrow.vector.TimeStampNanoVector;
+import org.apache.arrow.vector.TimeStampSecTZVector;
+import org.apache.arrow.vector.TimeStampSecVector;
+import org.apache.arrow.vector.TinyIntVector;
+import org.apache.arrow.vector.TypeLayout;
+import org.apache.arrow.vector.UInt1Vector;
+import org.apache.arrow.vector.UInt2Vector;
+import org.apache.arrow.vector.UInt4Vector;
+import org.apache.arrow.vector.UInt8Vector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.dictionary.Dictionary;
+import org.apache.arrow.vector.dictionary.DictionaryProvider;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.apache.arrow.vector.util.DecimalUtility;
+import org.apache.arrow.vector.util.DictionaryUtility;
+import org.apache.commons.codec.binary.Hex;
+
+import com.fasterxml.jackson.core.JsonEncoding;
+import com.fasterxml.jackson.core.JsonGenerator;
+import com.fasterxml.jackson.core.util.DefaultPrettyPrinter;
+import com.fasterxml.jackson.core.util.DefaultPrettyPrinter.NopIndenter;
+import com.fasterxml.jackson.databind.MappingJsonFactory;
+
+/**
+ * A writer that converts binary Vectors into a JSON format suitable
+ * for integration testing.
+ */
+public class JsonFileWriter implements AutoCloseable {
+
+ /**
+ * Configuration POJO for writing JSON files.
+ */
+ public static final class JSONWriteConfig {
+ private final boolean pretty;
+
+ private JSONWriteConfig(boolean pretty) {
+ this.pretty = pretty;
+ }
+
+ private JSONWriteConfig() {
+ this.pretty = false;
+ }
+
+ public JSONWriteConfig pretty(boolean pretty) {
+ return new JSONWriteConfig(pretty);
+ }
+ }
+
+ public static JSONWriteConfig config() {
+ return new JSONWriteConfig();
+ }
+
+ private final JsonGenerator generator;
+ private Schema schema;
+
+ /**
+ * Constructs a new writer that will output to <code>outputFile</code>.
+ */
+ public JsonFileWriter(File outputFile) throws IOException {
+ this(outputFile, config());
+ }
+
+ /**
+ * Constructs a new writer that will output to <code>outputFile</code> with the given options.
+ */
+ public JsonFileWriter(File outputFile, JSONWriteConfig config) throws IOException {
+ MappingJsonFactory jsonFactory = new MappingJsonFactory();
+ this.generator = jsonFactory.createGenerator(outputFile, JsonEncoding.UTF8);
+ if (config.pretty) {
+ DefaultPrettyPrinter prettyPrinter = new DefaultPrettyPrinter();
+ prettyPrinter.indentArraysWith(NopIndenter.instance);
+ this.generator.setPrettyPrinter(prettyPrinter);
+ }
+ // Allow writing of floating point NaN values not as strings
+ this.generator.configure(JsonGenerator.Feature.QUOTE_NON_NUMERIC_NUMBERS, false);
+ }
+
+ /**
+ * Writes out the "header" of the file including the schema and any dictionaries required.
+ */
+ public void start(Schema schema, DictionaryProvider provider) throws IOException {
+ List<Field> fields = new ArrayList<>(schema.getFields().size());
+ Set<Long> dictionaryIdsUsed = new HashSet<>();
+ this.schema = schema; // Store original Schema to ensure batches written match
+
+ // Convert fields with dictionaries to have dictionary type
+ for (Field field : schema.getFields()) {
+ fields.add(DictionaryUtility.toMessageFormat(field, provider, dictionaryIdsUsed));
+ }
+ Schema updatedSchema = new Schema(fields, schema.getCustomMetadata());
+
+ generator.writeStartObject();
+ generator.writeObjectField("schema", updatedSchema);
+
+ // Write all dictionaries that were used
+ if (!dictionaryIdsUsed.isEmpty()) {
+ writeDictionaryBatches(generator, dictionaryIdsUsed, provider);
+ }
+
+ // Start writing of record batches
+ generator.writeArrayFieldStart("batches");
+ }
+
+ private void writeDictionaryBatches(JsonGenerator generator, Set<Long> dictionaryIdsUsed, DictionaryProvider provider)
+ throws IOException {
+ generator.writeArrayFieldStart("dictionaries");
+ for (Long id : dictionaryIdsUsed) {
+ generator.writeStartObject();
+ generator.writeObjectField("id", id);
+
+ generator.writeFieldName("data");
+ Dictionary dictionary = provider.lookup(id);
+ FieldVector vector = dictionary.getVector();
+ List<Field> fields = Collections.singletonList(vector.getField());
+ List<FieldVector> vectors = Collections.singletonList(vector);
+ VectorSchemaRoot root = new VectorSchemaRoot(fields, vectors, vector.getValueCount());
+ writeBatch(root);
+
+ generator.writeEndObject();
+ }
+ generator.writeEndArray();
+ }
+
+ /** Writes the record batch to the JSON file. */
+ public void write(VectorSchemaRoot recordBatch) throws IOException {
+ if (!recordBatch.getSchema().equals(schema)) {
+ throw new IllegalArgumentException("record batches must have the same schema: " + schema);
+ }
+ writeBatch(recordBatch);
+ }
+
+ private void writeBatch(VectorSchemaRoot recordBatch) throws IOException {
+ generator.writeStartObject();
+ {
+ generator.writeObjectField("count", recordBatch.getRowCount());
+ generator.writeArrayFieldStart("columns");
+ for (Field field : recordBatch.getSchema().getFields()) {
+ FieldVector vector = recordBatch.getVector(field);
+ writeFromVectorIntoJson(field, vector);
+ }
+ generator.writeEndArray();
+ }
+ generator.writeEndObject();
+ }
+
+ private void writeFromVectorIntoJson(Field field, FieldVector vector) throws IOException {
+ List<BufferType> vectorTypes = TypeLayout.getTypeLayout(field.getType()).getBufferTypes();
+ List<ArrowBuf> vectorBuffers = vector.getFieldBuffers();
+ if (vectorTypes.size() != vectorBuffers.size()) {
+ throw new IllegalArgumentException("vector types and inner vector buffers are not the same size: " +
+ vectorTypes.size() + " != " + vectorBuffers.size());
+ }
+ generator.writeStartObject();
+ {
+ generator.writeObjectField("name", field.getName());
+ int valueCount = vector.getValueCount();
+ generator.writeObjectField("count", valueCount);
+
+ for (int v = 0; v < vectorTypes.size(); v++) {
+ BufferType bufferType = vectorTypes.get(v);
+ ArrowBuf vectorBuffer = vectorBuffers.get(v);
+ generator.writeArrayFieldStart(bufferType.getName());
+ final int bufferValueCount = (bufferType.equals(OFFSET) && vector.getMinorType() != MinorType.DENSEUNION) ?
+ valueCount + 1 : valueCount;
+ for (int i = 0; i < bufferValueCount; i++) {
+ if (bufferType.equals(DATA) && (vector.getMinorType() == MinorType.VARCHAR ||
+ vector.getMinorType() == MinorType.VARBINARY)) {
+ writeValueToGenerator(bufferType, vectorBuffer, vectorBuffers.get(v - 1), vector, i);
+ } else if (bufferType.equals(OFFSET) && vector.getValueCount() == 0 &&
+ (vector.getMinorType() == MinorType.VARBINARY || vector.getMinorType() == MinorType.VARCHAR)) {
+ ArrowBuf vectorBufferTmp = vector.getAllocator().buffer(4);
+ vectorBufferTmp.setInt(0, 0);
+ writeValueToGenerator(bufferType, vectorBufferTmp, null, vector, i);
+ vectorBufferTmp.close();
+ } else {
+ writeValueToGenerator(bufferType, vectorBuffer, null, vector, i);
+ }
+ }
+ generator.writeEndArray();
+ }
+ List<Field> fields = field.getChildren();
+ List<FieldVector> children = vector.getChildrenFromFields();
+ if (fields.size() != children.size()) {
+ throw new IllegalArgumentException("fields and children are not the same size: " + fields.size() + " != " +
+ children.size());
+ }
+ if (fields.size() > 0) {
+ generator.writeArrayFieldStart("children");
+ for (int i = 0; i < fields.size(); i++) {
+ Field childField = fields.get(i);
+ FieldVector childVector = children.get(i);
+ writeFromVectorIntoJson(childField, childVector);
+ }
+ generator.writeEndArray();
+ }
+ }
+ generator.writeEndObject();
+ }
+
+ private void writeValueToGenerator(
+ BufferType bufferType,
+ ArrowBuf buffer,
+ ArrowBuf offsetBuffer,
+ FieldVector vector,
+ final int index) throws IOException {
+ if (bufferType.equals(TYPE)) {
+ generator.writeNumber(buffer.getByte(index * TinyIntVector.TYPE_WIDTH));
+ } else if (bufferType.equals(OFFSET)) {
+ generator.writeNumber(buffer.getInt(index * BaseVariableWidthVector.OFFSET_WIDTH));
+ } else if (bufferType.equals(VALIDITY)) {
+ generator.writeNumber(vector.isNull(index) ? 0 : 1);
+ } else if (bufferType.equals(DATA)) {
+ switch (vector.getMinorType()) {
+ case TINYINT:
+ generator.writeNumber(TinyIntVector.get(buffer, index));
+ break;
+ case SMALLINT:
+ generator.writeNumber(SmallIntVector.get(buffer, index));
+ break;
+ case INT:
+ generator.writeNumber(IntVector.get(buffer, index));
+ break;
+ case BIGINT:
+ generator.writeString(String.valueOf(BigIntVector.get(buffer, index)));
+ break;
+ case UINT1:
+ generator.writeNumber(UInt1Vector.getNoOverflow(buffer, index));
+ break;
+ case UINT2:
+ generator.writeNumber(UInt2Vector.get(buffer, index));
+ break;
+ case UINT4:
+ generator.writeNumber(UInt4Vector.getNoOverflow(buffer, index));
+ break;
+ case UINT8:
+ generator.writeString(UInt8Vector.getNoOverflow(buffer, index).toString());
+ break;
+ case FLOAT4:
+ generator.writeNumber(Float4Vector.get(buffer, index));
+ break;
+ case FLOAT8:
+ generator.writeNumber(Float8Vector.get(buffer, index));
+ break;
+ case DATEDAY:
+ generator.writeNumber(DateDayVector.get(buffer, index));
+ break;
+ case DATEMILLI:
+ generator.writeNumber(DateMilliVector.get(buffer, index));
+ break;
+ case TIMESEC:
+ generator.writeNumber(TimeSecVector.get(buffer, index));
+ break;
+ case TIMEMILLI:
+ generator.writeNumber(TimeMilliVector.get(buffer, index));
+ break;
+ case TIMEMICRO:
+ generator.writeNumber(TimeMicroVector.get(buffer, index));
+ break;
+ case TIMENANO:
+ generator.writeNumber(TimeNanoVector.get(buffer, index));
+ break;
+ case TIMESTAMPSEC:
+ generator.writeNumber(TimeStampSecVector.get(buffer, index));
+ break;
+ case TIMESTAMPMILLI:
+ generator.writeNumber(TimeStampMilliVector.get(buffer, index));
+ break;
+ case TIMESTAMPMICRO:
+ generator.writeNumber(TimeStampMicroVector.get(buffer, index));
+ break;
+ case TIMESTAMPNANO:
+ generator.writeNumber(TimeStampNanoVector.get(buffer, index));
+ break;
+ case TIMESTAMPSECTZ:
+ generator.writeNumber(TimeStampSecTZVector.get(buffer, index));
+ break;
+ case TIMESTAMPMILLITZ:
+ generator.writeNumber(TimeStampMilliTZVector.get(buffer, index));
+ break;
+ case TIMESTAMPMICROTZ:
+ generator.writeNumber(TimeStampMicroTZVector.get(buffer, index));
+ break;
+ case TIMESTAMPNANOTZ:
+ generator.writeNumber(TimeStampNanoTZVector.get(buffer, index));
+ break;
+ case DURATION:
+ generator.writeNumber(DurationVector.get(buffer, index));
+ break;
+ case INTERVALYEAR:
+ generator.writeNumber(IntervalYearVector.getTotalMonths(buffer, index));
+ break;
+ case INTERVALDAY:
+ generator.writeStartObject();
+ generator.writeObjectField("days", IntervalDayVector.getDays(buffer, index));
+ generator.writeObjectField("milliseconds", IntervalDayVector.getMilliseconds(buffer, index));
+ generator.writeEndObject();
+ break;
+ case INTERVALMONTHDAYNANO:
+ generator.writeStartObject();
+ generator.writeObjectField("months", IntervalMonthDayNanoVector.getMonths(buffer, index));
+ generator.writeObjectField("days", IntervalMonthDayNanoVector.getDays(buffer, index));
+ generator.writeObjectField("nanoseconds", IntervalMonthDayNanoVector.getNanoseconds(buffer, index));
+ generator.writeEndObject();
+ break;
+ case BIT:
+ generator.writeNumber(BitVectorHelper.get(buffer, index));
+ break;
+ case VARBINARY: {
+ Preconditions.checkNotNull(offsetBuffer);
+ String hexString = Hex.encodeHexString(BaseVariableWidthVector.get(buffer,
+ offsetBuffer, index));
+ generator.writeObject(hexString);
+ break;
+ }
+ case FIXEDSIZEBINARY:
+ int byteWidth = ((FixedSizeBinaryVector) vector).getByteWidth();
+ String fixedSizeHexString = Hex.encodeHexString(FixedSizeBinaryVector.get(buffer, index, byteWidth));
+ generator.writeObject(fixedSizeHexString);
+ break;
+ case VARCHAR: {
+ Preconditions.checkNotNull(offsetBuffer);
+ byte[] b = (BaseVariableWidthVector.get(buffer, offsetBuffer, index));
+ generator.writeString(new String(b, "UTF-8"));
+ break;
+ }
+ case DECIMAL: {
+ int scale = ((DecimalVector) vector).getScale();
+ BigDecimal decimalValue = DecimalUtility.getBigDecimalFromArrowBuf(buffer, index, scale,
+ DecimalVector.TYPE_WIDTH);
+ // We write the unscaled value, because the scale is stored in the type metadata.
+ generator.writeString(decimalValue.unscaledValue().toString());
+ break;
+ }
+ case DECIMAL256: {
+ int scale = ((Decimal256Vector) vector).getScale();
+ BigDecimal decimalValue = DecimalUtility.getBigDecimalFromArrowBuf(buffer, index, scale,
+ Decimal256Vector.TYPE_WIDTH);
+ // We write the unscaled value, because the scale is stored in the type metadata.
+ generator.writeString(decimalValue.unscaledValue().toString());
+ break;
+ }
+
+ default:
+ throw new UnsupportedOperationException("minor type: " + vector.getMinorType());
+ }
+ }
+ }
+
+ @Override
+ public void close() throws IOException {
+ generator.writeEndArray();
+ generator.writeEndObject();
+ generator.close();
+ }
+
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/ReadChannel.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/ReadChannel.java
new file mode 100644
index 000000000..db79661a8
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/ReadChannel.java
@@ -0,0 +1,102 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.channels.ReadableByteChannel;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Adapter around {@link ReadableByteChannel} that reads into {@linkplain ArrowBuf}s.
+ */
+public class ReadChannel implements AutoCloseable {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(ReadChannel.class);
+
+ private ReadableByteChannel in;
+ private long bytesRead = 0;
+
+ public ReadChannel(ReadableByteChannel in) {
+ this.in = in;
+ }
+
+ public long bytesRead() {
+ return bytesRead;
+ }
+
+ /**
+ * Reads bytes into buffer until it is full (buffer.remaining() == 0). Returns the
+ * number of bytes read which can be less than full if there are no more.
+ *
+ * @param buffer The buffer to read to
+ * @return the number of byte read
+ * @throws IOException if nit enough bytes left to read
+ */
+ public int readFully(ByteBuffer buffer) throws IOException {
+ if (LOGGER.isDebugEnabled()) {
+ LOGGER.debug("Reading buffer with size: {}", buffer.remaining());
+ }
+ int totalRead = 0;
+ while (buffer.remaining() != 0) {
+ int read = in.read(buffer);
+ if (read == -1) {
+ this.bytesRead += totalRead;
+ return totalRead;
+ }
+ totalRead += read;
+ if (read == 0) {
+ break;
+ }
+ }
+ this.bytesRead += totalRead;
+ return totalRead;
+ }
+
+ /**
+ * Reads up to len into buffer. Returns bytes read.
+ *
+ * @param buffer the buffer to read to
+ * @param length the amount of bytes to read
+ * @return the number of bytes read
+ * @throws IOException if nit enough bytes left to read
+ */
+ public long readFully(ArrowBuf buffer, long length) throws IOException {
+ boolean fullRead = true;
+ long bytesLeft = length;
+ while (fullRead && bytesLeft > 0) {
+ int bytesToRead = (int) Math.min(bytesLeft, Integer.MAX_VALUE);
+ int n = readFully(buffer.nioBuffer(buffer.writerIndex(), bytesToRead));
+ buffer.writerIndex(buffer.writerIndex() + n);
+ fullRead = n == bytesToRead;
+ bytesLeft -= n;
+ }
+ return length - bytesLeft;
+ }
+
+ @Override
+ public void close() throws IOException {
+ if (this.in != null) {
+ in.close();
+ in = null;
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/SeekableReadChannel.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/SeekableReadChannel.java
new file mode 100644
index 000000000..4b6e0ed76
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/SeekableReadChannel.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc;
+
+import java.io.IOException;
+import java.nio.channels.SeekableByteChannel;
+
+/**
+ * An {@link ReadChannel} that supports seeking to a
+ * random position.
+ */
+public class SeekableReadChannel extends ReadChannel {
+
+ private final SeekableByteChannel in;
+
+ public SeekableReadChannel(SeekableByteChannel in) {
+ super(in);
+ this.in = in;
+ }
+
+ public void setPosition(long position) throws IOException {
+ in.position(position);
+ }
+
+ public long size() throws IOException {
+ return in.size();
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/WriteChannel.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/WriteChannel.java
new file mode 100644
index 000000000..9ad71f6fe
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/WriteChannel.java
@@ -0,0 +1,162 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.channels.WritableByteChannel;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.vector.ipc.message.FBSerializable;
+import org.apache.arrow.vector.ipc.message.MessageSerializer;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.flatbuffers.FlatBufferBuilder;
+
+/**
+ * Wrapper around a WritableByteChannel that maintains the position as well adding
+ * some common serialization utilities.
+ *
+ * <p>All write methods in this class follow full write semantics, i.e., write calls
+ * only return after requested data has been fully written. Note this is different
+ * from java WritableByteChannel interface where partial write is allowed
+ * </p>
+ * <p>
+ * Please note that objects of this class are not thread-safe.
+ * </p>
+ */
+public class WriteChannel implements AutoCloseable {
+ private static final Logger LOGGER = LoggerFactory.getLogger(WriteChannel.class);
+
+ private static final byte[] ZERO_BYTES = new byte[8];
+
+ private final byte[] intBuf = new byte[4];
+
+ private long currentPosition = 0;
+
+ private final WritableByteChannel out;
+
+ public WriteChannel(WritableByteChannel out) {
+ this.out = out;
+ }
+
+ @Override
+ public void close() throws IOException {
+ out.close();
+ }
+
+ public long getCurrentPosition() {
+ return currentPosition;
+ }
+
+ public long write(byte[] buffer) throws IOException {
+ return write(ByteBuffer.wrap(buffer));
+ }
+
+ long write(byte[] buffer, int offset, int length) throws IOException {
+ return write(ByteBuffer.wrap(buffer, offset, length));
+ }
+
+ /**
+ * Writes <zeroCount>zeroCount</zeroCount> zeros the underlying channel.
+ */
+ public long writeZeros(long zeroCount) throws IOException {
+ long bytesWritten = 0;
+ long wholeWordsEnd = zeroCount - 8;
+ while (bytesWritten <= wholeWordsEnd) {
+ bytesWritten += write(ZERO_BYTES);
+ }
+
+ if (bytesWritten < zeroCount) {
+ bytesWritten += write(ZERO_BYTES, 0, (int) (zeroCount - bytesWritten));
+ }
+ return bytesWritten;
+ }
+
+ /**
+ * Writes enough bytes to align the channel to an 8-byte boundary.
+ */
+ public long align() throws IOException {
+ int trailingByteSize = (int) (currentPosition % 8);
+ if (trailingByteSize != 0) { // align on 8 byte boundaries
+ return writeZeros(8 - trailingByteSize);
+ }
+ return 0;
+ }
+
+ /**
+ * Writes all data from <code>buffer</code> to the underlying channel.
+ */
+ public long write(ByteBuffer buffer) throws IOException {
+ long length = buffer.remaining();
+ if (LOGGER.isDebugEnabled()) {
+ LOGGER.debug("Writing buffer with size: {}", length);
+ }
+ while (buffer.hasRemaining()) {
+ out.write(buffer);
+ }
+ currentPosition += length;
+ return length;
+ }
+
+ /**
+ * Writes <code>v</code> in little-endian format to the underlying channel.
+ */
+ public long writeIntLittleEndian(int v) throws IOException {
+ MessageSerializer.intToBytes(v, intBuf);
+ return write(intBuf);
+ }
+
+ /**
+ * Writes the buffer to the underlying channel.
+ */
+ public void write(ArrowBuf buffer) throws IOException {
+ long bytesWritten = 0;
+ while (bytesWritten < buffer.readableBytes()) {
+ int bytesToWrite = (int) Math.min(Integer.MAX_VALUE, buffer.readableBytes() - bytesWritten);
+ ByteBuffer nioBuffer = buffer.nioBuffer(buffer.readerIndex() + bytesWritten,
+ bytesToWrite);
+ write(nioBuffer);
+ bytesWritten += bytesToWrite;
+ }
+
+ }
+
+ /**
+ * Writes the serialized flatbuffer to the underlying channel. If withSizePrefix
+ * is true then the length in bytes of the buffer will first be written in little endian format.
+ */
+ public long write(FBSerializable writer, boolean withSizePrefix) throws IOException {
+ ByteBuffer buffer = serialize(writer);
+ if (withSizePrefix) {
+ writeIntLittleEndian(buffer.remaining());
+ }
+ return write(buffer);
+ }
+
+ /**
+ * Serializes writer to a ByteBuffer.
+ */
+ public static ByteBuffer serialize(FBSerializable writer) {
+ FlatBufferBuilder builder = new FlatBufferBuilder();
+ int root = writer.writeTo(builder);
+ builder.finish(root);
+ return builder.dataBuffer();
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowBlock.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowBlock.java
new file mode 100644
index 000000000..a235102ce
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowBlock.java
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc.message;
+
+import org.apache.arrow.flatbuf.Block;
+
+import com.google.flatbuffers.FlatBufferBuilder;
+
+/** Metadata for an arrow message in a channel. */
+public class ArrowBlock implements FBSerializable {
+
+ private final long offset;
+ private final int metadataLength;
+ private final long bodyLength;
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param offset The offset into the channel file where the block was written.
+ * @param metadataLength The length of the flatbuffer metadata in the block.
+ * @param bodyLength The length of data in the block.
+ */
+ public ArrowBlock(long offset, int metadataLength, long bodyLength) {
+ super();
+ this.offset = offset;
+ this.metadataLength = metadataLength;
+ this.bodyLength = bodyLength;
+ }
+
+ public long getOffset() {
+ return offset;
+ }
+
+ public int getMetadataLength() {
+ return metadataLength;
+ }
+
+ public long getBodyLength() {
+ return bodyLength;
+ }
+
+ @Override
+ public int writeTo(FlatBufferBuilder builder) {
+ return Block.createBlock(builder, offset, metadataLength, bodyLength);
+ }
+
+ @Override
+ public int hashCode() {
+ final int prime = 31;
+ int result = 1;
+ result = prime * result + (int) (bodyLength ^ (bodyLength >>> 32));
+ result = prime * result + metadataLength;
+ result = prime * result + (int) (offset ^ (offset >>> 32));
+ return result;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj) {
+ return true;
+ }
+ if (obj == null) {
+ return false;
+ }
+ if (getClass() != obj.getClass()) {
+ return false;
+ }
+ ArrowBlock other = (ArrowBlock) obj;
+ if (bodyLength != other.bodyLength) {
+ return false;
+ }
+ if (metadataLength != other.metadataLength) {
+ return false;
+ }
+ if (offset != other.offset) {
+ return false;
+ }
+ return true;
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowBodyCompression.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowBodyCompression.java
new file mode 100644
index 000000000..5370ddfa0
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowBodyCompression.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc.message;
+
+import org.apache.arrow.flatbuf.BodyCompression;
+
+import com.google.flatbuffers.FlatBufferBuilder;
+
+/**
+ * Compression information about data written to a channel.
+ */
+public class ArrowBodyCompression implements FBSerializable {
+
+ private final byte codec;
+
+ private final byte method;
+
+ public ArrowBodyCompression(byte codec, byte method) {
+ this.codec = codec;
+ this.method = method;
+ }
+
+ @Override
+ public int writeTo(FlatBufferBuilder builder) {
+ return BodyCompression.createBodyCompression(builder, codec, method);
+ }
+
+ public byte getCodec() {
+ return codec;
+ }
+
+ public byte getMethod() {
+ return method;
+ }
+
+ @Override
+ public String toString() {
+ return "ArrowBodyCompression [codec=" + codec + ", method=" + method + "]";
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowBuffer.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowBuffer.java
new file mode 100644
index 000000000..d3aec6fb7
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowBuffer.java
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc.message;
+
+import org.apache.arrow.flatbuf.Buffer;
+
+import com.google.flatbuffers.FlatBufferBuilder;
+
+/** Metadata for a buffer written to a channel. */
+public class ArrowBuffer implements FBSerializable {
+
+ private long offset;
+ private long size;
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param offset The offset to the start of the buffer in the channel.
+ * @param size The size of the buffer.
+ */
+ public ArrowBuffer(long offset, long size) {
+ super();
+ this.offset = offset;
+ this.size = size;
+ }
+
+ public long getOffset() {
+ return offset;
+ }
+
+ public long getSize() {
+ return size;
+ }
+
+ @Override
+ public int hashCode() {
+ final int prime = 31;
+ int result = 1;
+ result = prime * result + (int) (offset ^ (offset >>> 32));
+ result = prime * result + (int) (size ^ (size >>> 32));
+ return result;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj) {
+ return true;
+ }
+ if (obj == null) {
+ return false;
+ }
+ if (getClass() != obj.getClass()) {
+ return false;
+ }
+ ArrowBuffer other = (ArrowBuffer) obj;
+ if (offset != other.offset) {
+ return false;
+ }
+ if (size != other.size) {
+ return false;
+ }
+ return true;
+ }
+
+ @Override
+ public int writeTo(FlatBufferBuilder builder) {
+ return Buffer.createBuffer(builder, offset, size);
+ }
+
+ @Override
+ public String toString() {
+ return "ArrowBuffer [offset=" + offset + ", size=" + size + "]";
+ }
+
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowDictionaryBatch.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowDictionaryBatch.java
new file mode 100644
index 000000000..cac2a1cb8
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowDictionaryBatch.java
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc.message;
+
+import org.apache.arrow.flatbuf.DictionaryBatch;
+import org.apache.arrow.flatbuf.MessageHeader;
+
+import com.google.flatbuffers.FlatBufferBuilder;
+
+/**
+ * POJO wrapper around a Dictionary Batch IPC messages
+ * (https://arrow.apache.org/docs/format/IPC.html#dictionary-batches)
+ */
+public class ArrowDictionaryBatch implements ArrowMessage {
+
+ private final long dictionaryId;
+ private final ArrowRecordBatch dictionary;
+ private final boolean isDelta;
+
+ @Deprecated
+ public ArrowDictionaryBatch(long dictionaryId, ArrowRecordBatch dictionary) {
+ this (dictionaryId, dictionary, false);
+ }
+
+ /**
+ * Constructs new instance.
+ */
+ public ArrowDictionaryBatch(long dictionaryId, ArrowRecordBatch dictionary, boolean isDelta) {
+ this.dictionaryId = dictionaryId;
+ this.dictionary = dictionary;
+ this.isDelta = isDelta;
+ }
+
+ public boolean isDelta() {
+ return isDelta;
+ }
+
+ public byte getMessageType() {
+ return MessageHeader.DictionaryBatch;
+ }
+
+ public long getDictionaryId() {
+ return dictionaryId;
+ }
+
+ public ArrowRecordBatch getDictionary() {
+ return dictionary;
+ }
+
+ @Override
+ public int writeTo(FlatBufferBuilder builder) {
+ int dataOffset = dictionary.writeTo(builder);
+ DictionaryBatch.startDictionaryBatch(builder);
+ DictionaryBatch.addId(builder, dictionaryId);
+ DictionaryBatch.addData(builder, dataOffset);
+ DictionaryBatch.addIsDelta(builder, isDelta);
+ return DictionaryBatch.endDictionaryBatch(builder);
+ }
+
+ @Override
+ public long computeBodyLength() {
+ return dictionary.computeBodyLength();
+ }
+
+ @Override
+ public <T> T accepts(ArrowMessageVisitor<T> visitor) {
+ return visitor.visit(this);
+ }
+
+ @Override
+ public String toString() {
+ return "ArrowDictionaryBatch [dictionaryId=" + dictionaryId + ", dictionary=" + dictionary + "]";
+ }
+
+ @Override
+ public void close() {
+ dictionary.close();
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowFieldNode.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowFieldNode.java
new file mode 100644
index 000000000..9ce5e2e4d
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowFieldNode.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc.message;
+
+import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt;
+
+import org.apache.arrow.flatbuf.FieldNode;
+
+import com.google.flatbuffers.FlatBufferBuilder;
+
+/**
+ * Metadata about Vectors/Arrays that is written to a channel.
+ */
+public class ArrowFieldNode implements FBSerializable {
+
+ private final int length;
+ private final int nullCount;
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param length The number of values written.
+ * @param nullCount The number of null values.
+ */
+ public ArrowFieldNode(long length, long nullCount) {
+ super();
+ this.length = checkedCastToInt(length);
+ this.nullCount = checkedCastToInt(nullCount);
+ }
+
+ @Override
+ public int writeTo(FlatBufferBuilder builder) {
+ return FieldNode.createFieldNode(builder, length, nullCount);
+ }
+
+ public int getNullCount() {
+ return nullCount;
+ }
+
+ public int getLength() {
+ return length;
+ }
+
+ @Override
+ public String toString() {
+ return "ArrowFieldNode [length=" + length + ", nullCount=" + nullCount + "]";
+ }
+
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowFooter.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowFooter.java
new file mode 100644
index 000000000..567fabc1d
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowFooter.java
@@ -0,0 +1,226 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc.message;
+
+import static org.apache.arrow.vector.ipc.message.FBSerializables.writeAllStructsToVector;
+import static org.apache.arrow.vector.ipc.message.FBSerializables.writeKeyValues;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.arrow.flatbuf.Block;
+import org.apache.arrow.flatbuf.Footer;
+import org.apache.arrow.flatbuf.KeyValue;
+import org.apache.arrow.vector.types.MetadataVersion;
+import org.apache.arrow.vector.types.pojo.Schema;
+
+import com.google.flatbuffers.FlatBufferBuilder;
+
+/** Footer metadata for the arrow file format. */
+public class ArrowFooter implements FBSerializable {
+
+ private final Schema schema;
+
+ private final List<ArrowBlock> dictionaries;
+
+ private final List<ArrowBlock> recordBatches;
+
+ private final Map<String, String> metaData;
+
+ private final MetadataVersion metadataVersion;
+
+ public ArrowFooter(Schema schema, List<ArrowBlock> dictionaries, List<ArrowBlock> recordBatches) {
+ this(schema, dictionaries, recordBatches, null);
+ }
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param schema The schema for record batches in the file.
+ * @param dictionaries The dictionaries relevant to the file.
+ * @param recordBatches The recordBatches written to the file.
+ * @param metaData user-defined k-v meta data.
+ */
+ public ArrowFooter(
+ Schema schema,
+ List<ArrowBlock> dictionaries,
+ List<ArrowBlock> recordBatches,
+ Map<String, String> metaData) {
+ this(schema, dictionaries, recordBatches, metaData, MetadataVersion.DEFAULT);
+ }
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param schema The schema for record batches in the file.
+ * @param dictionaries The dictionaries relevant to the file.
+ * @param recordBatches The recordBatches written to the file.
+ * @param metaData user-defined k-v meta data.
+ * @param metadataVersion The Arrow metadata version.
+ */
+ public ArrowFooter(
+ Schema schema,
+ List<ArrowBlock> dictionaries,
+ List<ArrowBlock> recordBatches,
+ Map<String, String> metaData,
+ MetadataVersion metadataVersion) {
+ this.schema = schema;
+ this.dictionaries = dictionaries;
+ this.recordBatches = recordBatches;
+ this.metaData = metaData;
+ this.metadataVersion = metadataVersion;
+ }
+
+ /**
+ * Constructs from the corresponding Flatbuffer message.
+ */
+ public ArrowFooter(Footer footer) {
+ this(
+ Schema.convertSchema(footer.schema()),
+ dictionaries(footer),
+ recordBatches(footer),
+ metaData(footer),
+ MetadataVersion.fromFlatbufID(footer.version())
+ );
+ }
+
+ private static List<ArrowBlock> recordBatches(Footer footer) {
+ List<ArrowBlock> recordBatches = new ArrayList<>();
+ Block tempBlock = new Block();
+ int recordBatchesLength = footer.recordBatchesLength();
+ for (int i = 0; i < recordBatchesLength; i++) {
+ Block block = footer.recordBatches(tempBlock, i);
+ recordBatches.add(new ArrowBlock(block.offset(), block.metaDataLength(), block.bodyLength()));
+ }
+ return recordBatches;
+ }
+
+ private static List<ArrowBlock> dictionaries(Footer footer) {
+ List<ArrowBlock> dictionaries = new ArrayList<>();
+ Block tempBlock = new Block();
+
+ int dictionariesLength = footer.dictionariesLength();
+ for (int i = 0; i < dictionariesLength; i++) {
+ Block block = footer.dictionaries(tempBlock, i);
+ dictionaries.add(new ArrowBlock(block.offset(), block.metaDataLength(), block.bodyLength()));
+ }
+ return dictionaries;
+ }
+
+ private static Map<String, String> metaData(Footer footer) {
+ Map<String, String> metaData = new HashMap<>();
+
+ int metaDataLength = footer.customMetadataLength();
+ for (int i = 0; i < metaDataLength; i++) {
+ KeyValue kv = footer.customMetadata(i);
+ metaData.put(kv.key(), kv.value());
+ }
+
+ return metaData;
+ }
+
+ public Schema getSchema() {
+ return schema;
+ }
+
+ public List<ArrowBlock> getDictionaries() {
+ return dictionaries;
+ }
+
+ public List<ArrowBlock> getRecordBatches() {
+ return recordBatches;
+ }
+
+ public Map<String, String> getMetaData() {
+ return metaData;
+ }
+
+ public MetadataVersion getMetadataVersion() {
+ return metadataVersion;
+ }
+
+ @Override
+ public int writeTo(FlatBufferBuilder builder) {
+ int schemaIndex = schema.getSchema(builder);
+ Footer.startDictionariesVector(builder, dictionaries.size());
+ int dicsOffset = writeAllStructsToVector(builder, dictionaries);
+ Footer.startRecordBatchesVector(builder, recordBatches.size());
+ int rbsOffset = writeAllStructsToVector(builder, recordBatches);
+
+ int metaDataOffset = 0;
+ if (metaData != null) {
+ metaDataOffset = writeKeyValues(builder, metaData);
+ }
+
+ Footer.startFooter(builder);
+ Footer.addSchema(builder, schemaIndex);
+ Footer.addDictionaries(builder, dicsOffset);
+ Footer.addRecordBatches(builder, rbsOffset);
+ Footer.addCustomMetadata(builder, metaDataOffset);
+ Footer.addVersion(builder, metadataVersion.toFlatbufID());
+ return Footer.endFooter(builder);
+ }
+
+ @Override
+ public int hashCode() {
+ final int prime = 31;
+ int result = 1;
+ result = prime * result + ((dictionaries == null) ? 0 : dictionaries.hashCode());
+ result = prime * result + ((recordBatches == null) ? 0 : recordBatches.hashCode());
+ result = prime * result + ((schema == null) ? 0 : schema.hashCode());
+ return result;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj) {
+ return true;
+ }
+ if (obj == null) {
+ return false;
+ }
+ if (getClass() != obj.getClass()) {
+ return false;
+ }
+ ArrowFooter other = (ArrowFooter) obj;
+ if (dictionaries == null) {
+ if (other.dictionaries != null) {
+ return false;
+ }
+ } else if (!dictionaries.equals(other.dictionaries)) {
+ return false;
+ }
+ if (recordBatches == null) {
+ if (other.recordBatches != null) {
+ return false;
+ }
+ } else if (!recordBatches.equals(other.recordBatches)) {
+ return false;
+ }
+ if (schema == null) {
+ if (other.schema != null) {
+ return false;
+ }
+ } else if (!schema.equals(other.schema)) {
+ return false;
+ }
+ return true;
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowMessage.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowMessage.java
new file mode 100644
index 000000000..4cbc87b4e
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowMessage.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc.message;
+
+/**
+ * Interface for Arrow IPC messages (https://arrow.apache.org/docs/format/IPC.html).
+ */
+public interface ArrowMessage extends FBSerializable, AutoCloseable {
+
+ long computeBodyLength();
+
+ <T> T accepts(ArrowMessageVisitor<T> visitor);
+
+ /** Returns the flatbuffer enum value indicating the type of the message. */
+ byte getMessageType();
+
+ /**
+ * Visitor interface for implementations of {@link ArrowMessage}.
+ *
+ * @param <T> The type of value to return after visiting.
+ */
+ interface ArrowMessageVisitor<T> {
+ T visit(ArrowDictionaryBatch message);
+
+ T visit(ArrowRecordBatch message);
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowRecordBatch.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowRecordBatch.java
new file mode 100644
index 000000000..dbf2774fb
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowRecordBatch.java
@@ -0,0 +1,259 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc.message;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.stream.Collectors;
+
+import org.apache.arrow.flatbuf.RecordBatch;
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.compression.NoCompressionCodec;
+import org.apache.arrow.vector.util.DataSizeRoundingUtil;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.flatbuffers.FlatBufferBuilder;
+
+/**
+ * POJO representation of a RecordBatch IPC message (https://arrow.apache.org/docs/format/IPC.html).
+ */
+public class ArrowRecordBatch implements ArrowMessage {
+
+ private static final Logger LOGGER = LoggerFactory.getLogger(ArrowRecordBatch.class);
+
+ /**
+ * Number of records.
+ */
+ private final int length;
+
+ /**
+ * Nodes correspond to the pre-ordered flattened logical schema.
+ */
+ private final List<ArrowFieldNode> nodes;
+
+ private final List<ArrowBuf> buffers;
+
+ private final ArrowBodyCompression bodyCompression;
+
+ private final List<ArrowBuffer> buffersLayout;
+
+ private boolean closed = false;
+
+ public ArrowRecordBatch(
+ int length, List<ArrowFieldNode> nodes, List<ArrowBuf> buffers) {
+ this(length, nodes, buffers, NoCompressionCodec.DEFAULT_BODY_COMPRESSION, true);
+ }
+
+ public ArrowRecordBatch(
+ int length, List<ArrowFieldNode> nodes, List<ArrowBuf> buffers,
+ ArrowBodyCompression bodyCompression) {
+ this(length, nodes, buffers, bodyCompression, true);
+ }
+
+ /**
+ * Construct a record batch from nodes.
+ *
+ * @param length how many rows in this batch
+ * @param nodes field level info
+ * @param buffers will be retained until this recordBatch is closed
+ * @param bodyCompression compression info.
+ */
+ public ArrowRecordBatch(
+ int length, List<ArrowFieldNode> nodes, List<ArrowBuf> buffers,
+ ArrowBodyCompression bodyCompression, boolean alignBuffers) {
+ super();
+ this.length = length;
+ this.nodes = nodes;
+ this.buffers = buffers;
+ Preconditions.checkArgument(bodyCompression != null, "body compression cannot be null");
+ this.bodyCompression = bodyCompression;
+ List<ArrowBuffer> arrowBuffers = new ArrayList<>(buffers.size());
+ long offset = 0;
+ for (ArrowBuf arrowBuf : buffers) {
+ arrowBuf.getReferenceManager().retain();
+ long size = arrowBuf.readableBytes();
+ arrowBuffers.add(new ArrowBuffer(offset, size));
+ if (LOGGER.isDebugEnabled()) {
+ LOGGER.debug("Buffer in RecordBatch at {}, length: {}", offset, size);
+ }
+ offset += size;
+ if (alignBuffers) { // align on 8 byte boundaries
+ offset = DataSizeRoundingUtil.roundUpTo8Multiple(offset);
+ }
+ }
+ this.buffersLayout = Collections.unmodifiableList(arrowBuffers);
+ }
+
+ // clone constructor
+ // this constructor is different from the public ones in that the reference manager's
+ // <code>retain</code> method is not called, so the first <code>dummy</code> parameter is used
+ // to distinguish this from the public constructor.
+ private ArrowRecordBatch(
+ boolean dummy, int length, List<ArrowFieldNode> nodes,
+ List<ArrowBuf> buffers, ArrowBodyCompression bodyCompression) {
+ this.length = length;
+ this.nodes = nodes;
+ this.buffers = buffers;
+ Preconditions.checkArgument(bodyCompression != null, "body compression cannot be null");
+ this.bodyCompression = bodyCompression;
+ this.closed = false;
+ List<ArrowBuffer> arrowBuffers = new ArrayList<>();
+ long offset = 0;
+ for (ArrowBuf arrowBuf : buffers) {
+ long size = arrowBuf.readableBytes();
+ arrowBuffers.add(new ArrowBuffer(offset, size));
+ offset += size;
+ }
+ this.buffersLayout = Collections.unmodifiableList(arrowBuffers);
+ }
+
+ public byte getMessageType() {
+ return org.apache.arrow.flatbuf.MessageHeader.RecordBatch;
+ }
+
+ public int getLength() {
+ return length;
+ }
+
+ public ArrowBodyCompression getBodyCompression() {
+ return bodyCompression;
+ }
+
+ /**
+ * Get the nodes in this record batch.
+ *
+ * @return the FieldNodes corresponding to the schema
+ */
+ public List<ArrowFieldNode> getNodes() {
+ return nodes;
+ }
+
+ /**
+ * Get the record batch buffers.
+ *
+ * @return the buffers containing the data
+ */
+ public List<ArrowBuf> getBuffers() {
+ if (closed) {
+ throw new IllegalStateException("already closed");
+ }
+ return buffers;
+ }
+
+ /**
+ * Create a new ArrowRecordBatch which has the same information as this batch but whose buffers
+ * are owned by that Allocator.
+ *
+ * <p>This will also close this record batch and make it no longer useful.
+ *
+ * @return A cloned ArrowRecordBatch
+ */
+ public ArrowRecordBatch cloneWithTransfer(final BufferAllocator allocator) {
+ final List<ArrowBuf> newBufs = buffers.stream()
+ .map(buf ->
+ (buf.getReferenceManager().transferOwnership(buf, allocator)
+ .getTransferredBuffer())
+ .writerIndex(buf.writerIndex()))
+ .collect(Collectors.toList());
+ close();
+ return new ArrowRecordBatch(false, length, nodes, newBufs, bodyCompression);
+ }
+
+ /**
+ * Get the serialized layout.
+ *
+ * @return the serialized layout if we send the buffers on the wire
+ */
+ public List<ArrowBuffer> getBuffersLayout() {
+ return buffersLayout;
+ }
+
+ @Override
+ public int writeTo(FlatBufferBuilder builder) {
+ RecordBatch.startNodesVector(builder, nodes.size());
+ int nodesOffset = FBSerializables.writeAllStructsToVector(builder, nodes);
+ RecordBatch.startBuffersVector(builder, buffers.size());
+ int buffersOffset = FBSerializables.writeAllStructsToVector(builder, buffersLayout);
+ int compressOffset = 0;
+ if (bodyCompression.getCodec() != NoCompressionCodec.COMPRESSION_TYPE) {
+ compressOffset = bodyCompression.writeTo(builder);
+ }
+ RecordBatch.startRecordBatch(builder);
+ RecordBatch.addLength(builder, length);
+ RecordBatch.addNodes(builder, nodesOffset);
+ RecordBatch.addBuffers(builder, buffersOffset);
+ if (bodyCompression.getCodec() != NoCompressionCodec.COMPRESSION_TYPE) {
+ RecordBatch.addCompression(builder, compressOffset);
+ }
+ return RecordBatch.endRecordBatch(builder);
+ }
+
+ @Override
+ public <T> T accepts(ArrowMessageVisitor<T> visitor) {
+ return visitor.visit(this);
+ }
+
+ /**
+ * Releases the buffers.
+ */
+ @Override
+ public void close() {
+ if (!closed) {
+ closed = true;
+ for (ArrowBuf arrowBuf : buffers) {
+ arrowBuf.getReferenceManager().release();
+ }
+ }
+ }
+
+ @Override
+ public String toString() {
+ return "ArrowRecordBatch [length=" + length + ", nodes=" + nodes + ", #buffers=" + buffers.size() +
+ ", buffersLayout=" + buffersLayout + ", closed=" + closed + "]";
+ }
+
+ /**
+ * Computes the size of the serialized body for this recordBatch.
+ */
+ @Override
+ public long computeBodyLength() {
+ long size = 0;
+
+ List<ArrowBuf> buffers = getBuffers();
+ List<ArrowBuffer> buffersLayout = getBuffersLayout();
+ if (buffers.size() != buffersLayout.size()) {
+ throw new IllegalStateException("the layout does not match: " +
+ buffers.size() + " != " + buffersLayout.size());
+ }
+
+ for (int i = 0; i < buffers.size(); i++) {
+ ArrowBuf buffer = buffers.get(i);
+ ArrowBuffer layout = buffersLayout.get(i);
+ size = layout.getOffset() + buffer.readableBytes();
+
+ // round up size to the next multiple of 8
+ size = DataSizeRoundingUtil.roundUpTo8Multiple(size);
+ }
+ return size;
+ }
+
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/FBSerializable.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/FBSerializable.java
new file mode 100644
index 000000000..6b406b594
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/FBSerializable.java
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc.message;
+
+import com.google.flatbuffers.FlatBufferBuilder;
+
+/**
+ * Interface for serializing to FlatBuffers.
+ */
+public interface FBSerializable {
+ /**
+ * Returns the number of bytes taken to serialize the data in builder after writing to it.
+ */
+ int writeTo(FlatBufferBuilder builder);
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/FBSerializables.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/FBSerializables.java
new file mode 100644
index 000000000..26736ed91
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/FBSerializables.java
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc.message;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.arrow.flatbuf.KeyValue;
+
+import com.google.flatbuffers.FlatBufferBuilder;
+
+/**
+ * Utility methods for {@linkplain org.apache.arrow.vector.ipc.message.FBSerializable}s.
+ */
+public class FBSerializables {
+ private FBSerializables() {}
+
+ /**
+ * Writes every element of all to builder and calls {@link FlatBufferBuilder#endVector()} afterwards.
+ * Returns the number of result of calling endVector.
+ */
+ public static int writeAllStructsToVector(FlatBufferBuilder builder, List<? extends FBSerializable> all) {
+ // struct vectors have to be created in reverse order
+ List<? extends FBSerializable> reversed = new ArrayList<>(all);
+ Collections.reverse(reversed);
+ for (FBSerializable element : reversed) {
+ element.writeTo(builder);
+ }
+ return builder.endVector();
+ }
+
+ /**
+ * Writes map data with string type.
+ */
+ public static int writeKeyValues(FlatBufferBuilder builder, Map<String, String> metaData) {
+ int[] metadataOffsets = new int[metaData.size()];
+ Iterator<Map.Entry<String, String>> metadataIterator = metaData.entrySet().iterator();
+ for (int i = 0; i < metadataOffsets.length; i++) {
+ Map.Entry<String, String> kv = metadataIterator.next();
+ int keyOffset = builder.createString(kv.getKey());
+ int valueOffset = builder.createString(kv.getValue());
+ KeyValue.startKeyValue(builder);
+ KeyValue.addKey(builder, keyOffset);
+ KeyValue.addValue(builder, valueOffset);
+ metadataOffsets[i] = KeyValue.endKeyValue(builder);
+ }
+ return org.apache.arrow.flatbuf.Field.createCustomMetadataVector(builder, metadataOffsets);
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/IpcOption.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/IpcOption.java
new file mode 100644
index 000000000..51207584f
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/IpcOption.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc.message;
+
+import org.apache.arrow.vector.types.MetadataVersion;
+
+/**
+ * IPC options, now only use for write.
+ */
+public class IpcOption {
+
+ // Write the pre-0.15.0 encapsulated IPC message format
+ // consisting of a 4-byte prefix instead of 8 byte
+ public final boolean write_legacy_ipc_format;
+
+ // The metadata version. Defaults to V5.
+ public final MetadataVersion metadataVersion;
+
+ public IpcOption() {
+ this(false, MetadataVersion.DEFAULT);
+ }
+
+ public IpcOption(boolean writeLegacyIpcFormat, MetadataVersion metadataVersion) {
+ this.write_legacy_ipc_format = writeLegacyIpcFormat;
+ this.metadataVersion = metadataVersion;
+ }
+
+ public static final IpcOption DEFAULT = new IpcOption();
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/MessageChannelReader.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/MessageChannelReader.java
new file mode 100644
index 000000000..1c7968d7f
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/MessageChannelReader.java
@@ -0,0 +1,91 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc.message;
+
+import java.io.IOException;
+
+import org.apache.arrow.flatbuf.Message;
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.ipc.ReadChannel;
+
+/**
+ * Reads a sequence of messages using a ReadChannel.
+ */
+public class MessageChannelReader implements AutoCloseable {
+ protected ReadChannel in;
+ protected BufferAllocator allocator;
+
+ /**
+ * Construct a MessageReader to read streaming messages from an existing ReadChannel.
+ *
+ * @param in Channel to read messages from
+ * @param allocator BufferAllocator used to read Message body into an ArrowBuf.
+ */
+ public MessageChannelReader(ReadChannel in, BufferAllocator allocator) {
+ this.in = in;
+ this.allocator = allocator;
+ }
+
+ /**
+ * Read a message from the ReadChannel and return a MessageResult containing the Message
+ * metadata and optional message body data. Once the end-of-stream has been reached, a null
+ * value will be returned. If the message has no body, then MessageResult.getBodyBuffer()
+ * returns null.
+ *
+ * @return MessageResult or null if reached end-of-stream
+ * @throws IOException on error
+ */
+ public MessageResult readNext() throws IOException {
+
+ // Read the flatbuf message and check for end-of-stream
+ MessageMetadataResult result = MessageSerializer.readMessage(in);
+ if (result == null) {
+ return null;
+ }
+ Message message = result.getMessage();
+ ArrowBuf bodyBuffer = null;
+
+ // Read message body data if defined in message
+ if (result.messageHasBody()) {
+ long bodyLength = result.getMessageBodyLength();
+ bodyBuffer = MessageSerializer.readMessageBody(in, bodyLength, allocator);
+ }
+
+ return new MessageResult(message, bodyBuffer);
+ }
+
+ /**
+ * Get the number of bytes read from the ReadChannel.
+ *
+ * @return number of bytes
+ */
+ public long bytesRead() {
+ return in.bytesRead();
+ }
+
+ /**
+ * Close the ReadChannel.
+ *
+ * @throws IOException on error
+ */
+ @Override
+ public void close() throws IOException {
+ in.close();
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/MessageMetadataResult.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/MessageMetadataResult.java
new file mode 100644
index 000000000..e4728822d
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/MessageMetadataResult.java
@@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc.message;
+
+import java.nio.ByteBuffer;
+
+import org.apache.arrow.flatbuf.Message;
+
+/**
+ * Class to hold resulting Message metadata and buffer containing the serialized Flatbuffer
+ * message when reading messages from a ReadChannel. This handles Message metadata only and
+ * does not include the message body data, which should be subsequently read into an ArrowBuf.
+ */
+public class MessageMetadataResult {
+
+ /**
+ * Construct a container to hold a deserialized Message metadata, and buffer
+ * with the serialized Message as read from a ReadChannel.
+ *
+ * @param messageLength the length of the serialized Flatbuffer message in bytes
+ * @param messageBuffer contains the serialized Flatbuffer Message metadata
+ * @param message the deserialized Flatbuffer Message metadata description
+ */
+ MessageMetadataResult(int messageLength, ByteBuffer messageBuffer, Message message) {
+ this.messageLength = messageLength;
+ this.messageBuffer = messageBuffer;
+ this.message = message;
+ }
+
+ /**
+ * Creates a new {@link MessageMetadataResult} by parsing it from the beginning of the buffer.
+ *
+ * @param messageLength The length of the serialized flatbuffer message in bytes (might not be equal to the buffer
+ * size).
+ */
+ public static MessageMetadataResult create(ByteBuffer buffer, int messageLength) {
+ return new MessageMetadataResult(messageLength, buffer, Message.getRootAsMessage(buffer));
+ }
+
+ /**
+ * Get the length of the message metadata in bytes, not including the body length.
+ *
+ * @return number of bytes in the message metadata buffer.
+ */
+ public int getMessageLength() {
+ return messageLength;
+ }
+
+ /**
+ * Get the buffer containing the raw message metadata bytes, not including the message body data.
+ *
+ * @return buffer containing the message metadata.
+ */
+ public ByteBuffer getMessageBuffer() {
+ return messageBuffer;
+ }
+
+ /**
+ * Returns the bytes remaining in the buffer after parsing the message from it.
+ */
+ public int bytesAfterMessage() {
+ return message.getByteBuffer().remaining();
+ }
+
+ public byte headerType() {
+ return message.headerType();
+ }
+
+ /**
+ * Check if the message is followed by a body. This will be true if the message has a body
+ * length > 0, which indicates that a message body needs to be read from the input source.
+ *
+ * @return true if message has a defined body
+ */
+ public boolean messageHasBody() {
+ return message.bodyLength() > 0;
+ }
+
+ /**
+ * Get the length of the message body.
+ *
+ * @return number of bytes of the message body
+ */
+ public long getMessageBodyLength() {
+ return message.bodyLength();
+ }
+
+ /**
+ * Get the realized flatbuf Message metadata description.
+ *
+ * @return Message metadata
+ */
+ public Message getMessage() {
+ return message;
+ }
+
+ private final int messageLength;
+ private final ByteBuffer messageBuffer;
+ private final Message message;
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/MessageResult.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/MessageResult.java
new file mode 100644
index 000000000..591fbf106
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/MessageResult.java
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc.message;
+
+import org.apache.arrow.flatbuf.Message;
+import org.apache.arrow.memory.ArrowBuf;
+
+/**
+ * Class to hold the Message metadata and body data when reading messages through a
+ * MessageChannelReader.
+ */
+public class MessageResult {
+
+ /**
+ * Construct with a valid Message metadata and optional ArrowBuf containing message body
+ * data, if any.
+ *
+ * @param message Deserialized Flatbuffer Message metadata description
+ * @param bodyBuffer Optional ArrowBuf containing message body data, null if message has no body
+ */
+ MessageResult(Message message, ArrowBuf bodyBuffer) {
+ this.message = message;
+ this.bodyBuffer = bodyBuffer;
+ }
+
+ /**
+ * Get the Message metadata.
+ *
+ * @return the Flatbuffer Message metadata
+ */
+ public Message getMessage() {
+ return message;
+ }
+
+ /**
+ * Get the message body data.
+ *
+ * @return an ArrowBuf containing the message body data or null if the message has no body
+ */
+ public ArrowBuf getBodyBuffer() {
+ return bodyBuffer;
+ }
+
+ private final Message message;
+ private final ArrowBuf bodyBuffer;
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/MessageSerializer.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/MessageSerializer.java
new file mode 100644
index 000000000..6597e0302
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/MessageSerializer.java
@@ -0,0 +1,736 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc.message;
+
+import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.arrow.flatbuf.Buffer;
+import org.apache.arrow.flatbuf.DictionaryBatch;
+import org.apache.arrow.flatbuf.FieldNode;
+import org.apache.arrow.flatbuf.Message;
+import org.apache.arrow.flatbuf.MessageHeader;
+import org.apache.arrow.flatbuf.MetadataVersion;
+import org.apache.arrow.flatbuf.RecordBatch;
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.compression.NoCompressionCodec;
+import org.apache.arrow.vector.ipc.ReadChannel;
+import org.apache.arrow.vector.ipc.WriteChannel;
+import org.apache.arrow.vector.types.pojo.Schema;
+
+import com.google.flatbuffers.FlatBufferBuilder;
+
+/**
+ * Utility class for serializing Messages. Messages are all serialized a similar way.
+ * 1. 4 byte little endian message header prefix
+ * 2. FB serialized Message: This includes it the body length, which is the serialized
+ * body and the type of the message.
+ * 3. Serialized message.
+ *
+ * <p>For schema messages, the serialization is simply the FB serialized Schema.
+ *
+ * <p>For RecordBatch messages the serialization is:
+ * 1. 4 byte little endian batch metadata header
+ * 2. FB serialized RowBatch
+ * 3. Padding to align to 8 byte boundary.
+ * 4. serialized RowBatch buffers.
+ */
+public class MessageSerializer {
+
+ // This 0xFFFFFFFF value is the first 4 bytes of a valid IPC message
+ public static final int IPC_CONTINUATION_TOKEN = -1;
+
+ /**
+ * Convert an array of 4 bytes in little-endian to an native-endian i32 value.
+ *
+ * @param bytes byte array with minimum length of 4 in little-endian
+ * @return converted an native-endian 32-bit integer
+ */
+ public static int bytesToInt(byte[] bytes) {
+ return ((bytes[3] & 255) << 24) +
+ ((bytes[2] & 255) << 16) +
+ ((bytes[1] & 255) << 8) +
+ ((bytes[0] & 255));
+ }
+
+ /**
+ * Convert an integer to a little endian 4 byte array.
+ *
+ * @param value integer value input
+ * @param bytes existing byte array with minimum length of 4 to contain the conversion output
+ */
+ public static void intToBytes(int value, byte[] bytes) {
+ bytes[3] = (byte) (value >>> 24);
+ bytes[2] = (byte) (value >>> 16);
+ bytes[1] = (byte) (value >>> 8);
+ bytes[0] = (byte) (value);
+ }
+
+ /**
+ * Convert a long to a little-endian 8 byte array.
+ *
+ * @param value long value input
+ * @param bytes existing byte array with minimum length of 8 to contain the conversion output
+ */
+ public static void longToBytes(long value, byte[] bytes) {
+ bytes[7] = (byte) (value >>> 56);
+ bytes[6] = (byte) (value >>> 48);
+ bytes[5] = (byte) (value >>> 40);
+ bytes[4] = (byte) (value >>> 32);
+ bytes[3] = (byte) (value >>> 24);
+ bytes[2] = (byte) (value >>> 16);
+ bytes[1] = (byte) (value >>> 8);
+ bytes[0] = (byte) (value);
+ }
+
+ public static int writeMessageBuffer(WriteChannel out, int messageLength, ByteBuffer messageBuffer)
+ throws IOException {
+ return writeMessageBuffer(out, messageLength, messageBuffer, IpcOption.DEFAULT);
+ }
+
+ /**
+ * Write the serialized Message metadata, prefixed by the length, to the output Channel. This
+ * ensures that it aligns to an 8 byte boundary and will adjust the message length to include
+ * any padding used for alignment.
+ *
+ * @param out Output Channel
+ * @param messageLength Number of bytes in the message buffer, written as little Endian prefix
+ * @param messageBuffer Message metadata buffer to be written, this does not include any
+ * message body data which should be subsequently written to the Channel
+ * @param option IPC write options
+ * @return Number of bytes written
+ * @throws IOException on error
+ */
+ public static int writeMessageBuffer(WriteChannel out, int messageLength, ByteBuffer messageBuffer, IpcOption option)
+ throws IOException {
+
+ // if write the pre-0.15.0 encapsulated IPC message format consisting of a 4-byte prefix instead of 8 byte
+ int prefixSize = option.write_legacy_ipc_format ? 4 : 8;
+
+ // ensure that message aligns to 8 byte padding - prefix_size bytes, then message body
+ if ((messageLength + prefixSize ) % 8 != 0) {
+ messageLength += 8 - (messageLength + prefixSize) % 8;
+ }
+ if (!option.write_legacy_ipc_format) {
+ out.writeIntLittleEndian(IPC_CONTINUATION_TOKEN);
+ }
+ out.writeIntLittleEndian(messageLength);
+ out.write(messageBuffer);
+ out.align();
+
+ // any bytes written are already captured by our size modification above
+ return messageLength + prefixSize;
+ }
+
+ /**
+ * Serialize a schema object.
+ */
+ public static long serialize(WriteChannel out, Schema schema) throws IOException {
+ return serialize(out, schema, IpcOption.DEFAULT);
+ }
+
+ /**
+ * Serialize a schema object.
+ *
+ * @param out where to write the schema
+ * @param schema the object to serialize to out
+ * @return the number of bytes written
+ * @throws IOException if something went wrong
+ */
+ public static long serialize(WriteChannel out, Schema schema, IpcOption option) throws IOException {
+ long start = out.getCurrentPosition();
+ Preconditions.checkArgument(start % 8 == 0, "out is not aligned");
+
+ ByteBuffer serializedMessage = serializeMetadata(schema, option);
+
+ int messageLength = serializedMessage.remaining();
+
+ int bytesWritten = writeMessageBuffer(out, messageLength, serializedMessage, option);
+ Preconditions.checkArgument(bytesWritten % 8 == 0, "out is not aligned");
+ return bytesWritten;
+ }
+
+ /**
+ * Returns the serialized flatbuffer bytes of the schema wrapped in a message table.
+ */
+ @Deprecated
+ public static ByteBuffer serializeMetadata(Schema schema) {
+ return serializeMetadata(schema, IpcOption.DEFAULT);
+ }
+
+ /**
+ * Returns the serialized flatbuffer bytes of the schema wrapped in a message table.
+ */
+ public static ByteBuffer serializeMetadata(Schema schema, IpcOption writeOption) {
+ FlatBufferBuilder builder = new FlatBufferBuilder();
+ int schemaOffset = schema.getSchema(builder);
+ return MessageSerializer.serializeMessage(builder, org.apache.arrow.flatbuf.MessageHeader.Schema, schemaOffset, 0,
+ writeOption);
+ }
+
+ /**
+ * Deserializes an Arrow Schema object from a schema message. Format is from serialize().
+ *
+ * @param schemaMessage a Message of type MessageHeader.Schema
+ * @return the deserialized Arrow Schema
+ */
+ public static Schema deserializeSchema(Message schemaMessage) {
+ Preconditions.checkArgument(schemaMessage.headerType() == MessageHeader.Schema,
+ "Expected schema but result was: %s", schemaMessage.headerType());
+ return Schema.convertSchema((org.apache.arrow.flatbuf.Schema)
+ schemaMessage.header(new org.apache.arrow.flatbuf.Schema()));
+ }
+
+ /**
+ * Deserializes an Arrow Schema read from the input channel. Format is from serialize().
+ *
+ * @param in the channel to deserialize from
+ * @return the deserialized Arrow Schema
+ * @throws IOException if something went wrong
+ */
+ public static Schema deserializeSchema(ReadChannel in) throws IOException {
+ MessageMetadataResult result = readMessage(in);
+ if (result == null) {
+ throw new IOException("Unexpected end of input when reading Schema");
+ }
+ if (result.getMessage().headerType() != MessageHeader.Schema) {
+ throw new IOException("Expected schema but header was " + result.getMessage().headerType());
+ }
+ return deserializeSchema(result);
+ }
+
+ /**
+ * Deserializes an Arrow Schema object from a {@link MessageMetadataResult}. Format is from serialize().
+ *
+ * @param message a Message of type MessageHeader.Schema
+ * @return the deserialized Arrow Schema
+ */
+ public static Schema deserializeSchema(MessageMetadataResult message) {
+ return deserializeSchema(message.getMessage());
+ }
+
+ /**
+ * Serializes an ArrowRecordBatch. Returns the offset and length of the written batch.
+ */
+ public static ArrowBlock serialize(WriteChannel out, ArrowRecordBatch batch) throws IOException {
+ return serialize(out, batch, IpcOption.DEFAULT);
+ }
+
+ /**
+ * Serializes an ArrowRecordBatch. Returns the offset and length of the written batch.
+ *
+ * @param out where to write the batch
+ * @param batch the object to serialize to out
+ * @return the serialized block metadata
+ * @throws IOException if something went wrong
+ */
+ public static ArrowBlock serialize(WriteChannel out, ArrowRecordBatch batch, IpcOption option) throws IOException {
+
+ long start = out.getCurrentPosition();
+ long bodyLength = batch.computeBodyLength();
+ Preconditions.checkArgument(bodyLength % 8 == 0, "batch is not aligned");
+
+ ByteBuffer serializedMessage = serializeMetadata(batch, option);
+
+ int metadataLength = serializedMessage.remaining();
+
+ int prefixSize = 4;
+ if (!option.write_legacy_ipc_format) {
+ out.writeIntLittleEndian(IPC_CONTINUATION_TOKEN);
+ prefixSize = 8;
+ }
+
+ // calculate alignment bytes so that metadata length points to the correct location after alignment
+ int padding = (int) ((start + metadataLength + prefixSize) % 8);
+ if (padding != 0) {
+ metadataLength += (8 - padding);
+ }
+
+ out.writeIntLittleEndian(metadataLength);
+ out.write(serializedMessage);
+
+ // Align the output to 8 byte boundary.
+ out.align();
+
+ long bufferLength = writeBatchBuffers(out, batch);
+ Preconditions.checkArgument(bufferLength % 8 == 0, "out is not aligned");
+
+ // Metadata size in the Block account for the size prefix
+ return new ArrowBlock(start, metadataLength + prefixSize, bufferLength);
+ }
+
+ /**
+ * Write the Arrow buffers of the record batch to the output channel.
+ *
+ * @param out the output channel to write the buffers to
+ * @param batch an ArrowRecordBatch containing buffers to be written
+ * @return the number of bytes written
+ * @throws IOException on error
+ */
+ public static long writeBatchBuffers(WriteChannel out, ArrowRecordBatch batch) throws IOException {
+ long bufferStart = out.getCurrentPosition();
+ List<ArrowBuf> buffers = batch.getBuffers();
+ List<ArrowBuffer> buffersLayout = batch.getBuffersLayout();
+
+ for (int i = 0; i < buffers.size(); i++) {
+ ArrowBuf buffer = buffers.get(i);
+ ArrowBuffer layout = buffersLayout.get(i);
+ long startPosition = bufferStart + layout.getOffset();
+ if (startPosition != out.getCurrentPosition()) {
+ out.writeZeros(startPosition - out.getCurrentPosition());
+ }
+ out.write(buffer);
+ if (out.getCurrentPosition() != startPosition + layout.getSize()) {
+ throw new IllegalStateException("wrong buffer size: " + out.getCurrentPosition() +
+ " != " + startPosition + layout.getSize());
+ }
+ }
+ out.align();
+ return out.getCurrentPosition() - bufferStart;
+ }
+
+ /**
+ * Returns the serialized form of {@link RecordBatch} wrapped in a {@link org.apache.arrow.flatbuf.Message}.
+ */
+ @Deprecated
+ public static ByteBuffer serializeMetadata(ArrowMessage message) {
+ return serializeMetadata(message, IpcOption.DEFAULT);
+ }
+
+ /**
+ * Returns the serialized form of {@link RecordBatch} wrapped in a {@link org.apache.arrow.flatbuf.Message}.
+ */
+ public static ByteBuffer serializeMetadata(ArrowMessage message, IpcOption writeOption) {
+ FlatBufferBuilder builder = new FlatBufferBuilder();
+ int batchOffset = message.writeTo(builder);
+ return serializeMessage(builder, message.getMessageType(), batchOffset,
+ message.computeBodyLength(), writeOption);
+ }
+
+ /**
+ * Deserializes an ArrowRecordBatch from a record batch message and data in an ArrowBuf.
+ *
+ * @param recordBatchMessage a Message of type MessageHeader.RecordBatch
+ * @param bodyBuffer Arrow buffer containing the RecordBatch data
+ * @return the deserialized ArrowRecordBatch
+ * @throws IOException if something went wrong
+ */
+ public static ArrowRecordBatch deserializeRecordBatch(Message recordBatchMessage, ArrowBuf bodyBuffer)
+ throws IOException {
+ RecordBatch recordBatchFB = (RecordBatch) recordBatchMessage.header(new RecordBatch());
+ return deserializeRecordBatch(recordBatchFB, bodyBuffer);
+ }
+
+ /**
+ * Deserializes an ArrowRecordBatch read from the input channel. This uses the given allocator
+ * to create an ArrowBuf for the batch body data.
+ *
+ * @param in Channel to read a RecordBatch message and data from
+ * @param allocator BufferAllocator to allocate an Arrow buffer to read message body data
+ * @return the deserialized ArrowRecordBatch
+ * @throws IOException on error
+ */
+ public static ArrowRecordBatch deserializeRecordBatch(ReadChannel in, BufferAllocator allocator) throws IOException {
+ MessageMetadataResult result = readMessage(in);
+ if (result == null) {
+ throw new IOException("Unexpected end of input when reading a RecordBatch");
+ }
+ if (result.getMessage().headerType() != MessageHeader.RecordBatch) {
+ throw new IOException("Expected RecordBatch but header was " + result.getMessage().headerType());
+ }
+ long bodyLength = result.getMessageBodyLength();
+ ArrowBuf bodyBuffer = readMessageBody(in, bodyLength, allocator);
+ return deserializeRecordBatch(result.getMessage(), bodyBuffer);
+ }
+
+ /**
+ * Deserializes an ArrowRecordBatch knowing the size of the entire message up front. This
+ * minimizes the number of reads to the underlying stream.
+ *
+ * @param in the channel to deserialize from
+ * @param block the object to deserialize to
+ * @param alloc to allocate buffers
+ * @return the deserialized ArrowRecordBatch
+ * @throws IOException if something went wrong
+ */
+ public static ArrowRecordBatch deserializeRecordBatch(ReadChannel in, ArrowBlock block, BufferAllocator alloc)
+ throws IOException {
+ // Metadata length contains prefix_size bytes plus byte padding
+ long totalLen = block.getMetadataLength() + block.getBodyLength();
+
+ ArrowBuf buffer = alloc.buffer(totalLen);
+ if (in.readFully(buffer, totalLen) != totalLen) {
+ throw new IOException("Unexpected end of input trying to read batch.");
+ }
+
+ int prefixSize = buffer.getInt(0) == IPC_CONTINUATION_TOKEN ? 8 : 4;
+
+ ArrowBuf metadataBuffer = buffer.slice(prefixSize, block.getMetadataLength() - prefixSize);
+
+ Message messageFB =
+ Message.getRootAsMessage(metadataBuffer.nioBuffer().asReadOnlyBuffer());
+
+ RecordBatch recordBatchFB = (RecordBatch) messageFB.header(new RecordBatch());
+
+ // Now read the body
+ final ArrowBuf body = buffer.slice(block.getMetadataLength(),
+ totalLen - block.getMetadataLength());
+ return deserializeRecordBatch(recordBatchFB, body);
+ }
+
+ /**
+ * Deserializes an ArrowRecordBatch given the Flatbuffer metadata and in-memory body.
+ *
+ * @param recordBatchFB Deserialized FlatBuffer record batch
+ * @param body Read body of the record batch
+ * @return ArrowRecordBatch from metadata and in-memory body
+ * @throws IOException on error
+ */
+ public static ArrowRecordBatch deserializeRecordBatch(RecordBatch recordBatchFB, ArrowBuf body) throws IOException {
+ // Now read the body
+ int nodesLength = recordBatchFB.nodesLength();
+ List<ArrowFieldNode> nodes = new ArrayList<>();
+ for (int i = 0; i < nodesLength; ++i) {
+ FieldNode node = recordBatchFB.nodes(i);
+ if ((int) node.length() != node.length() ||
+ (int) node.nullCount() != node.nullCount()) {
+ throw new IOException("Cannot currently deserialize record batches with " +
+ "node length larger than INT_MAX records.");
+ }
+ nodes.add(new ArrowFieldNode(node.length(), node.nullCount()));
+ }
+ List<ArrowBuf> buffers = new ArrayList<>();
+ for (int i = 0; i < recordBatchFB.buffersLength(); ++i) {
+ Buffer bufferFB = recordBatchFB.buffers(i);
+ ArrowBuf vectorBuffer = body.slice(bufferFB.offset(), bufferFB.length());
+ buffers.add(vectorBuffer);
+ }
+
+ ArrowBodyCompression bodyCompression = recordBatchFB.compression() == null ?
+ NoCompressionCodec.DEFAULT_BODY_COMPRESSION
+ : new ArrowBodyCompression(recordBatchFB.compression().codec(), recordBatchFB.compression().method());
+
+ if ((int) recordBatchFB.length() != recordBatchFB.length()) {
+ throw new IOException("Cannot currently deserialize record batches with more than INT_MAX records.");
+ }
+ ArrowRecordBatch arrowRecordBatch =
+ new ArrowRecordBatch(checkedCastToInt(recordBatchFB.length()), nodes, buffers, bodyCompression);
+ body.getReferenceManager().release();
+ return arrowRecordBatch;
+ }
+
+ /**
+ * Reads a record batch based on the metadata in serializedMessage and the underlying data buffer.
+ */
+ public static ArrowRecordBatch deserializeRecordBatch(MessageMetadataResult serializedMessage,
+ ArrowBuf underlying) throws
+ IOException {
+ return deserializeRecordBatch(serializedMessage.getMessage(), underlying);
+ }
+
+ public static ArrowBlock serialize(WriteChannel out, ArrowDictionaryBatch batch) throws IOException {
+ return serialize(out, batch, IpcOption.DEFAULT);
+ }
+
+ /**
+ * Serializes a dictionary ArrowRecordBatch. Returns the offset and length of the written batch.
+ *
+ * @param out where to serialize
+ * @param batch the batch to serialize
+ * @param option options for IPC
+ * @return the metadata of the serialized block
+ * @throws IOException if something went wrong
+ */
+ public static ArrowBlock serialize(WriteChannel out, ArrowDictionaryBatch batch, IpcOption option)
+ throws IOException {
+ long start = out.getCurrentPosition();
+
+ long bodyLength = batch.computeBodyLength();
+ Preconditions.checkArgument(bodyLength % 8 == 0, "batch is not aligned");
+
+ ByteBuffer serializedMessage = serializeMetadata(batch, option);
+
+ int metadataLength = serializedMessage.remaining();
+
+ int prefixSize = 4;
+ if (!option.write_legacy_ipc_format) {
+ out.writeIntLittleEndian(IPC_CONTINUATION_TOKEN);
+ prefixSize = 8;
+ }
+
+ // calculate alignment bytes so that metadata length points to the correct location after alignment
+ int padding = (int) ((start + metadataLength + prefixSize) % 8);
+ if (padding != 0) {
+ metadataLength += (8 - padding);
+ }
+
+ out.writeIntLittleEndian(metadataLength);
+ out.write(serializedMessage);
+
+ // Align the output to 8 byte boundary.
+ out.align();
+
+ // write the embedded record batch
+ long bufferLength = writeBatchBuffers(out, batch.getDictionary());
+ Preconditions.checkArgument(bufferLength % 8 == 0, "out is not aligned");
+
+ // Metadata size in the Block account for the size prefix
+ return new ArrowBlock(start, metadataLength + prefixSize, bufferLength);
+ }
+
+ /**
+ * Deserializes an ArrowDictionaryBatch from a dictionary batch Message and data in an ArrowBuf.
+ *
+ * @param message a message of type MessageHeader.DictionaryBatch
+ * @param bodyBuffer Arrow buffer containing the DictionaryBatch data
+ * of type MessageHeader.DictionaryBatch
+ * @return the deserialized ArrowDictionaryBatch
+ * @throws IOException if something went wrong
+ */
+ public static ArrowDictionaryBatch deserializeDictionaryBatch(Message message, ArrowBuf bodyBuffer)
+ throws IOException {
+ DictionaryBatch dictionaryBatchFB = (DictionaryBatch) message.header(new DictionaryBatch());
+ ArrowRecordBatch recordBatch = deserializeRecordBatch(dictionaryBatchFB.data(), bodyBuffer);
+ return new ArrowDictionaryBatch(dictionaryBatchFB.id(), recordBatch, dictionaryBatchFB.isDelta());
+ }
+
+ /**
+ * Deserializes an ArrowDictionaryBatch from a dictionary batch Message and data in an ArrowBuf.
+ *
+ * @param message a message of type MessageHeader.DictionaryBatch
+ * @param bodyBuffer Arrow buffer containing the DictionaryBatch data
+ * of type MessageHeader.DictionaryBatch
+ * @return the deserialized ArrowDictionaryBatch
+ * @throws IOException if something went wrong
+ */
+ public static ArrowDictionaryBatch deserializeDictionaryBatch(MessageMetadataResult message, ArrowBuf bodyBuffer)
+ throws IOException {
+ return deserializeDictionaryBatch(message.getMessage(), bodyBuffer);
+ }
+
+ /**
+ * Deserializes an ArrowDictionaryBatch read from the input channel. This uses the given allocator
+ * to create an ArrowBuf for the batch body data.
+ *
+ * @param in Channel to read a DictionaryBatch message and data from
+ * @param allocator BufferAllocator to allocate an Arrow buffer to read message body data
+ * @return the deserialized ArrowDictionaryBatch
+ * @throws IOException on error
+ */
+ public static ArrowDictionaryBatch deserializeDictionaryBatch(ReadChannel in, BufferAllocator allocator)
+ throws IOException {
+ MessageMetadataResult result = readMessage(in);
+ if (result == null) {
+ throw new IOException("Unexpected end of input when reading a DictionaryBatch");
+ }
+ if (result.getMessage().headerType() != MessageHeader.DictionaryBatch) {
+ throw new IOException("Expected DictionaryBatch but header was " + result.getMessage().headerType());
+ }
+ long bodyLength = result.getMessageBodyLength();
+ ArrowBuf bodyBuffer = readMessageBody(in, bodyLength, allocator);
+ return deserializeDictionaryBatch(result.getMessage(), bodyBuffer);
+ }
+
+ /**
+ * Deserializes a DictionaryBatch knowing the size of the entire message up front. This
+ * minimizes the number of reads to the underlying stream.
+ *
+ * @param in where to read from
+ * @param block block metadata for deserializing
+ * @param alloc to allocate new buffers
+ * @return the deserialized ArrowDictionaryBatch
+ * @throws IOException if something went wrong
+ */
+ public static ArrowDictionaryBatch deserializeDictionaryBatch(
+ ReadChannel in,
+ ArrowBlock block,
+ BufferAllocator alloc) throws IOException {
+ // Metadata length contains integer prefix plus byte padding
+ long totalLen = block.getMetadataLength() + block.getBodyLength();
+
+ ArrowBuf buffer = alloc.buffer(totalLen);
+ if (in.readFully(buffer, totalLen) != totalLen) {
+ throw new IOException("Unexpected end of input trying to read batch.");
+ }
+
+ int prefixSize = buffer.getInt(0) == IPC_CONTINUATION_TOKEN ? 8 : 4;
+
+ ArrowBuf metadataBuffer = buffer.slice(prefixSize, block.getMetadataLength() - prefixSize);
+
+ Message messageFB =
+ Message.getRootAsMessage(metadataBuffer.nioBuffer().asReadOnlyBuffer());
+
+ DictionaryBatch dictionaryBatchFB = (DictionaryBatch) messageFB.header(new DictionaryBatch());
+
+ // Now read the body
+ final ArrowBuf body = buffer.slice(block.getMetadataLength(),
+ totalLen - block.getMetadataLength());
+ ArrowRecordBatch recordBatch = deserializeRecordBatch(dictionaryBatchFB.data(), body);
+ return new ArrowDictionaryBatch(dictionaryBatchFB.id(), recordBatch, dictionaryBatchFB.isDelta());
+ }
+
+ /**
+ * Deserialize a message that is either an ArrowDictionaryBatch or ArrowRecordBatch.
+ *
+ * @param reader MessageChannelReader to read a sequence of messages from a ReadChannel
+ * @return The deserialized record batch
+ * @throws IOException if the message is not an ArrowDictionaryBatch or ArrowRecordBatch
+ */
+ public static ArrowMessage deserializeMessageBatch(MessageChannelReader reader) throws IOException {
+ MessageResult result = reader.readNext();
+ if (result == null) {
+ return null;
+ } else if (result.getMessage().bodyLength() > Integer.MAX_VALUE) {
+ throw new IOException("Cannot currently deserialize record batches over 2GB");
+ }
+
+ if (result.getMessage().version() != MetadataVersion.V4 &&
+ result.getMessage().version() != MetadataVersion.V5) {
+ throw new IOException("Received metadata with an incompatible version number: " + result.getMessage().version());
+ }
+
+ switch (result.getMessage().headerType()) {
+ case MessageHeader.RecordBatch:
+ return deserializeRecordBatch(result.getMessage(), result.getBodyBuffer());
+ case MessageHeader.DictionaryBatch:
+ return deserializeDictionaryBatch(result.getMessage(), result.getBodyBuffer());
+ default:
+ throw new IOException("Unexpected message header type " + result.getMessage().headerType());
+ }
+ }
+
+ /**
+ * Deserialize a message that is either an ArrowDictionaryBatch or ArrowRecordBatch.
+ *
+ * @param in ReadChannel to read messages from
+ * @param alloc Allocator for message data
+ * @return The deserialized record batch
+ * @throws IOException if the message is not an ArrowDictionaryBatch or ArrowRecordBatch
+ */
+ public static ArrowMessage deserializeMessageBatch(ReadChannel in, BufferAllocator alloc) throws IOException {
+ return deserializeMessageBatch(new MessageChannelReader(in, alloc));
+ }
+
+ @Deprecated
+ public static ByteBuffer serializeMessage(
+ FlatBufferBuilder builder,
+ byte headerType,
+ int headerOffset,
+ long bodyLength) {
+ return serializeMessage(builder, headerType, headerOffset, bodyLength, IpcOption.DEFAULT);
+ }
+
+ /**
+ * Serializes a message header.
+ *
+ * @param builder to write the flatbuf to
+ * @param headerType headerType field
+ * @param headerOffset header offset field
+ * @param bodyLength body length field
+ * @param writeOption IPC write options
+ * @return the corresponding ByteBuffer
+ */
+ public static ByteBuffer serializeMessage(
+ FlatBufferBuilder builder,
+ byte headerType,
+ int headerOffset,
+ long bodyLength,
+ IpcOption writeOption) {
+ Message.startMessage(builder);
+ Message.addHeaderType(builder, headerType);
+ Message.addHeader(builder, headerOffset);
+ Message.addVersion(builder, writeOption.metadataVersion.toFlatbufID());
+ Message.addBodyLength(builder, bodyLength);
+ builder.finish(Message.endMessage(builder));
+ return builder.dataBuffer();
+ }
+
+ /**
+ * Read a Message from the input channel and return a MessageMetadataResult that contains the
+ * Message metadata, buffer containing the serialized Message metadata as read, and length of the
+ * Message in bytes. Returns null if the end-of-stream has been reached.
+ *
+ * @param in ReadChannel to read messages from
+ * @return MessageMetadataResult with deserialized Message metadata and message information if
+ * a valid Message was read, or null if end-of-stream
+ * @throws IOException on error
+ */
+ public static MessageMetadataResult readMessage(ReadChannel in) throws IOException {
+
+ // Read the message size. There is an i32 little endian prefix.
+ ByteBuffer buffer = ByteBuffer.allocate(4);
+ if (in.readFully(buffer) == 4) {
+
+ int messageLength = MessageSerializer.bytesToInt(buffer.array());
+ if (messageLength == IPC_CONTINUATION_TOKEN) {
+ buffer.clear();
+ // ARROW-6313, if the first 4 bytes are continuation message, read the next 4 for the length
+ if (in.readFully(buffer) == 4) {
+ messageLength = MessageSerializer.bytesToInt(buffer.array());
+ }
+ }
+
+ // Length of 0 indicates end of stream
+ if (messageLength != 0) {
+
+ // Read the message into the buffer.
+ ByteBuffer messageBuffer = ByteBuffer.allocate(messageLength);
+ if (in.readFully(messageBuffer) != messageLength) {
+ throw new IOException(
+ "Unexpected end of stream trying to read message.");
+ }
+ messageBuffer.rewind();
+
+ // Load the message.
+ Message message = Message.getRootAsMessage(messageBuffer);
+
+ return new MessageMetadataResult(messageLength, messageBuffer, message);
+ }
+ }
+ return null;
+ }
+
+ /**
+ * Read a Message body from the in channel into an ArrowBuf.
+ *
+ * @param in ReadChannel to read message body from
+ * @param bodyLength Length in bytes of the message body to read
+ * @param allocator Allocate the ArrowBuf to contain message body data
+ * @return an ArrowBuf containing the message body data
+ * @throws IOException on error
+ */
+ public static ArrowBuf readMessageBody(ReadChannel in, long bodyLength,
+ BufferAllocator allocator) throws IOException {
+ ArrowBuf bodyBuffer = allocator.buffer(bodyLength);
+ try {
+ if (in.readFully(bodyBuffer, bodyLength) != bodyLength) {
+ throw new IOException("Unexpected end of input trying to read batch.");
+ }
+ } catch (RuntimeException | IOException e) {
+ bodyBuffer.close();
+ throw e;
+ }
+ return bodyBuffer;
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/DateUnit.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/DateUnit.java
new file mode 100644
index 000000000..8fce12e83
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/DateUnit.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.types;
+
+/**
+ * Resolutions that dates can be stored at.
+ */
+public enum DateUnit {
+ /** Days since epoch. */
+ DAY(org.apache.arrow.flatbuf.DateUnit.DAY),
+ /** Milliseconds since epoch. */
+ MILLISECOND(org.apache.arrow.flatbuf.DateUnit.MILLISECOND);
+
+ private static final DateUnit[] valuesByFlatbufId = new DateUnit[DateUnit.values().length];
+
+ static {
+ for (DateUnit v : DateUnit.values()) {
+ valuesByFlatbufId[v.flatbufID] = v;
+ }
+ }
+
+ private final short flatbufID;
+
+ DateUnit(short flatbufID) {
+ this.flatbufID = flatbufID;
+ }
+
+ public short getFlatbufID() {
+ return flatbufID;
+ }
+
+ public static DateUnit fromFlatbufID(short id) {
+ return valuesByFlatbufId[id];
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/FloatingPointPrecision.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/FloatingPointPrecision.java
new file mode 100644
index 000000000..c52fc1243
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/FloatingPointPrecision.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.types;
+
+import org.apache.arrow.flatbuf.Precision;
+
+/**
+ * Precisions of primitive floating point numbers.
+ */
+public enum FloatingPointPrecision {
+ /** 16-bit (not a standard java type). */
+ HALF(Precision.HALF),
+ /** 32-bit (i.e. float in java). */
+ SINGLE(Precision.SINGLE),
+ /** 64-bit (i.e. double in java). */
+ DOUBLE(Precision.DOUBLE);
+
+ private static final FloatingPointPrecision[] valuesByFlatbufId =
+ new FloatingPointPrecision[FloatingPointPrecision.values().length];
+
+ static {
+ for (FloatingPointPrecision v : FloatingPointPrecision.values()) {
+ valuesByFlatbufId[v.flatbufID] = v;
+ }
+ }
+
+ private short flatbufID;
+
+ private FloatingPointPrecision(short flatbufID) {
+ this.flatbufID = flatbufID;
+ }
+
+ public short getFlatbufID() {
+ return flatbufID;
+ }
+
+ public static FloatingPointPrecision fromFlatbufID(short id) {
+ return valuesByFlatbufId[id];
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/IntervalUnit.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/IntervalUnit.java
new file mode 100644
index 000000000..1b17240d0
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/IntervalUnit.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.types;
+
+/**
+ * Resolutions for Interval Vectors.
+ */
+public enum IntervalUnit {
+ /** Values are stored as number of months (which can be converted into years and months via division). */
+ YEAR_MONTH(org.apache.arrow.flatbuf.IntervalUnit.YEAR_MONTH),
+ /** Values are stored as some number of days and some number of milliseconds within that day. */
+ DAY_TIME(org.apache.arrow.flatbuf.IntervalUnit.DAY_TIME),
+ /** Values are stored as number of months, days and nanoseconds. */
+ MONTH_DAY_NANO(org.apache.arrow.flatbuf.IntervalUnit.MONTH_DAY_NANO);
+
+ private static final IntervalUnit[] valuesByFlatbufId = new IntervalUnit[IntervalUnit.values().length];
+
+ static {
+ for (IntervalUnit v : IntervalUnit.values()) {
+ valuesByFlatbufId[v.flatbufID] = v;
+ }
+ }
+
+ private short flatbufID;
+
+ private IntervalUnit(short flatbufID) {
+ this.flatbufID = flatbufID;
+ }
+
+ public short getFlatbufID() {
+ return flatbufID;
+ }
+
+ public static IntervalUnit fromFlatbufID(short id) {
+ return valuesByFlatbufId[id];
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/MetadataVersion.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/MetadataVersion.java
new file mode 100644
index 000000000..a0e281960
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/MetadataVersion.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.types;
+
+/**
+ * Metadata version for Arrow metadata.
+ */
+public enum MetadataVersion {
+ /// 0.1.0
+ V1(org.apache.arrow.flatbuf.MetadataVersion.V1),
+
+ /// 0.2.0
+ V2(org.apache.arrow.flatbuf.MetadataVersion.V2),
+
+ /// 0.3.0 to 0.7.1
+ V3(org.apache.arrow.flatbuf.MetadataVersion.V3),
+
+ /// 0.8.0 to 0.17.1
+ V4(org.apache.arrow.flatbuf.MetadataVersion.V4),
+
+ /// >= 1.0.0
+ V5(org.apache.arrow.flatbuf.MetadataVersion.V5),
+
+ ;
+
+ public static final MetadataVersion DEFAULT = V5;
+
+ private static final MetadataVersion[] valuesByFlatbufId =
+ new MetadataVersion[MetadataVersion.values().length];
+
+ static {
+ for (MetadataVersion v : MetadataVersion.values()) {
+ valuesByFlatbufId[v.flatbufID] = v;
+ }
+ }
+
+ private final short flatbufID;
+
+ MetadataVersion(short flatbufID) {
+ this.flatbufID = flatbufID;
+ }
+
+ public short toFlatbufID() {
+ return flatbufID;
+ }
+
+ public static MetadataVersion fromFlatbufID(short id) {
+ return valuesByFlatbufId[id];
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/TimeUnit.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/TimeUnit.java
new file mode 100644
index 000000000..dcaebba48
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/TimeUnit.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.types;
+
+/**
+ * Resolutions that times can be stored with.
+ */
+public enum TimeUnit {
+ SECOND(org.apache.arrow.flatbuf.TimeUnit.SECOND),
+ MILLISECOND(org.apache.arrow.flatbuf.TimeUnit.MILLISECOND),
+ MICROSECOND(org.apache.arrow.flatbuf.TimeUnit.MICROSECOND),
+ NANOSECOND(org.apache.arrow.flatbuf.TimeUnit.NANOSECOND);
+
+ private static final TimeUnit[] valuesByFlatbufId = new TimeUnit[TimeUnit.values().length];
+
+ static {
+ for (TimeUnit v : TimeUnit.values()) {
+ valuesByFlatbufId[v.flatbufID] = v;
+ }
+ }
+
+ private final short flatbufID;
+
+ TimeUnit(short flatbufID) {
+ this.flatbufID = flatbufID;
+ }
+
+ public short getFlatbufID() {
+ return flatbufID;
+ }
+
+ public static TimeUnit fromFlatbufID(short id) {
+ return valuesByFlatbufId[id];
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java
new file mode 100644
index 000000000..d4c827859
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java
@@ -0,0 +1,1016 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.types;
+
+import static org.apache.arrow.vector.types.FloatingPointPrecision.DOUBLE;
+import static org.apache.arrow.vector.types.FloatingPointPrecision.SINGLE;
+import static org.apache.arrow.vector.types.UnionMode.Dense;
+import static org.apache.arrow.vector.types.UnionMode.Sparse;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.BigIntVector;
+import org.apache.arrow.vector.BitVector;
+import org.apache.arrow.vector.DateDayVector;
+import org.apache.arrow.vector.DateMilliVector;
+import org.apache.arrow.vector.Decimal256Vector;
+import org.apache.arrow.vector.DecimalVector;
+import org.apache.arrow.vector.DurationVector;
+import org.apache.arrow.vector.ExtensionTypeVector;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.FixedSizeBinaryVector;
+import org.apache.arrow.vector.Float4Vector;
+import org.apache.arrow.vector.Float8Vector;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.IntervalDayVector;
+import org.apache.arrow.vector.IntervalMonthDayNanoVector;
+import org.apache.arrow.vector.IntervalYearVector;
+import org.apache.arrow.vector.LargeVarBinaryVector;
+import org.apache.arrow.vector.LargeVarCharVector;
+import org.apache.arrow.vector.NullVector;
+import org.apache.arrow.vector.SmallIntVector;
+import org.apache.arrow.vector.TimeMicroVector;
+import org.apache.arrow.vector.TimeMilliVector;
+import org.apache.arrow.vector.TimeNanoVector;
+import org.apache.arrow.vector.TimeSecVector;
+import org.apache.arrow.vector.TimeStampMicroTZVector;
+import org.apache.arrow.vector.TimeStampMicroVector;
+import org.apache.arrow.vector.TimeStampMilliTZVector;
+import org.apache.arrow.vector.TimeStampMilliVector;
+import org.apache.arrow.vector.TimeStampNanoTZVector;
+import org.apache.arrow.vector.TimeStampNanoVector;
+import org.apache.arrow.vector.TimeStampSecTZVector;
+import org.apache.arrow.vector.TimeStampSecVector;
+import org.apache.arrow.vector.TinyIntVector;
+import org.apache.arrow.vector.UInt1Vector;
+import org.apache.arrow.vector.UInt2Vector;
+import org.apache.arrow.vector.UInt4Vector;
+import org.apache.arrow.vector.UInt8Vector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.VarBinaryVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.complex.DenseUnionVector;
+import org.apache.arrow.vector.complex.FixedSizeListVector;
+import org.apache.arrow.vector.complex.LargeListVector;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.MapVector;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.complex.UnionVector;
+import org.apache.arrow.vector.complex.impl.BigIntWriterImpl;
+import org.apache.arrow.vector.complex.impl.BitWriterImpl;
+import org.apache.arrow.vector.complex.impl.DateDayWriterImpl;
+import org.apache.arrow.vector.complex.impl.DateMilliWriterImpl;
+import org.apache.arrow.vector.complex.impl.Decimal256WriterImpl;
+import org.apache.arrow.vector.complex.impl.DecimalWriterImpl;
+import org.apache.arrow.vector.complex.impl.DenseUnionWriter;
+import org.apache.arrow.vector.complex.impl.DurationWriterImpl;
+import org.apache.arrow.vector.complex.impl.FixedSizeBinaryWriterImpl;
+import org.apache.arrow.vector.complex.impl.Float4WriterImpl;
+import org.apache.arrow.vector.complex.impl.Float8WriterImpl;
+import org.apache.arrow.vector.complex.impl.IntWriterImpl;
+import org.apache.arrow.vector.complex.impl.IntervalDayWriterImpl;
+import org.apache.arrow.vector.complex.impl.IntervalMonthDayNanoWriterImpl;
+import org.apache.arrow.vector.complex.impl.IntervalYearWriterImpl;
+import org.apache.arrow.vector.complex.impl.LargeVarBinaryWriterImpl;
+import org.apache.arrow.vector.complex.impl.LargeVarCharWriterImpl;
+import org.apache.arrow.vector.complex.impl.NullableStructWriter;
+import org.apache.arrow.vector.complex.impl.SmallIntWriterImpl;
+import org.apache.arrow.vector.complex.impl.TimeMicroWriterImpl;
+import org.apache.arrow.vector.complex.impl.TimeMilliWriterImpl;
+import org.apache.arrow.vector.complex.impl.TimeNanoWriterImpl;
+import org.apache.arrow.vector.complex.impl.TimeSecWriterImpl;
+import org.apache.arrow.vector.complex.impl.TimeStampMicroTZWriterImpl;
+import org.apache.arrow.vector.complex.impl.TimeStampMicroWriterImpl;
+import org.apache.arrow.vector.complex.impl.TimeStampMilliTZWriterImpl;
+import org.apache.arrow.vector.complex.impl.TimeStampMilliWriterImpl;
+import org.apache.arrow.vector.complex.impl.TimeStampNanoTZWriterImpl;
+import org.apache.arrow.vector.complex.impl.TimeStampNanoWriterImpl;
+import org.apache.arrow.vector.complex.impl.TimeStampSecTZWriterImpl;
+import org.apache.arrow.vector.complex.impl.TimeStampSecWriterImpl;
+import org.apache.arrow.vector.complex.impl.TinyIntWriterImpl;
+import org.apache.arrow.vector.complex.impl.UInt1WriterImpl;
+import org.apache.arrow.vector.complex.impl.UInt2WriterImpl;
+import org.apache.arrow.vector.complex.impl.UInt4WriterImpl;
+import org.apache.arrow.vector.complex.impl.UInt8WriterImpl;
+import org.apache.arrow.vector.complex.impl.UnionLargeListWriter;
+import org.apache.arrow.vector.complex.impl.UnionListWriter;
+import org.apache.arrow.vector.complex.impl.UnionWriter;
+import org.apache.arrow.vector.complex.impl.VarBinaryWriterImpl;
+import org.apache.arrow.vector.complex.impl.VarCharWriterImpl;
+import org.apache.arrow.vector.complex.writer.FieldWriter;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.ArrowType.ArrowTypeVisitor;
+import org.apache.arrow.vector.types.pojo.ArrowType.Binary;
+import org.apache.arrow.vector.types.pojo.ArrowType.Bool;
+import org.apache.arrow.vector.types.pojo.ArrowType.Date;
+import org.apache.arrow.vector.types.pojo.ArrowType.Decimal;
+import org.apache.arrow.vector.types.pojo.ArrowType.Duration;
+import org.apache.arrow.vector.types.pojo.ArrowType.ExtensionType;
+import org.apache.arrow.vector.types.pojo.ArrowType.FixedSizeBinary;
+import org.apache.arrow.vector.types.pojo.ArrowType.FixedSizeList;
+import org.apache.arrow.vector.types.pojo.ArrowType.FloatingPoint;
+import org.apache.arrow.vector.types.pojo.ArrowType.Int;
+import org.apache.arrow.vector.types.pojo.ArrowType.Interval;
+import org.apache.arrow.vector.types.pojo.ArrowType.LargeBinary;
+import org.apache.arrow.vector.types.pojo.ArrowType.LargeUtf8;
+import org.apache.arrow.vector.types.pojo.ArrowType.List;
+import org.apache.arrow.vector.types.pojo.ArrowType.Map;
+import org.apache.arrow.vector.types.pojo.ArrowType.Null;
+import org.apache.arrow.vector.types.pojo.ArrowType.Struct;
+import org.apache.arrow.vector.types.pojo.ArrowType.Time;
+import org.apache.arrow.vector.types.pojo.ArrowType.Timestamp;
+import org.apache.arrow.vector.types.pojo.ArrowType.Union;
+import org.apache.arrow.vector.types.pojo.ArrowType.Utf8;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.CallBack;
+
+/** An enumeration of all logical types supported by this library. */
+public class Types {
+
+ /**
+ * The actual enumeration of types.
+ */
+ public enum MinorType {
+ NULL(Null.INSTANCE) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new NullVector(field.getName());
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return null;
+ }
+ },
+ STRUCT(Struct.INSTANCE) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new StructVector(field.getName(), allocator, field.getFieldType(), schemaChangeCallback);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new NullableStructWriter((StructVector) vector);
+ }
+ },
+ TINYINT(new Int(8, true)) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new TinyIntVector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new TinyIntWriterImpl((TinyIntVector) vector);
+ }
+ },
+ SMALLINT(new Int(16, true)) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new SmallIntVector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new SmallIntWriterImpl((SmallIntVector) vector);
+ }
+ },
+ INT(new Int(32, true)) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new IntVector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new IntWriterImpl((IntVector) vector);
+ }
+ },
+ BIGINT(new Int(64, true)) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new BigIntVector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new BigIntWriterImpl((BigIntVector) vector);
+ }
+ },
+ DATEDAY(new Date(DateUnit.DAY)) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new DateDayVector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new DateDayWriterImpl((DateDayVector) vector);
+ }
+ },
+ DATEMILLI(new Date(DateUnit.MILLISECOND)) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new DateMilliVector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new DateMilliWriterImpl((DateMilliVector) vector);
+ }
+ },
+ TIMESEC(new Time(TimeUnit.SECOND, 32)) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new TimeSecVector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new TimeSecWriterImpl((TimeSecVector) vector);
+ }
+ },
+ TIMEMILLI(new Time(TimeUnit.MILLISECOND, 32)) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new TimeMilliVector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new TimeMilliWriterImpl((TimeMilliVector) vector);
+ }
+ },
+ TIMEMICRO(new Time(TimeUnit.MICROSECOND, 64)) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new TimeMicroVector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new TimeMicroWriterImpl((TimeMicroVector) vector);
+ }
+ },
+ TIMENANO(new Time(TimeUnit.NANOSECOND, 64)) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new TimeNanoVector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new TimeNanoWriterImpl((TimeNanoVector) vector);
+ }
+ },
+ // time in second from the Unix epoch, 00:00:00.000000 on 1 January 1970, UTC.
+ TIMESTAMPSEC(new Timestamp(org.apache.arrow.vector.types.TimeUnit.SECOND, null)) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new TimeStampSecVector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new TimeStampSecWriterImpl((TimeStampSecVector) vector);
+ }
+ },
+ // time in millis from the Unix epoch, 00:00:00.000 on 1 January 1970, UTC.
+ TIMESTAMPMILLI(new Timestamp(org.apache.arrow.vector.types.TimeUnit.MILLISECOND, null)) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new TimeStampMilliVector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new TimeStampMilliWriterImpl((TimeStampMilliVector) vector);
+ }
+ },
+ // time in microsecond from the Unix epoch, 00:00:00.000000 on 1 January 1970, UTC.
+ TIMESTAMPMICRO(new Timestamp(org.apache.arrow.vector.types.TimeUnit.MICROSECOND, null)) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new TimeStampMicroVector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new TimeStampMicroWriterImpl((TimeStampMicroVector) vector);
+ }
+ },
+ // time in nanosecond from the Unix epoch, 00:00:00.000000000 on 1 January 1970, UTC.
+ TIMESTAMPNANO(new Timestamp(org.apache.arrow.vector.types.TimeUnit.NANOSECOND, null)) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new TimeStampNanoVector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new TimeStampNanoWriterImpl((TimeStampNanoVector) vector);
+ }
+ },
+ INTERVALDAY(new Interval(IntervalUnit.DAY_TIME)) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new IntervalDayVector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new IntervalDayWriterImpl((IntervalDayVector) vector);
+ }
+ },
+ INTERVALMONTHDAYNANO(new Interval(IntervalUnit.MONTH_DAY_NANO)) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new IntervalMonthDayNanoVector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new IntervalMonthDayNanoWriterImpl((IntervalMonthDayNanoVector) vector);
+ }
+ },
+ DURATION(null) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new DurationVector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new DurationWriterImpl((DurationVector) vector);
+ }
+ },
+
+
+ INTERVALYEAR(new Interval(IntervalUnit.YEAR_MONTH)) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new IntervalYearVector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new IntervalYearWriterImpl((IntervalYearVector) vector);
+ }
+ },
+ // 4 byte ieee 754
+ FLOAT4(new FloatingPoint(SINGLE)) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new Float4Vector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new Float4WriterImpl((Float4Vector) vector);
+ }
+ },
+ // 8 byte ieee 754
+ FLOAT8(new FloatingPoint(DOUBLE)) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new Float8Vector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new Float8WriterImpl((Float8Vector) vector);
+ }
+ },
+ BIT(Bool.INSTANCE) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new BitVector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new BitWriterImpl((BitVector) vector);
+ }
+ },
+ VARCHAR(Utf8.INSTANCE) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new VarCharVector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new VarCharWriterImpl((VarCharVector) vector);
+ }
+ },
+ LARGEVARCHAR(LargeUtf8.INSTANCE) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new LargeVarCharVector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new LargeVarCharWriterImpl((LargeVarCharVector) vector);
+ }
+ },
+ LARGEVARBINARY(LargeBinary.INSTANCE) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new LargeVarBinaryVector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new LargeVarBinaryWriterImpl((LargeVarBinaryVector) vector);
+ }
+ },
+ VARBINARY(Binary.INSTANCE) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new VarBinaryVector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new VarBinaryWriterImpl((VarBinaryVector) vector);
+ }
+ },
+ DECIMAL(null) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new DecimalVector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new DecimalWriterImpl((DecimalVector) vector);
+ }
+ },
+ DECIMAL256(null) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new Decimal256Vector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new Decimal256WriterImpl((Decimal256Vector) vector);
+ }
+ },
+ FIXEDSIZEBINARY(null) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new FixedSizeBinaryVector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new FixedSizeBinaryWriterImpl((FixedSizeBinaryVector) vector);
+ }
+ },
+ UINT1(new Int(8, false)) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new UInt1Vector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new UInt1WriterImpl((UInt1Vector) vector);
+ }
+ },
+ UINT2(new Int(16, false)) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new UInt2Vector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new UInt2WriterImpl((UInt2Vector) vector);
+ }
+ },
+ UINT4(new Int(32, false)) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new UInt4Vector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new UInt4WriterImpl((UInt4Vector) vector);
+ }
+ },
+ UINT8(new Int(64, false)) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new UInt8Vector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new UInt8WriterImpl((UInt8Vector) vector);
+ }
+ },
+ LIST(List.INSTANCE) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new ListVector(field.getName(), allocator, field.getFieldType(), schemaChangeCallback);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new UnionListWriter((ListVector) vector);
+ }
+ },
+ LARGELIST(ArrowType.LargeList.INSTANCE) {
+ @Override
+ public FieldVector getNewVector(Field field, BufferAllocator allocator, CallBack schemaChangeCallback) {
+ return new LargeListVector(field.getName(), allocator, field.getFieldType(), schemaChangeCallback);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new UnionLargeListWriter((LargeListVector) vector);
+ }
+ },
+ FIXED_SIZE_LIST(null) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new FixedSizeListVector(field.getName(), allocator, field.getFieldType(), schemaChangeCallback);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ throw new UnsupportedOperationException("FieldWriter not implemented for FixedSizeList " +
+ "type");
+ }
+ },
+ UNION(new Union(Sparse, null)) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ if (field.getFieldType().getDictionary() != null) {
+ throw new UnsupportedOperationException("Dictionary encoding not supported for complex " +
+ "types");
+ }
+ return new UnionVector(field.getName(), allocator, field.getFieldType(), schemaChangeCallback);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new UnionWriter((UnionVector) vector);
+ }
+ },
+ DENSEUNION(new Union(Dense, null)) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ if (field.getFieldType().getDictionary() != null) {
+ throw new UnsupportedOperationException("Dictionary encoding not supported for complex " +
+ "types");
+ }
+ return new DenseUnionVector(field.getName(), allocator, field.getFieldType(), schemaChangeCallback);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new DenseUnionWriter((DenseUnionVector) vector);
+ }
+ },
+ MAP(null) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new MapVector(field.getName(), allocator, field.getFieldType(), schemaChangeCallback);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new UnionListWriter((MapVector) vector);
+ }
+ },
+ TIMESTAMPSECTZ(null) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new TimeStampSecTZVector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new TimeStampSecTZWriterImpl((TimeStampSecTZVector) vector);
+ }
+ },
+ TIMESTAMPMILLITZ(null) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new TimeStampMilliTZVector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new TimeStampMilliTZWriterImpl((TimeStampMilliTZVector) vector);
+ }
+ },
+ TIMESTAMPMICROTZ(null) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new TimeStampMicroTZVector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new TimeStampMicroTZWriterImpl((TimeStampMicroTZVector) vector);
+ }
+ },
+ TIMESTAMPNANOTZ(null) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return new TimeStampNanoTZVector(field, allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return new TimeStampNanoTZWriterImpl((TimeStampNanoTZVector) vector);
+ }
+ },
+ EXTENSIONTYPE(null) {
+ @Override
+ public FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return ((ExtensionType) field.getFieldType().getType()).getNewVector(field.getName(),
+ field.getFieldType(), allocator);
+ }
+
+ @Override
+ public FieldWriter getNewFieldWriter(ValueVector vector) {
+ return ((ExtensionTypeVector) vector).getUnderlyingVector().getMinorType().getNewFieldWriter(vector);
+ }
+ },
+ ;
+
+ private final ArrowType type;
+
+ MinorType(ArrowType type) {
+ this.type = type;
+ }
+
+ /**
+ * Returns the {@link ArrowType} equivalent of this type.
+ */
+ public final ArrowType getType() {
+ if (type == null) {
+ throw new UnsupportedOperationException("Cannot get simple type for type " + name());
+ }
+ return type;
+ }
+
+ /** Constructs a new vector for the given type. */
+ public final FieldVector getNewVector(
+ String name,
+ FieldType fieldType,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback) {
+ return getNewVector(new Field(name, fieldType, null), allocator, schemaChangeCallback);
+ }
+
+ /** Constructs a new vector for the given type. */
+ public abstract FieldVector getNewVector(
+ Field field,
+ BufferAllocator allocator,
+ CallBack schemaChangeCallback);
+
+ public abstract FieldWriter getNewFieldWriter(ValueVector vector);
+ }
+
+ /**
+ * Maps the ArrowType to the java implementations MinorType.
+ */
+ public static MinorType getMinorTypeForArrowType(ArrowType arrowType) {
+ return arrowType.accept(new ArrowTypeVisitor<MinorType>() {
+ @Override
+ public MinorType visit(Null type) {
+ return MinorType.NULL;
+ }
+
+ @Override
+ public MinorType visit(Struct type) {
+ return MinorType.STRUCT;
+ }
+
+ @Override
+ public MinorType visit(List type) {
+ return MinorType.LIST;
+ }
+
+ @Override
+ public MinorType visit(FixedSizeList type) {
+ return MinorType.FIXED_SIZE_LIST;
+ }
+
+ @Override
+ public MinorType visit(Union type) {
+ switch (type.getMode()) {
+ case Sparse:
+ return MinorType.UNION;
+ case Dense:
+ return MinorType.DENSEUNION;
+ default:
+ throw new IllegalArgumentException("only Dense or Sparse unions supported: " + type);
+ }
+ }
+
+ @Override
+ public MinorType visit(Map type) {
+ return MinorType.MAP;
+ }
+
+ @Override
+ public MinorType visit(ArrowType.LargeList type) {
+ return MinorType.LARGELIST;
+ }
+
+ @Override
+ public MinorType visit(Int type) {
+ switch (type.getBitWidth()) {
+ case 8:
+ return type.getIsSigned() ? MinorType.TINYINT : MinorType.UINT1;
+ case 16:
+ return type.getIsSigned() ? MinorType.SMALLINT : MinorType.UINT2;
+ case 32:
+ return type.getIsSigned() ? MinorType.INT : MinorType.UINT4;
+ case 64:
+ return type.getIsSigned() ? MinorType.BIGINT : MinorType.UINT8;
+ default:
+ throw new IllegalArgumentException("only 8, 16, 32, 64 supported: " + type);
+ }
+ }
+
+ @Override
+ public MinorType visit(FloatingPoint type) {
+ switch (type.getPrecision()) {
+ case HALF:
+ throw new UnsupportedOperationException("NYI: " + type);
+ case SINGLE:
+ return MinorType.FLOAT4;
+ case DOUBLE:
+ return MinorType.FLOAT8;
+ default:
+ throw new IllegalArgumentException("unknown precision: " + type);
+ }
+ }
+
+ @Override
+ public MinorType visit(Utf8 type) {
+ return MinorType.VARCHAR;
+ }
+
+ @Override
+ public Types.MinorType visit(LargeUtf8 type) {
+ return MinorType.LARGEVARCHAR;
+ }
+
+ @Override
+ public MinorType visit(Binary type) {
+ return MinorType.VARBINARY;
+ }
+
+ @Override
+ public MinorType visit(LargeBinary type) {
+ return MinorType.LARGEVARBINARY;
+ }
+
+ @Override
+ public MinorType visit(Bool type) {
+ return MinorType.BIT;
+ }
+
+ @Override
+ public MinorType visit(Decimal type) {
+ if (type.getBitWidth() == 256) {
+ return MinorType.DECIMAL256;
+ }
+ return MinorType.DECIMAL;
+ }
+
+ @Override
+ public MinorType visit(FixedSizeBinary type) {
+ return MinorType.FIXEDSIZEBINARY;
+ }
+
+ @Override
+ public MinorType visit(Date type) {
+ switch (type.getUnit()) {
+ case DAY:
+ return MinorType.DATEDAY;
+ case MILLISECOND:
+ return MinorType.DATEMILLI;
+ default:
+ throw new IllegalArgumentException("unknown unit: " + type);
+ }
+ }
+
+ @Override
+ public MinorType visit(Time type) {
+ switch (type.getUnit()) {
+ case SECOND:
+ return MinorType.TIMESEC;
+ case MILLISECOND:
+ return MinorType.TIMEMILLI;
+ case MICROSECOND:
+ return MinorType.TIMEMICRO;
+ case NANOSECOND:
+ return MinorType.TIMENANO;
+ default:
+ throw new IllegalArgumentException("unknown unit: " + type);
+ }
+ }
+
+ @Override
+ public MinorType visit(Timestamp type) {
+ String tz = type.getTimezone();
+ switch (type.getUnit()) {
+ case SECOND:
+ return tz == null ? MinorType.TIMESTAMPSEC : MinorType.TIMESTAMPSECTZ;
+ case MILLISECOND:
+ return tz == null ? MinorType.TIMESTAMPMILLI : MinorType.TIMESTAMPMILLITZ;
+ case MICROSECOND:
+ return tz == null ? MinorType.TIMESTAMPMICRO : MinorType.TIMESTAMPMICROTZ;
+ case NANOSECOND:
+ return tz == null ? MinorType.TIMESTAMPNANO : MinorType.TIMESTAMPNANOTZ;
+ default:
+ throw new IllegalArgumentException("unknown unit: " + type);
+ }
+ }
+
+ @Override
+ public MinorType visit(Interval type) {
+ switch (type.getUnit()) {
+ case DAY_TIME:
+ return MinorType.INTERVALDAY;
+ case YEAR_MONTH:
+ return MinorType.INTERVALYEAR;
+ case MONTH_DAY_NANO:
+ return MinorType.INTERVALMONTHDAYNANO;
+ default:
+ throw new IllegalArgumentException("unknown unit: " + type);
+ }
+ }
+
+ @Override
+ public MinorType visit(Duration type) {
+ return MinorType.DURATION;
+ }
+
+ @Override
+ public MinorType visit(ExtensionType type) {
+ return MinorType.EXTENSIONTYPE;
+ }
+ });
+ }
+
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/UnionMode.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/UnionMode.java
new file mode 100644
index 000000000..19956ac6a
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/UnionMode.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.types;
+
+/**
+ * Different memory layouts for Union Vectors.
+ */
+public enum UnionMode {
+ /**
+ * Each child vector is the same length as the overall vector, and there is one 8-bit integer buffer to indicate
+ * the index of a child vector to use at any given position.
+ */
+ Sparse(org.apache.arrow.flatbuf.UnionMode.Sparse),
+ /**
+ * Each child vector is of variable width. The parent vector contains both a child index vector (like in
+ * {@link #Sparse}) and in addition a slot index buffer to determine the offset into the child vector indicated
+ * by the index vector.
+ */
+ Dense(org.apache.arrow.flatbuf.UnionMode.Dense);
+
+ private static final UnionMode[] valuesByFlatbufId = new UnionMode[UnionMode.values().length];
+
+ static {
+ for (UnionMode v : UnionMode.values()) {
+ valuesByFlatbufId[v.flatbufID] = v;
+ }
+ }
+
+ private final short flatbufID;
+
+ private UnionMode(short flatbufID) {
+ this.flatbufID = flatbufID;
+ }
+
+ public short getFlatbufID() {
+ return flatbufID;
+ }
+
+ public static UnionMode fromFlatbufID(short id) {
+ return valuesByFlatbufId[id];
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/DictionaryEncoding.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/DictionaryEncoding.java
new file mode 100644
index 000000000..8d41b92d8
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/DictionaryEncoding.java
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.types.pojo;
+
+import java.util.Objects;
+
+import org.apache.arrow.vector.types.pojo.ArrowType.Int;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonGetter;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+/**
+ * A POJO representation of Arrow Dictionary metadata.
+ */
+public class DictionaryEncoding {
+
+ private final long id;
+ private final boolean ordered;
+ private final Int indexType;
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param id The ID of the dictionary to use for encoding.
+ * @param ordered Whether the keys in values in the dictionary are ordered.
+ * @param indexType (nullable). The integer type to use for indexing in the dictionary. Defaults to a signed
+ * 32 bit integer.
+ */
+ @JsonCreator
+ public DictionaryEncoding(
+ @JsonProperty("id") long id,
+ @JsonProperty("isOrdered") boolean ordered,
+ @JsonProperty("indexType") Int indexType) {
+ this.id = id;
+ this.ordered = ordered;
+ this.indexType = indexType == null ? new Int(32, true) : indexType;
+ }
+
+ public long getId() {
+ return id;
+ }
+
+ @JsonGetter("isOrdered")
+ public boolean isOrdered() {
+ return ordered;
+ }
+
+ public Int getIndexType() {
+ return indexType;
+ }
+
+ @Override
+ public String toString() {
+ return "DictionaryEncoding[id=" + id + ",ordered=" + ordered + ",indexType=" + indexType + "]";
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) {
+ return true;
+ } else if (o == null || getClass() != o.getClass()) {
+ return false;
+ }
+ DictionaryEncoding that = (DictionaryEncoding) o;
+ return id == that.id && ordered == that.ordered && Objects.equals(indexType, that.indexType);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(id, ordered, indexType);
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/ExtensionTypeRegistry.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/ExtensionTypeRegistry.java
new file mode 100644
index 000000000..f347008b4
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/ExtensionTypeRegistry.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.types.pojo;
+
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentMap;
+
+import org.apache.arrow.vector.types.pojo.ArrowType.ExtensionType;
+
+/**
+ * A registry of recognized extension types.
+ */
+public final class ExtensionTypeRegistry {
+ private static final ConcurrentMap<String, ExtensionType> registry = new ConcurrentHashMap<>();
+
+ public static void register(ExtensionType type) {
+ registry.put(type.extensionName(), type);
+ }
+
+ public static void unregister(ExtensionType type) {
+ registry.remove(type.extensionName());
+ }
+
+ public static ExtensionType lookup(String name) {
+ return registry.get(name);
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java
new file mode 100644
index 000000000..3a5ef1153
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Field.java
@@ -0,0 +1,306 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.types.pojo;
+
+import static org.apache.arrow.util.Preconditions.checkNotNull;
+import static org.apache.arrow.vector.complex.BaseRepeatedValueVector.DATA_VECTOR_NAME;
+import static org.apache.arrow.vector.types.pojo.ArrowType.getTypeForField;
+import static org.apache.arrow.vector.types.pojo.Schema.convertMetadata;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Objects;
+import java.util.stream.Collectors;
+
+import org.apache.arrow.flatbuf.KeyValue;
+import org.apache.arrow.flatbuf.Type;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.util.Collections2;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.TypeLayout;
+import org.apache.arrow.vector.types.pojo.ArrowType.ExtensionType;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonIgnore;
+import com.fasterxml.jackson.annotation.JsonInclude;
+import com.fasterxml.jackson.annotation.JsonInclude.Include;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.google.flatbuffers.FlatBufferBuilder;
+
+/**
+ * A POJO abstraction for the Flatbuffer description of Vector Type.
+ */
+public class Field {
+
+ private static final Logger logger = LoggerFactory.getLogger(Field.class);
+
+ public static Field nullablePrimitive(String name, ArrowType.PrimitiveType type) {
+ return nullable(name, type);
+ }
+
+ public static Field nullable(String name, ArrowType type) {
+ return new Field(name, FieldType.nullable(type), null);
+ }
+
+ private final String name;
+ private final FieldType fieldType;
+ private final List<Field> children;
+
+ private Field(
+ String name,
+ boolean nullable,
+ ArrowType type,
+ DictionaryEncoding dictionary,
+ List<Field> children,
+ Map<String, String> metadata) {
+ this(name, new FieldType(nullable, type, dictionary, metadata), children);
+ }
+
+ @JsonCreator
+ private Field(
+ @JsonProperty("name") String name,
+ @JsonProperty("nullable") boolean nullable,
+ @JsonProperty("type") ArrowType type,
+ @JsonProperty("dictionary") DictionaryEncoding dictionary,
+ @JsonProperty("children") List<Field> children,
+ @JsonProperty("metadata") List<Map<String, String>> metadata) {
+ this(name, new FieldType(nullable, type, dictionary, convertMetadata(metadata)), children);
+ }
+
+ private Field(String name, FieldType fieldType, List<Field> children, TypeLayout typeLayout) {
+ this.name = name;
+ this.fieldType = checkNotNull(fieldType);
+ this.children = children == null ? Collections.emptyList() : Collections2.toImmutableList(children);
+ }
+
+ public Field(String name, FieldType fieldType, List<Field> children) {
+ this(name, fieldType, children, fieldType == null ? null : TypeLayout.getTypeLayout(fieldType.getType()));
+ }
+
+ /**
+ * Construct a new vector of this type using the given allocator.
+ */
+ public FieldVector createVector(BufferAllocator allocator) {
+ FieldVector vector = fieldType.createNewSingleVector(this, allocator, null);
+ vector.initializeChildrenFromFields(children);
+ return vector;
+ }
+
+ /**
+ * Constructs a new instance from a flatbuffer representation of the field.
+ */
+ public static Field convertField(org.apache.arrow.flatbuf.Field field) {
+ Map<String, String> metadata = new HashMap<>();
+ for (int i = 0; i < field.customMetadataLength(); i++) {
+ KeyValue kv = field.customMetadata(i);
+ String key = kv.key();
+ String value = kv.value();
+ metadata.put(key == null ? "" : key, value == null ? "" : value);
+ }
+ metadata = Collections.unmodifiableMap(metadata);
+
+ String name = field.name();
+ boolean nullable = field.nullable();
+ ArrowType type = getTypeForField(field);
+
+ if (metadata.containsKey(ExtensionType.EXTENSION_METADATA_KEY_NAME)) {
+ final String extensionName = metadata.get(ExtensionType.EXTENSION_METADATA_KEY_NAME);
+ final String extensionMetadata = metadata.getOrDefault(ExtensionType.EXTENSION_METADATA_KEY_METADATA, "");
+ ExtensionType extensionType = ExtensionTypeRegistry.lookup(extensionName);
+ if (extensionType != null) {
+ type = extensionType.deserialize(type, extensionMetadata);
+ } else {
+ // Otherwise, we haven't registered the type
+ logger.info("Unrecognized extension type: {}", extensionName);
+ }
+ }
+
+ DictionaryEncoding dictionary = null;
+ org.apache.arrow.flatbuf.DictionaryEncoding dictionaryFB = field.dictionary();
+ if (dictionaryFB != null) {
+ ArrowType.Int indexType = null;
+ org.apache.arrow.flatbuf.Int indexTypeFB = dictionaryFB.indexType();
+ if (indexTypeFB != null) {
+ indexType = new ArrowType.Int(indexTypeFB.bitWidth(), indexTypeFB.isSigned());
+ }
+ dictionary = new DictionaryEncoding(dictionaryFB.id(), dictionaryFB.isOrdered(), indexType);
+ }
+ List<Field> children = new ArrayList<>();
+ for (int i = 0; i < field.childrenLength(); i++) {
+ Field childField = convertField(field.children(i));
+ childField = mutateOriginalNameIfNeeded(field, childField);
+ children.add(childField);
+ }
+ children = Collections.unmodifiableList(children);
+ return new Field(name, nullable, type, dictionary, children, metadata);
+ }
+
+ /**
+ * Helper method to ensure backward compatibility with schemas generated prior to ARROW-1347, ARROW-1663.
+ *
+ * @param field the field to check
+ * @param originalChildField original field which name might be mutated
+ * @return original or mutated field
+ */
+ private static Field mutateOriginalNameIfNeeded(org.apache.arrow.flatbuf.Field field, Field originalChildField) {
+ if ((field.typeType() == Type.List || field.typeType() == Type.FixedSizeList) &&
+ originalChildField.getName().equals("[DEFAULT]")) {
+ return
+ new Field(DATA_VECTOR_NAME,
+ originalChildField.isNullable(),
+ originalChildField.getType(),
+ originalChildField.getDictionary(),
+ originalChildField.getChildren(),
+ originalChildField.getMetadata());
+ }
+ return originalChildField;
+ }
+
+ /**
+ * Puts this object into <code>builder</code> and returns the length of the serialized flatbuffer.
+ */
+ public int getField(FlatBufferBuilder builder) {
+ int nameOffset = name == null ? -1 : builder.createString(name);
+ int typeOffset = getType().getType(builder);
+ int dictionaryOffset = -1;
+ DictionaryEncoding dictionary = getDictionary();
+ if (dictionary != null) {
+ int dictionaryType = dictionary.getIndexType().getType(builder);
+ org.apache.arrow.flatbuf.DictionaryEncoding.startDictionaryEncoding(builder);
+ org.apache.arrow.flatbuf.DictionaryEncoding.addId(builder, dictionary.getId());
+ org.apache.arrow.flatbuf.DictionaryEncoding.addIsOrdered(builder, dictionary.isOrdered());
+ org.apache.arrow.flatbuf.DictionaryEncoding.addIndexType(builder, dictionaryType);
+ dictionaryOffset = org.apache.arrow.flatbuf.DictionaryEncoding.endDictionaryEncoding(builder);
+ }
+ int[] childrenData = new int[children.size()];
+ for (int i = 0; i < children.size(); i++) {
+ childrenData[i] = children.get(i).getField(builder);
+ }
+ int childrenOffset = org.apache.arrow.flatbuf.Field.createChildrenVector(builder, childrenData);
+ int[] metadataOffsets = new int[getMetadata().size()];
+ Iterator<Entry<String, String>> metadataIterator = getMetadata().entrySet().iterator();
+ for (int i = 0; i < metadataOffsets.length; i++) {
+ Entry<String, String> kv = metadataIterator.next();
+ int keyOffset = builder.createString(kv.getKey());
+ int valueOffset = builder.createString(kv.getValue());
+ KeyValue.startKeyValue(builder);
+ KeyValue.addKey(builder, keyOffset);
+ KeyValue.addValue(builder, valueOffset);
+ metadataOffsets[i] = KeyValue.endKeyValue(builder);
+ }
+ int metadataOffset = org.apache.arrow.flatbuf.Field.createCustomMetadataVector(builder, metadataOffsets);
+ org.apache.arrow.flatbuf.Field.startField(builder);
+ if (name != null) {
+ org.apache.arrow.flatbuf.Field.addName(builder, nameOffset);
+ }
+ org.apache.arrow.flatbuf.Field.addNullable(builder, isNullable());
+ org.apache.arrow.flatbuf.Field.addTypeType(builder, getType().getTypeID().getFlatbufID());
+ org.apache.arrow.flatbuf.Field.addType(builder, typeOffset);
+ org.apache.arrow.flatbuf.Field.addChildren(builder, childrenOffset);
+ org.apache.arrow.flatbuf.Field.addCustomMetadata(builder, metadataOffset);
+ if (dictionary != null) {
+ org.apache.arrow.flatbuf.Field.addDictionary(builder, dictionaryOffset);
+ }
+ return org.apache.arrow.flatbuf.Field.endField(builder);
+ }
+
+ public String getName() {
+ return name;
+ }
+
+ public boolean isNullable() {
+ return fieldType.isNullable();
+ }
+
+ public ArrowType getType() {
+ return fieldType.getType();
+ }
+
+ @JsonIgnore
+ public FieldType getFieldType() {
+ return fieldType;
+ }
+
+ @JsonInclude(Include.NON_NULL)
+ public DictionaryEncoding getDictionary() {
+ return fieldType.getDictionary();
+ }
+
+ public List<Field> getChildren() {
+ return children;
+ }
+
+ @JsonIgnore
+ public Map<String, String> getMetadata() {
+ return fieldType.getMetadata();
+ }
+
+ @JsonProperty("metadata")
+ @JsonInclude(Include.NON_EMPTY)
+ List<Map<String, String>> getMetadataForJson() {
+ return convertMetadata(getMetadata());
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(name, isNullable(), getType(), getDictionary(), getMetadata(), children);
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (!(obj instanceof Field)) {
+ return false;
+ }
+ Field that = (Field) obj;
+ return Objects.equals(this.name, that.name) &&
+ Objects.equals(this.isNullable(), that.isNullable()) &&
+ Objects.equals(this.getType(), that.getType()) &&
+ Objects.equals(this.getDictionary(), that.getDictionary()) &&
+ Objects.equals(this.getMetadata(), that.getMetadata()) &&
+ Objects.equals(this.children, that.children);
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ if (name != null) {
+ sb.append(name).append(": ");
+ }
+ sb.append(getType());
+ if (getDictionary() != null) {
+ sb.append("[dictionary: ").append(getDictionary().getId()).append("]");
+ }
+ if (!children.isEmpty()) {
+ sb.append("<").append(children.stream()
+ .map(t -> t.toString())
+ .collect(Collectors.joining(", ")))
+ .append(">");
+ }
+ if (!isNullable()) {
+ sb.append(" not null");
+ }
+ return sb.toString();
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/FieldType.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/FieldType.java
new file mode 100644
index 000000000..bb3250ef1
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/FieldType.java
@@ -0,0 +1,123 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.types.pojo;
+
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Objects;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.util.Collections2;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType.ExtensionType;
+import org.apache.arrow.vector.util.CallBack;
+
+/**
+ * POJO representation of an Arrow field type. It consists of a logical type, nullability and whether the field
+ * (column) is dictionary encoded.
+ */
+public class FieldType {
+
+ public static FieldType nullable(ArrowType type) {
+ return new FieldType(true, type, null, null);
+ }
+
+ private final boolean nullable;
+ private final ArrowType type;
+ private final DictionaryEncoding dictionary;
+ private final Map<String, String> metadata;
+
+ public FieldType(boolean nullable, ArrowType type, DictionaryEncoding dictionary) {
+ this(nullable, type, dictionary, null);
+ }
+
+ /**
+ * Constructs a new instance.
+ *
+ * @param nullable Whether the Vector is nullable
+ * @param type The logical arrow type of the field.
+ * @param dictionary The dictionary encoding of the field.
+ * @param metadata Custom metadata for the field.
+ */
+ public FieldType(boolean nullable, ArrowType type, DictionaryEncoding dictionary, Map<String, String> metadata) {
+ super();
+ this.nullable = nullable;
+ this.type = Preconditions.checkNotNull(type);
+ this.dictionary = dictionary;
+ if (type instanceof ExtensionType) {
+ // Save the extension type name/metadata
+ final Map<String, String> extensionMetadata = new HashMap<>();
+ extensionMetadata.put(ExtensionType.EXTENSION_METADATA_KEY_NAME, ((ExtensionType) type).extensionName());
+ extensionMetadata.put(ExtensionType.EXTENSION_METADATA_KEY_METADATA, ((ExtensionType) type).serialize());
+ if (metadata != null) {
+ extensionMetadata.putAll(metadata);
+ }
+ this.metadata = Collections.unmodifiableMap(extensionMetadata);
+ } else {
+ this.metadata = metadata == null ? java.util.Collections.emptyMap() : Collections2.immutableMapCopy(metadata);
+ }
+ }
+
+ public boolean isNullable() {
+ return nullable;
+ }
+
+ public ArrowType getType() {
+ return type;
+ }
+
+ public DictionaryEncoding getDictionary() {
+ return dictionary;
+ }
+
+ public Map<String, String> getMetadata() {
+ return metadata;
+ }
+
+ public FieldVector createNewSingleVector(String name, BufferAllocator allocator, CallBack schemaCallBack) {
+ MinorType minorType = Types.getMinorTypeForArrowType(type);
+ return minorType.getNewVector(name, this, allocator, schemaCallBack);
+ }
+
+ public FieldVector createNewSingleVector(Field field, BufferAllocator allocator, CallBack schemaCallBack) {
+ MinorType minorType = Types.getMinorTypeForArrowType(type);
+ return minorType.getNewVector(field, allocator, schemaCallBack);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(nullable, type, dictionary, metadata);
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (!(obj instanceof FieldType)) {
+ return false;
+ }
+ FieldType that = (FieldType) obj;
+ return Objects.equals(this.isNullable(), that.isNullable()) &&
+ Objects.equals(this.getType(), that.getType()) &&
+ Objects.equals(this.getDictionary(), that.getDictionary()) &&
+ Objects.equals(this.getMetadata(), that.getMetadata());
+ }
+
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Schema.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Schema.java
new file mode 100644
index 000000000..d377b395c
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/types/pojo/Schema.java
@@ -0,0 +1,247 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.types.pojo;
+
+
+import static org.apache.arrow.vector.types.pojo.Field.convertField;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.AbstractMap;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.stream.Collectors;
+
+import org.apache.arrow.flatbuf.KeyValue;
+import org.apache.arrow.util.Collections2;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.ipc.message.FBSerializables;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonIgnore;
+import com.fasterxml.jackson.annotation.JsonInclude;
+import com.fasterxml.jackson.annotation.JsonInclude.Include;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.ObjectReader;
+import com.fasterxml.jackson.databind.ObjectWriter;
+import com.google.flatbuffers.FlatBufferBuilder;
+
+/**
+ * An Arrow Schema.
+ */
+public class Schema {
+
+ /**
+ * Search for a field by name in given the list of fields.
+ *
+ * @param fields the list of the fields
+ * @param name the name of the field to return
+ * @return the corresponding field
+ * @throws IllegalArgumentException if the field was not found
+ */
+ public static Field findField(List<Field> fields, String name) {
+ for (Field field : fields) {
+ if (field.getName().equals(name)) {
+ return field;
+ }
+ }
+ throw new IllegalArgumentException(String.format("field %s not found in %s", name, fields));
+ }
+
+ static final String METADATA_KEY = "key";
+ static final String METADATA_VALUE = "value";
+
+ private static final ObjectMapper mapper = new ObjectMapper();
+ private static final ObjectWriter writer = mapper.writerWithDefaultPrettyPrinter();
+ private static final ObjectReader reader = mapper.readerFor(Schema.class);
+
+ public static Schema fromJSON(String json) throws IOException {
+ return reader.readValue(Preconditions.checkNotNull(json));
+ }
+
+ public static Schema deserialize(ByteBuffer buffer) {
+ return convertSchema(org.apache.arrow.flatbuf.Schema.getRootAsSchema(buffer));
+ }
+
+ /** Converts a flatbuffer schema to its POJO representation. */
+ public static Schema convertSchema(org.apache.arrow.flatbuf.Schema schema) {
+ List<Field> fields = new ArrayList<>();
+ for (int i = 0; i < schema.fieldsLength(); i++) {
+ fields.add(convertField(schema.fields(i)));
+ }
+ Map<String, String> metadata = new HashMap<>();
+ for (int i = 0; i < schema.customMetadataLength(); i++) {
+ KeyValue kv = schema.customMetadata(i);
+ String key = kv.key();
+ String value = kv.value();
+ metadata.put(key == null ? "" : key, value == null ? "" : value);
+ }
+ return new Schema(true, Collections.unmodifiableList(fields), Collections.unmodifiableMap(metadata));
+ }
+
+ private final List<Field> fields;
+ private final Map<String, String> metadata;
+
+ public Schema(Iterable<Field> fields) {
+ this(fields, (Map<String, String>) null);
+ }
+
+ /**
+ * Constructor with metadata.
+ */
+ public Schema(Iterable<Field> fields,
+ Map<String, String> metadata) {
+ this(true,
+ Collections2.toImmutableList(fields),
+ metadata == null ? Collections.emptyMap() : Collections2.immutableMapCopy(metadata));
+ }
+
+
+ /**
+ * Constructor used for JSON deserialization.
+ */
+ @JsonCreator
+ private Schema(@JsonProperty("fields") Iterable<Field> fields,
+ @JsonProperty("metadata") List<Map<String, String>> metadata) {
+ this(fields, convertMetadata(metadata));
+ }
+
+
+ /**
+ * Private constructor to bypass automatic collection copy.
+ * @param unsafe a ignored argument. Its only purpose is to prevent using the constructor
+ * by accident because of type collisions (List vs Iterable).
+ */
+ private Schema(boolean unsafe, List<Field> fields, Map<String, String> metadata) {
+ this.fields = fields;
+ this.metadata = metadata;
+ }
+
+ static Map<String, String> convertMetadata(List<Map<String, String>> metadata) {
+ return (metadata == null) ? null : metadata.stream()
+ .map(e -> new AbstractMap.SimpleImmutableEntry<>(e.get(METADATA_KEY), e.get(METADATA_VALUE)))
+ .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
+ }
+
+ static List<Map<String, String>> convertMetadata(Map<String, String> metadata) {
+ return (metadata == null) ? null : metadata.entrySet()
+ .stream()
+ .map(Schema::convertEntryToKeyValueMap)
+ .collect(Collectors.toList());
+ }
+
+ private static Map<String, String> convertEntryToKeyValueMap(Map.Entry<String, String> entry) {
+ Map<String, String> map = new HashMap<>(2);
+ map.put(METADATA_KEY, entry.getKey());
+ map.put(METADATA_VALUE, entry.getValue());
+ return Collections.unmodifiableMap(map);
+ }
+
+ public List<Field> getFields() {
+ return fields;
+ }
+
+ @JsonIgnore
+ public Map<String, String> getCustomMetadata() {
+ return metadata;
+ }
+
+ @JsonProperty("metadata")
+ @JsonInclude(Include.NON_EMPTY)
+ List<Map<String, String>> getCustomMetadataForJson() {
+ return convertMetadata(getCustomMetadata());
+ }
+
+ /**
+ * Search for a field by name in this Schema.
+ *
+ * @param name the name of the field to return
+ * @return the corresponding field
+ * @throws IllegalArgumentException if the field was not found
+ */
+ public Field findField(String name) {
+ return findField(getFields(), name);
+ }
+
+ /**
+ * Returns the JSON string representation of this schema.
+ */
+ public String toJson() {
+ try {
+ return writer.writeValueAsString(this);
+ } catch (JsonProcessingException e) {
+ // this should not happen
+ throw new RuntimeException(e);
+ }
+ }
+
+ /**
+ * Adds this schema to the builder returning the size of the builder after adding.
+ */
+ public int getSchema(FlatBufferBuilder builder) {
+ int[] fieldOffsets = new int[fields.size()];
+ for (int i = 0; i < fields.size(); i++) {
+ fieldOffsets[i] = fields.get(i).getField(builder);
+ }
+ int fieldsOffset = org.apache.arrow.flatbuf.Schema.createFieldsVector(builder, fieldOffsets);
+ int metadataOffset = FBSerializables.writeKeyValues(builder, metadata);
+ org.apache.arrow.flatbuf.Schema.startSchema(builder);
+ org.apache.arrow.flatbuf.Schema.addFields(builder, fieldsOffset);
+ org.apache.arrow.flatbuf.Schema.addCustomMetadata(builder, metadataOffset);
+ return org.apache.arrow.flatbuf.Schema.endSchema(builder);
+ }
+
+ /**
+ * Returns the serialized flatbuffer representation of this schema.
+ */
+ public byte[] toByteArray() {
+ FlatBufferBuilder builder = new FlatBufferBuilder();
+ int schemaOffset = this.getSchema(builder);
+ builder.finish(schemaOffset);
+ ByteBuffer bb = builder.dataBuffer();
+ byte[] bytes = new byte[bb.remaining()];
+ bb.get(bytes);
+ return bytes;
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(fields, metadata);
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (!(obj instanceof Schema)) {
+ return false;
+ }
+ return Objects.equals(this.fields, ((Schema) obj).fields) &&
+ Objects.equals(this.metadata, ((Schema) obj).metadata);
+ }
+
+ @Override
+ public String toString() {
+ String meta = metadata.isEmpty() ? "" : "(metadata: " + metadata.toString() + ")";
+ return "Schema<" + fields.stream().map(t -> t.toString()).collect(Collectors.joining(", ")) + ">" + meta;
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/ByteArrayReadableSeekableByteChannel.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/ByteArrayReadableSeekableByteChannel.java
new file mode 100644
index 000000000..b8ce9bde4
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/ByteArrayReadableSeekableByteChannel.java
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.util;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.channels.SeekableByteChannel;
+
+/**
+ * A {@link SeekableByteChannel} backed by a byte array.
+ */
+public class ByteArrayReadableSeekableByteChannel implements SeekableByteChannel {
+ private byte[] byteArray;
+ private int position = 0;
+
+ /**
+ * Construct a new object using the given byteArray as a backing store.
+ */
+ public ByteArrayReadableSeekableByteChannel(byte[] byteArray) {
+ if (byteArray == null) {
+ throw new NullPointerException();
+ }
+ this.byteArray = byteArray;
+ }
+
+ @Override
+ public boolean isOpen() {
+ return byteArray != null;
+ }
+
+ @Override
+ public void close() throws IOException {
+ byteArray = null;
+ }
+
+ @Override
+ public int read(final ByteBuffer dst) throws IOException {
+ int remainingInBuf = byteArray.length - this.position;
+ int length = Math.min(dst.remaining(), remainingInBuf);
+ dst.put(this.byteArray, this.position, length);
+ this.position += length;
+ return length;
+ }
+
+ @Override
+ public long position() throws IOException {
+ return this.position;
+ }
+
+ @Override
+ public SeekableByteChannel position(final long newPosition) throws IOException {
+ this.position = (int) newPosition;
+ return this;
+ }
+
+ @Override
+ public long size() throws IOException {
+ return this.byteArray.length;
+ }
+
+ @Override
+ public int write(final ByteBuffer src) throws IOException {
+ throw new UnsupportedOperationException("Read only");
+ }
+
+ @Override
+ public SeekableByteChannel truncate(final long size) throws IOException {
+ throw new UnsupportedOperationException("Read only");
+ }
+
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/CallBack.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/CallBack.java
new file mode 100644
index 000000000..ddeca59b0
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/CallBack.java
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.util;
+
+/**
+ * Generic callback interface to be notified of events on value vectors.
+ */
+public interface CallBack {
+ void doWork();
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/DataSizeRoundingUtil.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/DataSizeRoundingUtil.java
new file mode 100644
index 000000000..3af2c9837
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/DataSizeRoundingUtil.java
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.util;
+
+/**
+ * Utilities for rounding data size.
+ */
+public final class DataSizeRoundingUtil {
+
+ /**
+ * The mask for rounding an integer to a multiple of 8.
+ * (i.e. clear the lowest 3 bits)
+ */
+ public static int ROUND_8_MASK_INT = 0xFFFFFFF8;
+
+ /**
+ * The mask for rounding a long integer to a multiple of 8.
+ * (i.e. clear the lowest 3 bits)
+ */
+ public static long ROUND_8_MASK_LONG = 0xFFFFFFFFFFFFFFF8L;
+
+ /**
+ * The number of bits to shift for dividing by 8.
+ */
+ public static int DIVIDE_BY_8_SHIFT_BITS = 3;
+
+ /**
+ * Round up the number to the nearest multiple of 8.
+ * @param input the number to round.
+ * @return the rounded number.
+ */
+ public static int roundUpTo8Multiple(int input) {
+ return (input + 7) & ROUND_8_MASK_INT;
+ }
+
+ /**
+ * Round up the number to the nearest multiple of 8.
+ * @param input the number to round.
+ * @return the rounded number
+ */
+ public static long roundUpTo8Multiple(long input) {
+ return (input + 7L) & ROUND_8_MASK_LONG;
+ }
+
+ /**
+ * Round down the number to the nearest multiple of 8.
+ * @param input the number to round.
+ * @return the rounded number.
+ */
+ public static int roundDownTo8Multiple(int input) {
+ return input & ROUND_8_MASK_INT;
+ }
+
+ /**
+ * Round down the number to the nearest multiple of 8.
+ * @param input the number to round.
+ * @return the rounded number
+ */
+ public static long roundDownTo8Multiple(long input) {
+ return input & ROUND_8_MASK_LONG;
+ }
+
+ /**
+ * A fast way to compute Math.ceil(input / 8.0).
+ * @param input the input number.
+ * @return the computed number.
+ */
+ public static int divideBy8Ceil(int input) {
+ return (input + 7) >>> DIVIDE_BY_8_SHIFT_BITS;
+ }
+
+ /**
+ * A fast way to compute Math.ceil(input / 8.0).
+ * @param input the input number.
+ * @return the computed number.
+ */
+ public static long divideBy8Ceil(long input) {
+ return (input + 7) >>> (long) DIVIDE_BY_8_SHIFT_BITS;
+ }
+
+ private DataSizeRoundingUtil() {
+
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/DateUtility.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/DateUtility.java
new file mode 100644
index 000000000..9e8b6d26f
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/DateUtility.java
@@ -0,0 +1,134 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.util;
+
+import java.time.Instant;
+import java.time.LocalDateTime;
+import java.time.format.DateTimeFormatter;
+import java.time.format.DateTimeFormatterBuilder;
+import java.time.temporal.ChronoUnit;
+import java.util.TimeZone;
+
+/** Utility class for Date, DateTime, TimeStamp, Interval data types. */
+public class DateUtility {
+ private DateUtility() {}
+
+ private static final String UTC = "UTC";
+
+ public static final DateTimeFormatter formatDate = DateTimeFormatter.ofPattern("yyyy-MM-dd");
+ public static final DateTimeFormatter formatTimeStampMilli = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss.SSS");
+ public static final DateTimeFormatter formatTimeStampTZ = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss.SSS ZZZ");
+ public static final DateTimeFormatter formatTime = DateTimeFormatter.ofPattern("HH:mm:ss.SSS");
+
+ public static DateTimeFormatter dateTimeTZFormat = null;
+ public static DateTimeFormatter timeFormat = null;
+
+ public static final int yearsToMonths = 12;
+ public static final int hoursToMillis = 60 * 60 * 1000;
+ public static final int minutesToMillis = 60 * 1000;
+ public static final int secondsToMillis = 1000;
+ public static final int monthToStandardDays = 30;
+ public static final long monthsToMillis = 2592000000L; // 30 * 24 * 60 * 60 * 1000
+ public static final int daysToStandardMillis = 24 * 60 * 60 * 1000;
+
+ /** Returns the date time formatter used to parse date strings. */
+ public static DateTimeFormatter getDateTimeFormatter() {
+
+ if (dateTimeTZFormat == null) {
+ DateTimeFormatter dateFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd");
+ DateTimeFormatter optionalTime = DateTimeFormatter.ofPattern(" HH:mm:ss");
+ DateTimeFormatter optionalSec = DateTimeFormatter.ofPattern(".SSS");
+ DateTimeFormatter optionalZone = DateTimeFormatter.ofPattern(" ZZZ");
+
+ dateTimeTZFormat = new DateTimeFormatterBuilder().append(dateFormatter).appendOptional(optionalTime)
+ .appendOptional(optionalSec).appendOptional(optionalZone).toFormatter();
+ }
+
+ return dateTimeTZFormat;
+ }
+
+ /** Returns time formatter used to parse time strings. */
+ public static DateTimeFormatter getTimeFormatter() {
+ if (timeFormat == null) {
+ DateTimeFormatter timeFormatter = DateTimeFormatter.ofPattern("HH:mm:ss");
+ DateTimeFormatter optionalSec = DateTimeFormatter.ofPattern(".SSS");
+ timeFormat = new DateTimeFormatterBuilder().append(timeFormatter).appendOptional(optionalSec).toFormatter();
+ }
+ return timeFormat;
+ }
+
+ /**
+ * Convert milliseconds from epoch to a LocalDateTime with timeZone offset.
+ *
+ * @param epochMillis milliseconds from epoch
+ * @param timeZone current timeZone
+ * @return LocalDateTime object with timeZone offset
+ */
+ public static LocalDateTime getLocalDateTimeFromEpochMilli(long epochMillis, String timeZone) {
+ final LocalDateTime localDateTime = LocalDateTime.ofInstant(
+ Instant.ofEpochMilli(epochMillis), TimeZone.getTimeZone(timeZone).toZoneId());
+ return localDateTime;
+ }
+
+ /**
+ * Convert milliseconds from epoch to a LocalDateTime with UTC offset.
+ */
+ public static LocalDateTime getLocalDateTimeFromEpochMilli(long epochMillis) {
+ return getLocalDateTimeFromEpochMilli(epochMillis, UTC);
+ }
+
+ /**
+ * Convert microseconds from epoch to a LocalDateTime with timeZone offset.
+ *
+ * @param epochMicros microseconds from epoch
+ * @param timeZone current timeZone
+ * @return LocalDateTime object with timeZone offset
+ */
+ public static LocalDateTime getLocalDateTimeFromEpochMicro(long epochMicros, String timeZone) {
+ final long millis = java.util.concurrent.TimeUnit.MICROSECONDS.toMillis(epochMicros);
+ final long addl_micros = epochMicros - (millis * 1000);
+ return DateUtility.getLocalDateTimeFromEpochMilli(millis, timeZone).plus(addl_micros, ChronoUnit.MICROS);
+ }
+
+ /**
+ * Convert microseconds from epoch to a LocalDateTime with UTC offset.
+ */
+ public static LocalDateTime getLocalDateTimeFromEpochMicro(long epochMicros) {
+ return getLocalDateTimeFromEpochMicro(epochMicros, UTC);
+ }
+
+ /**
+ * Convert nanoseconds from epoch to a LocalDateTime with timeZone offset.
+ *
+ * @param epochNanos nanoseconds from epoch
+ * @param timeZone current timeZone
+ * @return LocalDateTime object with timeZone offset
+ */
+ public static LocalDateTime getLocalDateTimeFromEpochNano(long epochNanos, String timeZone) {
+ final long millis = java.util.concurrent.TimeUnit.NANOSECONDS.toMillis(epochNanos);
+ final long addl_nanos = epochNanos - (millis * 1000 * 1000);
+ return DateUtility.getLocalDateTimeFromEpochMilli(millis, timeZone).plusNanos(addl_nanos);
+ }
+
+ /**
+ * Convert nanoseconds from epoch to a LocalDateTime with UTC offset.
+ */
+ public static LocalDateTime getLocalDateTimeFromEpochNano(long epochNanos) {
+ return getLocalDateTimeFromEpochNano(epochNanos, UTC);
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/DecimalUtility.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/DecimalUtility.java
new file mode 100644
index 000000000..f778bcb20
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/DecimalUtility.java
@@ -0,0 +1,188 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.util;
+
+import java.math.BigDecimal;
+import java.math.BigInteger;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+
+import org.apache.arrow.memory.ArrowBuf;
+
+import io.netty.util.internal.PlatformDependent;
+
+/**
+ * Utility methods for configurable precision Decimal values (e.g. {@link BigDecimal}).
+ */
+public class DecimalUtility {
+ private DecimalUtility() {}
+
+ public static final byte [] zeroes = new byte[] {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+ public static final byte [] minus_one = new byte[] {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1};
+ private static final boolean LITTLE_ENDIAN = ByteOrder.nativeOrder() == ByteOrder.LITTLE_ENDIAN;
+
+ /**
+ * Read an ArrowType.Decimal at the given value index in the ArrowBuf and convert to a BigDecimal
+ * with the given scale.
+ */
+ public static BigDecimal getBigDecimalFromArrowBuf(ArrowBuf bytebuf, int index, int scale, int byteWidth) {
+ byte[] value = new byte[byteWidth];
+ byte temp;
+ final long startIndex = (long) index * byteWidth;
+
+ bytebuf.getBytes(startIndex, value, 0, byteWidth);
+ if (LITTLE_ENDIAN) {
+ // Decimal stored as native endian, need to swap bytes to make BigDecimal if native endian is LE
+ int stop = byteWidth / 2;
+ for (int i = 0, j; i < stop; i++) {
+ temp = value[i];
+ j = (byteWidth - 1) - i;
+ value[i] = value[j];
+ value[j] = temp;
+ }
+ }
+ BigInteger unscaledValue = new BigInteger(value);
+ return new BigDecimal(unscaledValue, scale);
+ }
+
+ /**
+ * Read an ArrowType.Decimal from the ByteBuffer and convert to a BigDecimal with the given
+ * scale.
+ */
+ public static BigDecimal getBigDecimalFromByteBuffer(ByteBuffer bytebuf, int scale, int byteWidth) {
+ byte[] value = new byte[byteWidth];
+ bytebuf.get(value);
+ BigInteger unscaledValue = new BigInteger(value);
+ return new BigDecimal(unscaledValue, scale);
+ }
+
+ /**
+ * Read an ArrowType.Decimal from the ArrowBuf at the given value index and return it as a byte
+ * array.
+ */
+ public static byte[] getByteArrayFromArrowBuf(ArrowBuf bytebuf, int index, int byteWidth) {
+ final byte[] value = new byte[byteWidth];
+ final long startIndex = (long) index * byteWidth;
+ bytebuf.getBytes(startIndex, value, 0, byteWidth);
+ return value;
+ }
+
+ /**
+ * Check that the BigDecimal scale equals the vectorScale and that the BigDecimal precision is
+ * less than or equal to the vectorPrecision. If not, then an UnsupportedOperationException is
+ * thrown, otherwise returns true.
+ */
+ public static boolean checkPrecisionAndScale(BigDecimal value, int vectorPrecision, int vectorScale) {
+ if (value.scale() != vectorScale) {
+ throw new UnsupportedOperationException("BigDecimal scale must equal that in the Arrow vector: " +
+ value.scale() + " != " + vectorScale);
+ }
+ if (value.precision() > vectorPrecision) {
+ throw new UnsupportedOperationException("BigDecimal precision can not be greater than that in the Arrow " +
+ "vector: " + value.precision() + " > " + vectorPrecision);
+ }
+ return true;
+ }
+
+ /**
+ * Check that the decimal scale equals the vectorScale and that the decimal precision is
+ * less than or equal to the vectorPrecision. If not, then an UnsupportedOperationException is
+ * thrown, otherwise returns true.
+ */
+ public static boolean checkPrecisionAndScale(int decimalPrecision, int decimalScale, int vectorPrecision,
+ int vectorScale) {
+ if (decimalScale != vectorScale) {
+ throw new UnsupportedOperationException("BigDecimal scale must equal that in the Arrow vector: " +
+ decimalScale + " != " + vectorScale);
+ }
+ if (decimalPrecision > vectorPrecision) {
+ throw new UnsupportedOperationException("BigDecimal precision can not be greater than that in the Arrow " +
+ "vector: " + decimalPrecision + " > " + vectorPrecision);
+ }
+ return true;
+ }
+
+ /**
+ * Write the given BigDecimal to the ArrowBuf at the given value index. Will throw an
+ * UnsupportedOperationException if the decimal size is greater than the Decimal vector byte
+ * width.
+ */
+ public static void writeBigDecimalToArrowBuf(BigDecimal value, ArrowBuf bytebuf, int index, int byteWidth) {
+ final byte[] bytes = value.unscaledValue().toByteArray();
+ writeByteArrayToArrowBufHelper(bytes, bytebuf, index, byteWidth);
+ }
+
+ /**
+ * Write the given long to the ArrowBuf at the given value index.
+ * This routine extends the original sign bit to a new upper area in 128-bit or 256-bit.
+ */
+ public static void writeLongToArrowBuf(long value, ArrowBuf bytebuf, int index, int byteWidth) {
+ if (byteWidth != 16 && byteWidth != 32) {
+ throw new UnsupportedOperationException("DecimalUtility.writeLongToArrowBuf() currently supports " +
+ "128-bit or 256-bit width data");
+ }
+ final long addressOfValue = bytebuf.memoryAddress() + (long) index * byteWidth;
+ final long padValue = Long.signum(value) == -1 ? -1L : 0L;
+ if (LITTLE_ENDIAN) {
+ PlatformDependent.putLong(addressOfValue, value);
+ for (int i = 1; i <= (byteWidth - 8) / 8; i++) {
+ PlatformDependent.putLong(addressOfValue + Long.BYTES * i, padValue);
+ }
+ } else {
+ for (int i = 0; i < (byteWidth - 8) / 8; i++) {
+ PlatformDependent.putLong(addressOfValue + Long.BYTES * i, padValue);
+ }
+ PlatformDependent.putLong(addressOfValue + Long.BYTES * (byteWidth - 8) / 8, value);
+ }
+ }
+
+ /**
+ * Write the given byte array to the ArrowBuf at the given value index. Will throw an
+ * UnsupportedOperationException if the decimal size is greater than the Decimal vector byte
+ * width.
+ */
+ public static void writeByteArrayToArrowBuf(byte[] bytes, ArrowBuf bytebuf, int index, int byteWidth) {
+ writeByteArrayToArrowBufHelper(bytes, bytebuf, index, byteWidth);
+ }
+
+ private static void writeByteArrayToArrowBufHelper(byte[] bytes, ArrowBuf bytebuf, int index, int byteWidth) {
+ final long startIndex = (long) index * byteWidth;
+ if (bytes.length > byteWidth) {
+ throw new UnsupportedOperationException("Decimal size greater than " + byteWidth + " bytes: " + bytes.length);
+ }
+
+ byte [] padBytes = bytes[0] < 0 ? minus_one : zeroes;
+ if (LITTLE_ENDIAN) {
+ // Decimal stored as native-endian, need to swap data bytes before writing to ArrowBuf if LE
+ byte[] bytesLE = new byte[bytes.length];
+ for (int i = 0; i < bytes.length; i++) {
+ bytesLE[i] = bytes[bytes.length - 1 - i];
+ }
+
+ // Write LE data
+ bytebuf.setBytes(startIndex, bytesLE, 0, bytes.length);
+ bytebuf.setBytes(startIndex + bytes.length, padBytes, 0, byteWidth - bytes.length);
+ } else {
+ // Write BE data
+ bytebuf.setBytes(startIndex + byteWidth - bytes.length, bytes, 0, bytes.length);
+ bytebuf.setBytes(startIndex, padBytes, 0, byteWidth - bytes.length);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/DictionaryUtility.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/DictionaryUtility.java
new file mode 100644
index 000000000..9592f3975
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/DictionaryUtility.java
@@ -0,0 +1,145 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.util;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.dictionary.Dictionary;
+import org.apache.arrow.vector.dictionary.DictionaryProvider;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.DictionaryEncoding;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+
+/**
+ * Utility methods for working with Dictionaries used in Dictionary encodings.
+ */
+public class DictionaryUtility {
+ private DictionaryUtility() {}
+
+ /**
+ * Convert field and child fields that have a dictionary encoding to message format, so fields
+ * have the dictionary type.
+ *
+ * <p>NOTE: in the message format, fields have the dictionary type
+ * in the memory format, they have the index type
+ */
+ public static Field toMessageFormat(Field field, DictionaryProvider provider, Set<Long> dictionaryIdsUsed) {
+ if (!needConvertToMessageFormat(field)) {
+ return field;
+ }
+ DictionaryEncoding encoding = field.getDictionary();
+ List<Field> children;
+
+
+ ArrowType type;
+ if (encoding == null) {
+ type = field.getType();
+ children = field.getChildren();
+ } else {
+ long id = encoding.getId();
+ Dictionary dictionary = provider.lookup(id);
+ if (dictionary == null) {
+ throw new IllegalArgumentException("Could not find dictionary with ID " + id);
+ }
+ type = dictionary.getVectorType();
+ children = dictionary.getVector().getField().getChildren();
+
+ dictionaryIdsUsed.add(id);
+ }
+
+ final List<Field> updatedChildren = new ArrayList<>(children.size());
+ for (Field child : children) {
+ updatedChildren.add(toMessageFormat(child, provider, dictionaryIdsUsed));
+ }
+
+ return new Field(field.getName(), new FieldType(field.isNullable(), type, encoding, field.getMetadata()),
+ updatedChildren);
+ }
+
+ /**
+ * Checks if it is required to convert the field to message format.
+ * @param field the field to check.
+ * @return true if a conversion is required, and false otherwise.
+ */
+ public static boolean needConvertToMessageFormat(Field field) {
+ DictionaryEncoding encoding = field.getDictionary();
+
+ if (encoding != null) {
+ // when encoding is not null, the type must be determined from the
+ // dictionary, so conversion must be performed.
+ return true;
+ }
+
+ List<Field> children = field.getChildren();
+ for (Field child : children) {
+ if (needConvertToMessageFormat(child)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /**
+ * Convert field and child fields that have a dictionary encoding to memory format, so fields
+ * have the index type.
+ */
+ public static Field toMemoryFormat(Field field, BufferAllocator allocator, Map<Long, Dictionary> dictionaries) {
+ DictionaryEncoding encoding = field.getDictionary();
+ List<Field> children = field.getChildren();
+
+ if (encoding == null && children.isEmpty()) {
+ return field;
+ }
+
+ List<Field> updatedChildren = new ArrayList<>(children.size());
+ for (Field child : children) {
+ updatedChildren.add(toMemoryFormat(child, allocator, dictionaries));
+ }
+
+ ArrowType type;
+ List<Field> fieldChildren = null;
+ if (encoding == null) {
+ type = field.getType();
+ fieldChildren = updatedChildren;
+ } else {
+ // re-type the field for in-memory format
+ type = encoding.getIndexType();
+ if (type == null) {
+ type = new ArrowType.Int(32, true);
+ }
+ // get existing or create dictionary vector
+ if (!dictionaries.containsKey(encoding.getId())) {
+ // create a new dictionary vector for the values
+ String dictName = "DICT" + encoding.getId();
+ Field dictionaryField = new Field(dictName,
+ new FieldType(field.isNullable(), field.getType(), null, null), updatedChildren);
+ FieldVector dictionaryVector = dictionaryField.createVector(allocator);
+ dictionaries.put(encoding.getId(), new Dictionary(dictionaryVector, encoding));
+ }
+ }
+
+ return new Field(field.getName(), new FieldType(field.isNullable(), type, encoding, field.getMetadata()),
+ fieldChildren);
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/ElementAddressableVectorIterator.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/ElementAddressableVectorIterator.java
new file mode 100644
index 000000000..89c100779
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/ElementAddressableVectorIterator.java
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.util;
+
+import java.util.Iterator;
+
+import org.apache.arrow.memory.util.ArrowBufPointer;
+import org.apache.arrow.memory.util.hash.ArrowBufHasher;
+import org.apache.arrow.memory.util.hash.SimpleHasher;
+import org.apache.arrow.vector.ElementAddressableVector;
+
+/**
+ * Iterator for traversing elements of a {@link ElementAddressableVector}.
+ * @param <T> vector type.
+ */
+public class ElementAddressableVectorIterator<T extends ElementAddressableVector>
+ implements Iterator<ArrowBufPointer> {
+
+ private final T vector;
+
+ /**
+ * Index of the next element to access.
+ */
+ private int index = 0;
+
+ private final ArrowBufPointer reusablePointer;
+
+ /**
+ * Constructs an iterator for the {@link ElementAddressableVector}.
+ * @param vector the vector to iterate.
+ */
+ public ElementAddressableVectorIterator(T vector) {
+ this(vector, SimpleHasher.INSTANCE);
+ }
+
+ /**
+ * Constructs an iterator for the {@link ElementAddressableVector}.
+ * @param vector the vector to iterate.
+ * @param hasher the hasher to calculate the hash code.
+ */
+ public ElementAddressableVectorIterator(T vector, ArrowBufHasher hasher) {
+ this.vector = vector;
+ reusablePointer = new ArrowBufPointer(hasher);
+ }
+
+ @Override
+ public boolean hasNext() {
+ return index < vector.getValueCount();
+ }
+
+ /**
+ * Retrieves the next pointer from the vector.
+ * @return the pointer pointing to the next element in the vector.
+ * Note that the returned pointer is only valid before the next call to this method.
+ */
+ @Override
+ public ArrowBufPointer next() {
+ vector.getDataPointer(index, reusablePointer);
+ index += 1;
+ return reusablePointer;
+ }
+
+ /**
+ * Retrieves the next pointer from the vector.
+ * @param outPointer the pointer to populate.
+ */
+ public void next(ArrowBufPointer outPointer) {
+ vector.getDataPointer(index, outPointer);
+ index += 1;
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/JsonStringArrayList.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/JsonStringArrayList.java
new file mode 100644
index 000000000..2ca71ec63
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/JsonStringArrayList.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.util;
+
+import java.util.ArrayList;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+/**
+ * Extension of {@link ArrayList} that {@link #toString()} method returns the serialized JSON
+ * version of its members (or throws an exception if they can't be converted to JSON).
+ *
+ * @param <E> Type of value held in the list.
+ */
+public class JsonStringArrayList<E> extends ArrayList<E> {
+
+ private static ObjectMapper mapper;
+
+ static {
+ mapper = new ObjectMapper();
+ }
+
+ public JsonStringArrayList() {
+ super();
+ }
+
+ public JsonStringArrayList(int size) {
+ super(size);
+ }
+
+ @Override
+ public final String toString() {
+ try {
+ return mapper.writeValueAsString(this);
+ } catch (JsonProcessingException e) {
+ throw new IllegalStateException("Cannot serialize array list to JSON string", e);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/JsonStringHashMap.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/JsonStringHashMap.java
new file mode 100644
index 000000000..f41ae4ee2
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/JsonStringHashMap.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.util;
+
+import java.util.LinkedHashMap;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+/**
+ * Simple class that extends the regular java.util.HashMap but overrides the
+ * toString() method of the HashMap class to produce a JSON string instead
+ *
+ * @param <K> The type of the key for the map.
+ * @param <V> The type of the value for the map.
+ */
+public class JsonStringHashMap<K, V> extends LinkedHashMap<K, V> {
+
+ private static ObjectMapper mapper;
+
+ static {
+ mapper = new ObjectMapper();
+ }
+
+ @Override
+ public final String toString() {
+ try {
+ return mapper.writeValueAsString(this);
+ } catch (JsonProcessingException e) {
+ throw new IllegalStateException("Cannot serialize hash map to JSON string", e);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/MapWithOrdinal.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/MapWithOrdinal.java
new file mode 100644
index 000000000..cf157031b
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/MapWithOrdinal.java
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.util;
+
+import java.util.Collection;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * An implementation of a map that supports constant time look-up by a generic key or an ordinal.
+ *
+ * <p>This class extends the functionality a regular {@link Map} with ordinal lookup support.
+ * Upon insertion an unused ordinal is assigned to the inserted (key, value) tuple.
+ * Upon update the same ordinal id is re-used while value is replaced.
+ * Upon deletion of an existing item, its corresponding ordinal is recycled and could be used by another item.
+ *
+ * <p>For any instance with N items, this implementation guarantees that ordinals are in the range of [0, N). However,
+ * the ordinal assignment is dynamic and may change after an insertion or deletion. Consumers of this class are
+ * responsible for explicitly checking the ordinal corresponding to a key via
+ * {@link MultiMapWithOrdinal#getOrdinal(Object)} before attempting to execute a lookup
+ * with an ordinal.
+ *
+ * @param <K> key type
+ * @param <V> value type
+ */
+public interface MapWithOrdinal<K, V> {
+ V getByOrdinal(int id);
+
+ int getOrdinal(K key);
+
+ int size();
+
+ boolean isEmpty();
+
+ V get(K key);
+
+ Collection<V> getAll(K key);
+
+ boolean put(K key, V value, boolean overwrite);
+
+ Collection<V> values();
+
+ boolean containsKey(K key);
+
+ boolean remove(K key, V value);
+
+ boolean removeAll(K key);
+
+ void clear();
+
+ Set<K> keys();
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/MapWithOrdinalImpl.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/MapWithOrdinalImpl.java
new file mode 100644
index 000000000..41ce1fc0d
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/MapWithOrdinalImpl.java
@@ -0,0 +1,248 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.util;
+
+import java.util.AbstractMap;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.stream.Collectors;
+import java.util.stream.StreamSupport;
+
+import org.apache.arrow.util.Preconditions;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import io.netty.util.collection.IntObjectHashMap;
+import io.netty.util.collection.IntObjectMap;
+
+/**
+ * An implementation of map that supports constant time look-up by a generic key or an ordinal.
+ *
+ * <p>This class extends the functionality a regular {@link Map} with ordinal lookup support.
+ * Upon insertion an unused ordinal is assigned to the inserted (key, value) tuple.
+ * Upon update the same ordinal id is re-used while value is replaced.
+ * Upon deletion of an existing item, its corresponding ordinal is recycled and could be used by another item.
+ *
+ * <p>For any instance with N items, this implementation guarantees that ordinals are in the range of [0, N). However,
+ * the ordinal assignment is dynamic and may change after an insertion or deletion. Consumers of this class are
+ * responsible for explicitly checking the ordinal corresponding to a key via
+ * {@link MapWithOrdinalImpl#getOrdinal(Object)} before attempting to execute a lookup
+ * with an ordinal.
+ *
+ * @param <K> key type
+ * @param <V> value type
+ */
+public class MapWithOrdinalImpl<K, V> implements MapWithOrdinal<K, V> {
+ private static final Logger logger = LoggerFactory.getLogger(MapWithOrdinalImpl.class);
+
+ private final Map<K, Map.Entry<Integer, V>> primary = new HashMap<>();
+ private final IntObjectHashMap<V> secondary = new IntObjectHashMap<>();
+
+ private final Map<K, V> delegate = new Map<K, V>() {
+ @Override
+ public boolean isEmpty() {
+ return size() == 0;
+ }
+
+ @Override
+ public int size() {
+ return primary.size();
+ }
+
+ @Override
+ public boolean containsKey(Object key) {
+ return primary.containsKey(key);
+ }
+
+ @Override
+ public boolean containsValue(Object value) {
+ return primary.containsValue(value);
+ }
+
+ @Override
+ public V get(Object key) {
+ Entry<Integer, V> pair = primary.get(key);
+ if (pair != null) {
+ return pair.getValue();
+ }
+ return null;
+ }
+
+ @Override
+ public V put(K key, V value) {
+ final Entry<Integer, V> oldPair = primary.get(key);
+ // if key exists try replacing otherwise, assign a new ordinal identifier
+ final int ordinal = oldPair == null ? primary.size() : oldPair.getKey();
+ primary.put(key, new AbstractMap.SimpleImmutableEntry<>(ordinal, value));
+ secondary.put(ordinal, value);
+ return oldPair == null ? null : oldPair.getValue();
+ }
+
+ public boolean put(K key, V value, boolean override) {
+ return put(key, value) != null;
+ }
+
+ @Override
+ public V remove(Object key) {
+ final Entry<Integer, V> oldPair = primary.remove(key);
+ if (oldPair != null) {
+ final int lastOrdinal = secondary.size();
+ final V last = secondary.get(lastOrdinal);
+ // normalize mappings so that all numbers until primary.size() is assigned
+ // swap the last element with the deleted one
+ secondary.put(oldPair.getKey(), last);
+ primary.put((K) key, new AbstractMap.SimpleImmutableEntry<>(oldPair.getKey(), last));
+ }
+ return oldPair == null ? null : oldPair.getValue();
+ }
+
+ @Override
+ public void putAll(Map<? extends K, ? extends V> m) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void clear() {
+ primary.clear();
+ secondary.clear();
+ }
+
+ @Override
+ public Set<K> keySet() {
+ return primary.keySet();
+ }
+
+ @Override
+ public Collection<V> values() {
+ return StreamSupport.stream(secondary.entries().spliterator(), false)
+ .map((IntObjectMap.PrimitiveEntry<V> t) -> Preconditions.checkNotNull(t).value())
+ .collect(Collectors.toList());
+ }
+
+ @Override
+ public Set<Entry<K, V>> entrySet() {
+ return primary.entrySet().stream()
+ .map(entry -> new AbstractMap.SimpleImmutableEntry<>(entry.getKey(), entry.getValue().getValue()))
+ .collect(Collectors.toSet());
+ }
+ };
+
+ /**
+ * Returns the value corresponding to the given ordinal.
+ *
+ * @param id ordinal value for lookup
+ * @return an instance of V
+ */
+ public V getByOrdinal(int id) {
+ return secondary.get(id);
+ }
+
+ /**
+ * Returns the ordinal corresponding to the given key.
+ *
+ * @param key key for ordinal lookup
+ * @return ordinal value corresponding to key if it exists or -1
+ */
+ public int getOrdinal(K key) {
+ Map.Entry<Integer, V> pair = primary.get(key);
+ if (pair != null) {
+ return pair.getKey();
+ }
+ return -1;
+ }
+
+ @Override
+ public int size() {
+ return delegate.size();
+ }
+
+ @Override
+ public boolean isEmpty() {
+ return delegate.isEmpty();
+ }
+
+ @Override
+ public Collection<V> getAll(K key) {
+ if (delegate.containsKey(key)) {
+ List<V> list = new ArrayList<>(1);
+ list.add(get(key));
+ return list;
+ }
+ return null;
+ }
+
+ @Override
+ public V get(K key) {
+ return delegate.get(key);
+ }
+
+ /**
+ * Inserts the tuple (key, value) into the map extending the semantics of {@link Map#put} with automatic ordinal
+ * assignment. A new ordinal is assigned if key does not exists. Otherwise the same ordinal is re-used but the value
+ * is replaced.
+ *
+ * @see java.util.Map#put
+ */
+ @Override
+ public boolean put(K key, V value, boolean overwrite) {
+ return delegate.put(key, value) != null;
+ }
+
+ @Override
+ public Collection<V> values() {
+ return delegate.values();
+ }
+
+ @Override
+ public boolean remove(K key, V value) {
+ return false;
+ }
+
+ @Override
+ public boolean containsKey(Object key) {
+ return delegate.containsKey(key);
+ }
+
+ /**
+ * Removes the element corresponding to the key if exists extending the semantics of {@link java.util.Map#remove}
+ * with ordinal re-cycling. The ordinal corresponding to the given key may be re-assigned to another tuple. It is
+ * important that consumer checks the ordinal value via
+ * {@link MapWithOrdinalImpl#getOrdinal(Object)} before attempting to look-up by ordinal.
+ *
+ * @see java.util.Map#remove
+ */
+ @Override
+ public boolean removeAll(K key) {
+ return delegate.remove(key) != null;
+ }
+
+ @Override
+ public void clear() {
+ delegate.clear();
+ }
+
+ @Override
+ public Set<K> keys() {
+ return delegate.keySet();
+ }
+
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/MultiMapWithOrdinal.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/MultiMapWithOrdinal.java
new file mode 100644
index 000000000..5fbb45a7a
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/MultiMapWithOrdinal.java
@@ -0,0 +1,230 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.util;
+
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.LinkedHashMap;
+import java.util.Map;
+import java.util.Optional;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+import io.netty.util.collection.IntObjectHashMap;
+
+/**
+ * An implementation of a multimap that supports constant time look-up by a generic key or an ordinal.
+ *
+ * <p>This class extends the functionality a regular {@link Map} with ordinal lookup support.
+ * Upon insertion an unused ordinal is assigned to the inserted (key, value) tuple.
+ * Upon update the same ordinal id is re-used while value is replaced.
+ * Upon deletion of an existing item, its corresponding ordinal is recycled and could be used by another item.
+ *
+ * <p>For any instance with N items, this implementation guarantees that ordinals are in the range of [0, N). However,
+ * the ordinal assignment is dynamic and may change after an insertion or deletion. Consumers of this class are
+ * responsible for explicitly checking the ordinal corresponding to a key via
+ * {@link MultiMapWithOrdinal#getOrdinal(Object)} before attempting to execute a lookup
+ * with an ordinal.
+ *
+ * @param <K> key type
+ * @param <V> value type
+ */
+public class MultiMapWithOrdinal<K, V> implements MapWithOrdinal<K, V> {
+
+ private final Map<K, Set<Integer>> keyToOrdinal = new LinkedHashMap<>();
+ private final IntObjectHashMap<V> ordinalToValue = new IntObjectHashMap<>();
+
+ /**
+ * Returns the value corresponding to the given ordinal.
+ *
+ * @param id ordinal value for lookup
+ * @return an instance of V
+ */
+ @Override
+ public V getByOrdinal(int id) {
+ return ordinalToValue.get(id);
+ }
+
+ /**
+ * Returns the ordinal corresponding to the given key.
+ *
+ * @param key key for ordinal lookup
+ * @return ordinal value corresponding to key if it exists or -1
+ */
+ @Override
+ public int getOrdinal(K key) {
+ Set<Integer> pair = getOrdinals(key);
+ if (!pair.isEmpty()) {
+ return pair.iterator().next();
+ }
+ return -1;
+ }
+
+ private Set<Integer> getOrdinals(K key) {
+ return keyToOrdinal.getOrDefault(key, new HashSet<>());
+ }
+
+ @Override
+ public int size() {
+ return ordinalToValue.size();
+ }
+
+ @Override
+ public boolean isEmpty() {
+ return ordinalToValue.isEmpty();
+ }
+
+ /**
+ * get set of values for key.
+ */
+ @Override
+ public V get(K key) {
+ Set<Integer> ordinals = keyToOrdinal.get(key);
+ if (ordinals == null) {
+ return null;
+ }
+ return ordinals.stream().map(ordinalToValue::get).collect(Collectors.toList()).get(0);
+ }
+
+ /**
+ * get set of values for key.
+ */
+ @Override
+ public Collection<V> getAll(K key) {
+ Set<Integer> ordinals = keyToOrdinal.get(key);
+ if (ordinals == null) {
+ return null;
+ }
+ return ordinals.stream().map(ordinalToValue::get).collect(Collectors.toList());
+ }
+
+ /**
+ * Inserts the tuple (key, value) into the multimap with automatic ordinal assignment.
+ *
+ * A new ordinal is assigned if key/value pair does not exists.
+ *
+ * If overwrite is true the existing key will be overwritten with value else value will be appended to the multimap.
+ */
+ @Override
+ public boolean put(K key, V value, boolean overwrite) {
+ if (overwrite) {
+ removeAll(key);
+ }
+ Set<Integer> ordinalSet = getOrdinals(key);
+ int nextOrdinal = ordinalToValue.size();
+ ordinalToValue.put(nextOrdinal, value);
+ boolean changed = ordinalSet.add(nextOrdinal);
+ keyToOrdinal.put(key, ordinalSet);
+ return changed;
+ }
+
+ @Override
+ public Collection<V> values() {
+ return ordinalToValue.values();
+ }
+
+ @Override
+ public boolean containsKey(K key) {
+ return keyToOrdinal.containsKey(key);
+ }
+
+ /**
+ * Removes the element corresponding to the key/value if exists with ordinal re-cycling.
+ *
+ * The ordinal corresponding to the given key may be re-assigned to another tuple. It is
+ * important that consumer checks the ordinal value via
+ * {@link MultiMapWithOrdinal#getOrdinal(Object)} before attempting to look-up by ordinal.
+ *
+ * If the multimap is changed return true.
+ */
+ @Override
+ public synchronized boolean remove(K key, V value) {
+ Set<Integer> removalSet = getOrdinals(key);
+ if (removalSet.isEmpty()) {
+ return false;
+ }
+ Optional<V> removeValue = removalSet.stream().map(ordinalToValue::get).filter(value::equals).findFirst();
+ if (!removeValue.isPresent()) {
+ return false;
+ }
+ int removalOrdinal = removeKv(removalSet, key, value);
+ int lastOrdinal = ordinalToValue.size();
+ if (lastOrdinal != removalOrdinal) { //we didn't remove the last ordinal
+ swapOrdinal(lastOrdinal, removalOrdinal);
+ }
+ return true;
+ }
+
+ private void swapOrdinal(int lastOrdinal, int removalOrdinal) {
+ V swapOrdinalValue = ordinalToValue.remove(lastOrdinal);
+ ordinalToValue.put(removalOrdinal, swapOrdinalValue);
+ K swapOrdinalKey = keyToOrdinal.entrySet()
+ .stream()
+ .filter(kv -> kv.getValue().stream().anyMatch(o -> o == lastOrdinal))
+ .map(Map.Entry::getKey)
+ .findFirst()
+ .orElseThrow(() -> new IllegalStateException("MultimapWithOrdinal in bad state"));
+ ordinalToValue.put(removalOrdinal, swapOrdinalValue);
+ Set<Integer> swapSet = getOrdinals(swapOrdinalKey);
+ swapSet.remove(lastOrdinal);
+ swapSet.add(removalOrdinal);
+ keyToOrdinal.put(swapOrdinalKey, swapSet);
+ }
+
+ private int removeKv(Set<Integer> removalSet, K key, V value) {
+ Integer removalOrdinal = removalSet.stream()
+ .filter(i -> ordinalToValue.get(i).equals(value))
+ .findFirst()
+ .orElseThrow(() -> new IllegalStateException("MultimapWithOrdinal in bad state"));
+ ordinalToValue.remove(removalOrdinal);
+ removalSet.remove(removalOrdinal);
+ if (removalSet.isEmpty()) {
+ keyToOrdinal.remove(key);
+ } else {
+ keyToOrdinal.put(key, removalSet);
+ }
+ return removalOrdinal;
+ }
+
+ /**
+ * remove all entries of key.
+ */
+ @Override
+ public synchronized boolean removeAll(K key) {
+ Collection<V> values = this.getAll(key);
+ if (values == null) {
+ return false;
+ }
+ for (V v: values) {
+ this.remove(key, v);
+ }
+ return true;
+ }
+
+ @Override
+ public void clear() {
+ ordinalToValue.clear();
+ keyToOrdinal.clear();
+ }
+
+ @Override
+ public Set<K> keys() {
+ return keyToOrdinal.keySet();
+ }
+
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/OversizedAllocationException.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/OversizedAllocationException.java
new file mode 100644
index 000000000..a47d3ade0
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/OversizedAllocationException.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.util;
+
+
+/**
+ * An exception that is used to signal that allocation request in bytes is greater than the maximum allowed by
+ * {@link org.apache.arrow.memory.BufferAllocator#buffer(int) allocator}.
+ *
+ * <p>Operators should handle this exception to split the batch and later resume the execution on the next
+ * iteration.</p>
+ */
+public class OversizedAllocationException extends RuntimeException {
+ public OversizedAllocationException() {
+ super();
+ }
+
+ public OversizedAllocationException(
+ String message,
+ Throwable cause,
+ boolean enableSuppression,
+ boolean writableStackTrace) {
+ super(message, cause, enableSuppression, writableStackTrace);
+ }
+
+ public OversizedAllocationException(String message, Throwable cause) {
+ super(message, cause);
+ }
+
+ public OversizedAllocationException(String message) {
+ super(message);
+ }
+
+ public OversizedAllocationException(Throwable cause) {
+ super(cause);
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/PromotableMultiMapWithOrdinal.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/PromotableMultiMapWithOrdinal.java
new file mode 100644
index 000000000..f2f838af9
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/PromotableMultiMapWithOrdinal.java
@@ -0,0 +1,133 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.util;
+
+import java.util.Collection;
+import java.util.Set;
+
+import org.apache.arrow.vector.complex.AbstractStructVector;
+
+/**
+ * Implementation of MapWithOrdinal that allows for promotion to multimap when duplicate fields exist.
+ * @param <K> key type
+ * @param <V> value type
+ */
+public class PromotableMultiMapWithOrdinal<K, V> implements MapWithOrdinal<K, V> {
+ private final MapWithOrdinalImpl<K, V> mapWithOrdinal = new MapWithOrdinalImpl<>();
+ private final MultiMapWithOrdinal<K, V> multiMapWithOrdinal = new MultiMapWithOrdinal<>();
+ private final boolean promotable;
+ private AbstractStructVector.ConflictPolicy conflictPolicy;
+ private MapWithOrdinal<K, V> delegate;
+
+ /**
+ * Create promotable map.
+ * @param promotable if promotion is allowed, otherwise delegate to MapWithOrdinal.
+ * @param conflictPolicy how to handle name conflicts.
+ */
+ public PromotableMultiMapWithOrdinal(boolean promotable, AbstractStructVector.ConflictPolicy conflictPolicy) {
+ this.promotable = promotable;
+ this.conflictPolicy = conflictPolicy;
+ delegate = mapWithOrdinal;
+ }
+
+ private void promote() {
+ if (delegate == multiMapWithOrdinal ||
+ !promotable ||
+ conflictPolicy.equals(AbstractStructVector.ConflictPolicy.CONFLICT_REPLACE)) {
+ return;
+ }
+ for (K key : mapWithOrdinal.keys()) {
+ V value = mapWithOrdinal.get(key);
+ multiMapWithOrdinal.put(key, value, false);
+ }
+ mapWithOrdinal.clear();
+ delegate = multiMapWithOrdinal;
+ }
+
+ @Override
+ public V getByOrdinal(int id) {
+ return delegate.getByOrdinal(id);
+ }
+
+ @Override
+ public int getOrdinal(K key) {
+ return delegate.getOrdinal(key);
+ }
+
+ @Override
+ public int size() {
+ return delegate.size();
+ }
+
+ @Override
+ public boolean isEmpty() {
+ return delegate.isEmpty();
+ }
+
+ @Override
+ public V get(K key) {
+ return delegate.get(key);
+ }
+
+ @Override
+ public Collection<V> getAll(K key) {
+ return delegate.getAll(key);
+ }
+
+ @Override
+ public boolean put(K key, V value, boolean overwrite) {
+ if (delegate.containsKey(key)) {
+ promote();
+ }
+ return delegate.put(key, value, overwrite);
+ }
+
+ @Override
+ public Collection<V> values() {
+ return delegate.values();
+ }
+
+ @Override
+ public boolean containsKey(K key) {
+ return delegate.containsKey(key);
+ }
+
+ @Override
+ public boolean remove(K key, V value) {
+ return delegate.remove(key, value);
+ }
+
+ @Override
+ public boolean removeAll(K key) {
+ return delegate.removeAll(key);
+ }
+
+ @Override
+ public void clear() {
+ delegate.clear();
+ }
+
+ @Override
+ public Set<K> keys() {
+ return delegate.keys();
+ }
+
+ public void setConflictPolicy(AbstractStructVector.ConflictPolicy conflictPolicy) {
+ this.conflictPolicy = conflictPolicy;
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/SchemaChangeRuntimeException.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/SchemaChangeRuntimeException.java
new file mode 100644
index 000000000..c29eb6ad3
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/SchemaChangeRuntimeException.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.util;
+
+
+/**
+ * Thrown when child vectors (e.g. in lists) don't match the expected type.
+ */
+public class SchemaChangeRuntimeException extends RuntimeException {
+ public SchemaChangeRuntimeException() {
+ super();
+ }
+
+ public SchemaChangeRuntimeException(
+ String message,
+ Throwable cause,
+ boolean enableSuppression,
+ boolean writableStackTrace) {
+ super(message, cause, enableSuppression, writableStackTrace);
+ }
+
+ public SchemaChangeRuntimeException(String message, Throwable cause) {
+ super(message, cause);
+ }
+
+ public SchemaChangeRuntimeException(String message) {
+ super(message);
+ }
+
+ public SchemaChangeRuntimeException(Throwable cause) {
+ super(cause);
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/SchemaUtility.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/SchemaUtility.java
new file mode 100644
index 000000000..f8167604c
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/SchemaUtility.java
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.util;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.nio.channels.Channels;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.ipc.ReadChannel;
+import org.apache.arrow.vector.ipc.WriteChannel;
+import org.apache.arrow.vector.ipc.message.MessageChannelReader;
+import org.apache.arrow.vector.ipc.message.MessageResult;
+import org.apache.arrow.vector.ipc.message.MessageSerializer;
+import org.apache.arrow.vector.types.pojo.Schema;
+
+/**
+ * Schema utility class including serialization and deserialization.
+ */
+public class SchemaUtility {
+ private SchemaUtility() {}
+
+ /**
+ * Deserialize Arrow schema from byte array.
+ */
+ public static Schema deserialize(byte[] bytes, BufferAllocator allocator) throws IOException {
+ try (MessageChannelReader schemaReader =
+ new MessageChannelReader(
+ new ReadChannel(
+ new ByteArrayReadableSeekableByteChannel(bytes)), allocator)) {
+
+ MessageResult result = schemaReader.readNext();
+ if (result == null) {
+ throw new IOException("Unexpected end of input. Missing schema.");
+ }
+ return MessageSerializer.deserializeSchema(result.getMessage());
+ }
+ }
+
+ /**
+ * Serialize Arrow schema into byte array.
+ */
+ public static byte[] serialize(Schema schema) throws IOException {
+ final ByteArrayOutputStream out = new ByteArrayOutputStream();
+ MessageSerializer.serialize(new WriteChannel(Channels.newChannel(out)), schema);
+ return out.toByteArray();
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/Text.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/Text.java
new file mode 100644
index 000000000..b479305c6
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/Text.java
@@ -0,0 +1,688 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.util;
+
+import java.io.DataInput;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.CharacterCodingException;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CodingErrorAction;
+import java.nio.charset.MalformedInputException;
+import java.text.CharacterIterator;
+import java.text.StringCharacterIterator;
+import java.util.Arrays;
+
+import com.fasterxml.jackson.core.JsonGenerationException;
+import com.fasterxml.jackson.core.JsonGenerator;
+import com.fasterxml.jackson.databind.SerializerProvider;
+import com.fasterxml.jackson.databind.annotation.JsonSerialize;
+import com.fasterxml.jackson.databind.ser.std.StdSerializer;
+
+/**
+ * A simplified byte wrapper similar to Hadoop's Text class without all the dependencies.
+ * Lifted from Hadoop 2.7.1
+ */
+@JsonSerialize(using = Text.TextSerializer.class)
+public class Text {
+
+ private static ThreadLocal<CharsetEncoder> ENCODER_FACTORY =
+ new ThreadLocal<CharsetEncoder>() {
+ @Override
+ protected CharsetEncoder initialValue() {
+ return Charset.forName("UTF-8").newEncoder()
+ .onMalformedInput(CodingErrorAction.REPORT)
+ .onUnmappableCharacter(CodingErrorAction.REPORT);
+ }
+ };
+
+ private static ThreadLocal<CharsetDecoder> DECODER_FACTORY =
+ new ThreadLocal<CharsetDecoder>() {
+ @Override
+ protected CharsetDecoder initialValue() {
+ return Charset.forName("UTF-8").newDecoder()
+ .onMalformedInput(CodingErrorAction.REPORT)
+ .onUnmappableCharacter(CodingErrorAction.REPORT);
+ }
+ };
+
+ private static final byte[] EMPTY_BYTES = new byte[0];
+
+ private byte[] bytes;
+ private int length;
+
+ public Text() {
+ bytes = EMPTY_BYTES;
+ }
+
+ /**
+ * Construct from a string.
+ *
+ * @param string initialize from that string
+ */
+ public Text(String string) {
+ set(string);
+ }
+
+ /**
+ * Construct from another text.
+ *
+ * @param utf8 initialize from that Text
+ */
+ public Text(Text utf8) {
+ set(utf8);
+ }
+
+ /**
+ * Construct from a byte array.
+ *
+ * @param utf8 initialize from that byte array
+ */
+ public Text(byte[] utf8) {
+ set(utf8);
+ }
+
+ /**
+ * Get a copy of the bytes that is exactly the length of the data. See {@link #getBytes()} for
+ * faster access to the underlying array.
+ *
+ * @return a copy of the underlying array
+ */
+ public byte[] copyBytes() {
+ byte[] result = new byte[length];
+ System.arraycopy(bytes, 0, result, 0, length);
+ return result;
+ }
+
+ /**
+ * Returns the raw bytes; however, only data up to {@link #getLength()} is valid. Please use
+ * {@link #copyBytes()} if you need the returned array to be precisely the length of the data.
+ *
+ * @return the underlying array
+ */
+ public byte[] getBytes() {
+ return bytes;
+ }
+
+ /**
+ * Get the number of bytes in the byte array.
+ *
+ * @return the number of bytes in the byte array
+ */
+ public int getLength() {
+ return length;
+ }
+
+ /**
+ * Returns the Unicode Scalar Value (32-bit integer value) for the character at
+ * <code>position</code>. Note that this method avoids using the converter or doing String
+ * instantiation.
+ *
+ * @param position the index of the char we want to retrieve
+ * @return the Unicode scalar value at position or -1 if the position is invalid or points to a
+ * trailing byte
+ */
+ public int charAt(int position) {
+ if (position > this.length) {
+ return -1; // too long
+ }
+ if (position < 0) {
+ return -1; // duh.
+ }
+
+ ByteBuffer bb = (ByteBuffer) ByteBuffer.wrap(bytes).position(position);
+ return bytesToCodePoint(bb.slice());
+ }
+
+ public int find(String what) {
+ return find(what, 0);
+ }
+
+ /**
+ * Finds any occurrence of <code>what</code> in the backing buffer, starting as position
+ * <code>start</code>. The starting position is measured in bytes and the return value is in terms
+ * of byte position in the buffer. The backing buffer is not converted to a string for this
+ * operation.
+ *
+ * @param what the string to search for
+ * @param start where to start from
+ * @return byte position of the first occurrence of the search string in the UTF-8 buffer or -1
+ * if not found
+ */
+ public int find(String what, int start) {
+ try {
+ ByteBuffer src = ByteBuffer.wrap(this.bytes, 0, this.length);
+ ByteBuffer tgt = encode(what);
+ byte b = tgt.get();
+ src.position(start);
+
+ while (src.hasRemaining()) {
+ if (b == src.get()) { // matching first byte
+ src.mark(); // save position in loop
+ tgt.mark(); // save position in target
+ boolean found = true;
+ int pos = src.position() - 1;
+ while (tgt.hasRemaining()) {
+ if (!src.hasRemaining()) { // src expired first
+ tgt.reset();
+ src.reset();
+ found = false;
+ break;
+ }
+ if (!(tgt.get() == src.get())) {
+ tgt.reset();
+ src.reset();
+ found = false;
+ break; // no match
+ }
+ }
+ if (found) {
+ return pos;
+ }
+ }
+ }
+ return -1; // not found
+ } catch (CharacterCodingException e) {
+ // can't get here
+ e.printStackTrace();
+ return -1;
+ }
+ }
+
+ /**
+ * Set to contain the contents of a string.
+ *
+ * @param string the string to initialize from
+ */
+ public void set(String string) {
+ try {
+ ByteBuffer bb = encode(string, true);
+ bytes = bb.array();
+ length = bb.limit();
+ } catch (CharacterCodingException e) {
+ throw new RuntimeException("Should not have happened ", e);
+ }
+ }
+
+ /**
+ * Set to a utf8 byte array.
+ *
+ * @param utf8 the byte array to initialize from
+ */
+ public void set(byte[] utf8) {
+ set(utf8, 0, utf8.length);
+ }
+
+ /**
+ * copy a text.
+ *
+ * @param other the text to initialize from
+ */
+ public void set(Text other) {
+ set(other.getBytes(), 0, other.getLength());
+ }
+
+ /**
+ * Set the Text to range of bytes.
+ *
+ * @param utf8 the data to copy from
+ * @param start the first position of the new string
+ * @param len the number of bytes of the new string
+ */
+ public void set(byte[] utf8, int start, int len) {
+ setCapacity(len, false);
+ System.arraycopy(utf8, start, bytes, 0, len);
+ this.length = len;
+ }
+
+ /**
+ * Append a range of bytes to the end of the given text.
+ *
+ * @param utf8 the data to copy from
+ * @param start the first position to append from utf8
+ * @param len the number of bytes to append
+ */
+ public void append(byte[] utf8, int start, int len) {
+ setCapacity(length + len, true);
+ System.arraycopy(utf8, start, bytes, length, len);
+ length += len;
+ }
+
+ /**
+ * Clear the string to empty.
+ *
+ * <em>Note</em>: For performance reasons, this call does not clear the underlying byte array that
+ * is retrievable via {@link #getBytes()}. In order to free the byte-array memory, call
+ * {@link #set(byte[])} with an empty byte array (For example, <code>new byte[0]</code>).
+ */
+ public void clear() {
+ length = 0;
+ }
+
+ /**
+ * Sets the capacity of this Text object to <em>at least</em> <code>len</code> bytes. If the
+ * current buffer is longer, then the capacity and existing content of the buffer are unchanged.
+ * If <code>len</code> is larger than the current capacity, the Text object's capacity is
+ * increased to match.
+ *
+ * @param len the number of bytes we need
+ * @param keepData should the old data be kept
+ */
+ private void setCapacity(int len, boolean keepData) {
+ if (bytes == null || bytes.length < len) {
+ if (bytes != null && keepData) {
+ bytes = Arrays.copyOf(bytes, Math.max(len, length << 1));
+ } else {
+ bytes = new byte[len];
+ }
+ }
+ }
+
+ @Override
+ public String toString() {
+ try {
+ return decode(bytes, 0, length);
+ } catch (CharacterCodingException e) {
+ throw new RuntimeException("Should not have happened ", e);
+ }
+ }
+
+ /**
+ * Read a Text object whose length is already known. This allows creating Text from a stream which
+ * uses a different serialization format.
+ *
+ * @param in the input to initialize from
+ * @param len how many bytes to read from in
+ * @throws IOException if something bad happens
+ */
+ public void readWithKnownLength(DataInput in, int len) throws IOException {
+ setCapacity(len, false);
+ in.readFully(bytes, 0, len);
+ length = len;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (o == this) {
+ return true;
+ } else if (o == null) {
+ return false;
+ }
+ if (!(o instanceof Text)) {
+ return false;
+ }
+
+ final Text that = (Text) o;
+ if (this.getLength() != that.getLength()) {
+ return false;
+ }
+
+ // copied from Arrays.equals so we don'thave to copy the byte arrays
+ for (int i = 0; i < length; i++) {
+ if (bytes[i] != that.bytes[i]) {
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ /**
+ * Copied from Arrays.hashCode so we don't have to copy the byte array.
+ *
+ * @return hashCode
+ */
+ @Override
+ public int hashCode() {
+ if (bytes == null) {
+ return 0;
+ }
+
+ int result = 1;
+ for (int i = 0; i < length; i++) {
+ result = 31 * result + bytes[i];
+ }
+
+ return result;
+ }
+
+ // / STATIC UTILITIES FROM HERE DOWN
+
+ /**
+ * Converts the provided byte array to a String using the UTF-8 encoding. If the input is
+ * malformed, replace by a default value.
+ *
+ * @param utf8 bytes to decode
+ * @return the decoded string
+ * @throws CharacterCodingException if this is not valid UTF-8
+ */
+ public static String decode(byte[] utf8) throws CharacterCodingException {
+ return decode(ByteBuffer.wrap(utf8), true);
+ }
+
+ public static String decode(byte[] utf8, int start, int length)
+ throws CharacterCodingException {
+ return decode(ByteBuffer.wrap(utf8, start, length), true);
+ }
+
+ /**
+ * Converts the provided byte array to a String using the UTF-8 encoding. If <code>replace</code>
+ * is true, then malformed input is replaced with the substitution character, which is U+FFFD.
+ * Otherwise the method throws a MalformedInputException.
+ *
+ * @param utf8 the bytes to decode
+ * @param start where to start from
+ * @param length length of the bytes to decode
+ * @param replace whether to replace malformed characters with U+FFFD
+ * @return the decoded string
+ * @throws CharacterCodingException if the input could not be decoded
+ */
+ public static String decode(byte[] utf8, int start, int length, boolean replace)
+ throws CharacterCodingException {
+ return decode(ByteBuffer.wrap(utf8, start, length), replace);
+ }
+
+ private static String decode(ByteBuffer utf8, boolean replace)
+ throws CharacterCodingException {
+ CharsetDecoder decoder = DECODER_FACTORY.get();
+ if (replace) {
+ decoder.onMalformedInput(
+ java.nio.charset.CodingErrorAction.REPLACE);
+ decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
+ }
+ String str = decoder.decode(utf8).toString();
+ // set decoder back to its default value: REPORT
+ if (replace) {
+ decoder.onMalformedInput(CodingErrorAction.REPORT);
+ decoder.onUnmappableCharacter(CodingErrorAction.REPORT);
+ }
+ return str;
+ }
+
+ /**
+ * Converts the provided String to bytes using the UTF-8 encoding. If the input is malformed,
+ * invalid chars are replaced by a default value.
+ *
+ * @param string the string to encode
+ * @return ByteBuffer: bytes stores at ByteBuffer.array() and length is ByteBuffer.limit()
+ * @throws CharacterCodingException if the string could not be encoded
+ */
+ public static ByteBuffer encode(String string)
+ throws CharacterCodingException {
+ return encode(string, true);
+ }
+
+ /**
+ * Converts the provided String to bytes using the UTF-8 encoding. If <code>replace</code> is
+ * true, then malformed input is replaced with the substitution character, which is U+FFFD.
+ * Otherwise the method throws a MalformedInputException.
+ *
+ * @param string the string to encode
+ * @param replace whether to replace malformed characters with U+FFFD
+ * @return ByteBuffer: bytes stores at ByteBuffer.array() and length is ByteBuffer.limit()
+ * @throws CharacterCodingException if the string could not be encoded
+ */
+ public static ByteBuffer encode(String string, boolean replace)
+ throws CharacterCodingException {
+ CharsetEncoder encoder = ENCODER_FACTORY.get();
+ if (replace) {
+ encoder.onMalformedInput(CodingErrorAction.REPLACE);
+ encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
+ }
+ ByteBuffer bytes =
+ encoder.encode(CharBuffer.wrap(string.toCharArray()));
+ if (replace) {
+ encoder.onMalformedInput(CodingErrorAction.REPORT);
+ encoder.onUnmappableCharacter(CodingErrorAction.REPORT);
+ }
+ return bytes;
+ }
+
+ public static final int DEFAULT_MAX_LEN = 1024 * 1024;
+
+ // //// states for validateUTF8
+
+ private static final int LEAD_BYTE = 0;
+
+ private static final int TRAIL_BYTE_1 = 1;
+
+ private static final int TRAIL_BYTE = 2;
+
+ /**
+ * Check if a byte array contains valid utf-8.
+ *
+ * @param utf8 byte array
+ * @throws MalformedInputException if the byte array contains invalid utf-8
+ */
+ public static void validateUTF8(byte[] utf8) throws MalformedInputException {
+ validateUTF8(utf8, 0, utf8.length);
+ }
+
+ /**
+ * Check to see if a byte array is valid utf-8.
+ *
+ * @param utf8 the array of bytes
+ * @param start the offset of the first byte in the array
+ * @param len the length of the byte sequence
+ * @throws MalformedInputException if the byte array contains invalid bytes
+ */
+ public static void validateUTF8(byte[] utf8, int start, int len)
+ throws MalformedInputException {
+ int count = start;
+ int leadByte = 0;
+ int length = 0;
+ int state = LEAD_BYTE;
+ while (count < start + len) {
+ int aByte = utf8[count] & 0xFF;
+
+ switch (state) {
+ case LEAD_BYTE:
+ leadByte = aByte;
+ length = bytesFromUTF8[aByte];
+
+ switch (length) {
+ case 0: // check for ASCII
+ if (leadByte > 0x7F) {
+ throw new MalformedInputException(count);
+ }
+ break;
+ case 1:
+ if (leadByte < 0xC2 || leadByte > 0xDF) {
+ throw new MalformedInputException(count);
+ }
+ state = TRAIL_BYTE_1;
+ break;
+ case 2:
+ if (leadByte < 0xE0 || leadByte > 0xEF) {
+ throw new MalformedInputException(count);
+ }
+ state = TRAIL_BYTE_1;
+ break;
+ case 3:
+ if (leadByte < 0xF0 || leadByte > 0xF4) {
+ throw new MalformedInputException(count);
+ }
+ state = TRAIL_BYTE_1;
+ break;
+ default:
+ // too long! Longest valid UTF-8 is 4 bytes (lead + three)
+ // or if < 0 we got a trail byte in the lead byte position
+ throw new MalformedInputException(count);
+ } // switch (length)
+ break;
+
+ case TRAIL_BYTE_1:
+ if (leadByte == 0xF0 && aByte < 0x90) {
+ throw new MalformedInputException(count);
+ }
+ if (leadByte == 0xF4 && aByte > 0x8F) {
+ throw new MalformedInputException(count);
+ }
+ if (leadByte == 0xE0 && aByte < 0xA0) {
+ throw new MalformedInputException(count);
+ }
+ if (leadByte == 0xED && aByte > 0x9F) {
+ throw new MalformedInputException(count);
+ }
+ // falls through to regular trail-byte test!!
+ case TRAIL_BYTE:
+ if (aByte < 0x80 || aByte > 0xBF) {
+ throw new MalformedInputException(count);
+ }
+ if (--length == 0) {
+ state = LEAD_BYTE;
+ } else {
+ state = TRAIL_BYTE;
+ }
+ break;
+ default:
+ break;
+ } // switch (state)
+ count++;
+ }
+ }
+
+ /**
+ * Magic numbers for UTF-8. These are the number of bytes that <em>follow</em> a given lead byte.
+ * Trailing bytes have the value -1. The values 4 and 5 are presented in this table, even though
+ * valid UTF-8 cannot include the five and six byte sequences.
+ */
+ static final int[] bytesFromUTF8 =
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0,
+ // trail bytes
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
+ 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5};
+
+ /**
+ * Returns the next code point at the current position in the buffer. The buffer's position will
+ * be incremented. Any mark set on this buffer will be changed by this method!
+ *
+ * @param bytes the incoming bytes
+ * @return the corresponding unicode codepoint
+ */
+ public static int bytesToCodePoint(ByteBuffer bytes) {
+ bytes.mark();
+ byte b = bytes.get();
+ bytes.reset();
+ int extraBytesToRead = bytesFromUTF8[(b & 0xFF)];
+ if (extraBytesToRead < 0) {
+ return -1; // trailing byte!
+ }
+ int ch = 0;
+
+ switch (extraBytesToRead) {
+ case 5:
+ ch += (bytes.get() & 0xFF);
+ ch <<= 6; /* remember, illegal UTF-8 */
+ // fall through
+ case 4:
+ ch += (bytes.get() & 0xFF);
+ ch <<= 6; /* remember, illegal UTF-8 */
+ // fall through
+ case 3:
+ ch += (bytes.get() & 0xFF);
+ ch <<= 6;
+ // fall through
+ case 2:
+ ch += (bytes.get() & 0xFF);
+ ch <<= 6;
+ // fall through
+ case 1:
+ ch += (bytes.get() & 0xFF);
+ ch <<= 6;
+ // fall through
+ case 0:
+ ch += (bytes.get() & 0xFF);
+ break;
+ default: // do nothing
+ }
+ ch -= offsetsFromUTF8[extraBytesToRead];
+
+ return ch;
+ }
+
+ static final int[] offsetsFromUTF8 =
+ {0x00000000, 0x00003080, 0x000E2080, 0x03C82080, 0xFA082080, 0x82082080};
+
+ /**
+ * For the given string, returns the number of UTF-8 bytes required to encode the string.
+ *
+ * @param string text to encode
+ * @return number of UTF-8 bytes required to encode
+ */
+ public static int utf8Length(String string) {
+ CharacterIterator iter = new StringCharacterIterator(string);
+ char ch = iter.first();
+ int size = 0;
+ while (ch != CharacterIterator.DONE) {
+ if ((ch >= 0xD800) && (ch < 0xDC00)) {
+ // surrogate pair?
+ char trail = iter.next();
+ if ((trail > 0xDBFF) && (trail < 0xE000)) {
+ // valid pair
+ size += 4;
+ } else {
+ // invalid pair
+ size += 3;
+ iter.previous(); // rewind one
+ }
+ } else if (ch < 0x80) {
+ size++;
+ } else if (ch < 0x800) {
+ size += 2;
+ } else {
+ // ch < 0x10000, that is, the largest char value
+ size += 3;
+ }
+ ch = iter.next();
+ }
+ return size;
+ }
+
+ /**
+ * JSON serializer for {@link Text}.
+ */
+ public static class TextSerializer extends StdSerializer<Text> {
+
+ public TextSerializer() {
+ super(Text.class);
+ }
+
+ @Override
+ public void serialize(
+ Text text,
+ JsonGenerator jsonGenerator,
+ SerializerProvider serializerProvider) throws IOException, JsonGenerationException {
+ jsonGenerator.writeString(text.toString());
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/TransferPair.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/TransferPair.java
new file mode 100644
index 000000000..ca3876c7b
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/TransferPair.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.util;
+
+import org.apache.arrow.vector.ValueVector;
+
+/**
+ * Interface for copying values between a pair of two vectors of the same type.
+ */
+public interface TransferPair {
+ void transfer();
+
+ void splitAndTransfer(int startIndex, int length);
+
+ ValueVector getTo();
+
+ void copyValueSafe(int from, int to);
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/Validator.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/Validator.java
new file mode 100644
index 000000000..741972b4a
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/Validator.java
@@ -0,0 +1,190 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.util;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.Objects;
+
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.dictionary.Dictionary;
+import org.apache.arrow.vector.dictionary.DictionaryProvider;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.DictionaryEncoding;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.Schema;
+
+/**
+ * Utility class for validating arrow data structures.
+ */
+public class Validator {
+
+ /**
+ * Validate two arrow schemas are equal.
+ *
+ * @param schema1 the 1st schema to compare
+ * @param schema2 the 2nd schema to compare
+ * @throws IllegalArgumentException if they are different.
+ */
+ public static void compareSchemas(Schema schema1, Schema schema2) {
+ if (!schema2.equals(schema1)) {
+ throw new IllegalArgumentException("Different schemas:\n" + schema2 + "\n" + schema1);
+ }
+ }
+
+ /**
+ * Validate two Dictionary encodings and dictionaries with id's from the encodings.
+ */
+ public static void compareDictionaries(
+ List<DictionaryEncoding> encodings1,
+ List<DictionaryEncoding> encodings2,
+ DictionaryProvider provider1,
+ DictionaryProvider provider2) {
+
+ if (encodings1.size() != encodings2.size()) {
+ throw new IllegalArgumentException("Different dictionary encoding count:\n" +
+ encodings1.size() + "\n" + encodings2.size());
+ }
+
+ for (int i = 0; i < encodings1.size(); i++) {
+ if (!encodings1.get(i).equals(encodings2.get(i))) {
+ throw new IllegalArgumentException("Different dictionary encodings:\n" + encodings1.get(i) +
+ "\n" + encodings2.get(i));
+ }
+
+ long id = encodings1.get(i).getId();
+ Dictionary dict1 = provider1.lookup(id);
+ Dictionary dict2 = provider2.lookup(id);
+
+ if (dict1 == null || dict2 == null) {
+ throw new IllegalArgumentException("The DictionaryProvider did not contain the required " +
+ "dictionary with id: " + id + "\n" + dict1 + "\n" + dict2);
+ }
+
+ try {
+ compareFieldVectors(dict1.getVector(), dict2.getVector());
+ } catch (IllegalArgumentException e) {
+ throw new IllegalArgumentException("Different dictionaries:\n" + dict1 + "\n" + dict2, e);
+ }
+ }
+ }
+
+ /**
+ * Validate two arrow vectorSchemaRoot are equal.
+ *
+ * @param root1 the 1st schema to compare
+ * @param root2 the 2nd schema to compare
+ * @throws IllegalArgumentException if they are different.
+ */
+ public static void compareVectorSchemaRoot(VectorSchemaRoot root1, VectorSchemaRoot root2) {
+ compareSchemas(root2.getSchema(), root1.getSchema());
+ if (root1.getRowCount() != root2.getRowCount()) {
+ throw new IllegalArgumentException("Different row count:\n" + root1.getRowCount() + " != " + root2.getRowCount());
+ }
+ List<FieldVector> vectors1 = root1.getFieldVectors();
+ List<FieldVector> vectors2 = root2.getFieldVectors();
+ if (vectors1.size() != vectors2.size()) {
+ throw new IllegalArgumentException("Different column count:\n" + vectors1.toString() +
+ "\n!=\n" + vectors2.toString());
+ }
+ for (int i = 0; i < vectors1.size(); i++) {
+ compareFieldVectors(vectors1.get(i), vectors2.get(i));
+ }
+ }
+
+ /**
+ * Validate two arrow FieldVectors are equal.
+ *
+ * @param vector1 the 1st VectorField to compare
+ * @param vector2 the 2nd VectorField to compare
+ * @throws IllegalArgumentException if they are different
+ */
+ public static void compareFieldVectors(FieldVector vector1, FieldVector vector2) {
+ Field field1 = vector1.getField();
+ if (!field1.equals(vector2.getField())) {
+ throw new IllegalArgumentException("Different Fields:\n" + field1 + "\n!=\n" +
+ vector2.getField());
+ }
+ int valueCount = vector1.getValueCount();
+ if (valueCount != vector2.getValueCount()) {
+ throw new IllegalArgumentException("Different value count for field " + field1 + " : " +
+ valueCount + " != " + vector2.getValueCount());
+ }
+ for (int j = 0; j < valueCount; j++) {
+ Object obj1 = vector1.getObject(j);
+ Object obj2 = vector2.getObject(j);
+ if (!equals(field1.getType(), obj1, obj2)) {
+ throw new IllegalArgumentException(
+ "Different values in column:\n" + field1 + " at index " + j + ": " + obj1 + " != " + obj2);
+ }
+ }
+ }
+
+ static boolean equals(ArrowType type, final Object o1, final Object o2) {
+ if (type instanceof ArrowType.FloatingPoint) {
+ ArrowType.FloatingPoint fpType = (ArrowType.FloatingPoint) type;
+ switch (fpType.getPrecision()) {
+ case DOUBLE:
+ return equalEnough((Double) o1, (Double) o2);
+ case SINGLE:
+ return equalEnough((Float) o1, (Float) o2);
+ case HALF:
+ default:
+ throw new UnsupportedOperationException("unsupported precision: " + fpType);
+ }
+ } else if (type instanceof ArrowType.Binary || type instanceof ArrowType.LargeBinary ||
+ type instanceof ArrowType.FixedSizeBinary) {
+ return Arrays.equals((byte[]) o1, (byte[]) o2);
+ } else if (o1 instanceof byte[] && o2 instanceof byte[]) {
+ return Arrays.equals((byte[]) o1, (byte[]) o2);
+ }
+
+ return Objects.equals(o1, o2);
+ }
+
+ static boolean equalEnough(Float f1, Float f2) {
+ if (f1 == null || f2 == null) {
+ return f1 == null && f2 == null;
+ }
+ if (f1.isNaN()) {
+ return f2.isNaN();
+ }
+ if (f1.isInfinite()) {
+ return f2.isInfinite() && Math.signum(f1) == Math.signum(f2);
+ }
+ float average = Math.abs((f1 + f2) / 2);
+ float differenceScaled = Math.abs(f1 - f2) / (average == 0.0f ? 1f : average);
+ return differenceScaled < 1.0E-6f;
+ }
+
+ static boolean equalEnough(Double f1, Double f2) {
+ if (f1 == null || f2 == null) {
+ return f1 == null && f2 == null;
+ }
+ if (f1.isNaN()) {
+ return f2.isNaN();
+ }
+ if (f1.isInfinite()) {
+ return f2.isInfinite() && Math.signum(f1) == Math.signum(f2);
+ }
+ double average = Math.abs((f1 + f2) / 2);
+ double differenceScaled = Math.abs(f1 - f2) / (average == 0.0d ? 1d : average);
+ return differenceScaled < 1.0E-12d;
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/ValueVectorUtility.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/ValueVectorUtility.java
new file mode 100644
index 000000000..ceb7081e1
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/ValueVectorUtility.java
@@ -0,0 +1,187 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.util;
+
+import static org.apache.arrow.vector.validate.ValidateUtil.validateOrThrow;
+
+import java.util.function.BiFunction;
+
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.BaseFixedWidthVector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.validate.ValidateVectorBufferVisitor;
+import org.apache.arrow.vector.validate.ValidateVectorDataVisitor;
+import org.apache.arrow.vector.validate.ValidateVectorTypeVisitor;
+
+/**
+ * Utility methods for {@link ValueVector}.
+ */
+public class ValueVectorUtility {
+
+ private ValueVectorUtility() {
+ }
+
+ /**
+ * Get the toString() representation of vector suitable for debugging.
+ * Note since vectors may have millions of values, this method only shows max 20 values.
+ * Examples as below (v represents value):
+ * <li>
+ * vector with 0 value:
+ * []
+ * </li>
+ * <li>
+ * vector with 5 values (no more than 20 values):
+ * [v0, v1, v2, v3, v4]
+ * </li>
+ * <li>
+ * vector with 100 values (more than 20 values):
+ * [v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, ..., v90, v91, v92, v93, v94, v95, v96, v97, v98, v99]
+ * </li>
+ */
+ public static <V extends ValueVector> String getToString(V vector, int start, int end) {
+ return getToString(vector, start, end, (v, i) -> v.getObject(i));
+ }
+
+ /**
+ * Get the toString() representation of vector suitable for debugging.
+ * Note since vectors may have millions of values, this method only shows at most 20 values.
+ * @param vector the vector for which to get toString representation.
+ * @param start the starting index, inclusive.
+ * @param end the end index, exclusive.
+ * @param valueToString the function to transform individual elements to strings.
+ */
+ public static <V extends ValueVector> String getToString(
+ V vector, int start, int end, BiFunction<V, Integer, Object> valueToString) {
+ Preconditions.checkNotNull(vector);
+ final int length = end - start;
+ Preconditions.checkArgument(length >= 0);
+ Preconditions.checkArgument(start >= 0);
+ Preconditions.checkArgument(end <= vector.getValueCount());
+
+ if (length == 0) {
+ return "[]";
+ }
+
+ final int window = 10;
+ boolean skipComma = false;
+
+ StringBuilder sb = new StringBuilder();
+ sb.append('[');
+ for (int i = start; i < end; i++) {
+ if (skipComma) {
+ skipComma = false;
+ }
+ if (i - start >= window && i < end - window) {
+ sb.append("...");
+ i = end - window - 1;
+ skipComma = true;
+ } else {
+ sb.append(valueToString.apply(vector, i));
+ }
+
+ if (i == end - 1) {
+ sb.append(']');
+ } else {
+ if (!skipComma) {
+ sb.append(',');
+ }
+ sb.append(' ');
+ }
+ }
+
+ return sb.toString();
+ }
+
+ /**
+ * Utility to validate vector in O(1) time.
+ */
+ public static void validate(ValueVector vector) {
+ Preconditions.checkNotNull(vector);
+
+ ValidateVectorTypeVisitor typeVisitor = new ValidateVectorTypeVisitor();
+ vector.accept(typeVisitor, null);
+
+ ValidateVectorBufferVisitor bufferVisitor = new ValidateVectorBufferVisitor();
+ vector.accept(bufferVisitor, null);
+ }
+
+ /**
+ * Utility to validate vector in O(n) time, where n is the value count.
+ */
+ public static void validateFull(ValueVector vector) {
+ validate(vector);
+
+ ValidateVectorDataVisitor dataVisitor = new ValidateVectorDataVisitor();
+ vector.accept(dataVisitor, null);
+ }
+
+ /**
+ * Utility to validate vector schema root in O(1) time.
+ */
+ public static void validate(VectorSchemaRoot root) {
+ Preconditions.checkNotNull(root);
+ int valueCount = root.getRowCount();
+ validateOrThrow(valueCount >= 0, "The row count of vector schema root %s is negative.", valueCount);
+ for (ValueVector childVec : root.getFieldVectors()) {
+ validateOrThrow(valueCount == childVec.getValueCount(),
+ "Child vector and vector schema root have different value counts. " +
+ "Child vector value count %s, vector schema root value count %s", childVec.getValueCount(), valueCount);
+ validate(childVec);
+ }
+ }
+
+ /**
+ * Utility to validate vector in O(n) time, where n is the value count.
+ */
+ public static void validateFull(VectorSchemaRoot root) {
+ Preconditions.checkNotNull(root);
+ int valueCount = root.getRowCount();
+ validateOrThrow(valueCount >= 0, "The row count of vector schema root %s is negative.", valueCount);
+ for (ValueVector childVec : root.getFieldVectors()) {
+ validateOrThrow(valueCount == childVec.getValueCount(),
+ "Child vector and vector schema root have different value counts. " +
+ "Child vector value count %s, vector schema root value count %s", childVec.getValueCount(), valueCount);
+ validateFull(childVec);
+ }
+ }
+
+ /**
+ * Pre allocate memory for BaseFixedWidthVector.
+ */
+ public static void preAllocate(VectorSchemaRoot root, int targetSize) {
+ for (ValueVector vector : root.getFieldVectors()) {
+ if (vector instanceof BaseFixedWidthVector) {
+ ((BaseFixedWidthVector) vector).allocateNew(targetSize);
+ }
+ }
+ }
+
+ /**
+ * Ensure capacity for BaseFixedWidthVector.
+ */
+ public static void ensureCapacity(VectorSchemaRoot root, int targetCapacity) {
+ for (ValueVector vector : root.getFieldVectors()) {
+ if (vector instanceof BaseFixedWidthVector) {
+ while (vector.getValueCapacity() < targetCapacity) {
+ vector.reAlloc();
+ }
+ }
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java
new file mode 100644
index 000000000..e5809e93e
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java
@@ -0,0 +1,542 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.util;
+
+import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt;
+
+import java.util.HashSet;
+
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.BaseFixedWidthVector;
+import org.apache.arrow.vector.BaseLargeVariableWidthVector;
+import org.apache.arrow.vector.BaseVariableWidthVector;
+import org.apache.arrow.vector.BitVectorHelper;
+import org.apache.arrow.vector.ExtensionTypeVector;
+import org.apache.arrow.vector.NullVector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.compare.TypeEqualsVisitor;
+import org.apache.arrow.vector.compare.VectorVisitor;
+import org.apache.arrow.vector.complex.DenseUnionVector;
+import org.apache.arrow.vector.complex.FixedSizeListVector;
+import org.apache.arrow.vector.complex.LargeListVector;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.NonNullableStructVector;
+import org.apache.arrow.vector.complex.UnionVector;
+
+import io.netty.util.internal.PlatformDependent;
+
+/**
+ * Utility to append two vectors together.
+ */
+class VectorAppender implements VectorVisitor<ValueVector, Void> {
+
+ /**
+ * The targetVector to be appended.
+ */
+ private final ValueVector targetVector;
+
+ private final TypeEqualsVisitor typeVisitor;
+
+ /**
+ * Constructs a new targetVector appender, with the given targetVector.
+ * @param targetVector the targetVector to be appended.
+ */
+ VectorAppender(ValueVector targetVector) {
+ this.targetVector = targetVector;
+ typeVisitor = new TypeEqualsVisitor(targetVector, false, true);
+ }
+
+ @Override
+ public ValueVector visit(BaseFixedWidthVector deltaVector, Void value) {
+ Preconditions.checkArgument(targetVector.getField().getType().equals(deltaVector.getField().getType()),
+ "The targetVector to append must have the same type as the targetVector being appended");
+
+ if (deltaVector.getValueCount() == 0) {
+ return targetVector; // optimization, nothing to append, return
+ }
+
+ int newValueCount = targetVector.getValueCount() + deltaVector.getValueCount();
+
+ // make sure there is enough capacity
+ while (targetVector.getValueCapacity() < newValueCount) {
+ targetVector.reAlloc();
+ }
+
+ // append validity buffer
+ BitVectorHelper.concatBits(
+ targetVector.getValidityBuffer(), targetVector.getValueCount(),
+ deltaVector.getValidityBuffer(), deltaVector.getValueCount(), targetVector.getValidityBuffer());
+
+ // append data buffer
+ PlatformDependent.copyMemory(deltaVector.getDataBuffer().memoryAddress(),
+ targetVector.getDataBuffer().memoryAddress() + deltaVector.getTypeWidth() * targetVector.getValueCount(),
+ deltaVector.getTypeWidth() * deltaVector.getValueCount());
+ targetVector.setValueCount(newValueCount);
+ return targetVector;
+ }
+
+ @Override
+ public ValueVector visit(BaseVariableWidthVector deltaVector, Void value) {
+ Preconditions.checkArgument(targetVector.getField().getType().equals(deltaVector.getField().getType()),
+ "The targetVector to append must have the same type as the targetVector being appended");
+
+ if (deltaVector.getValueCount() == 0) {
+ return targetVector; // nothing to append, return
+ }
+
+ int newValueCount = targetVector.getValueCount() + deltaVector.getValueCount();
+
+ int targetDataSize = targetVector.getOffsetBuffer().getInt(
+ (long) targetVector.getValueCount() * BaseVariableWidthVector.OFFSET_WIDTH);
+ int deltaDataSize = deltaVector.getOffsetBuffer().getInt(
+ (long) deltaVector.getValueCount() * BaseVariableWidthVector.OFFSET_WIDTH);
+ int newValueCapacity = targetDataSize + deltaDataSize;
+
+ // make sure there is enough capacity
+ while (targetVector.getValueCapacity() < newValueCount) {
+ targetVector.reAlloc();
+ }
+ while (targetVector.getDataBuffer().capacity() < newValueCapacity) {
+ ((BaseVariableWidthVector) targetVector).reallocDataBuffer();
+ }
+
+ // append validity buffer
+ BitVectorHelper.concatBits(
+ targetVector.getValidityBuffer(), targetVector.getValueCount(),
+ deltaVector.getValidityBuffer(), deltaVector.getValueCount(), targetVector.getValidityBuffer());
+
+ // append data buffer
+ PlatformDependent.copyMemory(deltaVector.getDataBuffer().memoryAddress(),
+ targetVector.getDataBuffer().memoryAddress() + targetDataSize, deltaDataSize);
+
+ // copy offset buffer
+ PlatformDependent.copyMemory(
+ deltaVector.getOffsetBuffer().memoryAddress() + BaseVariableWidthVector.OFFSET_WIDTH,
+ targetVector.getOffsetBuffer().memoryAddress() + (targetVector.getValueCount() + 1) *
+ BaseVariableWidthVector.OFFSET_WIDTH,
+ deltaVector.getValueCount() * BaseVariableWidthVector.OFFSET_WIDTH);
+
+ // increase each offset from the second buffer
+ for (int i = 0; i < deltaVector.getValueCount(); i++) {
+ int oldOffset = targetVector.getOffsetBuffer().getInt((long) (targetVector.getValueCount() + 1 + i) *
+ BaseVariableWidthVector.OFFSET_WIDTH);
+ targetVector.getOffsetBuffer().setInt(
+ (long) (targetVector.getValueCount() + 1 + i) *
+ BaseVariableWidthVector.OFFSET_WIDTH, oldOffset + targetDataSize);
+ }
+ ((BaseVariableWidthVector) targetVector).setLastSet(newValueCount - 1);
+ targetVector.setValueCount(newValueCount);
+ return targetVector;
+ }
+
+ @Override
+ public ValueVector visit(BaseLargeVariableWidthVector deltaVector, Void value) {
+ Preconditions.checkArgument(targetVector.getField().getType().equals(deltaVector.getField().getType()),
+ "The targetVector to append must have the same type as the targetVector being appended");
+
+ if (deltaVector.getValueCount() == 0) {
+ return targetVector; // nothing to append, return
+ }
+
+ int newValueCount = targetVector.getValueCount() + deltaVector.getValueCount();
+
+ long targetDataSize = targetVector.getOffsetBuffer().getLong(
+ (long) targetVector.getValueCount() * BaseLargeVariableWidthVector.OFFSET_WIDTH);
+ long deltaDataSize = deltaVector.getOffsetBuffer().getLong(
+ (long) deltaVector.getValueCount() * BaseLargeVariableWidthVector.OFFSET_WIDTH);
+ long newValueCapacity = targetDataSize + deltaDataSize;
+
+ // make sure there is enough capacity
+ while (targetVector.getValueCapacity() < newValueCount) {
+ targetVector.reAlloc();
+ }
+ while (targetVector.getDataBuffer().capacity() < newValueCapacity) {
+ ((BaseLargeVariableWidthVector) targetVector).reallocDataBuffer();
+ }
+
+ // append validity buffer
+ BitVectorHelper.concatBits(
+ targetVector.getValidityBuffer(), targetVector.getValueCount(),
+ deltaVector.getValidityBuffer(), deltaVector.getValueCount(), targetVector.getValidityBuffer());
+
+ // append data buffer
+ PlatformDependent.copyMemory(deltaVector.getDataBuffer().memoryAddress(),
+ targetVector.getDataBuffer().memoryAddress() + targetDataSize, deltaDataSize);
+
+ // copy offset buffer
+ PlatformDependent.copyMemory(
+ deltaVector.getOffsetBuffer().memoryAddress() + BaseLargeVariableWidthVector.OFFSET_WIDTH,
+ targetVector.getOffsetBuffer().memoryAddress() + (targetVector.getValueCount() + 1) *
+ BaseLargeVariableWidthVector.OFFSET_WIDTH,
+ deltaVector.getValueCount() * BaseLargeVariableWidthVector.OFFSET_WIDTH);
+
+ // increase each offset from the second buffer
+ for (int i = 0; i < deltaVector.getValueCount(); i++) {
+ long oldOffset = targetVector.getOffsetBuffer().getLong((long) (targetVector.getValueCount() + 1 + i) *
+ BaseLargeVariableWidthVector.OFFSET_WIDTH);
+ targetVector.getOffsetBuffer().setLong(
+ (long) (targetVector.getValueCount() + 1 + i) *
+ BaseLargeVariableWidthVector.OFFSET_WIDTH, oldOffset + targetDataSize);
+ }
+ ((BaseLargeVariableWidthVector) targetVector).setLastSet(newValueCount - 1);
+ targetVector.setValueCount(newValueCount);
+ return targetVector;
+ }
+
+ @Override
+ public ValueVector visit(ListVector deltaVector, Void value) {
+ Preconditions.checkArgument(typeVisitor.equals(deltaVector),
+ "The targetVector to append must have the same type as the targetVector being appended");
+
+ if (deltaVector.getValueCount() == 0) {
+ return targetVector; // nothing to append, return
+ }
+
+ int newValueCount = targetVector.getValueCount() + deltaVector.getValueCount();
+
+ int targetListSize = targetVector.getOffsetBuffer().getInt(
+ (long) targetVector.getValueCount() * ListVector.OFFSET_WIDTH);
+ int deltaListSize = deltaVector.getOffsetBuffer().getInt(
+ (long) deltaVector.getValueCount() * ListVector.OFFSET_WIDTH);
+
+ ListVector targetListVector = (ListVector) targetVector;
+
+ // make sure the underlying vector has value count set
+ targetListVector.getDataVector().setValueCount(targetListSize);
+ deltaVector.getDataVector().setValueCount(deltaListSize);
+
+ // make sure there is enough capacity
+ while (targetVector.getValueCapacity() < newValueCount) {
+ targetVector.reAlloc();
+ }
+
+ // append validity buffer
+ BitVectorHelper.concatBits(
+ targetVector.getValidityBuffer(), targetVector.getValueCount(),
+ deltaVector.getValidityBuffer(), deltaVector.getValueCount(), targetVector.getValidityBuffer());
+
+ // append offset buffer
+ PlatformDependent.copyMemory(deltaVector.getOffsetBuffer().memoryAddress() + ListVector.OFFSET_WIDTH,
+ targetVector.getOffsetBuffer().memoryAddress() + (targetVector.getValueCount() + 1) *
+ ListVector.OFFSET_WIDTH,
+ (long) deltaVector.getValueCount() * ListVector.OFFSET_WIDTH);
+
+ // increase each offset from the second buffer
+ for (int i = 0; i < deltaVector.getValueCount(); i++) {
+ int oldOffset = targetVector.getOffsetBuffer().getInt(
+ (long) (targetVector.getValueCount() + 1 + i) * ListVector.OFFSET_WIDTH);
+ targetVector.getOffsetBuffer().setInt((long) (targetVector.getValueCount() + 1 + i) * ListVector.OFFSET_WIDTH,
+ oldOffset + targetListSize);
+ }
+ targetListVector.setLastSet(newValueCount - 1);
+
+ // append underlying vectors
+ VectorAppender innerAppender = new VectorAppender(targetListVector.getDataVector());
+ deltaVector.getDataVector().accept(innerAppender, null);
+
+ targetVector.setValueCount(newValueCount);
+ return targetVector;
+ }
+
+ @Override
+ public ValueVector visit(LargeListVector deltaVector, Void value) {
+ Preconditions.checkArgument(typeVisitor.equals(deltaVector),
+ "The targetVector to append must have the same type as the targetVector being appended");
+
+ if (deltaVector.getValueCount() == 0) {
+ return targetVector; // nothing to append, return
+ }
+
+ int newValueCount = targetVector.getValueCount() + deltaVector.getValueCount();
+
+ long targetListSize = targetVector.getOffsetBuffer().getLong(
+ (long) targetVector.getValueCount() * LargeListVector.OFFSET_WIDTH);
+ long deltaListSize = deltaVector.getOffsetBuffer().getLong(
+ (long) deltaVector.getValueCount() * LargeListVector.OFFSET_WIDTH);
+
+ ListVector targetListVector = (ListVector) targetVector;
+
+ // make sure the underlying vector has value count set
+ // todo recheck these casts when int64 vectors are supported
+ targetListVector.getDataVector().setValueCount(checkedCastToInt(targetListSize));
+ deltaVector.getDataVector().setValueCount(checkedCastToInt(deltaListSize));
+
+ // make sure there is enough capacity
+ while (targetVector.getValueCapacity() < newValueCount) {
+ targetVector.reAlloc();
+ }
+
+ // append validity buffer
+ BitVectorHelper.concatBits(
+ targetVector.getValidityBuffer(), targetVector.getValueCount(),
+ deltaVector.getValidityBuffer(), deltaVector.getValueCount(), targetVector.getValidityBuffer());
+
+ // append offset buffer
+ PlatformDependent.copyMemory(deltaVector.getOffsetBuffer().memoryAddress() + ListVector.OFFSET_WIDTH,
+ targetVector.getOffsetBuffer().memoryAddress() + (targetVector.getValueCount() + 1) *
+ LargeListVector.OFFSET_WIDTH,
+ (long) deltaVector.getValueCount() * ListVector.OFFSET_WIDTH);
+
+ // increase each offset from the second buffer
+ for (int i = 0; i < deltaVector.getValueCount(); i++) {
+ long oldOffset = targetVector.getOffsetBuffer().getLong(
+ (long) (targetVector.getValueCount() + 1 + i) * LargeListVector.OFFSET_WIDTH);
+ targetVector.getOffsetBuffer().setLong((long) (targetVector.getValueCount() + 1 + i) *
+ LargeListVector.OFFSET_WIDTH, oldOffset + targetListSize);
+ }
+ targetListVector.setLastSet(newValueCount - 1);
+
+ // append underlying vectors
+ VectorAppender innerAppender = new VectorAppender(targetListVector.getDataVector());
+ deltaVector.getDataVector().accept(innerAppender, null);
+
+ targetVector.setValueCount(newValueCount);
+ return targetVector;
+ }
+
+ @Override
+ public ValueVector visit(FixedSizeListVector deltaVector, Void value) {
+ Preconditions.checkArgument(typeVisitor.equals(deltaVector),
+ "The vector to append must have the same type as the targetVector being appended");
+
+ if (deltaVector.getValueCount() == 0) {
+ return targetVector; // optimization, nothing to append, return
+ }
+
+ FixedSizeListVector targetListVector = (FixedSizeListVector) targetVector;
+
+ Preconditions.checkArgument(targetListVector.getListSize() == deltaVector.getListSize(),
+ "FixedSizeListVector must have the same list size to append");
+
+ int newValueCount = targetVector.getValueCount() + deltaVector.getValueCount();
+
+ int targetListSize = targetListVector.getValueCount() * targetListVector.getListSize();
+ int deltaListSize = deltaVector.getValueCount() * deltaVector.getListSize();
+
+ // make sure the underlying vector has value count set
+ targetListVector.getDataVector().setValueCount(targetListSize);
+ deltaVector.getDataVector().setValueCount(deltaListSize);
+
+ // make sure there is enough capacity
+ while (targetVector.getValueCapacity() < newValueCount) {
+ targetVector.reAlloc();
+ }
+
+ // append validity buffer
+ BitVectorHelper.concatBits(
+ targetVector.getValidityBuffer(), targetVector.getValueCount(),
+ deltaVector.getValidityBuffer(), deltaVector.getValueCount(), targetVector.getValidityBuffer());
+
+ // append underlying vectors
+ VectorAppender innerAppender = new VectorAppender(targetListVector.getDataVector());
+ deltaVector.getDataVector().accept(innerAppender, null);
+
+ targetVector.setValueCount(newValueCount);
+ return targetVector;
+ }
+
+ @Override
+ public ValueVector visit(NonNullableStructVector deltaVector, Void value) {
+ Preconditions.checkArgument(typeVisitor.equals(deltaVector),
+ "The vector to append must have the same type as the targetVector being appended");
+
+ if (deltaVector.getValueCount() == 0) {
+ return targetVector; // optimization, nothing to append, return
+ }
+
+ NonNullableStructVector targetStructVector = (NonNullableStructVector) targetVector;
+ int newValueCount = targetVector.getValueCount() + deltaVector.getValueCount();
+
+ // make sure there is enough capacity
+ while (targetVector.getValueCapacity() < newValueCount) {
+ targetVector.reAlloc();
+ }
+
+ // append validity buffer
+ BitVectorHelper.concatBits(
+ targetVector.getValidityBuffer(), targetVector.getValueCount(),
+ deltaVector.getValidityBuffer(), deltaVector.getValueCount(), targetVector.getValidityBuffer());
+
+ // append child vectors
+ for (int i = 0; i < targetStructVector.getChildrenFromFields().size(); i++) {
+ ValueVector targetChild = targetStructVector.getVectorById(i);
+ ValueVector deltaChild = deltaVector.getVectorById(i);
+
+ targetChild.setValueCount(targetStructVector.getValueCount());
+ deltaChild.setValueCount(deltaVector.getValueCount());
+
+ VectorAppender innerAppender = new VectorAppender(targetChild);
+ deltaChild.accept(innerAppender, null);
+ }
+
+ targetVector.setValueCount(newValueCount);
+ return targetVector;
+ }
+
+ @Override
+ public ValueVector visit(UnionVector deltaVector, Void value) {
+ // we only make sure that both vectors are union vectors.
+ Preconditions.checkArgument(targetVector.getMinorType() == deltaVector.getMinorType(),
+ "The vector to append must have the same type as the targetVector being appended");
+
+ if (deltaVector.getValueCount() == 0) {
+ return targetVector; // optimization, nothing to append, return
+ }
+
+ UnionVector targetUnionVector = (UnionVector) targetVector;
+ int newValueCount = targetVector.getValueCount() + deltaVector.getValueCount();
+
+ // make sure there is enough capacity
+ while (targetUnionVector.getValueCapacity() < newValueCount) {
+ targetUnionVector.reAlloc();
+ }
+
+ // append type buffers
+ PlatformDependent.copyMemory(deltaVector.getTypeBufferAddress(),
+ targetUnionVector.getTypeBufferAddress() + targetVector.getValueCount(),
+ deltaVector.getValueCount());
+
+ // build the hash set for all types
+ HashSet<Integer> targetTypes = new HashSet<>();
+ for (int i = 0; i < targetUnionVector.getValueCount(); i++) {
+ targetTypes.add(targetUnionVector.getTypeValue(i));
+ }
+ HashSet<Integer> deltaTypes = new HashSet<>();
+ for (int i = 0; i < deltaVector.getValueCount(); i++) {
+ deltaTypes.add(deltaVector.getTypeValue(i));
+ }
+
+ // append child vectors
+ for (int i = 0; i < Byte.MAX_VALUE; i++) {
+ if (targetTypes.contains(i) || deltaTypes.contains(i)) {
+ ValueVector targetChild = targetUnionVector.getVectorByType(i);
+ if (!targetTypes.contains(i)) {
+ // if the vector type does not exist in the target, it must be newly created
+ // and we must make sure it has enough capacity.
+ while (targetChild.getValueCapacity() < newValueCount) {
+ targetChild.reAlloc();
+ }
+ }
+
+ if (deltaTypes.contains(i)) {
+ // append child vectors
+ ValueVector deltaChild = deltaVector.getVectorByType(i);
+
+ targetChild.setValueCount(targetUnionVector.getValueCount());
+ deltaChild.setValueCount(deltaVector.getValueCount());
+
+ VectorAppender innerAppender = new VectorAppender(targetChild);
+ deltaChild.accept(innerAppender, null);
+ }
+ targetChild.setValueCount(newValueCount);
+ }
+ }
+
+ targetVector.setValueCount(newValueCount);
+ return targetVector;
+ }
+
+ @Override
+ public ValueVector visit(DenseUnionVector deltaVector, Void value) {
+ // we only make sure that both vectors are union vectors.
+ Preconditions.checkArgument(targetVector.getMinorType() == deltaVector.getMinorType(),
+ "The vector to append must have the same type as the targetVector being appended");
+
+ if (deltaVector.getValueCount() == 0) {
+ return targetVector; // optimization, nothing to append, return
+ }
+
+ DenseUnionVector targetDenseUnionVector = (DenseUnionVector) targetVector;
+ int newValueCount = targetVector.getValueCount() + deltaVector.getValueCount();
+
+ // make sure there is enough capacity
+ while (targetDenseUnionVector.getValueCapacity() < newValueCount) {
+ targetDenseUnionVector.reAlloc();
+ }
+
+ // append type buffers
+ PlatformDependent.copyMemory(deltaVector.getTypeBuffer().memoryAddress(),
+ targetDenseUnionVector.getTypeBuffer() .memoryAddress() + targetVector.getValueCount(),
+ deltaVector.getValueCount());
+
+ // append offset buffers
+ for (int i = 0; i < deltaVector.getValueCount(); i++) {
+ byte typeId = deltaVector.getTypeId(i);
+ ValueVector targetChildVector = targetDenseUnionVector.getVectorByType(typeId);
+ int offsetBase = targetChildVector == null ? 0 : targetChildVector.getValueCount();
+ int deltaOffset = deltaVector.getOffset(i);
+ long index = (long) (targetVector.getValueCount() + i) * DenseUnionVector.OFFSET_WIDTH;
+
+ targetVector.getOffsetBuffer().setInt(index, offsetBase + deltaOffset);
+ }
+
+ // append child vectors
+ for (int i = 0; i <= Byte.MAX_VALUE; i++) {
+ ValueVector targetChildVector = targetDenseUnionVector.getVectorByType((byte) i);
+ ValueVector deltaChildVector = deltaVector.getVectorByType((byte) i);
+
+ if (targetChildVector == null && deltaChildVector == null) {
+ // the type id is not registered in either vector, we are done.
+ continue;
+ } else if (targetChildVector == null && deltaChildVector != null) {
+ // first register a new child in the target vector
+ targetDenseUnionVector.registerNewTypeId(deltaChildVector.getField());
+ targetChildVector = targetDenseUnionVector.addVector(
+ (byte) i, deltaChildVector.getField().createVector(targetDenseUnionVector.getAllocator()));
+
+ // now we have both child vecors not null, we can append them.
+ VectorAppender childAppender = new VectorAppender(targetChildVector);
+ deltaChildVector.accept(childAppender, null);
+ } else if (targetChildVector != null && deltaChildVector == null) {
+ // the value only exists in the target vector, so we are done
+ continue;
+ } else {
+ // both child vectors are non-null
+
+ // first check vector types
+ TypeEqualsVisitor childTypeVisitor =
+ new TypeEqualsVisitor(targetChildVector, /* check name */ false, /* check meta data*/ false);
+ if (!childTypeVisitor.equals(deltaChildVector)) {
+ throw new IllegalArgumentException("dense union vectors have different child vector types with type id " + i);
+ }
+
+ // append child vectors
+ VectorAppender childAppender = new VectorAppender(targetChildVector);
+ deltaChildVector.accept(childAppender, null);
+ }
+ }
+
+ targetVector.setValueCount(newValueCount);
+ return targetVector;
+ }
+
+ @Override
+ public ValueVector visit(NullVector deltaVector, Void value) {
+ Preconditions.checkArgument(targetVector.getField().getType().equals(deltaVector.getField().getType()),
+ "The targetVector to append must have the same type as the targetVector being appended");
+ return targetVector;
+ }
+
+ @Override
+ public ValueVector visit(ExtensionTypeVector<?> deltaVector, Void value) {
+ ValueVector targetUnderlying = ((ExtensionTypeVector<?>) targetVector).getUnderlyingVector();
+ VectorAppender underlyingAppender = new VectorAppender(targetUnderlying);
+ deltaVector.getUnderlyingVector().accept(underlyingAppender, null);
+ return targetVector;
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/VectorBatchAppender.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/VectorBatchAppender.java
new file mode 100644
index 000000000..570783d10
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/VectorBatchAppender.java
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.util;
+
+import org.apache.arrow.vector.ValueVector;
+
+/**
+ * Utility to add vector values in batch.
+ */
+public class VectorBatchAppender {
+
+ /**
+ * Add value vectors in batch.
+ * @param targetVector the target vector.
+ * @param vectorsToAppend the vectors to append.
+ * @param <V> the vector type.
+ */
+ public static <V extends ValueVector> void batchAppend(V targetVector, V... vectorsToAppend) {
+ VectorAppender appender = new VectorAppender(targetVector);
+ for (V delta : vectorsToAppend) {
+ delta.accept(appender, null);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/VectorSchemaRootAppender.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/VectorSchemaRootAppender.java
new file mode 100644
index 000000000..3c6044ec5
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/util/VectorSchemaRootAppender.java
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.util;
+
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.compare.TypeEqualsVisitor;
+
+/**
+ * Utility to append {@link org.apache.arrow.vector.VectorSchemaRoot}s with the same schema.
+ */
+public class VectorSchemaRootAppender {
+
+ /**
+ * Appends a number of {@link VectorSchemaRoot}s.
+ * @param checkSchema if we need to check schema for the vector schema roots.
+ * @param targetRoot the vector schema root to be appended.
+ * @param rootsToAppend the vector schema roots to append.
+ * @throws IllegalArgumentException throws if we need to check schema, and checking schema fails.
+ */
+ public static void append(boolean checkSchema, VectorSchemaRoot targetRoot, VectorSchemaRoot... rootsToAppend) {
+ // create appenders
+ VectorAppender[] appenders = new VectorAppender[targetRoot.getFieldVectors().size()];
+ for (int i = 0; i < appenders.length; i++) {
+ appenders[i] = new VectorAppender(targetRoot.getVector(i));
+ }
+
+ // create type checkers, if necessary
+ TypeEqualsVisitor[] typeCheckers = null;
+ if (checkSchema) {
+ typeCheckers = new TypeEqualsVisitor[targetRoot.getFieldVectors().size()];
+ for (int i = 0; i < typeCheckers.length; i++) {
+ typeCheckers[i] = new TypeEqualsVisitor(targetRoot.getVector(i),
+ /* check name */ false, /* check meta data */ false);
+ }
+ }
+
+ for (VectorSchemaRoot delta : rootsToAppend) {
+ // check schema, if necessary
+ if (checkSchema) {
+ if (delta.getFieldVectors().size() != targetRoot.getFieldVectors().size()) {
+ throw new IllegalArgumentException("Vector schema roots have different numbers of child vectors.");
+ }
+ for (int i = 0; i < typeCheckers.length; i++) {
+ if (!typeCheckers[i].equals(delta.getVector(i))) {
+ throw new IllegalArgumentException("Vector schema roots have different schemas.");
+ }
+ }
+ }
+
+ // append child vectors.
+ for (int i = 0; i < appenders.length; i++) {
+ delta.getVector(i).accept(appenders[i], null);
+ }
+ targetRoot.setRowCount(targetRoot.getRowCount() + delta.getRowCount());
+ }
+ }
+
+ /**
+ * Appends a number of {@link VectorSchemaRoot}s.
+ * This method performs schema checking before appending data.
+ * @param targetRoot the vector schema root to be appended.
+ * @param rootsToAppend the vector schema roots to append.
+ * @throws IllegalArgumentException throws if we need to check schema, and checking schema fails.
+ */
+ public static void append(VectorSchemaRoot targetRoot, VectorSchemaRoot... rootsToAppend) {
+ append(true, targetRoot, rootsToAppend);
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/validate/MetadataV4UnionChecker.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/validate/MetadataV4UnionChecker.java
new file mode 100644
index 000000000..2a7068365
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/validate/MetadataV4UnionChecker.java
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.validate;
+
+import java.io.IOException;
+import java.util.Iterator;
+
+import org.apache.arrow.vector.types.MetadataVersion;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.Schema;
+
+/**
+ * Given a field, checks that no Union fields are present.
+ *
+ * This is intended to be used to prevent unions from being read/written with V4 metadata.
+ */
+public final class MetadataV4UnionChecker {
+ static boolean isUnion(Field field) {
+ return field.getType().getTypeID() == ArrowType.ArrowTypeID.Union;
+ }
+
+ static Field check(Field field) {
+ if (isUnion(field)) {
+ return field;
+ }
+ // Naive recursive DFS
+ for (final Field child : field.getChildren()) {
+ final Field result = check(child);
+ if (result != null) {
+ return result;
+ }
+ }
+ return null;
+ }
+
+ /**
+ * Check the schema, raising an error if an unsupported feature is used (e.g. unions with < V5 metadata).
+ */
+ public static void checkForUnion(Iterator<Field> fields, MetadataVersion metadataVersion) {
+ if (metadataVersion.toFlatbufID() >= MetadataVersion.V5.toFlatbufID()) {
+ return;
+ }
+ while (fields.hasNext()) {
+ Field union = check(fields.next());
+ if (union != null) {
+ throw new IllegalArgumentException(
+ "Cannot write union with V4 metadata version, use V5 instead. Found field: " + union);
+ }
+ }
+ }
+
+ /**
+ * Check the schema, raising an error if an unsupported feature is used (e.g. unions with < V5 metadata).
+ */
+ public static void checkRead(Schema schema, MetadataVersion metadataVersion) throws IOException {
+ if (metadataVersion.toFlatbufID() >= MetadataVersion.V5.toFlatbufID()) {
+ return;
+ }
+ for (final Field field : schema.getFields()) {
+ Field union = check(field);
+ if (union != null) {
+ throw new IOException("Cannot read union with V4 metadata version. Found field: " + union);
+ }
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateUtil.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateUtil.java
new file mode 100644
index 000000000..e1b60e926
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateUtil.java
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.validate;
+
+/**
+ * Utilities for vector validation.
+ */
+public class ValidateUtil {
+
+ private ValidateUtil() {
+ }
+
+ /**
+ * Validate the expression.
+ * @param expression the expression to validate.
+ * @param errorMessage the error message.
+ * @throws ValidateException if the expression evaluates to false.
+ */
+ public static void validateOrThrow(boolean expression, String errorMessage) {
+ if (!expression) {
+ throw new ValidateException(errorMessage);
+ }
+ }
+
+ /**
+ * Validate the expression.
+ * @param expression the expression to validate.
+ * @param errorMessage the error message template.
+ * @param args the error message arguments.
+ * @throws ValidateException if the expression evaluates to false.
+ */
+ public static void validateOrThrow(boolean expression, String errorMessage, Object... args) {
+ if (!expression) {
+ throw new ValidateException(String.format(errorMessage, args));
+ }
+ }
+
+ /**
+ * A exception that is thrown when the vector validation fails.
+ */
+ public static class ValidateException extends RuntimeException {
+ public ValidateException(String message) {
+ super(message);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorBufferVisitor.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorBufferVisitor.java
new file mode 100644
index 000000000..d4abaa194
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorBufferVisitor.java
@@ -0,0 +1,246 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.validate;
+
+import static org.apache.arrow.vector.validate.ValidateUtil.validateOrThrow;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.vector.BaseFixedWidthVector;
+import org.apache.arrow.vector.BaseLargeVariableWidthVector;
+import org.apache.arrow.vector.BaseVariableWidthVector;
+import org.apache.arrow.vector.BitVector;
+import org.apache.arrow.vector.ExtensionTypeVector;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.NullVector;
+import org.apache.arrow.vector.TypeLayout;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.compare.VectorVisitor;
+import org.apache.arrow.vector.complex.DenseUnionVector;
+import org.apache.arrow.vector.complex.FixedSizeListVector;
+import org.apache.arrow.vector.complex.LargeListVector;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.NonNullableStructVector;
+import org.apache.arrow.vector.complex.UnionVector;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+
+/**
+ * Visitor to validate vector buffers.
+ */
+public class ValidateVectorBufferVisitor implements VectorVisitor<Void, Void> {
+
+ private void validateVectorCommon(ValueVector vector) {
+ ArrowType arrowType = vector.getField().getType();
+ validateOrThrow(vector.getValueCount() >= 0,
+ "Vector valueCount %s is negative.", vector.getValueCapacity());
+
+ if (vector instanceof FieldVector) {
+ FieldVector fieldVector = (FieldVector) vector;
+ int typeBufferCount = TypeLayout.getTypeBufferCount(arrowType);
+ validateOrThrow(fieldVector.getFieldBuffers().size() == typeBufferCount,
+ "Expected %s buffers in vector of type %s, got %s.",
+ typeBufferCount, vector.getField().getType().toString(), fieldVector.getFieldBuffers().size());
+ }
+ }
+
+ private void validateValidityBuffer(ValueVector vector, int valueCount) {
+ ArrowBuf validityBuffer = vector.getValidityBuffer();
+ validateOrThrow(validityBuffer != null, "The validity buffer is null.");
+ validateOrThrow(validityBuffer.capacity() * 8 >= valueCount,
+ "Not enough capacity for the validity buffer. Minimum capacity %s, actual capacity %s.",
+ (valueCount + 7) / 8, validityBuffer.capacity());
+ }
+
+ private void validateOffsetBuffer(ValueVector vector, long minCapacity) {
+ ArrowBuf offsetBuffer = vector.getOffsetBuffer();
+ validateOrThrow(offsetBuffer != null, "The offset buffer is null.");
+ validateOrThrow(offsetBuffer.capacity() >= minCapacity,
+ "Not enough capacity for the offset buffer. Minimum capacity %s, actual capacity %s.",
+ minCapacity, offsetBuffer.capacity());
+ }
+
+ private void validateFixedWidthDataBuffer(ValueVector vector, int valueCount, int bitWidth) {
+ ArrowBuf dataBuffer = vector.getDataBuffer();
+ validateOrThrow(dataBuffer != null, "The fixed width data buffer is null.");
+ validateOrThrow((long) bitWidth * valueCount <= dataBuffer.capacity() * 8L,
+ "Not enough capacity for fixed width data buffer. Minimum capacity %s, actual capacity %s.",
+ ((long) bitWidth * valueCount + 7L) / 8L, dataBuffer.capacity());
+ }
+
+ private void validateDataBuffer(ValueVector vector, long minCapacity) {
+ ArrowBuf dataBuffer = vector.getDataBuffer();
+ validateOrThrow(dataBuffer != null, "The data buffer is null.");
+ validateOrThrow(dataBuffer.capacity() >= minCapacity,
+ "Not enough capacity for data buffer. Minimum capacity %s, actual capacity %s.",
+ minCapacity, dataBuffer.capacity());
+ }
+
+ private void validateTypeBuffer(ArrowBuf typeBuf, long minCapacity) {
+ validateOrThrow(typeBuf != null, "The type buffer is null.");
+ validateOrThrow(typeBuf.capacity() >= minCapacity,
+ "Not enough capacity for type buffer. Minimum capacity %s, actual capacity %s.",
+ minCapacity, typeBuf.capacity());
+ }
+
+ @Override
+ public Void visit(BaseFixedWidthVector vector, Void value) {
+ int bitWidth = (vector instanceof BitVector) ? 1 : vector.getTypeWidth() * 8;
+ int valueCount = vector.getValueCount();
+ validateVectorCommon(vector);
+ validateValidityBuffer(vector, valueCount);
+ validateFixedWidthDataBuffer(vector, valueCount, bitWidth);
+ return null;
+ }
+
+ @Override
+ public Void visit(BaseVariableWidthVector vector, Void value) {
+ int valueCount = vector.getValueCount();
+ validateVectorCommon(vector);
+ validateValidityBuffer(vector, valueCount);
+ long minOffsetCapacity = valueCount == 0 ? 0L : (long) (valueCount + 1) * BaseVariableWidthVector.OFFSET_WIDTH;
+ validateOffsetBuffer(vector, minOffsetCapacity);
+ int lastOffset = valueCount == 0 ? 0 :
+ vector.getOffsetBuffer().getInt(valueCount * BaseVariableWidthVector.OFFSET_WIDTH);
+ validateDataBuffer(vector, lastOffset);
+ return null;
+ }
+
+ @Override
+ public Void visit(BaseLargeVariableWidthVector vector, Void value) {
+ int valueCount = vector.getValueCount();
+ validateVectorCommon(vector);
+ validateValidityBuffer(vector, valueCount);
+ long minOffsetCapacity = valueCount == 0 ? 0L
+ : (long) (valueCount + 1) * BaseLargeVariableWidthVector.OFFSET_WIDTH;
+ validateOffsetBuffer(vector, minOffsetCapacity);
+ long lastOffset = valueCount == 0 ? 0L :
+ vector.getOffsetBuffer().getLong((long) valueCount * BaseLargeVariableWidthVector.OFFSET_WIDTH);
+ validateDataBuffer(vector, lastOffset);
+ return null;
+ }
+
+ @Override
+ public Void visit(ListVector vector, Void value) {
+ int valueCount = vector.getValueCount();
+ validateVectorCommon(vector);
+ validateValidityBuffer(vector, valueCount);
+ long minOffsetCapacity = valueCount == 0 ? 0L : (long) (valueCount + 1) * ListVector.OFFSET_WIDTH;
+ validateOffsetBuffer(vector, minOffsetCapacity);
+
+ FieldVector dataVector = vector.getDataVector();
+ int lastOffset = valueCount == 0 ? 0 :
+ vector.getOffsetBuffer().getInt(valueCount * BaseVariableWidthVector.OFFSET_WIDTH);
+ int dataVectorLength = dataVector == null ? 0 : dataVector.getValueCount();
+ validateOrThrow(dataVectorLength >= lastOffset,
+ "Inner vector does not contain enough elements. Minimum element count %s, actual element count %s",
+ lastOffset + 1, dataVectorLength);
+
+ if (dataVector != null) {
+ dataVector.accept(this, null);
+ }
+ return null;
+ }
+
+ @Override
+ public Void visit(FixedSizeListVector vector, Void value) {
+ int valueCount = vector.getValueCount();
+ validateVectorCommon(vector);
+ validateValidityBuffer(vector, valueCount);
+ FieldVector dataVector = vector.getDataVector();
+ int dataVectorLength = dataVector == null ? 0 : dataVector.getValueCount();
+ validateOrThrow(dataVectorLength >= valueCount * vector.getListSize(),
+ "Inner vector does not contain enough elements. Minimum element count %s, actual element count %s.",
+ valueCount * vector.getListSize(), dataVectorLength);
+ if (dataVector != null) {
+ dataVector.accept(this, null);
+ }
+ return null;
+ }
+
+ @Override
+ public Void visit(LargeListVector vector, Void value) {
+ int valueCount = vector.getValueCount();
+ validateVectorCommon(vector);
+ validateValidityBuffer(vector, valueCount);
+ long minOffsetCapacity = valueCount == 0 ? 0L : (long) (valueCount + 1) * LargeListVector.OFFSET_WIDTH;
+ validateOffsetBuffer(vector, minOffsetCapacity);
+
+ FieldVector dataVector = vector.getDataVector();
+ long lastOffset = valueCount == 0 ? 0 :
+ vector.getOffsetBuffer().getLong(valueCount * BaseLargeVariableWidthVector.OFFSET_WIDTH);
+ int dataVectorLength = dataVector == null ? 0 : dataVector.getValueCount();
+ validateOrThrow(dataVectorLength >= lastOffset,
+ "Inner vector does not contain enough elements. Minimum element count %s, actual element count %s",
+ lastOffset + 1, dataVectorLength);
+
+ if (dataVector != null) {
+ dataVector.accept(this, null);
+ }
+ return null;
+ }
+
+ @Override
+ public Void visit(NonNullableStructVector vector, Void value) {
+ int valueCount = vector.getValueCount();
+ validateVectorCommon(vector);
+ validateValidityBuffer(vector, valueCount);
+ for (ValueVector subVector : vector.getChildrenFromFields()) {
+ validateOrThrow(valueCount == subVector.getValueCount(),
+ "Struct vector length not equal to child vector length. Struct vector length %s, child vector length %s",
+ valueCount, subVector.getValueCount());
+ subVector.accept(this, null);
+ }
+ return null;
+ }
+
+ @Override
+ public Void visit(UnionVector vector, Void value) {
+ int valueCount = vector.getValueCount();
+ validateVectorCommon(vector);
+ validateTypeBuffer(vector.getTypeBuffer(), valueCount * UnionVector.TYPE_WIDTH);
+ for (ValueVector subVector : vector.getChildrenFromFields()) {
+ validateOrThrow(valueCount == subVector.getValueCount(),
+ "Union vector length not equal to child vector length. Union vector length %s, child vector length %s",
+ valueCount, subVector.getValueCount());
+ subVector.accept(this, null);
+ }
+ return null;
+ }
+
+ @Override
+ public Void visit(DenseUnionVector vector, Void value) {
+ int valueCount = vector.getValueCount();
+ validateVectorCommon(vector);
+ validateOffsetBuffer(vector, (long) valueCount * DenseUnionVector.OFFSET_WIDTH);
+ validateTypeBuffer(vector.getTypeBuffer(), valueCount * DenseUnionVector.TYPE_WIDTH);
+ for (ValueVector subVector : vector.getChildrenFromFields()) {
+ subVector.accept(this, null);
+ }
+ return null;
+ }
+
+ @Override
+ public Void visit(NullVector vector, Void value) {
+ return null;
+ }
+
+ @Override
+ public Void visit(ExtensionTypeVector<?> vector, Void value) {
+ vector.getUnderlyingVector().accept(this, value);
+ return null;
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorDataVisitor.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorDataVisitor.java
new file mode 100644
index 000000000..cdeb4f1ea
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorDataVisitor.java
@@ -0,0 +1,180 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.validate;
+
+import static org.apache.arrow.vector.validate.ValidateUtil.validateOrThrow;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.vector.BaseFixedWidthVector;
+import org.apache.arrow.vector.BaseLargeVariableWidthVector;
+import org.apache.arrow.vector.BaseVariableWidthVector;
+import org.apache.arrow.vector.ExtensionTypeVector;
+import org.apache.arrow.vector.NullVector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.compare.VectorVisitor;
+import org.apache.arrow.vector.complex.DenseUnionVector;
+import org.apache.arrow.vector.complex.FixedSizeListVector;
+import org.apache.arrow.vector.complex.LargeListVector;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.NonNullableStructVector;
+import org.apache.arrow.vector.complex.UnionVector;
+
+/**
+ * Utility for validating vector data.
+ */
+public class ValidateVectorDataVisitor implements VectorVisitor<Void, Void> {
+
+ private void validateOffsetBuffer(ValueVector vector, int valueCount) {
+ if (valueCount == 0) {
+ return;
+ }
+ ArrowBuf offsetBuffer = vector.getOffsetBuffer();
+
+ // verify that the values in the offset buffer is non-decreasing
+ int prevValue = offsetBuffer.getInt(0);
+ for (int i = 1; i <= valueCount; i++) {
+ int curValue = offsetBuffer.getInt(i * 4);
+ validateOrThrow(curValue >= 0, "The value at position %s of the offset buffer is negative: %s.", i, curValue);
+ validateOrThrow(curValue >= prevValue,
+ "The values in positions %s and %s of the offset buffer are decreasing: %s, %s.",
+ i - 1, i, prevValue, curValue);
+ prevValue = curValue;
+ }
+ }
+
+ private void validateLargeOffsetBuffer(ValueVector vector, int valueCount) {
+ if (valueCount == 0) {
+ return;
+ }
+ ArrowBuf offsetBuffer = vector.getOffsetBuffer();
+
+ // verify that the values in the large offset buffer is non-decreasing
+ long prevValue = offsetBuffer.getLong(0);
+ for (int i = 1; i <= valueCount; i++) {
+ long curValue = offsetBuffer.getLong((long) i * 8);
+ validateOrThrow(curValue >= 0L, "The value at position %s of the large offset buffer is negative: %s.",
+ i, curValue);
+ validateOrThrow(curValue >= prevValue,
+ "The values in positions %s and %s of the large offset buffer are decreasing: %s, %s.",
+ i - 1, i, prevValue, curValue);
+ prevValue = curValue;
+ }
+ }
+
+ private void validateTypeBuffer(ArrowBuf typeBuf, int valueCount) {
+ for (int i = 0; i < valueCount; i++) {
+ validateOrThrow(typeBuf.getByte(i) >= 0, "The type id at position %s is negative: %s.",
+ i, typeBuf.getByte(i));
+ }
+ }
+
+ @Override
+ public Void visit(BaseFixedWidthVector vector, Void value) {
+ return null;
+ }
+
+ @Override
+ public Void visit(BaseVariableWidthVector vector, Void value) {
+ validateOffsetBuffer(vector, vector.getValueCount());
+ return null;
+ }
+
+ @Override
+ public Void visit(BaseLargeVariableWidthVector vector, Void value) {
+ validateLargeOffsetBuffer(vector, vector.getValueCount());
+ return null;
+ }
+
+ @Override
+ public Void visit(ListVector vector, Void value) {
+ validateOffsetBuffer(vector, vector.getValueCount());
+ ValueVector innerVector = vector.getDataVector();
+ if (innerVector != null) {
+ innerVector.accept(this, null);
+ }
+ return null;
+ }
+
+ @Override
+ public Void visit(FixedSizeListVector vector, Void value) {
+ validateOffsetBuffer(vector, vector.getValueCount());
+ ValueVector innerVector = vector.getDataVector();
+ if (innerVector != null) {
+ innerVector.accept(this, null);
+ }
+ return null;
+ }
+
+ @Override
+ public Void visit(LargeListVector vector, Void value) {
+ validateLargeOffsetBuffer(vector, vector.getValueCount());
+ ValueVector innerVector = vector.getDataVector();
+ if (innerVector != null) {
+ innerVector.accept(this, null);
+ }
+ return null;
+ }
+
+ @Override
+ public Void visit(NonNullableStructVector vector, Void value) {
+ for (ValueVector subVector : vector.getChildrenFromFields()) {
+ subVector.accept(this, null);
+ }
+ return null;
+ }
+
+ @Override
+ public Void visit(UnionVector vector, Void value) {
+ validateTypeBuffer(vector.getTypeBuffer(), vector.getValueCount());
+ for (ValueVector subVector : vector.getChildrenFromFields()) {
+ subVector.accept(this, null);
+ }
+ return null;
+ }
+
+ @Override
+ public Void visit(DenseUnionVector vector, Void value) {
+ validateTypeBuffer(vector.getTypeBuffer(), vector.getValueCount());
+
+ // validate offset buffer
+ for (int i = 0; i < vector.getValueCount(); i++) {
+ int offset = vector.getOffset(i);
+ byte typeId = vector.getTypeId(i);
+ ValueVector subVector = vector.getVectorByType(typeId);
+ validateOrThrow(offset < subVector.getValueCount(),
+ "Dense union vector offset exceeds sub-vector boundary. Vector offset %s, sub vector size %s",
+ offset, subVector.getValueCount());
+ }
+
+ for (ValueVector subVector : vector.getChildrenFromFields()) {
+ subVector.accept(this, null);
+ }
+ return null;
+ }
+
+ @Override
+ public Void visit(NullVector vector, Void value) {
+ return null;
+ }
+
+ @Override
+ public Void visit(ExtensionTypeVector<?> vector, Void value) {
+ vector.getUnderlyingVector().accept(this, value);
+ return null;
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorTypeVisitor.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorTypeVisitor.java
new file mode 100644
index 000000000..65795b468
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorTypeVisitor.java
@@ -0,0 +1,378 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.validate;
+
+import static org.apache.arrow.vector.validate.ValidateUtil.validateOrThrow;
+
+import org.apache.arrow.vector.BaseFixedWidthVector;
+import org.apache.arrow.vector.BaseLargeVariableWidthVector;
+import org.apache.arrow.vector.BaseVariableWidthVector;
+import org.apache.arrow.vector.BigIntVector;
+import org.apache.arrow.vector.BitVector;
+import org.apache.arrow.vector.DateDayVector;
+import org.apache.arrow.vector.DateMilliVector;
+import org.apache.arrow.vector.Decimal256Vector;
+import org.apache.arrow.vector.DecimalVector;
+import org.apache.arrow.vector.DurationVector;
+import org.apache.arrow.vector.ExtensionTypeVector;
+import org.apache.arrow.vector.FixedSizeBinaryVector;
+import org.apache.arrow.vector.Float4Vector;
+import org.apache.arrow.vector.Float8Vector;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.IntervalDayVector;
+import org.apache.arrow.vector.IntervalMonthDayNanoVector;
+import org.apache.arrow.vector.IntervalYearVector;
+import org.apache.arrow.vector.LargeVarBinaryVector;
+import org.apache.arrow.vector.LargeVarCharVector;
+import org.apache.arrow.vector.NullVector;
+import org.apache.arrow.vector.SmallIntVector;
+import org.apache.arrow.vector.TimeMicroVector;
+import org.apache.arrow.vector.TimeMilliVector;
+import org.apache.arrow.vector.TimeNanoVector;
+import org.apache.arrow.vector.TimeSecVector;
+import org.apache.arrow.vector.TimeStampMicroTZVector;
+import org.apache.arrow.vector.TimeStampMicroVector;
+import org.apache.arrow.vector.TimeStampMilliTZVector;
+import org.apache.arrow.vector.TimeStampMilliVector;
+import org.apache.arrow.vector.TimeStampNanoTZVector;
+import org.apache.arrow.vector.TimeStampNanoVector;
+import org.apache.arrow.vector.TimeStampSecTZVector;
+import org.apache.arrow.vector.TimeStampSecVector;
+import org.apache.arrow.vector.TinyIntVector;
+import org.apache.arrow.vector.UInt1Vector;
+import org.apache.arrow.vector.UInt2Vector;
+import org.apache.arrow.vector.UInt4Vector;
+import org.apache.arrow.vector.UInt8Vector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.VarBinaryVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.compare.VectorVisitor;
+import org.apache.arrow.vector.complex.DenseUnionVector;
+import org.apache.arrow.vector.complex.FixedSizeListVector;
+import org.apache.arrow.vector.complex.LargeListVector;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.NonNullableStructVector;
+import org.apache.arrow.vector.complex.UnionVector;
+import org.apache.arrow.vector.types.DateUnit;
+import org.apache.arrow.vector.types.FloatingPointPrecision;
+import org.apache.arrow.vector.types.IntervalUnit;
+import org.apache.arrow.vector.types.TimeUnit;
+import org.apache.arrow.vector.types.UnionMode;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.FieldType;
+
+/**
+ * Utility to validate vector type information.
+ */
+public class ValidateVectorTypeVisitor implements VectorVisitor<Void, Void> {
+
+ private void validateVectorCommon(ValueVector vector, Class<? extends ArrowType> expectedArrowType) {
+ validateOrThrow(vector.getField() != null, "Vector field is empty.");
+ validateOrThrow(vector.getField().getFieldType() != null, "Vector field type is empty.");
+ ArrowType arrowType = vector.getField().getFieldType().getType();
+ validateOrThrow(arrowType != null, "Vector arrow type is empty.");
+ validateOrThrow(expectedArrowType == arrowType.getClass(),
+ "Incorrect arrow type for " + vector.getClass() + " : " + arrowType.toString());
+ }
+
+ private void validateIntVector(ValueVector vector, int expectedWidth, boolean expectedSigned) {
+ validateOrThrow(vector.getField().getFieldType().getType() instanceof ArrowType.Int,
+ "Vector %s is not an integer vector.", vector.getClass());
+ ArrowType.Int intType = (ArrowType.Int) vector.getField().getFieldType().getType();
+ validateOrThrow(intType.getIsSigned() == expectedSigned,
+ "Expecting bit width %s, actual width %s.", expectedWidth, intType.getBitWidth());
+ validateOrThrow(intType.getBitWidth() == expectedWidth, "Expecting bit width %s, actual bit width %s.",
+ expectedWidth, intType.getBitWidth());
+ }
+
+ private void validateFloatingPointVector(ValueVector vector, FloatingPointPrecision expectedPrecision) {
+ validateOrThrow(vector.getField().getFieldType().getType() instanceof ArrowType.FloatingPoint,
+ "Vector %s is not a floating point vector.", vector.getClass());
+ ArrowType.FloatingPoint floatType = (ArrowType.FloatingPoint) vector.getField().getFieldType().getType();
+ validateOrThrow(floatType.getPrecision() == expectedPrecision, "Expecting precision %s, actual precision %s.",
+ expectedPrecision, floatType.getPrecision());
+ }
+
+ private void validateDateVector(ValueVector vector, DateUnit expectedDateUnit) {
+ validateOrThrow(vector.getField().getFieldType().getType() instanceof ArrowType.Date,
+ "Vector %s is not a date vector", vector.getClass());
+ ArrowType.Date dateType = (ArrowType.Date) vector.getField().getFieldType().getType();
+ validateOrThrow(dateType.getUnit() == expectedDateUnit,
+ "Expecting date unit %s, actual date unit %s.", expectedDateUnit, dateType.getUnit());
+ }
+
+ private void validateTimeVector(ValueVector vector, TimeUnit expectedTimeUnit, int expectedBitWidth) {
+ validateOrThrow(vector.getField().getFieldType().getType() instanceof ArrowType.Time,
+ "Vector %s is not a time vector.", vector.getClass());
+ ArrowType.Time timeType = (ArrowType.Time) vector.getField().getFieldType().getType();
+ validateOrThrow(timeType.getUnit() == expectedTimeUnit,
+ "Expecting time unit %s, actual time unit %s.", expectedTimeUnit, timeType.getUnit());
+ validateOrThrow(timeType.getBitWidth() == expectedBitWidth,
+ "Expecting bit width %s, actual bit width %s.", expectedBitWidth, timeType.getBitWidth());
+ }
+
+ private void validateIntervalVector(ValueVector vector, IntervalUnit expectedIntervalUnit) {
+ validateOrThrow(vector.getField().getFieldType().getType() instanceof ArrowType.Interval,
+ "Vector %s is not an interval vector.", vector.getClass());
+ ArrowType.Interval intervalType = (ArrowType.Interval) vector.getField().getFieldType().getType();
+ validateOrThrow(intervalType.getUnit() == expectedIntervalUnit,
+ "Expecting interval unit %s, actual date unit %s.", expectedIntervalUnit, intervalType.getUnit());
+ }
+
+ private void validateTimeStampVector(ValueVector vector, TimeUnit expectedTimeUnit, boolean expectTZ) {
+ validateOrThrow(vector.getField().getFieldType().getType() instanceof ArrowType.Timestamp,
+ "Vector %s is not a time stamp vector.", vector.getClass());
+ ArrowType.Timestamp timestampType = (ArrowType.Timestamp) vector.getField().getFieldType().getType();
+ validateOrThrow(timestampType.getUnit() == expectedTimeUnit,
+ "Expecting time stamp unit %s, actual time stamp unit %s.", expectedTimeUnit, timestampType.getUnit());
+ if (expectTZ) {
+ validateOrThrow(timestampType.getTimezone() != null, "The time zone should not be null");
+ } else {
+ validateOrThrow(timestampType.getTimezone() == null, "The time zone should be null");
+ }
+ }
+
+ private void validateExtensionTypeVector(ExtensionTypeVector<?> vector) {
+ validateOrThrow(vector.getField().getFieldType().getType() instanceof ArrowType.ExtensionType,
+ "Vector %s is not an extension type vector.", vector.getClass());
+ validateOrThrow(vector.getField().getMetadata().containsKey(ArrowType.ExtensionType.EXTENSION_METADATA_KEY_NAME),
+ "Field %s does not have proper extension type metadata: %s",
+ vector.getField().getName(),
+ vector.getField().getMetadata());
+ // Validate the storage vector type
+ vector.getUnderlyingVector().accept(this, null);
+ }
+
+ @Override
+ public Void visit(BaseFixedWidthVector vector, Void value) {
+ if (vector instanceof TinyIntVector) {
+ validateVectorCommon(vector, ArrowType.Int.class);
+ validateIntVector(vector, 8, true);
+ } else if (vector instanceof SmallIntVector) {
+ validateVectorCommon(vector, ArrowType.Int.class);
+ validateIntVector(vector, 16, true);
+ } else if (vector instanceof IntVector) {
+ validateVectorCommon(vector, ArrowType.Int.class);
+ validateIntVector(vector, 32, true);
+ } else if (vector instanceof BigIntVector) {
+ validateVectorCommon(vector, ArrowType.Int.class);
+ validateIntVector(vector, 64, true);
+ } else if (vector instanceof UInt1Vector) {
+ validateVectorCommon(vector, ArrowType.Int.class);
+ validateIntVector(vector, 8, false);
+ } else if (vector instanceof UInt2Vector) {
+ validateVectorCommon(vector, ArrowType.Int.class);
+ validateIntVector(vector, 16, false);
+ } else if (vector instanceof UInt4Vector) {
+ validateVectorCommon(vector, ArrowType.Int.class);
+ validateIntVector(vector, 32, false);
+ } else if (vector instanceof UInt8Vector) {
+ validateVectorCommon(vector, ArrowType.Int.class);
+ validateIntVector(vector, 64, false);
+ } else if (vector instanceof BitVector) {
+ validateVectorCommon(vector, ArrowType.Bool.class);
+ } else if (vector instanceof DecimalVector || vector instanceof Decimal256Vector) {
+ validateVectorCommon(vector, ArrowType.Decimal.class);
+ ArrowType.Decimal arrowType = (ArrowType.Decimal) vector.getField().getType();
+ validateOrThrow(arrowType.getScale() > 0, "The scale of decimal %s is not positive.", arrowType.getScale());
+ validateOrThrow(arrowType.getPrecision() > 0, "The precision of decimal %S is not positive.",
+ arrowType.getPrecision());
+ } else if (vector instanceof DateDayVector) {
+ validateVectorCommon(vector, ArrowType.Date.class);
+ validateDateVector(vector, DateUnit.DAY);
+ } else if (vector instanceof DateMilliVector) {
+ validateVectorCommon(vector, ArrowType.Date.class);
+ validateDateVector(vector, DateUnit.MILLISECOND);
+ } else if (vector instanceof DurationVector) {
+ validateVectorCommon(vector, ArrowType.Duration.class);
+ ArrowType.Duration arrowType = (ArrowType.Duration) vector.getField().getType();
+ validateOrThrow(((DurationVector) vector).getUnit() == arrowType.getUnit(),
+ "Different duration time unit for vector and arrow type. Vector time unit %s, type time unit %s.",
+ ((DurationVector) vector).getUnit(), arrowType.getUnit());
+ } else if (vector instanceof Float4Vector) {
+ validateVectorCommon(vector, ArrowType.FloatingPoint.class);
+ validateFloatingPointVector(vector, FloatingPointPrecision.SINGLE);
+ } else if (vector instanceof Float8Vector) {
+ validateVectorCommon(vector, ArrowType.FloatingPoint.class);
+ validateFloatingPointVector(vector, FloatingPointPrecision.DOUBLE);
+ } else if (vector instanceof IntervalDayVector) {
+ validateVectorCommon(vector, ArrowType.Interval.class);
+ validateIntervalVector(vector, IntervalUnit.DAY_TIME);
+ } else if (vector instanceof IntervalMonthDayNanoVector) {
+ validateVectorCommon(vector, ArrowType.Interval.class);
+ validateIntervalVector(vector, IntervalUnit.MONTH_DAY_NANO);
+ } else if (vector instanceof IntervalYearVector) {
+ validateVectorCommon(vector, ArrowType.Interval.class);
+ validateIntervalVector(vector, IntervalUnit.YEAR_MONTH);
+ } else if (vector instanceof TimeMicroVector) {
+ validateVectorCommon(vector, ArrowType.Time.class);
+ validateTimeVector(vector, TimeUnit.MICROSECOND, 64);
+ } else if (vector instanceof TimeMilliVector) {
+ validateVectorCommon(vector, ArrowType.Time.class);
+ validateTimeVector(vector, TimeUnit.MILLISECOND, 32);
+ } else if (vector instanceof TimeNanoVector) {
+ validateVectorCommon(vector, ArrowType.Time.class);
+ validateTimeVector(vector, TimeUnit.NANOSECOND, 64);
+ } else if (vector instanceof TimeSecVector) {
+ validateVectorCommon(vector, ArrowType.Time.class);
+ validateTimeVector(vector, TimeUnit.SECOND, 32);
+ } else if (vector instanceof TimeStampMicroTZVector) {
+ validateVectorCommon(vector, ArrowType.Timestamp.class);
+ validateTimeStampVector(vector, TimeUnit.MICROSECOND, true);
+ } else if (vector instanceof TimeStampMicroVector) {
+ validateVectorCommon(vector, ArrowType.Timestamp.class);
+ validateTimeStampVector(vector, TimeUnit.MICROSECOND, false);
+ } else if (vector instanceof TimeStampMilliTZVector) {
+ validateVectorCommon(vector, ArrowType.Timestamp.class);
+ validateTimeStampVector(vector, TimeUnit.MILLISECOND, true);
+ } else if (vector instanceof TimeStampMilliVector) {
+ validateVectorCommon(vector, ArrowType.Timestamp.class);
+ validateTimeStampVector(vector, TimeUnit.MILLISECOND, false);
+ } else if (vector instanceof TimeStampNanoTZVector) {
+ validateVectorCommon(vector, ArrowType.Timestamp.class);
+ validateTimeStampVector(vector, TimeUnit.NANOSECOND, true);
+ } else if (vector instanceof TimeStampNanoVector) {
+ validateVectorCommon(vector, ArrowType.Timestamp.class);
+ validateTimeStampVector(vector, TimeUnit.NANOSECOND, false);
+ } else if (vector instanceof TimeStampSecTZVector) {
+ validateVectorCommon(vector, ArrowType.Timestamp.class);
+ validateTimeStampVector(vector, TimeUnit.SECOND, true);
+ } else if (vector instanceof TimeStampSecVector) {
+ validateVectorCommon(vector, ArrowType.Timestamp.class);
+ validateTimeStampVector(vector, TimeUnit.SECOND, false);
+ } else if (vector instanceof FixedSizeBinaryVector) {
+ validateVectorCommon(vector, ArrowType.FixedSizeBinary.class);
+ ArrowType.FixedSizeBinary arrowType = (ArrowType.FixedSizeBinary) vector.getField().getType();
+ validateOrThrow(arrowType.getByteWidth() > 0, "The byte width of a FixedSizeBinaryVector %s is not positive.",
+ arrowType.getByteWidth());
+ validateOrThrow(arrowType.getByteWidth() == vector.getTypeWidth(),
+ "Type width mismatch for FixedSizeBinaryVector. Vector type width %s, arrow type type width %s.",
+ vector.getTypeWidth(), arrowType.getByteWidth());
+ } else {
+ throw new IllegalArgumentException("Unknown type for fixed width vector " + vector.getClass());
+ }
+ return null;
+ }
+
+ @Override
+ public Void visit(BaseVariableWidthVector vector, Void value) {
+ if (vector instanceof VarCharVector) {
+ validateVectorCommon(vector, ArrowType.Utf8.class);
+ } else if (vector instanceof VarBinaryVector) {
+ validateVectorCommon(vector, ArrowType.Binary.class);
+ }
+ return null;
+ }
+
+ @Override
+ public Void visit(BaseLargeVariableWidthVector vector, Void value) {
+ if (vector instanceof LargeVarCharVector) {
+ validateVectorCommon(vector, ArrowType.LargeUtf8.class);
+ } else if (vector instanceof LargeVarBinaryVector) {
+ validateVectorCommon(vector, ArrowType.LargeBinary.class);
+ }
+ return null;
+ }
+
+ @Override
+ public Void visit(ListVector vector, Void value) {
+ validateVectorCommon(vector, ArrowType.List.class);
+ ValueVector innerVector = vector.getDataVector();
+ if (innerVector != null) {
+ innerVector.accept(this, null);
+ }
+ return null;
+ }
+
+ @Override
+ public Void visit(FixedSizeListVector vector, Void value) {
+ validateVectorCommon(vector, ArrowType.FixedSizeList.class);
+ ArrowType.FixedSizeList arrowType = (ArrowType.FixedSizeList) vector.getField().getType();
+ validateOrThrow(arrowType.getListSize() == vector.getListSize(),
+ "Inconsistent list size for FixedSizeListVector. Vector list size %s, arrow type list size %s.",
+ vector.getListSize(), arrowType.getListSize());
+ validateOrThrow(arrowType.getListSize() > 0, "The list size %s is not positive.", arrowType.getListSize());
+ ValueVector innerVector = vector.getDataVector();
+ if (innerVector != null) {
+ innerVector.accept(this, null);
+ }
+ return null;
+ }
+
+ @Override
+ public Void visit(LargeListVector vector, Void value) {
+ validateVectorCommon(vector, ArrowType.LargeList.class);
+ ValueVector innerVector = vector.getDataVector();
+ if (innerVector != null) {
+ innerVector.accept(this, null);
+ }
+ return null;
+ }
+
+ @Override
+ public Void visit(NonNullableStructVector vector, Void value) {
+ validateVectorCommon(vector, ArrowType.Struct.class);
+ validateOrThrow(vector.getField().getChildren().size() == vector.getChildrenFromFields().size(),
+ "Child field count and child vector count mismatch. Vector child count %s, field child count %s",
+ vector.getChildrenFromFields().size(), vector.getField().getChildren().size());
+ for (int i = 0; i < vector.getChildrenFromFields().size(); i++) {
+ ValueVector subVector = vector.getChildByOrdinal(i);
+ FieldType subType = vector.getField().getChildren().get(i).getFieldType();
+
+ validateOrThrow(subType.equals(subVector.getField().getFieldType()),
+ "Struct vector's field type not equal to the child vector's field type. " +
+ "Struct field type %s, sub-vector field type %s", subType, subVector.getField().getFieldType());
+ subVector.accept(this, null);
+ }
+ return null;
+ }
+
+ @Override
+ public Void visit(UnionVector vector, Void value) {
+ validateVectorCommon(vector, ArrowType.Union.class);
+ ArrowType.Union arrowType = (ArrowType.Union) vector.getField().getType();
+ validateOrThrow(arrowType.getMode() == UnionMode.Sparse, "The union mode of UnionVector must be sparse");
+ for (ValueVector subVector : vector.getChildrenFromFields()) {
+ subVector.accept(this, null);
+ }
+ return null;
+ }
+
+ @Override
+ public Void visit(DenseUnionVector vector, Void value) {
+ validateVectorCommon(vector, ArrowType.Union.class);
+ ArrowType.Union arrowType = (ArrowType.Union) vector.getField().getType();
+ validateOrThrow(arrowType.getMode() == UnionMode.Dense, "The union mode of DenseUnionVector must be dense");
+ for (ValueVector subVector : vector.getChildrenFromFields()) {
+ subVector.accept(this, null);
+ }
+ return null;
+ }
+
+ @Override
+ public Void visit(NullVector vector, Void value) {
+ validateVectorCommon(vector, ArrowType.Null.class);
+ return null;
+ }
+
+ @Override
+ public Void visit(ExtensionTypeVector<?> vector, Void value) {
+ validateExtensionTypeVector(vector);
+ return null;
+ }
+}
diff --git a/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorVisitor.java b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorVisitor.java
new file mode 100644
index 000000000..7e99b1f90
--- /dev/null
+++ b/src/arrow/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorVisitor.java
@@ -0,0 +1,273 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.validate;
+
+import java.util.List;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.vector.BaseFixedWidthVector;
+import org.apache.arrow.vector.BaseLargeVariableWidthVector;
+import org.apache.arrow.vector.BaseVariableWidthVector;
+import org.apache.arrow.vector.ExtensionTypeVector;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.NullVector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.compare.VectorVisitor;
+import org.apache.arrow.vector.complex.DenseUnionVector;
+import org.apache.arrow.vector.complex.FixedSizeListVector;
+import org.apache.arrow.vector.complex.LargeListVector;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.NonNullableStructVector;
+import org.apache.arrow.vector.complex.UnionVector;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.util.ValueVectorUtility;
+
+/**
+ * Visitor to validate vector (without validating data).
+ * This visitor could be used for {@link ValueVector#accept(VectorVisitor, Object)} API,
+ * and also users could simply use {@link ValueVectorUtility#validate(ValueVector)}.
+ */
+public class ValidateVectorVisitor implements VectorVisitor<Void, Void> {
+
+ @Override
+ public Void visit(BaseFixedWidthVector vector, Void value) {
+ if (vector.getValueCount() > 0) {
+ if (vector.getDataBuffer() == null || vector.getDataBuffer().capacity() == 0) {
+ throw new IllegalArgumentException("valueBuffer is null or capacity is 0");
+ }
+ }
+ return null;
+ }
+
+ @Override
+ public Void visit(BaseVariableWidthVector vector, Void value) {
+
+ if (vector.getValueCount() > 0) {
+ if (vector.getDataBuffer() == null || vector.getDataBuffer().capacity() == 0) {
+ throw new IllegalArgumentException("valueBuffer is null or capacity is 0");
+ }
+
+ ArrowBuf offsetBuf = vector.getOffsetBuffer();
+ int minBufferSize = (vector.getValueCount() + 1) * BaseVariableWidthVector.OFFSET_WIDTH;
+
+ if (offsetBuf.capacity() < minBufferSize) {
+ throw new IllegalArgumentException(String.format("offsetBuffer too small in vector of type %s" +
+ " and valueCount %s : expected at least %s byte(s), got %s",
+ vector.getField().getType().toString(),
+ vector.getValueCount(), minBufferSize, offsetBuf.capacity()));
+ }
+
+ int firstOffset = vector.getOffsetBuffer().getInt(0);
+ int lastOffset = vector.getOffsetBuffer().getInt(vector.getValueCount() * BaseVariableWidthVector.OFFSET_WIDTH);
+
+ if (firstOffset < 0 || lastOffset < 0) {
+ throw new IllegalArgumentException("Negative offsets in vector");
+ }
+
+ int dataExtent = lastOffset - firstOffset;
+
+ if (dataExtent > 0 && (vector.getDataBuffer().capacity() == 0)) {
+ throw new IllegalArgumentException("dataBuffer capacity is 0");
+ }
+
+ if (dataExtent > vector.getDataBuffer().capacity()) {
+ throw new IllegalArgumentException(String.format("Length spanned by offsets %s larger than" +
+ " dataBuffer capacity %s", dataExtent, vector.getValueCount()));
+ }
+ }
+ return null;
+ }
+
+ @Override
+ public Void visit(BaseLargeVariableWidthVector left, Void value) {
+ return null;
+ }
+
+ @Override
+ public Void visit(ListVector vector, Void value) {
+
+ FieldVector dataVector = vector.getDataVector();
+
+ if (vector.getValueCount() > 0) {
+
+ ArrowBuf offsetBuf = vector.getOffsetBuffer();
+ int minBufferSize = (vector.getValueCount() + 1) * BaseVariableWidthVector.OFFSET_WIDTH;
+
+ if (offsetBuf.capacity() < minBufferSize) {
+ throw new IllegalArgumentException(String.format("offsetBuffer too small in vector of type %s" +
+ " and valueCount %s : expected at least %s byte(s), got %s",
+ vector.getField().getType().toString(),
+ vector.getValueCount(), minBufferSize, offsetBuf.capacity()));
+ }
+
+ int firstOffset = vector.getOffsetBuffer().getInt(0);
+ int lastOffset = vector.getOffsetBuffer().getInt(vector.getValueCount() * BaseVariableWidthVector.OFFSET_WIDTH);
+
+ if (firstOffset < 0 || lastOffset < 0) {
+ throw new IllegalArgumentException("Negative offsets in list vector");
+ }
+
+ int dataExtent = lastOffset - firstOffset;
+
+ if (dataExtent > 0 && (dataVector.getDataBuffer() == null || dataVector.getDataBuffer().capacity() == 0)) {
+ throw new IllegalArgumentException("valueBuffer is null or capacity is 0");
+ }
+
+ if (dataExtent > dataVector.getValueCount()) {
+ throw new IllegalArgumentException(String.format("Length spanned by list offsets (%s) larger than" +
+ " data vector valueCount (length %s)", dataExtent, dataVector.getValueCount()));
+ }
+ }
+
+ return dataVector.accept(this, null);
+ }
+
+ @Override
+ public Void visit(LargeListVector vector, Void value) {
+
+ FieldVector dataVector = vector.getDataVector();
+
+ if (vector.getValueCount() > 0) {
+
+ ArrowBuf offsetBuf = vector.getOffsetBuffer();
+ long minBufferSize = (vector.getValueCount() + 1) * LargeListVector.OFFSET_WIDTH;
+
+ if (offsetBuf.capacity() < minBufferSize) {
+ throw new IllegalArgumentException(String.format("offsetBuffer too small in vector of type %s" +
+ " and valueCount %s : expected at least %s byte(s), got %s",
+ vector.getField().getType().toString(),
+ vector.getValueCount(), minBufferSize, offsetBuf.capacity()));
+ }
+
+ long firstOffset = vector.getOffsetBuffer().getLong(0);
+ long lastOffset = vector.getOffsetBuffer().getLong(vector.getValueCount() * LargeListVector.OFFSET_WIDTH);
+
+ if (firstOffset < 0 || lastOffset < 0) {
+ throw new IllegalArgumentException("Negative offsets in list vector");
+ }
+
+ long dataExtent = lastOffset - firstOffset;
+
+ if (dataExtent > 0 && (dataVector.getDataBuffer() == null || dataVector.getDataBuffer().capacity() == 0)) {
+ throw new IllegalArgumentException("valueBuffer is null or capacity is 0");
+ }
+
+ if (dataExtent > dataVector.getValueCount()) {
+ throw new IllegalArgumentException(String.format("Length spanned by list offsets (%s) larger than" +
+ " data vector valueCount (length %s)", dataExtent, dataVector.getValueCount()));
+ }
+ }
+
+ return dataVector.accept(this, null);
+ }
+
+ @Override
+ public Void visit(FixedSizeListVector vector, Void value) {
+
+ FieldVector dataVector = vector.getDataVector();
+ int valueCount = vector.getValueCount();
+ int listSize = vector.getListSize();
+
+ if (valueCount > 0 && (dataVector.getDataBuffer() == null || dataVector.getDataBuffer().capacity() == 0)) {
+ throw new IllegalArgumentException("valueBuffer is null or capacity is 0");
+ }
+
+ if (valueCount * listSize != dataVector.getValueCount()) {
+ throw new IllegalArgumentException(String.format("data vector valueCount invalid, expect %s, " +
+ "actual is: %s", valueCount * listSize, dataVector.getValueCount()));
+ }
+
+ return null;
+ }
+
+ @Override
+ public Void visit(NonNullableStructVector vector, Void value) {
+
+ List<Field> childFields = vector.getField().getChildren();
+ final int valueCount = vector.getValueCount();
+
+ for (int i = 0; i < childFields.size(); i++) {
+ FieldVector child = vector.getChildrenFromFields().get(i);
+
+ if (child.getValueCount() != valueCount) {
+ throw new IllegalArgumentException(String.format("struct child vector #%s valueCount is not equals with " +
+ "struct vector, expect %s, actual %s", i, vector.getValueCount(), child.getValueCount()));
+ }
+
+ if (!childFields.get(i).getType().equals(child.getField().getType())) {
+ throw new IllegalArgumentException(String.format("struct child vector #%s does not match type: %s vs %s",
+ i, childFields.get(i).getType().toString(), child.getField().getType().toString()));
+ }
+
+ child.accept(this, null);
+ }
+ return null;
+ }
+
+ @Override
+ public Void visit(UnionVector vector, Void value) {
+
+ List<Field> childFields = vector.getField().getChildren();
+ final int valueCount = vector.getValueCount();
+
+ for (int i = 0; i < childFields.size(); i++) {
+ FieldVector child = vector.getChildrenFromFields().get(i);
+
+ if (child.getValueCount() != valueCount) {
+ throw new IllegalArgumentException(String.format("union child vector #%s valueCount is not equals with union" +
+ " vector, expect %s, actual %s", i, vector.getValueCount(), child.getValueCount()));
+ }
+
+ if (!childFields.get(i).getType().equals(child.getField().getType())) {
+ throw new IllegalArgumentException(String.format("union child vector #%s does not match type: %s vs %s",
+ i, childFields.get(i).getType().toString(), child.getField().getType().toString()));
+ }
+
+ child.accept(this, null);
+ }
+ return null;
+ }
+
+ @Override
+ public Void visit(DenseUnionVector vector, Void value) {
+
+ List<Field> childFields = vector.getField().getChildren();
+ for (int i = 0; i < childFields.size(); i++) {
+ FieldVector child = vector.getChildrenFromFields().get(i);
+
+ if (!childFields.get(i).getType().equals(child.getField().getType())) {
+ throw new IllegalArgumentException(String.format("union child vector #%s does not match type: %s vs %s",
+ i, childFields.get(i).getType().toString(), child.getField().getType().toString()));
+ }
+
+ child.accept(this, null);
+ }
+ return null;
+ }
+
+ @Override
+ public Void visit(NullVector vector, Void value) {
+ return null;
+ }
+
+ @Override
+ public Void visit(ExtensionTypeVector<?> vector, Void value) {
+ vector.getUnderlyingVector().accept(this, value);
+ return null;
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/util/TestSchemaUtil.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/util/TestSchemaUtil.java
new file mode 100644
index 000000000..cefff8382
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/util/TestSchemaUtil.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.util;
+
+import static java.util.Arrays.asList;
+import static org.junit.Assert.assertEquals;
+
+import java.io.IOException;
+
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.apache.arrow.vector.util.SchemaUtility;
+import org.junit.Test;
+
+public class TestSchemaUtil {
+
+ private static Field field(String name, boolean nullable, ArrowType type, Field... children) {
+ return new Field(name, new FieldType(nullable, type, null, null), asList(children));
+ }
+
+ @Test
+ public void testSerializationAndDeserialization() throws IOException {
+ Schema schema = new Schema(asList(
+ field("a", false, new ArrowType.Null()),
+ field("b", true, new ArrowType.Utf8()),
+ field("c", true, new ArrowType.Binary()))
+ );
+
+ byte[] serialized = SchemaUtility.serialize(schema);
+ Schema deserialized = SchemaUtility.deserialize(serialized, new RootAllocator(Long.MAX_VALUE));
+ assertEquals(schema, deserialized);
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/DirtyRootAllocator.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/DirtyRootAllocator.java
new file mode 100644
index 000000000..27b8ac752
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/DirtyRootAllocator.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferManager;
+import org.apache.arrow.memory.RootAllocator;
+
+/**
+ * Root allocator that returns buffers pre-filled with a given value.<br>
+ * Useful for testing if value vectors are properly zeroing their buffers.
+ */
+public class DirtyRootAllocator extends RootAllocator {
+
+ private final byte fillValue;
+
+ public DirtyRootAllocator(final long limit, final byte fillValue) {
+ super(limit);
+ this.fillValue = fillValue;
+ }
+
+ @Override
+ public ArrowBuf buffer(long size) {
+ return buffer(size, null);
+ }
+
+ @Override
+ public ArrowBuf buffer(long size, BufferManager manager) {
+ ArrowBuf buffer = super.buffer(size, manager);
+ // contaminate the buffer
+ for (int i = 0; i < buffer.capacity(); i++) {
+ buffer.setByte(i, fillValue);
+ }
+
+ return buffer;
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ITTestLargeVector.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ITTestLargeVector.java
new file mode 100644
index 000000000..19648dc9e
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ITTestLargeVector.java
@@ -0,0 +1,280 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.math.BigDecimal;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.holders.NullableDecimalHolder;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Integration test for a vector with a large (more than 2GB) {@link org.apache.arrow.memory.ArrowBuf} as
+ * the data buffer.
+ * To run this test, please make sure there is at least 4GB free memory in the system.
+ */
+public class ITTestLargeVector {
+ private static final Logger logger = LoggerFactory.getLogger(ITTestLargeVector.class);
+
+ @Test
+ public void testLargeLongVector() {
+ logger.trace("Testing large big int vector.");
+
+ final long bufSize = 4 * 1024 * 1024 * 1024L;
+ final int vecLength = (int) (bufSize / BigIntVector.TYPE_WIDTH);
+
+ try (BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE);
+ BigIntVector largeVec = new BigIntVector("vec", allocator)) {
+ largeVec.allocateNew(vecLength);
+
+ logger.trace("Successfully allocated a vector with capacity {}", vecLength);
+
+ for (int i = 0; i < vecLength; i++) {
+ largeVec.set(i, i * 10L);
+
+ if ((i + 1) % 10000 == 0) {
+ logger.trace("Successfully written {} values", i + 1);
+ }
+ }
+ logger.trace("Successfully written {} values", vecLength);
+
+ for (int i = 0; i < vecLength; i++) {
+ long val = largeVec.get(i);
+ assertEquals(i * 10L, val);
+
+ if ((i + 1) % 10000 == 0) {
+ logger.trace("Successfully read {} values", i + 1);
+ }
+ }
+ logger.trace("Successfully read {} values", vecLength);
+ }
+ logger.trace("Successfully released the large vector.");
+ }
+
+ @Test
+ public void testLargeIntVector() {
+ logger.trace("Testing large int vector.");
+
+ final long bufSize = 4 * 1024 * 1024 * 1024L;
+ final int vecLength = (int) (bufSize / IntVector.TYPE_WIDTH);
+
+ try (BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE);
+ IntVector largeVec = new IntVector("vec", allocator)) {
+ largeVec.allocateNew(vecLength);
+
+ logger.trace("Successfully allocated a vector with capacity {}", vecLength);
+
+ for (int i = 0; i < vecLength; i++) {
+ largeVec.set(i, i);
+
+ if ((i + 1) % 10000 == 0) {
+ logger.trace("Successfully written {} values", i + 1);
+ }
+ }
+ logger.trace("Successfully written {} values", vecLength);
+
+ for (int i = 0; i < vecLength; i++) {
+ long val = largeVec.get(i);
+ assertEquals(i, val);
+
+ if ((i + 1) % 10000 == 0) {
+ logger.trace("Successfully read {} values", i + 1);
+ }
+ }
+ logger.trace("Successfully read {} values", vecLength);
+ }
+ logger.trace("Successfully released the large vector.");
+ }
+
+ @Test
+ public void testLargeDecimalVector() {
+ logger.trace("Testing large decimal vector.");
+
+ final long bufSize = 4 * 1024 * 1024 * 1024L;
+ final int vecLength = (int) (bufSize / DecimalVector.TYPE_WIDTH);
+
+ try (BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE);
+ DecimalVector largeVec = new DecimalVector("vec", allocator, 38, 0)) {
+ largeVec.allocateNew(vecLength);
+
+ logger.trace("Successfully allocated a vector with capacity {}", vecLength);
+
+ for (int i = 0; i < vecLength; i++) {
+ largeVec.set(i, 0);
+
+ if ((i + 1) % 10000 == 0) {
+ logger.trace("Successfully written {} values", i + 1);
+ }
+ }
+ logger.trace("Successfully written {} values", vecLength);
+
+ for (int i = 0; i < vecLength; i++) {
+ ArrowBuf buf = largeVec.get(i);
+ assertEquals(buf.capacity(), DecimalVector.TYPE_WIDTH);
+ assertEquals(0, buf.getLong(0));
+ assertEquals(0, buf.getLong(8));
+
+ if ((i + 1) % 10000 == 0) {
+ logger.trace("Successfully read {} values", i + 1);
+ }
+ }
+ logger.trace("Successfully read {} values", vecLength);
+
+ // try setting values with a large offset in the buffer
+ largeVec.set(vecLength - 1, 12345L);
+ assertEquals(12345L, largeVec.getObject(vecLength - 1).longValue());
+
+ NullableDecimalHolder holder = new NullableDecimalHolder();
+ holder.buffer = largeVec.valueBuffer;
+ holder.isSet = 1;
+ holder.start = (long) (vecLength - 1) * largeVec.getTypeWidth();
+ assertTrue(holder.start > Integer.MAX_VALUE);
+ largeVec.set(0, holder);
+
+ BigDecimal decimal = largeVec.getObject(0);
+ assertEquals(12345L, decimal.longValue());
+
+ logger.trace("Successfully setting values from large offsets");
+ }
+ logger.trace("Successfully released the large vector.");
+ }
+
+ @Test
+ public void testLargeFixedSizeBinaryVector() {
+ logger.trace("Testing large fixed size binary vector.");
+
+ final long bufSize = 4 * 1024 * 1024 * 1024L;
+ final int typeWidth = 8;
+ final int vecLength = (int) (bufSize / typeWidth);
+
+ try (BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE);
+ FixedSizeBinaryVector largeVec = new FixedSizeBinaryVector("vec", allocator, typeWidth)) {
+ largeVec.allocateNew(vecLength);
+
+ logger.trace("Successfully allocated a vector with capacity {}", vecLength);
+
+ byte[] value = new byte[] {'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'};
+ for (int i = 0; i < vecLength; i++) {
+ largeVec.set(i, value);
+
+ if ((i + 1) % 10000 == 0) {
+ logger.trace("Successfully written {} values", i + 1);
+ }
+ }
+ logger.trace("Successfully written {} values", vecLength);
+
+ for (int i = 0; i < vecLength; i++) {
+ byte[] buf = largeVec.get(i);
+ assertEquals(typeWidth, buf.length);
+ assertArrayEquals(buf, value);
+
+ if ((i + 1) % 10000 == 0) {
+ logger.trace("Successfully read {} values", i + 1);
+ }
+ }
+ logger.trace("Successfully read {} values", vecLength);
+ }
+ logger.trace("Successfully released the large vector.");
+ }
+
+ @Test
+ public void testLargeVarCharVector() {
+ logger.trace("Testing large var char vector.");
+
+ final long bufSize = 4 * 1024 * 1024 * 1024L;
+ final int vecLength = (int) (bufSize / BaseVariableWidthVector.OFFSET_WIDTH);
+ final String strElement = "a";
+
+ try (BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE);
+ VarCharVector largeVec = new VarCharVector("vec", allocator)) {
+ largeVec.allocateNew(vecLength);
+
+ logger.trace("Successfully allocated a vector with capacity " + vecLength);
+
+ for (int i = 0; i < vecLength; i++) {
+ largeVec.setSafe(i, strElement.getBytes());
+
+ if ((i + 1) % 10000 == 0) {
+ logger.trace("Successfully written " + (i + 1) + " values");
+ }
+ }
+ largeVec.setValueCount(vecLength);
+ assertTrue(largeVec.getOffsetBuffer().readableBytes() > Integer.MAX_VALUE);
+ assertTrue(largeVec.getDataBuffer().readableBytes() < Integer.MAX_VALUE);
+ logger.trace("Successfully written " + vecLength + " values");
+
+ for (int i = 0; i < vecLength; i++) {
+ byte[] val = largeVec.get(i);
+ assertEquals(strElement, new String(val));
+
+ if ((i + 1) % 10000 == 0) {
+ logger.trace("Successfully read " + (i + 1) + " values");
+ }
+ }
+ logger.trace("Successfully read " + vecLength + " values");
+ }
+ logger.trace("Successfully released the large vector.");
+ }
+
+ @Test
+ public void testLargeLargeVarCharVector() {
+ logger.trace("Testing large large var char vector.");
+
+ final long bufSize = 4 * 1024 * 1024 * 1024L;
+ final int vecLength = (int) (bufSize / BaseLargeVariableWidthVector.OFFSET_WIDTH);
+ final String strElement = "9876543210";
+
+ try (BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE);
+ LargeVarCharVector largeVec = new LargeVarCharVector("vec", allocator)) {
+ largeVec.allocateNew(vecLength);
+
+ logger.trace("Successfully allocated a vector with capacity " + vecLength);
+
+ for (int i = 0; i < vecLength; i++) {
+ largeVec.setSafe(i, strElement.getBytes());
+
+ if ((i + 1) % 10000 == 0) {
+ logger.trace("Successfully written " + (i + 1) + " values");
+ }
+ }
+ largeVec.setValueCount(vecLength);
+ assertTrue(largeVec.getOffsetBuffer().readableBytes() > Integer.MAX_VALUE);
+ assertTrue(largeVec.getDataBuffer().readableBytes() > Integer.MAX_VALUE);
+ logger.trace("Successfully written " + vecLength + " values");
+
+ for (int i = 0; i < vecLength; i++) {
+ byte[] val = largeVec.get(i);
+ assertEquals(strElement, new String(val));
+
+ if ((i + 1) % 10000 == 0) {
+ logger.trace("Successfully read " + (i + 1) + " values");
+ }
+ }
+ logger.trace("Successfully read " + vecLength + " values");
+ }
+ logger.trace("Successfully released the large vector.");
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestBitVector.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestBitVector.java
new file mode 100644
index 000000000..28d56e342
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestBitVector.java
@@ -0,0 +1,543 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import java.util.stream.IntStream;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.memory.util.hash.MurmurHasher;
+import org.apache.arrow.vector.testing.ValueVectorDataPopulator;
+import org.apache.arrow.vector.util.TransferPair;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestBitVector {
+ private static final String EMPTY_SCHEMA_PATH = "";
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void init() {
+ allocator = new RootAllocator(Long.MAX_VALUE);
+ }
+
+ @After
+ public void terminate() throws Exception {
+ allocator.close();
+ }
+
+ @Test
+ public void testBitVectorCopyFromSafe() {
+ final int size = 20;
+ try (final BitVector src = new BitVector(EMPTY_SCHEMA_PATH, allocator);
+ final BitVector dst = new BitVector(EMPTY_SCHEMA_PATH, allocator)) {
+ src.allocateNew(size);
+ dst.allocateNew(10);
+
+ for (int i = 0; i < size; i++) {
+ src.set(i, i % 2);
+ }
+ src.setValueCount(size);
+
+ for (int i = 0; i < size; i++) {
+ dst.copyFromSafe(i, i, src);
+ }
+ dst.setValueCount(size);
+
+ for (int i = 0; i < size; i++) {
+ assertEquals(src.getObject(i), dst.getObject(i));
+ }
+ }
+ }
+
+ @Test
+ public void testSplitAndTransfer() throws Exception {
+
+ try (final BitVector sourceVector = new BitVector("bitvector", allocator)) {
+
+ sourceVector.allocateNew(40);
+
+ /* populate the bitvector -- 010101010101010101010101..... */
+ for (int i = 0; i < 40; i++) {
+ if ((i & 1) == 1) {
+ sourceVector.set(i, 1);
+ } else {
+ sourceVector.set(i, 0);
+ }
+ }
+
+ sourceVector.setValueCount(40);
+
+ /* check the vector output */
+ for (int i = 0; i < 40; i++) {
+ int result = sourceVector.get(i);
+ if ((i & 1) == 1) {
+ assertEquals(Integer.toString(1), Integer.toString(result));
+ } else {
+ assertEquals(Integer.toString(0), Integer.toString(result));
+ }
+ }
+
+ try (final BitVector toVector = new BitVector("toVector", allocator)) {
+ final TransferPair transferPair = sourceVector.makeTransferPair(toVector);
+
+ /*
+ * form test cases such that we cover:
+ *
+ * (1) the start index is exactly where a particular byte starts in the source bit vector
+ * (2) the start index is randomly positioned within a byte in the source bit vector
+ * (2.1) the length is a multiple of 8
+ * (2.2) the length is not a multiple of 8
+ */
+ final int[][] transferLengths = {{0, 8}, {8, 10}, {18, 0}, {18, 8}, {26, 0}, {26, 14}};
+
+ for (final int[] transferLength : transferLengths) {
+ final int start = transferLength[0];
+ final int length = transferLength[1];
+
+ transferPair.splitAndTransfer(start, length);
+
+ /* check the toVector output after doing splitAndTransfer */
+ for (int i = 0; i < length; i++) {
+ int actual = toVector.get(i);
+ int expected = sourceVector.get(start + i);
+ assertEquals("different data values not expected --> sourceVector index: " + (start + i) +
+ " toVector index: " + i, expected, actual);
+ }
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testSplitAndTransfer1() throws Exception {
+
+ try (final BitVector sourceVector = new BitVector("bitvector", allocator)) {
+
+ sourceVector.allocateNew(8190);
+
+ /* populate the bitvector */
+ for (int i = 0; i < 8190; i++) {
+ sourceVector.set(i, 1);
+ }
+
+ sourceVector.setValueCount(8190);
+
+ /* check the vector output */
+ for (int i = 0; i < 8190; i++) {
+ int result = sourceVector.get(i);
+ assertEquals(Integer.toString(1), Integer.toString(result));
+ }
+
+ try (final BitVector toVector = new BitVector("toVector", allocator)) {
+ final TransferPair transferPair = sourceVector.makeTransferPair(toVector);
+
+ final int[][] transferLengths = {{0, 4095}, {4095, 4095}};
+
+ for (final int[] transferLength : transferLengths) {
+ final int start = transferLength[0];
+ final int length = transferLength[1];
+
+ transferPair.splitAndTransfer(start, length);
+
+ /* check the toVector output after doing splitAndTransfer */
+ for (int i = 0; i < length; i++) {
+ int actual = toVector.get(i);
+ int expected = sourceVector.get(start + i);
+ assertEquals("different data values not expected --> sourceVector index: " + (start + i) +
+ " toVector index: " + i, expected, actual);
+ }
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testSplitAndTransfer2() throws Exception {
+
+ try (final BitVector sourceVector = new BitVector("bitvector", allocator)) {
+
+ sourceVector.allocateNew(32);
+
+ /* populate the bitvector */
+ for (int i = 0; i < 32; i++) {
+ if ((i & 1) == 1) {
+ sourceVector.set(i, 1);
+ } else {
+ sourceVector.set(i, 0);
+ }
+ }
+
+ sourceVector.setValueCount(32);
+
+ /* check the vector output */
+ for (int i = 0; i < 32; i++) {
+ int result = sourceVector.get(i);
+ if ((i & 1) == 1) {
+ assertEquals(Integer.toString(1), Integer.toString(result));
+ } else {
+ assertEquals(Integer.toString(0), Integer.toString(result));
+ }
+ }
+
+ try (final BitVector toVector = new BitVector("toVector", allocator)) {
+ final TransferPair transferPair = sourceVector.makeTransferPair(toVector);
+
+ final int[][] transferLengths = {{5, 22}, {5, 24}, {5, 25}, {5, 27}, {0, 31}, {5, 7}, {2, 3}};
+
+ for (final int[] transferLength : transferLengths) {
+ final int start = transferLength[0];
+ final int length = transferLength[1];
+
+ transferPair.splitAndTransfer(start, length);
+
+ /* check the toVector output after doing splitAndTransfer */
+ for (int i = 0; i < length; i++) {
+ int actual = toVector.get(i);
+ int expected = sourceVector.get(start + i);
+ assertEquals("different data values not expected --> sourceVector index: " + (start + i) +
+ " toVector index: " + i, expected, actual);
+ }
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testReallocAfterVectorTransfer1() {
+ try (final BitVector vector = new BitVector(EMPTY_SCHEMA_PATH, allocator)) {
+ vector.allocateNew(4096);
+ int valueCapacity = vector.getValueCapacity();
+ assertEquals(4096, valueCapacity);
+
+ for (int i = 0; i < valueCapacity; i++) {
+ if ((i & 1) == 1) {
+ vector.setToOne(i);
+ }
+ }
+
+ for (int i = 0; i < valueCapacity; i++) {
+ if ((i & 1) == 1) {
+ assertEquals("unexpected cleared bit at index: " + i, 1, vector.get(i));
+ } else {
+ assertTrue("unexpected set bit at index: " + i, vector.isNull(i));
+ }
+ }
+
+ /* trigger first realloc */
+ vector.setSafeToOne(valueCapacity);
+ assertEquals(valueCapacity * 2, vector.getValueCapacity());
+
+ for (int i = valueCapacity; i < valueCapacity * 2; i++) {
+ if ((i & 1) == 1) {
+ vector.setToOne(i);
+ }
+ }
+
+ for (int i = 0; i < valueCapacity * 2; i++) {
+ if (((i & 1) == 1) || (i == valueCapacity)) {
+ assertEquals("unexpected cleared bit at index: " + i, 1, vector.get(i));
+ } else {
+ assertTrue("unexpected set bit at index: " + i, vector.isNull(i));
+ }
+ }
+
+ /* trigger second realloc */
+ vector.setSafeToOne(valueCapacity * 2);
+ assertEquals(valueCapacity * 4, vector.getValueCapacity());
+
+ for (int i = valueCapacity * 2; i < valueCapacity * 4; i++) {
+ if ((i & 1) == 1) {
+ vector.setToOne(i);
+ }
+ }
+
+ for (int i = 0; i < valueCapacity * 4; i++) {
+ if (((i & 1) == 1) || (i == valueCapacity) || (i == valueCapacity * 2)) {
+ assertEquals("unexpected cleared bit at index: " + i, 1, vector.get(i));
+ } else {
+ assertTrue("unexpected set bit at index: " + i, vector.isNull(i));
+ }
+ }
+
+ /* now transfer the vector */
+ TransferPair transferPair = vector.getTransferPair(allocator);
+ transferPair.transfer();
+ final BitVector toVector = (BitVector) transferPair.getTo();
+
+ assertEquals(valueCapacity * 4, toVector.getValueCapacity());
+
+ /* realloc the toVector */
+ toVector.setSafeToOne(valueCapacity * 4);
+
+ for (int i = 0; i < toVector.getValueCapacity(); i++) {
+ if (i <= valueCapacity * 4) {
+ if (((i & 1) == 1) || (i == valueCapacity) ||
+ (i == valueCapacity * 2) || (i == valueCapacity * 4)) {
+ assertEquals("unexpected cleared bit at index: " + i, 1, toVector.get(i));
+ } else {
+ assertTrue("unexpected set bit at index: " + i, toVector.isNull(i));
+ }
+ } else {
+ assertTrue("unexpected set bit at index: " + i, toVector.isNull(i));
+ }
+ }
+
+ toVector.close();
+ }
+ }
+
+ @Test
+ public void testReallocAfterVectorTransfer2() {
+ try (final BitVector vector = new BitVector(EMPTY_SCHEMA_PATH, allocator)) {
+ vector.allocateNew(4096);
+ int valueCapacity = vector.getValueCapacity();
+ assertEquals(4096, valueCapacity);
+
+ for (int i = 0; i < valueCapacity; i++) {
+ if ((i & 1) == 1) {
+ vector.set(i, 1);
+ }
+ }
+
+ for (int i = 0; i < valueCapacity; i++) {
+ if ((i & 1) == 1) {
+ assertFalse("unexpected cleared bit at index: " + i, vector.isNull(i));
+ } else {
+ assertTrue("unexpected set bit at index: " + i, vector.isNull(i));
+ }
+ }
+
+ /* trigger first realloc */
+ vector.setSafe(valueCapacity, 1, 1);
+ assertEquals(valueCapacity * 2, vector.getValueCapacity());
+
+ for (int i = valueCapacity; i < valueCapacity * 2; i++) {
+ if ((i & 1) == 1) {
+ vector.set(i, 1);
+ }
+ }
+
+ for (int i = 0; i < valueCapacity * 2; i++) {
+ if (((i & 1) == 1) || (i == valueCapacity)) {
+ assertFalse("unexpected cleared bit at index: " + i, vector.isNull(i));
+ } else {
+ assertTrue("unexpected set bit at index: " + i, vector.isNull(i));
+ }
+ }
+
+ /* trigger second realloc */
+ vector.setSafe(valueCapacity * 2, 1, 1);
+ assertEquals(valueCapacity * 4, vector.getValueCapacity());
+
+ for (int i = valueCapacity * 2; i < valueCapacity * 4; i++) {
+ if ((i & 1) == 1) {
+ vector.set(i, 1);
+ }
+ }
+
+ for (int i = 0; i < valueCapacity * 4; i++) {
+ if (((i & 1) == 1) || (i == valueCapacity) || (i == valueCapacity * 2)) {
+ assertFalse("unexpected cleared bit at index: " + i, vector.isNull(i));
+ } else {
+ assertTrue("unexpected set bit at index: " + i, vector.isNull(i));
+ }
+ }
+
+ /* now transfer the vector */
+ TransferPair transferPair = vector.getTransferPair(allocator);
+ transferPair.transfer();
+ final BitVector toVector = (BitVector) transferPair.getTo();
+
+ assertEquals(valueCapacity * 4, toVector.getValueCapacity());
+
+ /* realloc the toVector */
+ toVector.setSafe(valueCapacity * 4, 1, 1);
+
+ for (int i = 0; i < toVector.getValueCapacity(); i++) {
+ if (i <= valueCapacity * 4) {
+ if (((i & 1) == 1) || (i == valueCapacity) ||
+ (i == valueCapacity * 2) || (i == valueCapacity * 4)) {
+ assertFalse("unexpected cleared bit at index: " + i, toVector.isNull(i));
+ } else {
+ assertTrue("unexpected set bit at index: " + i, toVector.isNull(i));
+ }
+ } else {
+ assertTrue("unexpected set bit at index: " + i, toVector.isNull(i));
+ }
+ }
+
+ toVector.close();
+ }
+ }
+
+ @Test
+ public void testBitVector() {
+ // Create a new value vector for 1024 integers
+ try (final BitVector vector = new BitVector(EMPTY_SCHEMA_PATH, allocator)) {
+ vector.allocateNew(1024);
+ vector.setValueCount(1024);
+
+ // Put and set a few values
+ vector.set(0, 1);
+ vector.set(1, 0);
+ vector.set(100, 0);
+ vector.set(1022, 1);
+
+ vector.setValueCount(1024);
+
+ assertEquals(1, vector.get(0));
+ assertEquals(0, vector.get(1));
+ assertEquals(0, vector.get(100));
+ assertEquals(1, vector.get(1022));
+
+ assertEquals(1020, vector.getNullCount());
+
+ // test setting the same value twice
+ vector.set(0, 1);
+ vector.set(0, 1);
+ vector.set(1, 0);
+ vector.set(1, 0);
+ assertEquals(1, vector.get(0));
+ assertEquals(0, vector.get(1));
+
+ // test toggling the values
+ vector.set(0, 0);
+ vector.set(1, 1);
+ assertEquals(0, vector.get(0));
+ assertEquals(1, vector.get(1));
+
+ // should not change
+ assertEquals(1020, vector.getNullCount());
+
+ // Ensure null value
+ assertTrue(vector.isNull(3));
+
+ // unset the previously set bits
+ vector.setNull(0);
+ vector.setNull(1);
+ vector.setNull(100);
+ vector.setNull(1022);
+ // this should set all the array to 0
+ assertEquals(1024, vector.getNullCount());
+
+ // set all the array to 1
+ for (int i = 0; i < 1024; ++i) {
+ assertEquals(1024 - i, vector.getNullCount());
+ vector.set(i, 1);
+ }
+
+ assertEquals(0, vector.getNullCount());
+
+ vector.allocateNew(1015);
+ vector.setValueCount(1015);
+
+ // ensure it has been zeroed
+ assertEquals(1015, vector.getNullCount());
+
+ vector.set(0, 1);
+ vector.set(1014, 1); // ensure that the last item of the last byte is allocated
+
+ assertEquals(1013, vector.getNullCount());
+
+ vector.zeroVector();
+ assertEquals(1015, vector.getNullCount());
+
+ // set all the array to 1
+ for (int i = 0; i < 1015; ++i) {
+ assertEquals(1015 - i, vector.getNullCount());
+ vector.set(i, 1);
+ }
+
+ assertEquals(0, vector.getNullCount());
+ }
+ }
+
+ @Test
+ public void testBitVectorRangeSetAllOnes() {
+ validateRange(1000, 0, 1000);
+ validateRange(1000, 0, 1);
+ validateRange(1000, 1, 2);
+ validateRange(1000, 5, 6);
+ validateRange(1000, 5, 10);
+ validateRange(1000, 5, 150);
+ validateRange(1000, 5, 27);
+ for (int i = 0; i < 8; i++) {
+ for (int j = 0; j < 8; j++) {
+ validateRange(1000, 10 + i, 27 + j);
+ validateRange(1000, i, j);
+ }
+ }
+ }
+
+ private void validateRange(int length, int start, int count) {
+ String desc = "[" + start + ", " + (start + count) + ") ";
+ try (BitVector bitVector = new BitVector("bits", allocator)) {
+ bitVector.reset();
+ bitVector.allocateNew(length);
+ bitVector.setRangeToOne(start, count);
+ for (int i = 0; i < start; i++) {
+ Assert.assertTrue(desc + i, bitVector.isNull(i));
+ }
+ for (int i = start; i < start + count; i++) {
+ Assert.assertEquals(desc + i, 1, bitVector.get(i));
+ }
+ for (int i = start + count; i < length; i++) {
+ Assert.assertTrue(desc + i, bitVector.isNull(i));
+ }
+ }
+ }
+
+ @Test
+ public void testBitVectorHashCode() {
+ final int size = 6;
+ try (final BitVector vector = new BitVector(EMPTY_SCHEMA_PATH, allocator)) {
+ ValueVectorDataPopulator.setVector(vector, 0, 1, null, 0, 1, null);
+
+ int[] hashCodes = new int[size];
+ IntStream.range(0, size).forEach(i -> hashCodes[i] = vector.hashCode(i));
+
+ assertTrue(hashCodes[0] == hashCodes[3]);
+ assertTrue(hashCodes[1] == hashCodes[4]);
+ assertTrue(hashCodes[2] == hashCodes[5]);
+
+ assertFalse(hashCodes[0] == hashCodes[1]);
+ assertFalse(hashCodes[0] == hashCodes[2]);
+ assertFalse(hashCodes[1] == hashCodes[2]);
+
+ MurmurHasher hasher = new MurmurHasher();
+
+ IntStream.range(0, size).forEach(i -> hashCodes[i] = vector.hashCode(i, hasher));
+
+ assertTrue(hashCodes[0] == hashCodes[3]);
+ assertTrue(hashCodes[1] == hashCodes[4]);
+ assertTrue(hashCodes[2] == hashCodes[5]);
+
+ assertFalse(hashCodes[0] == hashCodes[1]);
+ assertFalse(hashCodes[0] == hashCodes[2]);
+ assertFalse(hashCodes[1] == hashCodes[2]);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestBitVectorHelper.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestBitVectorHelper.java
new file mode 100644
index 000000000..9c7e1979d
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestBitVectorHelper.java
@@ -0,0 +1,235 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.junit.Test;
+
+import io.netty.util.internal.PlatformDependent;
+
+public class TestBitVectorHelper {
+ @Test
+ public void testGetNullCount() throws Exception {
+ try (BufferAllocator root = new RootAllocator()) {
+ // test case 1, 1 null value for 0b110
+ ArrowBuf validityBuffer = root.buffer(3);
+ // we set validity buffer to be 0b10110, but only have 3 items with 1st item is null
+ validityBuffer.setByte(0, 0b10110);
+
+ // we will only consider 0b110 here, since we only 3 items and only one is null
+ int count = BitVectorHelper.getNullCount(validityBuffer, 3);
+ assertEquals(count, 1);
+ validityBuffer.close();
+
+ // test case 2, no null value for 0xFF
+ validityBuffer = root.buffer(8);
+ validityBuffer.setByte(0, 0xFF);
+
+ count = BitVectorHelper.getNullCount(validityBuffer, 8);
+ assertEquals(count, 0);
+ validityBuffer.close();
+
+ // test case 3, 1 null value for 0x7F
+ validityBuffer = root.buffer(8);
+ validityBuffer.setByte(0, 0x7F);
+
+ count = BitVectorHelper.getNullCount(validityBuffer, 8);
+ assertEquals(count, 1);
+ validityBuffer.close();
+
+ // test case 4, validity buffer has multiple bytes, 11 items
+ validityBuffer = root.buffer(11);
+ validityBuffer.setByte(0, 0b10101010);
+ validityBuffer.setByte(1, 0b01010101);
+
+ count = BitVectorHelper.getNullCount(validityBuffer, 11);
+ assertEquals(count, 5);
+ validityBuffer.close();
+ }
+ }
+
+ @Test
+ public void testAllBitsNull() {
+ final int bufferLength = 32 * 1024;
+ try (RootAllocator allocator = new RootAllocator(bufferLength);
+ ArrowBuf validityBuffer = allocator.buffer(bufferLength)) {
+
+ validityBuffer.setZero(0, bufferLength);
+ int bitLength = 1024;
+ assertTrue(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, false));
+
+ bitLength = 1027;
+ assertTrue(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, false));
+
+ validityBuffer.setZero(0, bufferLength);
+ bitLength = 1025;
+ BitVectorHelper.setBit(validityBuffer, 12);
+ assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, false));
+
+ validityBuffer.setZero(0, bufferLength);
+ bitLength = 1025;
+ BitVectorHelper.setBit(validityBuffer, 1024);
+ assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, false));
+
+ validityBuffer.setZero(0, bufferLength);
+ bitLength = 1026;
+ BitVectorHelper.setBit(validityBuffer, 1024);
+ assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, false));
+
+ validityBuffer.setZero(0, bufferLength);
+ bitLength = 1027;
+ BitVectorHelper.setBit(validityBuffer, 1025);
+ assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, false));
+
+ validityBuffer.setZero(0, bufferLength);
+ bitLength = 1031;
+ BitVectorHelper.setBit(validityBuffer, 1029);
+ BitVectorHelper.setBit(validityBuffer, 1030);
+ assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, false));
+ }
+ }
+
+ @Test
+ public void testAllBitsSet() {
+ final int bufferLength = 32 * 1024;
+ try (RootAllocator allocator = new RootAllocator(bufferLength);
+ ArrowBuf validityBuffer = allocator.buffer(bufferLength)) {
+
+ PlatformDependent.setMemory(validityBuffer.memoryAddress(), bufferLength, (byte) -1);
+ int bitLength = 1024;
+ assertTrue(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, true));
+
+ bitLength = 1028;
+ assertTrue(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, true));
+
+ PlatformDependent.setMemory(validityBuffer.memoryAddress(), bufferLength, (byte) -1);
+ bitLength = 1025;
+ BitVectorHelper.unsetBit(validityBuffer, 12);
+ assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, true));
+
+ PlatformDependent.setMemory(validityBuffer.memoryAddress(), bufferLength, (byte) -1);
+ bitLength = 1025;
+ BitVectorHelper.unsetBit(validityBuffer, 1024);
+ assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, true));
+
+ PlatformDependent.setMemory(validityBuffer.memoryAddress(), bufferLength, (byte) -1);
+ bitLength = 1026;
+ BitVectorHelper.unsetBit(validityBuffer, 1024);
+ assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, true));
+
+ PlatformDependent.setMemory(validityBuffer.memoryAddress(), bufferLength, (byte) -1);
+ bitLength = 1027;
+ BitVectorHelper.unsetBit(validityBuffer, 1025);
+ assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, true));
+
+ PlatformDependent.setMemory(validityBuffer.memoryAddress(), bufferLength, (byte) -1);
+ bitLength = 1031;
+ BitVectorHelper.unsetBit(validityBuffer, 1029);
+ BitVectorHelper.unsetBit(validityBuffer, 1030);
+ assertFalse(BitVectorHelper.checkAllBitsEqualTo(validityBuffer, bitLength, true));
+ }
+ }
+
+ @Test
+ public void testConcatBits() {
+ try (RootAllocator allocator = new RootAllocator(1024 * 1024)) {
+ try (ArrowBuf buf1 = allocator.buffer(1024);
+ ArrowBuf buf2 = allocator.buffer(1024);
+ ArrowBuf output = allocator.buffer(1024)) {
+
+ buf1.setZero(0, buf1.capacity());
+ buf2.setZero(0, buf2.capacity());
+
+ final int maxCount = 100;
+ for (int i = 0; i < maxCount; i++) {
+ if (i % 3 == 0) {
+ BitVectorHelper.setBit(buf1, i);
+ BitVectorHelper.setBit(buf2, i);
+ }
+ }
+
+ // test the case where the number of bits for both sets are multiples of 8.
+ concatAndVerify(buf1, 40, buf2, 48, output);
+
+ // only the number of bits in the first set is a multiple of 8
+ concatAndVerify(buf1, 32, buf2, 47, output);
+
+ // only the number of bits in the second set is a multiple of 8
+ concatAndVerify(buf1, 31, buf2, 48, output);
+
+ // neither set has a size that is a multiple of 8
+ concatAndVerify(buf1, 27, buf2, 52, output);
+
+ // the remaining bits in the second set is spread in two bytes
+ concatAndVerify(buf1, 31, buf2, 55, output);
+ }
+ }
+ }
+
+ @Test
+ public void testConcatBitsInPlace() {
+ try (RootAllocator allocator = new RootAllocator(1024 * 1024)) {
+ try (ArrowBuf buf1 = allocator.buffer(1024);
+ ArrowBuf buf2 = allocator.buffer(1024)) {
+
+ buf1.setZero(0, buf1.capacity());
+ buf2.setZero(0, buf2.capacity());
+
+ final int maxCount = 100;
+ for (int i = 0; i < maxCount; i++) {
+ if (i % 3 == 0) {
+ BitVectorHelper.setBit(buf1, i);
+ BitVectorHelper.setBit(buf2, i);
+ }
+ }
+
+ // test the case where the number of bits for both sets are multiples of 8.
+ concatAndVerify(buf1, 40, buf2, 48, buf1);
+
+ // only the number of bits in the first set is a multiple of 8
+ concatAndVerify(buf1, 32, buf2, 47, buf1);
+
+ // only the number of bits in the second set is a multiple of 8
+ concatAndVerify(buf1, 31, buf2, 48, buf1);
+
+ // neither set has a size that is a multiple of 8
+ concatAndVerify(buf1, 27, buf2, 52, buf1);
+
+ // the remaining bits in the second set is spread in two bytes
+ concatAndVerify(buf1, 31, buf2, 55, buf1);
+ }
+ }
+ }
+
+ private void concatAndVerify(ArrowBuf buf1, int count1, ArrowBuf buf2, int count2, ArrowBuf output) {
+ BitVectorHelper.concatBits(buf1, count1, buf2, count2, output);
+ int outputIdx = 0;
+ for (int i = 0; i < count1; i++, outputIdx++) {
+ assertEquals(BitVectorHelper.get(output, outputIdx), BitVectorHelper.get(buf1, i));
+ }
+ for (int i = 0; i < count2; i++, outputIdx++) {
+ assertEquals(BitVectorHelper.get(output, outputIdx), BitVectorHelper.get(buf2, i));
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestBufferOwnershipTransfer.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestBufferOwnershipTransfer.java
new file mode 100644
index 000000000..8efadad9b
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestBufferOwnershipTransfer.java
@@ -0,0 +1,131 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.ReferenceManager;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.CallBack;
+import org.junit.Test;
+
+public class TestBufferOwnershipTransfer {
+
+ @Test
+ public void testTransferFixedWidth() {
+ BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE);
+ BufferAllocator childAllocator1 = allocator.newChildAllocator("child1", 100000, 100000);
+ BufferAllocator childAllocator2 = allocator.newChildAllocator("child2", 100000, 100000);
+
+ IntVector v1 = new IntVector("v1", childAllocator1);
+ v1.allocateNew();
+ v1.setValueCount(4095);
+ long totalAllocatedMemory = childAllocator1.getAllocatedMemory();
+
+ IntVector v2 = new IntVector("v2", childAllocator2);
+
+ v1.makeTransferPair(v2).transfer();
+
+ assertEquals(0, childAllocator1.getAllocatedMemory());
+ assertEquals(totalAllocatedMemory, childAllocator2.getAllocatedMemory());
+
+ v1.close();
+ v2.close();
+ childAllocator1.close();
+ childAllocator2.close();
+ allocator.close();
+ }
+
+ @Test
+ public void testTransferVariableWidth() {
+ BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE);
+ BufferAllocator childAllocator1 = allocator.newChildAllocator("child1", 100000, 100000);
+ BufferAllocator childAllocator2 = allocator.newChildAllocator("child2", 100000, 100000);
+
+ VarCharVector v1 = new VarCharVector("v1", childAllocator1);
+ v1.allocateNew();
+ v1.setSafe(4094, "hello world".getBytes(), 0, 11);
+ v1.setValueCount(4001);
+
+ VarCharVector v2 = new VarCharVector("v2", childAllocator2);
+ long memoryBeforeTransfer = childAllocator1.getAllocatedMemory();
+
+ v1.makeTransferPair(v2).transfer();
+
+ assertEquals(0, childAllocator1.getAllocatedMemory());
+ assertEquals(memoryBeforeTransfer, childAllocator2.getAllocatedMemory());
+
+ v1.close();
+ v2.close();
+ childAllocator1.close();
+ childAllocator2.close();
+ allocator.close();
+ }
+
+ private static class Pointer<T> {
+ T value;
+ }
+
+ private static CallBack newTriggerCallback(final Pointer<Boolean> trigger) {
+ trigger.value = false;
+ return new CallBack() {
+ @Override
+ public void doWork() {
+ trigger.value = true;
+ }
+ };
+ }
+
+ @Test
+ public void emptyListTransferShouldNotTriggerSchemaChange() {
+ final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE);
+
+ final Pointer<Boolean> trigger1 = new Pointer<>();
+ final Pointer<Boolean> trigger2 = new Pointer<>();
+ final ListVector v1 = new ListVector("v1", allocator,
+ FieldType.nullable(ArrowType.Null.INSTANCE),
+ newTriggerCallback(trigger1));
+ final ListVector v2 = new ListVector("v2", allocator,
+ FieldType.nullable(ArrowType.Null.INSTANCE),
+ newTriggerCallback(trigger2));
+
+ try {
+ // since we are working with empty vectors, their internal
+ // buffers will be allocator.EMPTY which use
+ // ReferenceManager.NO_OP instance and transfer() is not
+ // supported
+ v1.makeTransferPair(v2).transfer();
+ } catch (Exception e) {
+ assertTrue(e instanceof UnsupportedOperationException);
+ assertTrue(e.getMessage().contains(ReferenceManager.NO_OP_ERROR_MESSAGE));
+ }
+
+ assertFalse(trigger1.value);
+ assertFalse(trigger2.value);
+
+ v1.close();
+ v2.close();
+ allocator.close();
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestCopyFrom.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestCopyFrom.java
new file mode 100644
index 000000000..3786f63c3
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestCopyFrom.java
@@ -0,0 +1,1104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.TestUtils.newVector;
+import static org.junit.Assert.*;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+
+import java.math.BigDecimal;
+import java.nio.charset.Charset;
+import java.time.Duration;
+import java.time.Period;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+/*
+ * Tested field types:
+ *
+ * NullableInt
+ * NullableBigInt
+ * NullableFloat4
+ * NullableFloat8
+ * NullableBit
+ * NullableDecimal
+ * NullableIntervalDay
+ * NullableIntervalYear
+ * NullableSmallInt
+ * NullableTinyInt
+ * NullableVarChar
+ * NullableTimeMicro
+ * NullableTimeMilli
+ * NullableTimeStamp*
+ */
+
+public class TestCopyFrom {
+
+ private static final String EMPTY_SCHEMA_PATH = "";
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void init() {
+ allocator = new RootAllocator(Long.MAX_VALUE);
+ }
+
+ @After
+ public void terminate() throws Exception {
+ allocator.close();
+ }
+
+ @Test /* NullableVarChar */
+ public void testCopyFromWithNulls() {
+ try (final VarCharVector vector =
+ newVector(VarCharVector.class, EMPTY_SCHEMA_PATH, MinorType.VARCHAR, allocator);
+ final VarCharVector vector2 =
+ newVector(VarCharVector.class, EMPTY_SCHEMA_PATH, MinorType.VARCHAR, allocator)) {
+ vector.allocateNew();
+ assertTrue(vector.getValueCapacity() >= 1);
+ assertEquals(0, vector.getValueCount());
+ int initialCapacity = vector.getValueCapacity();
+
+ for (int i = 0; i < initialCapacity; i++) {
+ if (i % 3 == 0) {
+ continue;
+ }
+ byte[] b = Integer.toString(i).getBytes();
+ vector.setSafe(i, b, 0, b.length);
+ }
+
+ /* NO reAlloc() should have happened in setSafe() */
+ int capacity = vector.getValueCapacity();
+ assertEquals(initialCapacity, capacity);
+
+ vector.setValueCount(initialCapacity);
+
+ for (int i = 0; i < initialCapacity; i++) {
+ if (i % 3 == 0) {
+ assertNull(vector.getObject(i));
+ } else {
+ assertEquals(
+ "unexpected value at index: " + i,
+ Integer.toString(i),
+ vector.getObject(i).toString());
+ }
+ }
+
+ vector2.setInitialCapacity(initialCapacity);
+ vector2.allocateNew();
+ capacity = vector2.getValueCapacity();
+ assertEquals(initialCapacity, capacity);
+
+ for (int i = 0; i < initialCapacity; i++) {
+ vector2.copyFromSafe(i, i, vector);
+ if (i % 3 == 0) {
+ assertNull(vector2.getObject(i));
+ } else {
+ assertEquals(
+ "unexpected value at index: " + i,
+ Integer.toString(i),
+ vector2.getObject(i).toString());
+ }
+ }
+
+ /* NO reAlloc() should have happened in copyFrom */
+ capacity = vector2.getValueCapacity();
+ assertEquals(initialCapacity, capacity);
+
+ vector2.setValueCount(initialCapacity);
+
+ for (int i = 0; i < initialCapacity; i++) {
+ if (i % 3 == 0) {
+ assertNull(vector2.getObject(i));
+ } else {
+ assertEquals(
+ "unexpected value at index: " + i,
+ Integer.toString(i),
+ vector2.getObject(i).toString());
+ }
+ }
+ }
+ }
+
+ @Test /* NullableVarChar */
+ public void testCopyFromWithNulls1() {
+ try (final VarCharVector vector =
+ newVector(VarCharVector.class, EMPTY_SCHEMA_PATH, MinorType.VARCHAR, allocator);
+ final VarCharVector vector2 =
+ newVector(VarCharVector.class, EMPTY_SCHEMA_PATH, MinorType.VARCHAR, allocator)) {
+ vector.allocateNew();
+ assertTrue(vector.getValueCapacity() >= 1);
+ assertEquals(0, vector.getValueCount());
+ int initialCapacity = vector.getValueCapacity();
+
+ for (int i = 0; i < initialCapacity; i++) {
+ if (i % 3 == 0) {
+ continue;
+ }
+ byte[] b = Integer.toString(i).getBytes();
+ vector.setSafe(i, b, 0, b.length);
+ }
+
+ /* NO reAlloc() should have happened in setSafe() */
+ int capacity = vector.getValueCapacity();
+ assertEquals(initialCapacity, capacity);
+
+ vector.setValueCount(initialCapacity);
+
+ for (int i = 0; i < initialCapacity; i++) {
+ if (i % 3 == 0) {
+ assertNull(vector.getObject(i));
+ } else {
+ assertEquals(
+ "unexpected value at index: " + i,
+ Integer.toString(i),
+ vector.getObject(i).toString());
+ }
+ }
+
+ /* set lesser initial capacity than actually needed
+ * to trigger reallocs in copyFromSafe()
+ */
+ vector2.allocateNew((initialCapacity / 4) * 10, initialCapacity / 4);
+
+ capacity = vector2.getValueCapacity();
+ assertTrue(capacity >= initialCapacity / 4);
+ assertTrue(capacity < initialCapacity / 2);
+
+ for (int i = 0; i < initialCapacity; i++) {
+ vector2.copyFromSafe(i, i, vector);
+ if (i % 3 == 0) {
+ assertNull(vector2.getObject(i));
+ } else {
+ assertEquals(
+ "unexpected value at index: " + i,
+ Integer.toString(i),
+ vector2.getObject(i).toString());
+ }
+ }
+
+ /* 2 reAllocs should have happened in copyFromSafe() */
+ capacity = vector2.getValueCapacity();
+ assertTrue(capacity >= initialCapacity);
+
+ vector2.setValueCount(initialCapacity);
+
+ for (int i = 0; i < initialCapacity; i++) {
+ if (i % 3 == 0) {
+ assertNull(vector2.getObject(i));
+ } else {
+ assertEquals(
+ "unexpected value at index: " + i,
+ Integer.toString(i),
+ vector2.getObject(i).toString());
+ }
+ }
+ }
+ }
+
+ @Test /* IntVector */
+ public void testCopyFromWithNulls2() {
+ try (final IntVector vector1 = new IntVector(EMPTY_SCHEMA_PATH, allocator);
+ final IntVector vector2 = new IntVector(EMPTY_SCHEMA_PATH, allocator)) {
+
+ vector1.allocateNew();
+ assertTrue(vector1.getValueCapacity() >= vector1.INITIAL_VALUE_ALLOCATION);
+ assertEquals(0, vector1.getValueCount());
+ int initialCapacity = vector1.getValueCapacity();
+
+ for (int i = 0; i < initialCapacity; i++) {
+ if ((i & 1) == 0) {
+ continue;
+ }
+ vector1.setSafe(i, 1000 + i);
+ }
+
+ vector1.setValueCount(initialCapacity);
+
+ /* No realloc should have happened in setSafe or
+ * setValueCount
+ */
+ assertEquals(initialCapacity, vector1.getValueCapacity());
+ assertEquals(initialCapacity, vector1.getValueCount());
+
+ for (int i = 0; i < initialCapacity; i++) {
+ if ((i & 1) == 0) {
+ assertNull(vector1.getObject(i));
+ } else {
+ assertEquals("unexpected value at index: " + i, 1000 + i, vector1.get(i));
+ }
+ }
+
+ /* set lesser initial capacity than actually needed
+ * to trigger reallocs in copyFromSafe()
+ */
+ vector2.allocateNew(initialCapacity / 4);
+ assertTrue(vector2.getValueCapacity() >= initialCapacity / 4);
+ assertTrue(vector2.getValueCapacity() < initialCapacity / 2);
+
+ for (int i = 0; i < initialCapacity; i++) {
+ vector2.copyFromSafe(i, i, vector1);
+ }
+
+ /* 2 realloc should have happened in copyFromSafe() */
+ assertTrue(vector2.getValueCapacity() >= initialCapacity);
+ vector2.setValueCount(initialCapacity * 2);
+ /* setValueCount() should have done another realloc */
+ assertEquals(initialCapacity * 2, vector2.getValueCount());
+ assertTrue(vector2.getValueCapacity() >= initialCapacity * 2);
+
+ /* check vector data after copy and realloc */
+ for (int i = 0; i < initialCapacity * 2; i++) {
+ if (((i & 1) == 0) || (i >= initialCapacity)) {
+ assertNull(vector2.getObject(i));
+ } else {
+ assertEquals("unexpected value at index: " + i, 1000 + i, vector2.get(i));
+ }
+ }
+ }
+ }
+
+ @Test /* BigIntVector */
+ public void testCopyFromWithNulls3() {
+ try (final BigIntVector vector1 = new BigIntVector(EMPTY_SCHEMA_PATH, allocator);
+ final BigIntVector vector2 = new BigIntVector(EMPTY_SCHEMA_PATH, allocator)) {
+
+ vector1.allocateNew();
+ assertTrue(vector1.getValueCapacity() >= vector1.INITIAL_VALUE_ALLOCATION);
+ assertEquals(0, vector1.getValueCount());
+ int initialCapacity = vector1.getValueCapacity();
+
+ for (int i = 0; i < initialCapacity; i++) {
+ if ((i & 1) == 0) {
+ continue;
+ }
+ vector1.setSafe(i, 10000000000L + (long) i);
+ }
+
+ vector1.setValueCount(initialCapacity);
+
+ /* No realloc should have happened in setSafe or
+ * setValueCount
+ */
+ assertEquals(initialCapacity, vector1.getValueCapacity());
+ assertEquals(initialCapacity, vector1.getValueCount());
+
+ for (int i = 0; i < initialCapacity; i++) {
+ if ((i & 1) == 0) {
+ assertNull(vector1.getObject(i));
+ } else {
+ assertEquals("unexpected value at index: " + i, 10000000000L + (long) i, vector1.get(i));
+ }
+ }
+
+ /* set lesser initial capacity than actually needed
+ * to trigger reallocs in copyFromSafe()
+ */
+ vector2.allocateNew(initialCapacity / 4);
+ assertTrue(vector2.getValueCapacity() >= initialCapacity / 4);
+ assertTrue(vector2.getValueCapacity() < initialCapacity / 2);
+
+ for (int i = 0; i < initialCapacity; i++) {
+ vector2.copyFromSafe(i, i, vector1);
+ }
+
+ /* 2 realloc should have happened in copyFromSafe() */
+ assertTrue(vector2.getValueCapacity() >= initialCapacity);
+ vector2.setValueCount(initialCapacity * 2);
+ /* setValueCount() should have done another realloc */
+ assertEquals(initialCapacity * 2, vector2.getValueCount());
+ assertTrue(vector2.getValueCapacity() >= initialCapacity * 2);
+
+ /* check vector data after copy and realloc */
+ for (int i = 0; i < initialCapacity * 2; i++) {
+ if (((i & 1) == 0) || (i >= initialCapacity)) {
+ assertNull(vector2.getObject(i));
+ } else {
+ assertEquals("unexpected value at index: " + i, 10000000000L + (long) i, vector2.get(i));
+ }
+ }
+ }
+ }
+
+ @Test /* BitVector */
+ public void testCopyFromWithNulls4() {
+ try (final BitVector vector1 = new BitVector(EMPTY_SCHEMA_PATH, allocator);
+ final BitVector vector2 = new BitVector(EMPTY_SCHEMA_PATH, allocator)) {
+
+ vector1.setInitialCapacity(4096);
+ vector1.allocateNew();
+ assertEquals(4096, vector1.getValueCapacity());
+ assertEquals(0, vector1.getValueCount());
+
+ int counter = 0;
+ for (int i = 0; i < 4096; i++) {
+ if ((i & 1) == 0) {
+ continue;
+ }
+ if ((counter & 1) == 0) {
+ vector1.setSafe(i, 1);
+ } else {
+ vector1.setSafe(i, 0);
+ }
+ counter++;
+ }
+
+ vector1.setValueCount(4096);
+
+ /* No realloc should have happened in setSafe or
+ * setValueCount
+ */
+ assertEquals(4096, vector1.getValueCapacity());
+ assertEquals(4096, vector1.getValueCount());
+
+ counter = 0;
+ for (int i = 0; i < 4096; i++) {
+ if ((i & 1) == 0) {
+ assertNull(vector1.getObject(i));
+ } else {
+ if ((counter & 1) == 0) {
+ assertTrue(vector1.getObject(i));
+ } else {
+ assertFalse(vector1.getObject(i));
+ }
+ counter++;
+ }
+ }
+
+ /* set lesser initial capacity than actually needed
+ * to trigger reallocs in copyFromSafe()
+ */
+ vector2.allocateNew(1024);
+ assertEquals(1024, vector2.getValueCapacity());
+
+ for (int i = 0; i < 4096; i++) {
+ vector2.copyFromSafe(i, i, vector1);
+ }
+
+ /* 2 realloc should have happened in copyFromSafe() */
+ assertEquals(4096, vector2.getValueCapacity());
+ vector2.setValueCount(8192);
+ /* setValueCount() should have done another realloc */
+ assertEquals(8192, vector2.getValueCount());
+ assertEquals(8192, vector2.getValueCapacity());
+
+ /* check vector data after copy and realloc */
+ counter = 0;
+ for (int i = 0; i < 8192; i++) {
+ if (((i & 1) == 0) || (i >= 4096)) {
+ assertNull(vector2.getObject(i));
+ } else {
+ if ((counter & 1) == 0) {
+ assertTrue(vector2.getObject(i));
+ } else {
+ assertFalse(vector2.getObject(i));
+ }
+ counter++;
+ }
+ }
+ }
+ }
+
+ @Test /* Float4Vector */
+ public void testCopyFromWithNulls5() {
+ try (final Float4Vector vector1 = new Float4Vector(EMPTY_SCHEMA_PATH, allocator);
+ final Float4Vector vector2 = new Float4Vector(EMPTY_SCHEMA_PATH, allocator)) {
+
+ vector1.allocateNew();
+ assertTrue(vector1.getValueCapacity() >= vector1.INITIAL_VALUE_ALLOCATION);
+ assertEquals(0, vector1.getValueCount());
+ int initialCapacity = vector1.getValueCapacity();
+
+ for (int i = 0; i < initialCapacity; i++) {
+ if ((i & 1) == 0) {
+ continue;
+ }
+ vector1.setSafe(i, 100.25f + (float) i);
+ }
+
+ vector1.setValueCount(initialCapacity);
+
+ /* No realloc should have happened in setSafe or
+ * setValueCount
+ */
+ assertEquals(initialCapacity, vector1.getValueCapacity());
+ assertEquals(initialCapacity, vector1.getValueCount());
+
+ for (int i = 0; i < initialCapacity; i++) {
+ if ((i & 1) == 0) {
+ assertNull(vector1.getObject(i));
+ } else {
+ assertEquals("unexpected value at index: " + i, 100.25f + (float) i, vector1.get(i), 0);
+ }
+ }
+
+ /* set lesser initial capacity than actually needed
+ * to trigger reallocs in copyFromSafe()
+ */
+ vector2.allocateNew(initialCapacity / 4);
+ assertTrue(vector2.getValueCapacity() >= initialCapacity / 4);
+ assertTrue(vector2.getValueCapacity() < initialCapacity / 2);
+
+ for (int i = 0; i < initialCapacity; i++) {
+ vector2.copyFromSafe(i, i, vector1);
+ }
+
+ /* 2 realloc should have happened in copyFromSafe() */
+ assertTrue(vector2.getValueCapacity() >= initialCapacity);
+ vector2.setValueCount(initialCapacity * 2);
+ /* setValueCount() should have done another realloc */
+ assertEquals(initialCapacity * 2, vector2.getValueCount());
+ assertTrue(vector2.getValueCapacity() >= initialCapacity * 2);
+
+ /* check vector data after copy and realloc */
+ for (int i = 0; i < initialCapacity * 2; i++) {
+ if (((i & 1) == 0) || (i >= initialCapacity)) {
+ assertNull(vector2.getObject(i));
+ } else {
+ assertEquals("unexpected value at index: " + i, 100.25f + i * 1.0f, vector2.get(i), 0);
+ }
+ }
+ }
+ }
+
+ @Test /* Float8Vector */
+ public void testCopyFromWithNulls6() {
+ try (final Float8Vector vector1 = new Float8Vector(EMPTY_SCHEMA_PATH, allocator);
+ final Float8Vector vector2 = new Float8Vector(EMPTY_SCHEMA_PATH, allocator)) {
+
+ vector1.allocateNew();
+ assertTrue(vector1.getValueCapacity() >= vector1.INITIAL_VALUE_ALLOCATION);
+ assertEquals(0, vector1.getValueCount());
+ int initialCapacity = vector1.getValueCapacity();
+
+ for (int i = 0; i < initialCapacity; i++) {
+ if ((i & 1) == 0) {
+ continue;
+ }
+ vector1.setSafe(i, 123456.7865 + (double) i);
+ }
+
+ vector1.setValueCount(initialCapacity);
+
+ /* No realloc should have happened in setSafe or
+ * setValueCount
+ */
+ assertEquals(initialCapacity, vector1.getValueCapacity());
+ assertEquals(initialCapacity, vector1.getValueCount());
+
+ for (int i = 0; i < initialCapacity; i++) {
+ if ((i & 1) == 0) {
+ assertNull(vector1.getObject(i));
+ } else {
+ assertEquals(
+ "unexpected value at index: " + i, 123456.7865 + (double) i, vector1.get(i), 0);
+ }
+ }
+
+ /* set lesser initial capacity than actually needed
+ * to trigger reallocs in copyFromSafe()
+ */
+ vector2.allocateNew(initialCapacity / 4);
+ assertTrue(vector2.getValueCapacity() >= initialCapacity / 4);
+ assertTrue(vector2.getValueCapacity() < initialCapacity / 2);
+
+ for (int i = 0; i < initialCapacity; i++) {
+ vector2.copyFromSafe(i, i, vector1);
+ }
+
+ /* 2 realloc should have happened in copyFromSafe() */
+ assertTrue(vector2.getValueCapacity() >= initialCapacity);
+ vector2.setValueCount(initialCapacity * 2);
+ /* setValueCount() should have done another realloc */
+ assertEquals(initialCapacity * 2, vector2.getValueCount());
+ assertTrue(vector2.getValueCapacity() >= initialCapacity * 2);
+
+ /* check vector data after copy and realloc */
+ for (int i = 0; i < initialCapacity * 2; i++) {
+ if (((i & 1) == 0) || (i >= initialCapacity)) {
+ assertNull(vector2.getObject(i));
+ } else {
+ assertEquals(
+ "unexpected value at index: " + i, 123456.7865 + (double) i, vector2.get(i), 0);
+ }
+ }
+ }
+ }
+
+ @Test /* IntervalDayVector */
+ public void testCopyFromWithNulls7() {
+ try (final IntervalDayVector vector1 = new IntervalDayVector(EMPTY_SCHEMA_PATH, allocator);
+ final IntervalDayVector vector2 = new IntervalDayVector(EMPTY_SCHEMA_PATH, allocator)) {
+
+ vector1.allocateNew();
+ assertTrue(vector1.getValueCapacity() >= vector1.INITIAL_VALUE_ALLOCATION);
+ assertEquals(0, vector1.getValueCount());
+ int initialCapacity = vector1.getValueCapacity();
+
+ final int days = 10;
+ final int milliseconds = 10000;
+ for (int i = 0; i < initialCapacity; i++) {
+ if ((i & 1) == 0) {
+ continue;
+ }
+ vector1.setSafe(i, days + i, milliseconds + i);
+ }
+
+ vector1.setValueCount(initialCapacity);
+
+ /* No realloc should have happened in setSafe or
+ * setValueCount
+ */
+ assertEquals(initialCapacity, vector1.getValueCapacity());
+ assertEquals(initialCapacity, vector1.getValueCount());
+
+ for (int i = 0; i < initialCapacity; i++) {
+ if ((i & 1) == 0) {
+ assertNull(vector1.getObject(i));
+ } else {
+ final Duration d = vector1.getObject(i);
+ assertEquals(days + i, d.toDays());
+ assertEquals(milliseconds + i, d.minusDays(days + i).toMillis());
+ }
+ }
+
+ /* set lesser initial capacity than actually needed
+ * to trigger reallocs in copyFromSafe()
+ */
+ vector2.allocateNew(initialCapacity / 4);
+ assertTrue(vector2.getValueCapacity() >= initialCapacity / 4);
+ assertTrue(vector2.getValueCapacity() < initialCapacity / 2);
+
+ for (int i = 0; i < initialCapacity; i++) {
+ vector2.copyFromSafe(i, i, vector1);
+ }
+
+ /* 2 realloc should have happened in copyFromSafe() */
+ assertTrue(vector2.getValueCapacity() >= initialCapacity);
+ vector2.setValueCount(initialCapacity * 2);
+ /* setValueCount() should have done another realloc */
+ assertEquals(initialCapacity * 2, vector2.getValueCount());
+ assertTrue(vector2.getValueCapacity() >= initialCapacity * 2);
+
+ /* check vector data after copy and realloc */
+ for (int i = 0; i < initialCapacity * 2; i++) {
+ if (((i & 1) == 0) || (i >= initialCapacity)) {
+ assertNull(vector2.getObject(i));
+ } else {
+ final Duration d = vector2.getObject(i);
+ assertEquals(days + i, d.toDays());
+ assertEquals(milliseconds + i, d.minusDays(days + i).toMillis());
+ }
+ }
+ }
+ }
+
+ @Test /* IntervalYearVector */
+ public void testCopyFromWithNulls8() {
+ try (final IntervalYearVector vector1 = new IntervalYearVector(EMPTY_SCHEMA_PATH, allocator);
+ final IntervalYearVector vector2 = new IntervalYearVector(EMPTY_SCHEMA_PATH, allocator)) {
+
+ vector1.allocateNew();
+ assertTrue(vector1.getValueCapacity() >= vector1.INITIAL_VALUE_ALLOCATION);
+ assertEquals(0, vector1.getValueCount());
+ int initialCapacity = vector1.getValueCapacity();
+
+ final int interval = 30; /* 2 years 6 months */
+ final Period[] periods = new Period[4096];
+ for (int i = 0; i < initialCapacity; i++) {
+ if ((i & 1) == 0) {
+ continue;
+ }
+ vector1.setSafe(i, interval + i);
+ final int years = (interval + i) / org.apache.arrow.vector.util.DateUtility.yearsToMonths;
+ final int months = (interval + i) % org.apache.arrow.vector.util.DateUtility.yearsToMonths;
+ periods[i] = Period.ofYears(years).plusMonths(months).normalized();
+ }
+
+ vector1.setValueCount(initialCapacity);
+
+ /* No realloc should have happened in setSafe or
+ * setValueCount
+ */
+ assertEquals(initialCapacity, vector1.getValueCapacity());
+ assertEquals(initialCapacity, vector1.getValueCount());
+
+ for (int i = 0; i < initialCapacity; i++) {
+ if ((i & 1) == 0) {
+ assertNull(vector1.getObject(i));
+ } else {
+ final Period p = vector1.getObject(i).normalized();
+ assertEquals(interval + i, vector1.get(i));
+ assertEquals(periods[i], p);
+ }
+ }
+
+ /* set lesser initial capacity than actually needed
+ * to trigger reallocs in copyFromSafe()
+ */
+ vector2.allocateNew(initialCapacity / 4);
+ assertTrue(vector2.getValueCapacity() >= initialCapacity / 4);
+ assertTrue(vector2.getValueCapacity() < initialCapacity / 2);
+
+ for (int i = 0; i < initialCapacity; i++) {
+ vector2.copyFromSafe(i, i, vector1);
+ }
+
+ /* 2 realloc should have happened in copyFromSafe() */
+ assertTrue(vector2.getValueCapacity() >= initialCapacity);
+ vector2.setValueCount(initialCapacity * 2);
+ /* setValueCount() should have done another realloc */
+ assertEquals(initialCapacity * 2, vector2.getValueCount());
+ assertTrue(vector2.getValueCapacity() >= initialCapacity * 2);
+
+ /* check vector data after copy and realloc */
+ for (int i = 0; i < initialCapacity * 2; i++) {
+ if (((i & 1) == 0) || (i >= initialCapacity)) {
+ assertNull(vector2.getObject(i));
+ } else {
+ final Period p = vector2.getObject(i).normalized();
+ assertEquals(periods[i], p);
+ }
+ }
+ }
+ }
+
+ @Test /* SmallIntVector */
+ public void testCopyFromWithNulls9() {
+ try (final SmallIntVector vector1 = new SmallIntVector(EMPTY_SCHEMA_PATH, allocator);
+ final SmallIntVector vector2 = new SmallIntVector(EMPTY_SCHEMA_PATH, allocator)) {
+
+ vector1.allocateNew();
+ assertTrue(vector1.getValueCapacity() >= vector1.INITIAL_VALUE_ALLOCATION);
+ assertEquals(0, vector1.getValueCount());
+ int initialCapacity = vector1.getValueCapacity();
+
+ final short val = 1000;
+ for (int i = 0; i < initialCapacity; i++) {
+ if ((i & 1) == 0) {
+ continue;
+ }
+ vector1.setSafe(i, val + (short) i);
+ }
+
+ vector1.setValueCount(initialCapacity);
+
+ /* No realloc should have happened in setSafe or
+ * setValueCount
+ */
+ assertEquals(initialCapacity, vector1.getValueCapacity());
+ assertEquals(initialCapacity, vector1.getValueCount());
+
+ for (int i = 0; i < initialCapacity; i++) {
+ if ((i & 1) == 0) {
+ assertNull(vector1.getObject(i));
+ } else {
+ assertEquals("unexpected value at index: " + i, val + (short) i, vector1.get(i));
+ }
+ }
+
+ /* set lesser initial capacity than actually needed
+ * to trigger reallocs in copyFromSafe()
+ */
+ vector2.allocateNew(initialCapacity / 4);
+ assertTrue(vector2.getValueCapacity() >= initialCapacity / 4);
+ assertTrue(vector2.getValueCapacity() < initialCapacity / 2);
+
+ for (int i = 0; i < initialCapacity; i++) {
+ vector2.copyFromSafe(i, i, vector1);
+ }
+
+ /* 2 realloc should have happened in copyFromSafe() */
+ assertTrue(vector2.getValueCapacity() >= initialCapacity);
+ vector2.setValueCount(initialCapacity * 2);
+ /* setValueCount() should have done another realloc */
+ assertEquals(initialCapacity * 2, vector2.getValueCount());
+ assertTrue(vector2.getValueCapacity() >= initialCapacity * 2);
+
+ /* check vector data after copy and realloc */
+ for (int i = 0; i < initialCapacity * 2; i++) {
+ if (((i & 1) == 0) || (i >= initialCapacity)) {
+ assertNull(vector2.getObject(i));
+ } else {
+ assertEquals("unexpected value at index: " + i, val + (short) i, vector2.get(i));
+ }
+ }
+ }
+ }
+
+ @Test /* TimeMicroVector */
+ public void testCopyFromWithNulls10() {
+ try (final TimeMicroVector vector1 = new TimeMicroVector(EMPTY_SCHEMA_PATH, allocator);
+ final TimeMicroVector vector2 = new TimeMicroVector(EMPTY_SCHEMA_PATH, allocator)) {
+
+ vector1.allocateNew();
+ assertTrue(vector1.getValueCapacity() >= vector1.INITIAL_VALUE_ALLOCATION);
+ assertEquals(0, vector1.getValueCount());
+ int initialCapacity = vector1.getValueCapacity();
+
+ final long val = 100485765432L;
+ for (int i = 0; i < initialCapacity; i++) {
+ if ((i & 1) == 0) {
+ continue;
+ }
+ vector1.setSafe(i, val + (long) i);
+ }
+
+ vector1.setValueCount(initialCapacity);
+
+ /* No realloc should have happened in setSafe or
+ * setValueCount
+ */
+ assertEquals(initialCapacity, vector1.getValueCapacity());
+ assertEquals(initialCapacity, vector1.getValueCount());
+
+ for (int i = 0; i < initialCapacity; i++) {
+ if ((i & 1) == 0) {
+ assertNull(vector1.getObject(i));
+ } else {
+ assertEquals("unexpected value at index: " + i, val + (long) i, vector1.get(i));
+ }
+ }
+
+ /* set lesser initial capacity than actually needed
+ * to trigger reallocs in copyFromSafe()
+ */
+ vector2.allocateNew(initialCapacity / 4);
+ assertTrue(vector2.getValueCapacity() >= initialCapacity / 4);
+ assertTrue(vector2.getValueCapacity() < initialCapacity / 2);
+
+ for (int i = 0; i < initialCapacity; i++) {
+ vector2.copyFromSafe(i, i, vector1);
+ }
+
+ /* 2 realloc should have happened in copyFromSafe() */
+ assertTrue(vector2.getValueCapacity() >= initialCapacity);
+ vector2.setValueCount(initialCapacity * 2);
+ /* setValueCount() should have done another realloc */
+ assertEquals(initialCapacity * 2, vector2.getValueCount());
+ assertTrue(vector2.getValueCapacity() >= initialCapacity * 2);
+
+ /* check vector data after copy and realloc */
+ for (int i = 0; i < initialCapacity * 2; i++) {
+ if (((i & 1) == 0) || (i >= initialCapacity)) {
+ assertNull(vector2.getObject(i));
+ } else {
+ assertEquals("unexpected value at index: " + i, val + (long) i, vector2.get(i));
+ }
+ }
+ }
+ }
+
+ @Test /* TimeMilliVector */
+ public void testCopyFromWithNulls11() {
+ try (final TimeMilliVector vector1 = new TimeMilliVector(EMPTY_SCHEMA_PATH, allocator);
+ final TimeMilliVector vector2 = new TimeMilliVector(EMPTY_SCHEMA_PATH, allocator)) {
+
+ vector1.allocateNew();
+ assertTrue(vector1.getValueCapacity() >= vector1.INITIAL_VALUE_ALLOCATION);
+ assertEquals(0, vector1.getValueCount());
+ int initialCapacity = vector1.getValueCapacity();
+
+ final int val = 1000;
+ for (int i = 0; i < initialCapacity; i++) {
+ if ((i & 1) == 0) {
+ continue;
+ }
+ vector1.setSafe(i, val + i);
+ }
+
+ vector1.setValueCount(initialCapacity);
+
+ /* No realloc should have happened in setSafe or
+ * setValueCount
+ */
+ assertEquals(initialCapacity, vector1.getValueCapacity());
+ assertEquals(initialCapacity, vector1.getValueCount());
+
+ for (int i = 0; i < initialCapacity; i++) {
+ if ((i & 1) == 0) {
+ assertNull(vector1.getObject(i));
+ } else {
+ assertEquals("unexpected value at index: " + i, val + i, vector1.get(i));
+ }
+ }
+
+ /* set lesser initial capacity than actually needed
+ * to trigger reallocs in copyFromSafe()
+ */
+ vector2.allocateNew(initialCapacity / 4);
+ assertTrue(vector2.getValueCapacity() >= initialCapacity / 4);
+ assertTrue(vector2.getValueCapacity() < initialCapacity / 2);
+
+ for (int i = 0; i < initialCapacity; i++) {
+ vector2.copyFromSafe(i, i, vector1);
+ }
+
+ /* 2 realloc should have happened in copyFromSafe() */
+ assertTrue(vector2.getValueCapacity() >= initialCapacity);
+ vector2.setValueCount(initialCapacity * 2);
+ /* setValueCount() should have done another realloc */
+ assertEquals(initialCapacity * 2, vector2.getValueCount());
+ assertTrue(vector2.getValueCapacity() >= initialCapacity * 2);
+
+ /* check vector data after copy and realloc */
+ for (int i = 0; i < initialCapacity * 2; i++) {
+ if (((i & 1) == 0) || (i >= initialCapacity)) {
+ assertNull(vector2.getObject(i));
+ } else {
+ assertEquals("unexpected value at index: " + i, val + i, vector2.get(i));
+ }
+ }
+ }
+ }
+
+ @Test /* TinyIntVector */
+ public void testCopyFromWithNulls12() {
+ try (final TinyIntVector vector1 = new TinyIntVector(EMPTY_SCHEMA_PATH, allocator);
+ final TinyIntVector vector2 = new TinyIntVector(EMPTY_SCHEMA_PATH, allocator)) {
+
+ vector1.allocateNew();
+ assertTrue(vector1.getValueCapacity() >= vector1.INITIAL_VALUE_ALLOCATION);
+ assertEquals(0, vector1.getValueCount());
+ int initialCapacity = vector1.getValueCapacity();
+
+ byte val = -128;
+ for (int i = 0; i < initialCapacity; i++) {
+ if ((i & 1) == 0) {
+ continue;
+ }
+ vector1.setSafe(i, val);
+ val++;
+ }
+
+ vector1.setValueCount(initialCapacity);
+
+ /* No realloc should have happened in setSafe or
+ * setValueCount
+ */
+ assertEquals(initialCapacity, vector1.getValueCapacity());
+ assertEquals(initialCapacity, vector1.getValueCount());
+
+ val = -128;
+ for (int i = 0; i < initialCapacity; i++) {
+ if ((i & 1) == 0) {
+ assertNull(vector1.getObject(i));
+ } else {
+ assertEquals("unexpected value at index: " + i, val, vector1.get(i));
+ val++;
+ }
+ }
+
+ /* set lesser initial capacity than actually needed
+ * to trigger reallocs in copyFromSafe()
+ */
+ vector2.allocateNew(initialCapacity / 4);
+ assertTrue(vector2.getValueCapacity() >= initialCapacity / 4);
+
+ for (int i = 0; i < initialCapacity; i++) {
+ vector2.copyFromSafe(i, i, vector1);
+ }
+
+ /* 2 realloc should have happened in copyFromSafe() */
+ assertTrue(vector2.getValueCapacity() >= initialCapacity);
+ vector2.setValueCount(initialCapacity * 2);
+ /* setValueCount() should have done another realloc */
+ assertEquals(initialCapacity * 2, vector2.getValueCount());
+ assertTrue(vector2.getValueCapacity() >= initialCapacity * 2);
+
+ /* check vector data after copy and realloc */
+ val = -128;
+ for (int i = 0; i < initialCapacity * 2; i++) {
+ if (((i & 1) == 0) || (i >= initialCapacity)) {
+ assertNull(vector2.getObject(i));
+ } else {
+ assertEquals("unexpected value at index: " + i, val, vector2.get(i));
+ val++;
+ }
+ }
+ }
+ }
+
+ @Test /* DecimalVector */
+ public void testCopyFromWithNulls13() {
+ try (final DecimalVector vector1 = new DecimalVector(EMPTY_SCHEMA_PATH, allocator, 30, 16);
+ final DecimalVector vector2 = new DecimalVector(EMPTY_SCHEMA_PATH, allocator, 30, 16)) {
+
+ vector1.allocateNew();
+ assertTrue(vector1.getValueCapacity() >= vector1.INITIAL_VALUE_ALLOCATION);
+ assertEquals(0, vector1.getValueCount());
+ int initialCapacity = vector1.getValueCapacity();
+
+ final double baseValue = 104567897654.876543654;
+ final BigDecimal[] decimals = new BigDecimal[4096];
+ for (int i = 0; i < initialCapacity; i++) {
+ if ((i & 1) == 0) {
+ continue;
+ }
+ BigDecimal decimal = new BigDecimal(baseValue + (double) i);
+ vector1.setSafe(i, decimal);
+ decimals[i] = decimal;
+ }
+
+ vector1.setValueCount(initialCapacity);
+
+ /* No realloc should have happened in setSafe or
+ * setValueCount
+ */
+ assertEquals(initialCapacity, vector1.getValueCapacity());
+ assertEquals(initialCapacity, vector1.getValueCount());
+
+ for (int i = 0; i < initialCapacity; i++) {
+ if ((i & 1) == 0) {
+ assertNull(vector1.getObject(i));
+ } else {
+ final BigDecimal decimal = vector1.getObject(i);
+ assertEquals(decimals[i], decimal);
+ }
+ }
+
+ /* set lesser initial capacity than actually needed
+ * to trigger reallocs in copyFromSafe()
+ */
+ vector2.allocateNew(initialCapacity / 4);
+ assertTrue(vector2.getValueCapacity() >= initialCapacity / 4);
+ assertTrue(vector2.getValueCapacity() < initialCapacity / 2);
+
+ for (int i = 0; i < initialCapacity; i++) {
+ vector2.copyFromSafe(i, i, vector1);
+ }
+
+ /* 2 realloc should have happened in copyFromSafe() */
+ assertTrue(vector2.getValueCapacity() >= initialCapacity);
+ vector2.setValueCount(initialCapacity * 2);
+ /* setValueCount() should have done another realloc */
+ assertEquals(initialCapacity * 2, vector2.getValueCount());
+ assertTrue(vector2.getValueCapacity() >= initialCapacity * 2);
+
+ /* check vector data after copy and realloc */
+ for (int i = 0; i < initialCapacity * 2; i++) {
+ if (((i & 1) == 0) || (i >= initialCapacity)) {
+ assertNull(vector2.getObject(i));
+ } else {
+ final BigDecimal decimal = vector2.getObject(i);
+ assertEquals(decimals[i], decimal);
+ }
+ }
+ }
+ }
+
+ @Test /* TimeStampVector */
+ public void testCopyFromWithNulls14() {
+ try (final TimeStampVector vector1 = new TimeStampMicroVector(EMPTY_SCHEMA_PATH, allocator);
+ final TimeStampVector vector2 = new TimeStampMicroVector(EMPTY_SCHEMA_PATH, allocator)) {
+
+ vector1.allocateNew();
+ assertTrue(vector1.getValueCapacity() >= vector1.INITIAL_VALUE_ALLOCATION);
+ assertEquals(0, vector1.getValueCount());
+ int initialCapacity = vector1.getValueCapacity();
+
+ final long val = 20145678912L;
+ for (int i = 0; i < initialCapacity; i++) {
+ if ((i & 1) == 0) {
+ continue;
+ }
+ vector1.setSafe(i, val + (long) i);
+ }
+
+ vector1.setValueCount(initialCapacity);
+
+ /* No realloc should have happened in setSafe or
+ * setValueCount
+ */
+ assertEquals(initialCapacity, vector1.getValueCapacity());
+ assertEquals(initialCapacity, vector1.getValueCount());
+
+ for (int i = 0; i < initialCapacity; i++) {
+ if ((i & 1) == 0) {
+ assertNull(vector1.getObject(i));
+ } else {
+ assertEquals("unexpected value at index: " + i, val + (long) i, vector1.get(i));
+ }
+ }
+
+ /* set lesser initial capacity than actually needed
+ * to trigger reallocs in copyFromSafe()
+ */
+ vector2.allocateNew(initialCapacity / 4);
+ assertTrue(vector2.getValueCapacity() >= initialCapacity / 4);
+ assertTrue(vector2.getValueCapacity() < initialCapacity / 2);
+
+ for (int i = 0; i < initialCapacity; i++) {
+ vector2.copyFromSafe(i, i, vector1);
+ }
+
+ /* 2 realloc should have happened in copyFromSafe() */
+ assertTrue(vector2.getValueCapacity() >= initialCapacity);
+ vector2.setValueCount(initialCapacity * 2);
+ /* setValueCount() should have done another realloc */
+ assertEquals(initialCapacity * 2, vector2.getValueCount());
+ assertTrue(vector2.getValueCapacity() >= initialCapacity * 2);
+
+ /* check vector data after copy and realloc */
+ for (int i = 0; i < initialCapacity * 2; i++) {
+ if (((i & 1) == 0) || (i >= initialCapacity)) {
+ assertNull(vector2.getObject(i));
+ } else {
+ assertEquals("unexpected value at index: " + i, val + (long) i, vector2.get(i));
+ }
+ }
+ }
+ }
+
+ @Test //https://issues.apache.org/jira/browse/ARROW-7837
+ public void testCopySafeArrow7837() {
+ // this test exposes a bug in `handleSafe` where
+ // it reads a stale index and as a result missed a required resize of the value vector.
+ try (VarCharVector vc1 = new VarCharVector("vc1", allocator);
+ VarCharVector vc2 = new VarCharVector("vc2", allocator);
+ ) {
+ //initial size is carefully set in order to force the second 'copyFromSafe' operation
+ // to trigger a reallocation of the vector.
+ vc2.setInitialCapacity(/*valueCount*/20, /*density*/0.5);
+
+ vc1.setSafe(0, "1234567890".getBytes(Charset.forName("utf-8")));
+ assertFalse(vc1.isNull(0));
+ assertEquals(vc1.getObject(0).toString(), "1234567890");
+
+ vc2.copyFromSafe(0, 0, vc1);
+ assertFalse(vc2.isNull(0));
+ assertEquals(vc2.getObject(0).toString(), "1234567890");
+
+ vc2.copyFromSafe(0, 5, vc1);
+ assertTrue(vc2.isNull(1));
+ assertTrue(vc2.isNull(2));
+ assertTrue(vc2.isNull(3));
+ assertTrue(vc2.isNull(4));
+ assertFalse(vc2.isNull(5));
+ assertEquals(vc2.getObject(5).toString(), "1234567890");
+ }
+ }
+
+
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestDecimal256Vector.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestDecimal256Vector.java
new file mode 100644
index 000000000..82c912cef
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestDecimal256Vector.java
@@ -0,0 +1,357 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+
+import java.math.BigDecimal;
+import java.math.BigInteger;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestDecimal256Vector {
+
+ private static long[] intValues;
+
+ static {
+ intValues = new long[60];
+ for (int i = 0; i < intValues.length / 2; i++) {
+ intValues[i] = 1 << i + 1;
+ intValues[2 * i] = -1 * (1 << i + 1);
+ }
+ }
+
+ private int scale = 3;
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void init() {
+ allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100);
+ }
+
+ @After
+ public void terminate() throws Exception {
+ allocator.close();
+ }
+
+ @Test
+ public void testValuesWriteRead() {
+ try (Decimal256Vector decimalVector = TestUtils.newVector(Decimal256Vector.class, "decimal",
+ new ArrowType.Decimal(10, scale, 256), allocator);) {
+
+ try (Decimal256Vector oldConstructor = new Decimal256Vector("decimal", allocator, 10, scale);) {
+ assertEquals(decimalVector.getField().getType(), oldConstructor.getField().getType());
+ }
+
+ decimalVector.allocateNew();
+ BigDecimal[] values = new BigDecimal[intValues.length];
+ for (int i = 0; i < intValues.length; i++) {
+ BigDecimal decimal = new BigDecimal(BigInteger.valueOf(intValues[i]), scale);
+ values[i] = decimal;
+ decimalVector.setSafe(i, decimal);
+ }
+
+ decimalVector.setValueCount(intValues.length);
+
+ for (int i = 0; i < intValues.length; i++) {
+ BigDecimal value = decimalVector.getObject(i);
+ assertEquals("unexpected data at index: " + i, values[i], value);
+ }
+ }
+ }
+
+ @Test
+ public void testDecimal256DifferentScaleAndPrecision() {
+ try (Decimal256Vector decimalVector = TestUtils.newVector(Decimal256Vector.class, "decimal",
+ new ArrowType.Decimal(4, 2, 256), allocator)) {
+ decimalVector.allocateNew();
+
+ // test Decimal256 with different scale
+ {
+ BigDecimal decimal = new BigDecimal(BigInteger.valueOf(0), 3);
+ UnsupportedOperationException ue =
+ assertThrows(UnsupportedOperationException.class, () -> decimalVector.setSafe(0, decimal));
+ assertEquals("BigDecimal scale must equal that in the Arrow vector: 3 != 2", ue.getMessage());
+ }
+
+ // test BigDecimal with larger precision than initialized
+ {
+ BigDecimal decimal = new BigDecimal(BigInteger.valueOf(12345), 2);
+ UnsupportedOperationException ue =
+ assertThrows(UnsupportedOperationException.class, () -> decimalVector.setSafe(0, decimal));
+ assertEquals("BigDecimal precision can not be greater than that in the Arrow vector: 5 > 4", ue.getMessage());
+ }
+ }
+ }
+
+ @Test
+ public void testWriteBigEndian() {
+ try (Decimal256Vector decimalVector = TestUtils.newVector(Decimal256Vector.class, "decimal",
+ new ArrowType.Decimal(38, 18, 256), allocator);) {
+ decimalVector.allocateNew();
+ BigDecimal decimal1 = new BigDecimal("123456789.000000000000000000");
+ BigDecimal decimal2 = new BigDecimal("11.123456789123456789");
+ BigDecimal decimal3 = new BigDecimal("1.000000000000000000");
+ BigDecimal decimal4 = new BigDecimal("0.111111111000000000");
+ BigDecimal decimal5 = new BigDecimal("987654321.123456789000000000");
+ BigDecimal decimal6 = new BigDecimal("222222222222.222222222000000000");
+ BigDecimal decimal7 = new BigDecimal("7777777777777.666666667000000000");
+ BigDecimal decimal8 = new BigDecimal("1212121212.343434343000000000");
+
+ byte[] decimalValue1 = decimal1.unscaledValue().toByteArray();
+ byte[] decimalValue2 = decimal2.unscaledValue().toByteArray();
+ byte[] decimalValue3 = decimal3.unscaledValue().toByteArray();
+ byte[] decimalValue4 = decimal4.unscaledValue().toByteArray();
+ byte[] decimalValue5 = decimal5.unscaledValue().toByteArray();
+ byte[] decimalValue6 = decimal6.unscaledValue().toByteArray();
+ byte[] decimalValue7 = decimal7.unscaledValue().toByteArray();
+ byte[] decimalValue8 = decimal8.unscaledValue().toByteArray();
+
+ decimalVector.setBigEndian(0, decimalValue1);
+ decimalVector.setBigEndian(1, decimalValue2);
+ decimalVector.setBigEndian(2, decimalValue3);
+ decimalVector.setBigEndian(3, decimalValue4);
+ decimalVector.setBigEndian(4, decimalValue5);
+ decimalVector.setBigEndian(5, decimalValue6);
+ decimalVector.setBigEndian(6, decimalValue7);
+ decimalVector.setBigEndian(7, decimalValue8);
+
+ decimalVector.setValueCount(8);
+ assertEquals(8, decimalVector.getValueCount());
+ assertEquals(decimal1, decimalVector.getObject(0));
+ assertEquals(decimal2, decimalVector.getObject(1));
+ assertEquals(decimal3, decimalVector.getObject(2));
+ assertEquals(decimal4, decimalVector.getObject(3));
+ assertEquals(decimal5, decimalVector.getObject(4));
+ assertEquals(decimal6, decimalVector.getObject(5));
+ assertEquals(decimal7, decimalVector.getObject(6));
+ assertEquals(decimal8, decimalVector.getObject(7));
+ }
+ }
+
+ @Test
+ public void testLongReadWrite() {
+ try (Decimal256Vector decimalVector = TestUtils.newVector(Decimal256Vector.class, "decimal",
+ new ArrowType.Decimal(38, 0, 256), allocator)) {
+ decimalVector.allocateNew();
+
+ long[] longValues = {0L, -2L, Long.MAX_VALUE, Long.MIN_VALUE, 187L};
+
+ for (int i = 0; i < longValues.length; ++i) {
+ decimalVector.set(i, longValues[i]);
+ }
+
+ decimalVector.setValueCount(longValues.length);
+
+ for (int i = 0; i < longValues.length; ++i) {
+ assertEquals(new BigDecimal(longValues[i]), decimalVector.getObject(i));
+ }
+ }
+ }
+
+
+ @Test
+ public void testBigDecimalReadWrite() {
+ try (Decimal256Vector decimalVector = TestUtils.newVector(Decimal256Vector.class, "decimal",
+ new ArrowType.Decimal(38, 9, 256), allocator);) {
+ decimalVector.allocateNew();
+ BigDecimal decimal1 = new BigDecimal("123456789.000000000");
+ BigDecimal decimal2 = new BigDecimal("11.123456789");
+ BigDecimal decimal3 = new BigDecimal("1.000000000");
+ BigDecimal decimal4 = new BigDecimal("-0.111111111");
+ BigDecimal decimal5 = new BigDecimal("-987654321.123456789");
+ BigDecimal decimal6 = new BigDecimal("-222222222222.222222222");
+ BigDecimal decimal7 = new BigDecimal("7777777777777.666666667");
+ BigDecimal decimal8 = new BigDecimal("1212121212.343434343");
+
+ decimalVector.set(0, decimal1);
+ decimalVector.set(1, decimal2);
+ decimalVector.set(2, decimal3);
+ decimalVector.set(3, decimal4);
+ decimalVector.set(4, decimal5);
+ decimalVector.set(5, decimal6);
+ decimalVector.set(6, decimal7);
+ decimalVector.set(7, decimal8);
+
+ decimalVector.setValueCount(8);
+ assertEquals(8, decimalVector.getValueCount());
+ assertEquals(decimal1, decimalVector.getObject(0));
+ assertEquals(decimal2, decimalVector.getObject(1));
+ assertEquals(decimal3, decimalVector.getObject(2));
+ assertEquals(decimal4, decimalVector.getObject(3));
+ assertEquals(decimal5, decimalVector.getObject(4));
+ assertEquals(decimal6, decimalVector.getObject(5));
+ assertEquals(decimal7, decimalVector.getObject(6));
+ assertEquals(decimal8, decimalVector.getObject(7));
+ }
+ }
+
+ /**
+ * Test {@link Decimal256Vector#setBigEndian(int, byte[])} which takes BE layout input and stores in native-endian
+ * (NE) layout.
+ * Cases to cover: input byte array in different lengths in range [1-16] and negative values.
+ */
+ @Test
+ public void decimalBE2NE() {
+ try (Decimal256Vector decimalVector = TestUtils.newVector(Decimal256Vector.class, "decimal",
+ new ArrowType.Decimal(23, 2, 256), allocator)) {
+ decimalVector.allocateNew();
+
+ BigInteger[] testBigInts = new BigInteger[] {
+ new BigInteger("0"),
+ new BigInteger("-1"),
+ new BigInteger("23"),
+ new BigInteger("234234"),
+ new BigInteger("-234234234"),
+ new BigInteger("234234234234"),
+ new BigInteger("-56345345345345"),
+ new BigInteger("2982346298346289346293467923465345634500"), // converts to 16+ byte array
+ new BigInteger("-389457298347598237459832459823434653600"), // converts to 16+ byte array
+ new BigInteger("-345345"),
+ new BigInteger("754533")
+ };
+
+ int insertionIdx = 0;
+ insertionIdx++; // insert a null
+ for (BigInteger val : testBigInts) {
+ decimalVector.setBigEndian(insertionIdx++, val.toByteArray());
+ }
+ insertionIdx++; // insert a null
+ // insert a zero length buffer
+ decimalVector.setBigEndian(insertionIdx++, new byte[0]);
+
+ // Try inserting a buffer larger than 33 bytes and expect a failure
+ final int insertionIdxCapture = insertionIdx;
+ IllegalArgumentException ex = assertThrows(IllegalArgumentException.class,
+ () -> decimalVector.setBigEndian(insertionIdxCapture, new byte[33]));
+ assertTrue(ex.getMessage().equals("Invalid decimal value length. Valid length in [1 - 32], got 33"));
+ decimalVector.setValueCount(insertionIdx);
+
+ // retrieve values and check if they are correct
+ int outputIdx = 0;
+ assertTrue(decimalVector.isNull(outputIdx++));
+ for (BigInteger expected : testBigInts) {
+ final BigDecimal actual = decimalVector.getObject(outputIdx++);
+ assertEquals(expected, actual.unscaledValue());
+ }
+ assertTrue(decimalVector.isNull(outputIdx++));
+ assertEquals(BigInteger.valueOf(0), decimalVector.getObject(outputIdx).unscaledValue());
+ }
+ }
+
+ @Test
+ public void setUsingArrowBufOfLEInts() {
+ try (Decimal256Vector decimalVector = TestUtils.newVector(Decimal256Vector.class, "decimal",
+ new ArrowType.Decimal(5, 2, 256), allocator);
+ ArrowBuf buf = allocator.buffer(8);) {
+ decimalVector.allocateNew();
+
+ // add a positive value equivalent to 705.32
+ int val = 70532;
+ buf.setInt(0, val);
+ decimalVector.setSafe(0, 0, buf, 4);
+
+ // add a -ve value equivalent to -705.32
+ val = -70532;
+ buf.setInt(4, val);
+ decimalVector.setSafe(1, 4, buf, 4);
+
+ decimalVector.setValueCount(2);
+
+ BigDecimal [] expectedValues = new BigDecimal[] {BigDecimal.valueOf(705.32), BigDecimal
+ .valueOf(-705.32)};
+ for (int i = 0; i < 2; i ++) {
+ BigDecimal value = decimalVector.getObject(i);
+ assertEquals(expectedValues[i], value);
+ }
+ }
+
+ }
+
+ @Test
+ public void setUsingArrowLongLEBytes() {
+ try (Decimal256Vector decimalVector = TestUtils.newVector(Decimal256Vector.class, "decimal",
+ new ArrowType.Decimal(18, 0, 256), allocator);
+ ArrowBuf buf = allocator.buffer(16);) {
+ decimalVector.allocateNew();
+
+ long val = Long.MAX_VALUE;
+ buf.setLong(0, val);
+ decimalVector.setSafe(0, 0, buf, 8);
+
+ val = Long.MIN_VALUE;
+ buf.setLong(8, val);
+ decimalVector.setSafe(1, 8, buf, 8);
+
+ decimalVector.setValueCount(2);
+
+ BigDecimal [] expectedValues = new BigDecimal[] {BigDecimal.valueOf(Long.MAX_VALUE), BigDecimal
+ .valueOf(Long.MIN_VALUE)};
+ for (int i = 0; i < 2; i ++) {
+ BigDecimal value = decimalVector.getObject(i);
+ assertEquals(expectedValues[i], value);
+ }
+ }
+ }
+
+ @Test
+ public void setUsingArrowBufOfBEBytes() {
+ try (Decimal256Vector decimalVector = TestUtils.newVector(Decimal256Vector.class, "decimal",
+ new ArrowType.Decimal(5, 2, 256), allocator);
+ ArrowBuf buf = allocator.buffer(9);) {
+ BigDecimal [] expectedValues = new BigDecimal[] {BigDecimal.valueOf(705.32), BigDecimal
+ .valueOf(-705.32), BigDecimal.valueOf(705.32)};
+ verifyWritingArrowBufWithBigEndianBytes(decimalVector, buf, expectedValues, 3);
+ }
+
+ try (Decimal256Vector decimalVector = TestUtils.newVector(Decimal256Vector.class, "decimal",
+ new ArrowType.Decimal(43, 2, 256), allocator);
+ ArrowBuf buf = allocator.buffer(45);) {
+ BigDecimal[] expectedValues = new BigDecimal[] {new BigDecimal("29823462983462893462934679234653450000000.63"),
+ new BigDecimal("-2982346298346289346293467923465345.63"),
+ new BigDecimal("2982346298346289346293467923465345.63")};
+ verifyWritingArrowBufWithBigEndianBytes(decimalVector, buf, expectedValues, 15);
+ }
+ }
+
+ private void verifyWritingArrowBufWithBigEndianBytes(Decimal256Vector decimalVector,
+ ArrowBuf buf, BigDecimal[] expectedValues,
+ int length) {
+ decimalVector.allocateNew();
+ for (int i = 0; i < expectedValues.length; i++) {
+ byte[] bigEndianBytes = expectedValues[i].unscaledValue().toByteArray();
+ buf.setBytes(length * i , bigEndianBytes, 0 , bigEndianBytes.length);
+ decimalVector.setBigEndianSafe(i, length * i, buf, bigEndianBytes.length);
+ }
+
+ decimalVector.setValueCount(3);
+
+ for (int i = 0; i < expectedValues.length; i ++) {
+ BigDecimal value = decimalVector.getObject(i);
+ assertEquals(expectedValues[i], value);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestDecimalVector.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestDecimalVector.java
new file mode 100644
index 000000000..c7e3e436e
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestDecimalVector.java
@@ -0,0 +1,365 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.math.BigDecimal;
+import java.math.BigInteger;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestDecimalVector {
+
+ private static long[] intValues;
+
+ static {
+ intValues = new long[60];
+ for (int i = 0; i < intValues.length / 2; i++) {
+ intValues[i] = 1 << i + 1;
+ intValues[2 * i] = -1 * (1 << i + 1);
+ }
+ }
+
+ private int scale = 3;
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void init() {
+ allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100);
+ }
+
+ @After
+ public void terminate() throws Exception {
+ allocator.close();
+ }
+
+ @Test
+ public void testValuesWriteRead() {
+ try (DecimalVector decimalVector = TestUtils.newVector(DecimalVector.class, "decimal",
+ new ArrowType.Decimal(10, scale, 128), allocator);) {
+
+ try (DecimalVector oldConstructor = new DecimalVector("decimal", allocator, 10, scale);) {
+ assertEquals(decimalVector.getField().getType(), oldConstructor.getField().getType());
+ }
+
+ decimalVector.allocateNew();
+ BigDecimal[] values = new BigDecimal[intValues.length];
+ for (int i = 0; i < intValues.length; i++) {
+ BigDecimal decimal = new BigDecimal(BigInteger.valueOf(intValues[i]), scale);
+ values[i] = decimal;
+ decimalVector.setSafe(i, decimal);
+ }
+
+ decimalVector.setValueCount(intValues.length);
+
+ for (int i = 0; i < intValues.length; i++) {
+ BigDecimal value = decimalVector.getObject(i);
+ assertEquals("unexpected data at index: " + i, values[i], value);
+ }
+ }
+ }
+
+ @Test
+ public void testBigDecimalDifferentScaleAndPrecision() {
+ try (DecimalVector decimalVector = TestUtils.newVector(DecimalVector.class, "decimal",
+ new ArrowType.Decimal(4, 2, 128), allocator);) {
+ decimalVector.allocateNew();
+
+ // test BigDecimal with different scale
+ boolean hasError = false;
+ try {
+ BigDecimal decimal = new BigDecimal(BigInteger.valueOf(0), 3);
+ decimalVector.setSafe(0, decimal);
+ } catch (UnsupportedOperationException ue) {
+ hasError = true;
+ } finally {
+ assertTrue(hasError);
+ }
+
+ // test BigDecimal with larger precision than initialized
+ hasError = false;
+ try {
+ BigDecimal decimal = new BigDecimal(BigInteger.valueOf(12345), 2);
+ decimalVector.setSafe(0, decimal);
+ } catch (UnsupportedOperationException ue) {
+ hasError = true;
+ } finally {
+ assertTrue(hasError);
+ }
+ }
+ }
+
+ @Test
+ public void testWriteBigEndian() {
+ try (DecimalVector decimalVector = TestUtils.newVector(DecimalVector.class, "decimal",
+ new ArrowType.Decimal(38, 9, 128), allocator);) {
+ decimalVector.allocateNew();
+ BigDecimal decimal1 = new BigDecimal("123456789.000000000");
+ BigDecimal decimal2 = new BigDecimal("11.123456789");
+ BigDecimal decimal3 = new BigDecimal("1.000000000");
+ BigDecimal decimal4 = new BigDecimal("0.111111111");
+ BigDecimal decimal5 = new BigDecimal("987654321.123456789");
+ BigDecimal decimal6 = new BigDecimal("222222222222.222222222");
+ BigDecimal decimal7 = new BigDecimal("7777777777777.666666667");
+ BigDecimal decimal8 = new BigDecimal("1212121212.343434343");
+
+ byte[] decimalValue1 = decimal1.unscaledValue().toByteArray();
+ byte[] decimalValue2 = decimal2.unscaledValue().toByteArray();
+ byte[] decimalValue3 = decimal3.unscaledValue().toByteArray();
+ byte[] decimalValue4 = decimal4.unscaledValue().toByteArray();
+ byte[] decimalValue5 = decimal5.unscaledValue().toByteArray();
+ byte[] decimalValue6 = decimal6.unscaledValue().toByteArray();
+ byte[] decimalValue7 = decimal7.unscaledValue().toByteArray();
+ byte[] decimalValue8 = decimal8.unscaledValue().toByteArray();
+
+ decimalVector.setBigEndian(0, decimalValue1);
+ decimalVector.setBigEndian(1, decimalValue2);
+ decimalVector.setBigEndian(2, decimalValue3);
+ decimalVector.setBigEndian(3, decimalValue4);
+ decimalVector.setBigEndian(4, decimalValue5);
+ decimalVector.setBigEndian(5, decimalValue6);
+ decimalVector.setBigEndian(6, decimalValue7);
+ decimalVector.setBigEndian(7, decimalValue8);
+
+ decimalVector.setValueCount(8);
+ assertEquals(8, decimalVector.getValueCount());
+ assertEquals(decimal1, decimalVector.getObject(0));
+ assertEquals(decimal2, decimalVector.getObject(1));
+ assertEquals(decimal3, decimalVector.getObject(2));
+ assertEquals(decimal4, decimalVector.getObject(3));
+ assertEquals(decimal5, decimalVector.getObject(4));
+ assertEquals(decimal6, decimalVector.getObject(5));
+ assertEquals(decimal7, decimalVector.getObject(6));
+ assertEquals(decimal8, decimalVector.getObject(7));
+ }
+ }
+
+ @Test
+ public void testLongReadWrite() {
+ try (DecimalVector decimalVector = TestUtils.newVector(DecimalVector.class, "decimal",
+ new ArrowType.Decimal(38, 0, 128), allocator)) {
+ decimalVector.allocateNew();
+
+ long[] longValues = {0L, -2L, Long.MAX_VALUE, Long.MIN_VALUE, 187L};
+
+ for (int i = 0; i < longValues.length; ++i) {
+ decimalVector.set(i, longValues[i]);
+ }
+
+ decimalVector.setValueCount(longValues.length);
+
+ for (int i = 0; i < longValues.length; ++i) {
+ assertEquals(new BigDecimal(longValues[i]), decimalVector.getObject(i));
+ }
+ }
+ }
+
+
+ @Test
+ public void testBigDecimalReadWrite() {
+ try (DecimalVector decimalVector = TestUtils.newVector(DecimalVector.class, "decimal",
+ new ArrowType.Decimal(38, 9, 128), allocator);) {
+ decimalVector.allocateNew();
+ BigDecimal decimal1 = new BigDecimal("123456789.000000000");
+ BigDecimal decimal2 = new BigDecimal("11.123456789");
+ BigDecimal decimal3 = new BigDecimal("1.000000000");
+ BigDecimal decimal4 = new BigDecimal("-0.111111111");
+ BigDecimal decimal5 = new BigDecimal("-987654321.123456789");
+ BigDecimal decimal6 = new BigDecimal("-222222222222.222222222");
+ BigDecimal decimal7 = new BigDecimal("7777777777777.666666667");
+ BigDecimal decimal8 = new BigDecimal("1212121212.343434343");
+
+ decimalVector.set(0, decimal1);
+ decimalVector.set(1, decimal2);
+ decimalVector.set(2, decimal3);
+ decimalVector.set(3, decimal4);
+ decimalVector.set(4, decimal5);
+ decimalVector.set(5, decimal6);
+ decimalVector.set(6, decimal7);
+ decimalVector.set(7, decimal8);
+
+ decimalVector.setValueCount(8);
+ assertEquals(8, decimalVector.getValueCount());
+ assertEquals(decimal1, decimalVector.getObject(0));
+ assertEquals(decimal2, decimalVector.getObject(1));
+ assertEquals(decimal3, decimalVector.getObject(2));
+ assertEquals(decimal4, decimalVector.getObject(3));
+ assertEquals(decimal5, decimalVector.getObject(4));
+ assertEquals(decimal6, decimalVector.getObject(5));
+ assertEquals(decimal7, decimalVector.getObject(6));
+ assertEquals(decimal8, decimalVector.getObject(7));
+ }
+ }
+
+ /**
+ * Test {@link DecimalVector#setBigEndian(int, byte[])} which takes BE layout input and stores in native-endian (NE)
+ * layout.
+ * Cases to cover: input byte array in different lengths in range [1-16] and negative values.
+ */
+ @Test
+ public void decimalBE2NE() {
+ try (DecimalVector decimalVector = TestUtils.newVector(DecimalVector.class, "decimal",
+ new ArrowType.Decimal(21, 2, 128), allocator)) {
+ decimalVector.allocateNew();
+
+ BigInteger[] testBigInts = new BigInteger[] {
+ new BigInteger("0"),
+ new BigInteger("-1"),
+ new BigInteger("23"),
+ new BigInteger("234234"),
+ new BigInteger("-234234234"),
+ new BigInteger("234234234234"),
+ new BigInteger("-56345345345345"),
+ new BigInteger("29823462983462893462934679234653456345"), // converts to 16 byte array
+ new BigInteger("-3894572983475982374598324598234346536"), // converts to 16 byte array
+ new BigInteger("-345345"),
+ new BigInteger("754533")
+ };
+
+ int insertionIdx = 0;
+ insertionIdx++; // insert a null
+ for (BigInteger val : testBigInts) {
+ decimalVector.setBigEndian(insertionIdx++, val.toByteArray());
+ }
+ insertionIdx++; // insert a null
+ // insert a zero length buffer
+ decimalVector.setBigEndian(insertionIdx++, new byte[0]);
+
+ // Try inserting a buffer larger than 16bytes and expect a failure
+ try {
+ decimalVector.setBigEndian(insertionIdx, new byte[17]);
+ fail("above statement should have failed");
+ } catch (IllegalArgumentException ex) {
+ assertTrue(ex.getMessage().equals("Invalid decimal value length. Valid length in [1 - 16], got 17"));
+ }
+ decimalVector.setValueCount(insertionIdx);
+
+ // retrieve values and check if they are correct
+ int outputIdx = 0;
+ assertTrue(decimalVector.isNull(outputIdx++));
+ for (BigInteger expected : testBigInts) {
+ final BigDecimal actual = decimalVector.getObject(outputIdx++);
+ assertEquals(expected, actual.unscaledValue());
+ }
+ assertTrue(decimalVector.isNull(outputIdx++));
+ assertEquals(BigInteger.valueOf(0), decimalVector.getObject(outputIdx).unscaledValue());
+ }
+ }
+
+ @Test
+ public void setUsingArrowBufOfInts() {
+ try (DecimalVector decimalVector = TestUtils.newVector(DecimalVector.class, "decimal",
+ new ArrowType.Decimal(5, 2, 128), allocator);
+ ArrowBuf buf = allocator.buffer(8);) {
+ decimalVector.allocateNew();
+
+ // add a positive value equivalent to 705.32
+ int val = 70532;
+ buf.setInt(0, val);
+ decimalVector.setSafe(0, 0, buf, 4);
+
+ // add a -ve value equivalent to -705.32
+ val = -70532;
+ buf.setInt(4, val);
+ decimalVector.setSafe(1, 4, buf, 4);
+
+ decimalVector.setValueCount(2);
+
+ BigDecimal [] expectedValues = new BigDecimal[] {BigDecimal.valueOf(705.32), BigDecimal
+ .valueOf(-705.32)};
+ for (int i = 0; i < 2; i ++) {
+ BigDecimal value = decimalVector.getObject(i);
+ assertEquals(expectedValues[i], value);
+ }
+ }
+
+ }
+
+ @Test
+ public void setUsingArrowLongBytes() {
+ try (DecimalVector decimalVector = TestUtils.newVector(DecimalVector.class, "decimal",
+ new ArrowType.Decimal(18, 0, 128), allocator);
+ ArrowBuf buf = allocator.buffer(16);) {
+ decimalVector.allocateNew();
+
+ long val = Long.MAX_VALUE;
+ buf.setLong(0, val);
+ decimalVector.setSafe(0, 0, buf, 8);
+
+ val = Long.MIN_VALUE;
+ buf.setLong(8, val);
+ decimalVector.setSafe(1, 8, buf, 8);
+
+ decimalVector.setValueCount(2);
+
+ BigDecimal [] expectedValues = new BigDecimal[] {BigDecimal.valueOf(Long.MAX_VALUE), BigDecimal
+ .valueOf(Long.MIN_VALUE)};
+ for (int i = 0; i < 2; i ++) {
+ BigDecimal value = decimalVector.getObject(i);
+ assertEquals(expectedValues[i], value);
+ }
+ }
+ }
+
+ @Test
+ public void setUsingArrowBufOfBEBytes() {
+ try (DecimalVector decimalVector = TestUtils.newVector(DecimalVector.class, "decimal",
+ new ArrowType.Decimal(5, 2, 128), allocator);
+ ArrowBuf buf = allocator.buffer(9);) {
+ BigDecimal [] expectedValues = new BigDecimal[] {BigDecimal.valueOf(705.32), BigDecimal
+ .valueOf(-705.32), BigDecimal.valueOf(705.32)};
+ verifyWritingArrowBufWithBigEndianBytes(decimalVector, buf, expectedValues, 3);
+ }
+
+ try (DecimalVector decimalVector = TestUtils.newVector(DecimalVector.class, "decimal",
+ new ArrowType.Decimal(36, 2, 128), allocator);
+ ArrowBuf buf = allocator.buffer(45);) {
+ BigDecimal[] expectedValues = new BigDecimal[] {new BigDecimal("2982346298346289346293467923465345.63"),
+ new BigDecimal("-2982346298346289346293467923465345.63"),
+ new BigDecimal("2982346298346289346293467923465345.63")};
+ verifyWritingArrowBufWithBigEndianBytes(decimalVector, buf, expectedValues, 15);
+ }
+ }
+
+ private void verifyWritingArrowBufWithBigEndianBytes(DecimalVector decimalVector,
+ ArrowBuf buf, BigDecimal[] expectedValues,
+ int length) {
+ decimalVector.allocateNew();
+ for (int i = 0; i < expectedValues.length; i++) {
+ byte []bigEndianBytes = expectedValues[i].unscaledValue().toByteArray();
+ buf.setBytes(length * i , bigEndianBytes, 0 , bigEndianBytes.length);
+ decimalVector.setBigEndianSafe(i, length * i, buf, bigEndianBytes.length);
+ }
+
+ decimalVector.setValueCount(3);
+
+ for (int i = 0; i < expectedValues.length; i ++) {
+ BigDecimal value = decimalVector.getObject(i);
+ assertEquals(expectedValues[i], value);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestDenseUnionVector.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestDenseUnionVector.java
new file mode 100644
index 000000000..01becf007
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestDenseUnionVector.java
@@ -0,0 +1,639 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.DenseUnionVector;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.complex.VectorWithOrdinal;
+import org.apache.arrow.vector.holders.NullableBigIntHolder;
+import org.apache.arrow.vector.holders.NullableBitHolder;
+import org.apache.arrow.vector.holders.NullableFloat4Holder;
+import org.apache.arrow.vector.holders.NullableIntHolder;
+import org.apache.arrow.vector.holders.NullableUInt4Holder;
+import org.apache.arrow.vector.testing.ValueVectorDataPopulator;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.UnionMode;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.JsonStringHashMap;
+import org.apache.arrow.vector.util.Text;
+import org.apache.arrow.vector.util.TransferPair;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestDenseUnionVector {
+ private static final String EMPTY_SCHEMA_PATH = "";
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void init() {
+ allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100);
+ }
+
+ @After
+ public void terminate() throws Exception {
+ allocator.close();
+ }
+
+ @Test
+ public void testDenseUnionVector() throws Exception {
+
+ final NullableUInt4Holder uInt4Holder = new NullableUInt4Holder();
+ uInt4Holder.value = 100;
+ uInt4Holder.isSet = 1;
+
+ try (DenseUnionVector unionVector = new DenseUnionVector(EMPTY_SCHEMA_PATH, allocator, null, null)) {
+ unionVector.allocateNew();
+
+ // write some data
+ byte uint4TypeId = unionVector.registerNewTypeId(Field.nullable("", MinorType.UINT4.getType()));
+ unionVector.setTypeId(0, uint4TypeId);
+ unionVector.setSafe(0, uInt4Holder);
+ unionVector.setTypeId(2, uint4TypeId);
+ unionVector.setSafe(2, uInt4Holder);
+ unionVector.setValueCount(4);
+
+ // check that what we wrote is correct
+ assertEquals(4, unionVector.getValueCount());
+
+ assertEquals(false, unionVector.isNull(0));
+ assertEquals(100, unionVector.getObject(0));
+
+ assertNull(unionVector.getObject(1));
+
+ assertEquals(false, unionVector.isNull(2));
+ assertEquals(100, unionVector.getObject(2));
+
+ assertNull(unionVector.getObject(3));
+ }
+ }
+
+ @Test
+ public void testTransfer() throws Exception {
+ try (DenseUnionVector srcVector = new DenseUnionVector(EMPTY_SCHEMA_PATH, allocator, null, null)) {
+ srcVector.allocateNew();
+
+ // write some data
+ byte intTypeId = srcVector.registerNewTypeId(Field.nullable("", MinorType.INT.getType()));
+ srcVector.setTypeId(0, intTypeId);
+ srcVector.setSafe(0, newIntHolder(5));
+ byte bitTypeId = srcVector.registerNewTypeId(Field.nullable("", MinorType.BIT.getType()));
+ srcVector.setTypeId(1, bitTypeId);
+ srcVector.setSafe(1, newBitHolder(false));
+ srcVector.setTypeId(3, intTypeId);
+ srcVector.setSafe(3, newIntHolder(10));
+ srcVector.setTypeId(5, bitTypeId);
+ srcVector.setSafe(5, newBitHolder(false));
+ srcVector.setValueCount(6);
+
+ try (DenseUnionVector destVector = new DenseUnionVector(EMPTY_SCHEMA_PATH, allocator, null, null)) {
+ TransferPair pair = srcVector.makeTransferPair(destVector);
+
+ // Creating the transfer should transfer the type of the field at least.
+ assertEquals(srcVector.getField(), destVector.getField());
+
+ // transfer
+ pair.transfer();
+
+ assertEquals(srcVector.getField(), destVector.getField());
+
+ // now check the values are transferred
+ assertEquals(6, destVector.getValueCount());
+
+ assertFalse(destVector.isNull(0));
+ assertEquals(5, destVector.getObject(0));
+
+ assertFalse(destVector.isNull(1));
+ assertEquals(false, destVector.getObject(1));
+
+ assertNull(destVector.getObject(2));
+
+ assertFalse(destVector.isNull(3));
+ assertEquals(10, destVector.getObject(3));
+
+ assertNull(destVector.getObject(4));
+
+ assertFalse(destVector.isNull(5));
+ assertEquals(false, destVector.getObject(5));
+ }
+ }
+ }
+
+ @Test
+ public void testSplitAndTransfer() throws Exception {
+ try (DenseUnionVector sourceVector = new DenseUnionVector(EMPTY_SCHEMA_PATH, allocator, null, null)) {
+
+ sourceVector.allocateNew();
+
+ /* populate the UnionVector */
+ byte intTypeId = sourceVector.registerNewTypeId(Field.nullable("", MinorType.INT.getType()));
+ sourceVector.setTypeId(0, intTypeId);
+ sourceVector.setSafe(0, newIntHolder(5));
+ sourceVector.setTypeId(1, intTypeId);
+ sourceVector.setSafe(1, newIntHolder(10));
+ sourceVector.setTypeId(2, intTypeId);
+ sourceVector.setSafe(2, newIntHolder(15));
+ sourceVector.setTypeId(3, intTypeId);
+ sourceVector.setSafe(3, newIntHolder(20));
+ sourceVector.setTypeId(4, intTypeId);
+ sourceVector.setSafe(4, newIntHolder(25));
+ sourceVector.setTypeId(5, intTypeId);
+ sourceVector.setSafe(5, newIntHolder(30));
+ sourceVector.setTypeId(6, intTypeId);
+ sourceVector.setSafe(6, newIntHolder(35));
+ sourceVector.setTypeId(7, intTypeId);
+ sourceVector.setSafe(7, newIntHolder(40));
+ sourceVector.setTypeId(8, intTypeId);
+ sourceVector.setSafe(8, newIntHolder(45));
+ sourceVector.setTypeId(9, intTypeId);
+ sourceVector.setSafe(9, newIntHolder(50));
+ sourceVector.setValueCount(10);
+
+ /* check the vector output */
+ assertEquals(10, sourceVector.getValueCount());
+ assertEquals(false, sourceVector.isNull(0));
+ assertEquals(5, sourceVector.getObject(0));
+ assertEquals(false, sourceVector.isNull(1));
+ assertEquals(10, sourceVector.getObject(1));
+ assertEquals(false, sourceVector.isNull(2));
+ assertEquals(15, sourceVector.getObject(2));
+ assertEquals(false, sourceVector.isNull(3));
+ assertEquals(20, sourceVector.getObject(3));
+ assertEquals(false, sourceVector.isNull(4));
+ assertEquals(25, sourceVector.getObject(4));
+ assertEquals(false, sourceVector.isNull(5));
+ assertEquals(30, sourceVector.getObject(5));
+ assertEquals(false, sourceVector.isNull(6));
+ assertEquals(35, sourceVector.getObject(6));
+ assertEquals(false, sourceVector.isNull(7));
+ assertEquals(40, sourceVector.getObject(7));
+ assertEquals(false, sourceVector.isNull(8));
+ assertEquals(45, sourceVector.getObject(8));
+ assertEquals(false, sourceVector.isNull(9));
+ assertEquals(50, sourceVector.getObject(9));
+
+ try (DenseUnionVector toVector = new DenseUnionVector(EMPTY_SCHEMA_PATH, allocator, null, null)) {
+ toVector.registerNewTypeId(Field.nullable("", MinorType.INT.getType()));
+
+ final TransferPair transferPair = sourceVector.makeTransferPair(toVector);
+
+ final int[][] transferLengths = {{0, 3},
+ {3, 1},
+ {4, 2},
+ {6, 1},
+ {7, 1},
+ {8, 2}
+ };
+
+ for (final int[] transferLength : transferLengths) {
+ final int start = transferLength[0];
+ final int length = transferLength[1];
+
+ transferPair.splitAndTransfer(start, length);
+
+ /* check the toVector output after doing the splitAndTransfer */
+ for (int i = 0; i < length; i++) {
+ assertEquals("Different data at indexes: " + (start + i) + "and " + i, sourceVector.getObject(start + i),
+ toVector.getObject(i));
+ }
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testSplitAndTransferWithMixedVectors() throws Exception {
+ try (DenseUnionVector sourceVector = new DenseUnionVector(EMPTY_SCHEMA_PATH, allocator, null, null)) {
+
+ sourceVector.allocateNew();
+
+ /* populate the UnionVector */
+ byte intTypeId = sourceVector.registerNewTypeId(Field.nullable("", MinorType.INT.getType()));
+
+ sourceVector.setTypeId(0, intTypeId);
+ sourceVector.setSafe(0, newIntHolder(5));
+
+ byte float4TypeId = sourceVector.registerNewTypeId(Field.nullable("", MinorType.FLOAT4.getType()));
+
+ sourceVector.setTypeId(1, float4TypeId);
+ sourceVector.setSafe(1, newFloat4Holder(5.5f));
+
+ sourceVector.setTypeId(2, intTypeId);
+ sourceVector.setSafe(2, newIntHolder(10));
+
+ sourceVector.setTypeId(3, float4TypeId);
+ sourceVector.setSafe(3, newFloat4Holder(10.5f));
+
+ sourceVector.setTypeId(4, intTypeId);
+ sourceVector.setSafe(4, newIntHolder(15));
+
+ sourceVector.setTypeId(5, float4TypeId);
+ sourceVector.setSafe(5, newFloat4Holder(15.5f));
+
+ sourceVector.setTypeId(6, intTypeId);
+ sourceVector.setSafe(6, newIntHolder(20));
+
+ sourceVector.setTypeId(7, float4TypeId);
+ sourceVector.setSafe(7, newFloat4Holder(20.5f));
+
+ sourceVector.setTypeId(8, intTypeId);
+ sourceVector.setSafe(8, newIntHolder(30));
+
+ sourceVector.setTypeId(9, float4TypeId);
+ sourceVector.setSafe(9, newFloat4Holder(30.5f));
+ sourceVector.setValueCount(10);
+
+ /* check the vector output */
+ assertEquals(10, sourceVector.getValueCount());
+ assertEquals(false, sourceVector.isNull(0));
+ assertEquals(5, sourceVector.getObject(0));
+ assertEquals(false, sourceVector.isNull(1));
+ assertEquals(5.5f, sourceVector.getObject(1));
+ assertEquals(false, sourceVector.isNull(2));
+ assertEquals(10, sourceVector.getObject(2));
+ assertEquals(false, sourceVector.isNull(3));
+ assertEquals(10.5f, sourceVector.getObject(3));
+ assertEquals(false, sourceVector.isNull(4));
+ assertEquals(15, sourceVector.getObject(4));
+ assertEquals(false, sourceVector.isNull(5));
+ assertEquals(15.5f, sourceVector.getObject(5));
+ assertEquals(false, sourceVector.isNull(6));
+ assertEquals(20, sourceVector.getObject(6));
+ assertEquals(false, sourceVector.isNull(7));
+ assertEquals(20.5f, sourceVector.getObject(7));
+ assertEquals(false, sourceVector.isNull(8));
+ assertEquals(30, sourceVector.getObject(8));
+ assertEquals(false, sourceVector.isNull(9));
+ assertEquals(30.5f, sourceVector.getObject(9));
+
+ try (DenseUnionVector toVector = new DenseUnionVector(EMPTY_SCHEMA_PATH, allocator, null, null)) {
+ toVector.registerNewTypeId(Field.nullable("", MinorType.INT.getType()));
+ toVector.registerNewTypeId(Field.nullable("", MinorType.FLOAT4.getType()));
+
+ final TransferPair transferPair = sourceVector.makeTransferPair(toVector);
+
+ final int[][] transferLengths = {{0, 2},
+ {2, 1},
+ {3, 2},
+ {5, 3},
+ {8, 2}
+ };
+
+ for (final int[] transferLength : transferLengths) {
+ final int start = transferLength[0];
+ final int length = transferLength[1];
+
+ transferPair.splitAndTransfer(start, length);
+
+ /* check the toVector output after doing the splitAndTransfer */
+ for (int i = 0; i < length; i++) {
+ assertEquals("Different values at index: " + i, sourceVector.getObject(start + i), toVector.getObject(i));
+ }
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testGetFieldTypeInfo() throws Exception {
+ Map<String, String> metadata = new HashMap<>();
+ metadata.put("key1", "value1");
+
+ int[] typeIds = new int[2];
+ typeIds[0] = 0;
+ typeIds[1] = 1;
+
+ List<Field> children = new ArrayList<>();
+ children.add(new Field("int", FieldType.nullable(MinorType.INT.getType()), null));
+ children.add(new Field("varchar", FieldType.nullable(MinorType.VARCHAR.getType()), null));
+
+ final FieldType fieldType = new FieldType(false, new ArrowType.Union(UnionMode.Dense, typeIds),
+ /*dictionary=*/null, metadata);
+ final Field field = new Field("union", fieldType, children);
+
+ MinorType minorType = MinorType.DENSEUNION;
+ DenseUnionVector vector = (DenseUnionVector) minorType.getNewVector(field, allocator, null);
+ vector.initializeChildrenFromFields(children);
+
+ assertEquals(vector.getField(), field);
+
+ // Union has 2 child vectors
+ assertEquals(vector.size(), 2);
+
+ // Check child field 0
+ VectorWithOrdinal intChild = vector.getChildVectorWithOrdinal("int");
+ assertEquals(intChild.ordinal, 0);
+ assertEquals(intChild.vector.getField(), children.get(0));
+
+ // Check child field 1
+ VectorWithOrdinal varcharChild = vector.getChildVectorWithOrdinal("varchar");
+ assertEquals(varcharChild.ordinal, 1);
+ assertEquals(varcharChild.vector.getField(), children.get(1));
+ }
+
+ @Test
+ public void testGetBufferAddress() throws Exception {
+ try (DenseUnionVector vector = new DenseUnionVector(EMPTY_SCHEMA_PATH, allocator, null, null)) {
+ boolean error = false;
+
+ vector.allocateNew();
+
+ /* populate the UnionVector */
+ byte intTypeId = vector.registerNewTypeId(Field.nullable("", MinorType.INT.getType()));
+ vector.setTypeId(0, intTypeId);
+ vector.setSafe(0, newIntHolder(5));
+
+ byte float4TypeId = vector.registerNewTypeId(Field.nullable("", MinorType.INT.getType()));
+ vector.setTypeId(1, float4TypeId);
+ vector.setSafe(1, newFloat4Holder(5.5f));
+
+ vector.setTypeId(2, intTypeId);
+ vector.setSafe(2, newIntHolder(10));
+
+ vector.setTypeId(3, float4TypeId);
+ vector.setSafe(3, newFloat4Holder(10.5f));
+
+ vector.setValueCount(10);
+
+ /* check the vector output */
+ assertEquals(10, vector.getValueCount());
+ assertEquals(false, vector.isNull(0));
+ assertEquals(5, vector.getObject(0));
+ assertEquals(false, vector.isNull(1));
+ assertEquals(5.5f, vector.getObject(1));
+ assertEquals(false, vector.isNull(2));
+ assertEquals(10, vector.getObject(2));
+ assertEquals(false, vector.isNull(3));
+ assertEquals(10.5f, vector.getObject(3));
+
+ List<ArrowBuf> buffers = vector.getFieldBuffers();
+
+ long offsetAddress = vector.getOffsetBufferAddress();
+
+ try {
+ vector.getDataBufferAddress();
+ } catch (UnsupportedOperationException ue) {
+ error = true;
+ } finally {
+ assertTrue(error);
+ }
+
+ assertEquals(2, buffers.size());
+ assertEquals(offsetAddress, buffers.get(1).memoryAddress());
+ }
+ }
+
+ /**
+ * Test adding two struct vectors to the dense union vector.
+ */
+ @Test
+ public void testMultipleStructs() {
+ FieldType type = new FieldType(true, ArrowType.Struct.INSTANCE, null, null);
+ try (StructVector structVector1 = new StructVector("struct1", allocator, type, null);
+ StructVector structVector2 = new StructVector("struct2", allocator, type, null);
+ DenseUnionVector unionVector = DenseUnionVector.empty("union", allocator)) {
+
+ // prepare sub vectors
+
+ // first struct vector: (int, int)
+ IntVector subVector11 = structVector1
+ .addOrGet("sub11", FieldType.nullable(MinorType.INT.getType()), IntVector.class);
+ subVector11.allocateNew();
+ ValueVectorDataPopulator.setVector(subVector11, 0, 1);
+
+ IntVector subVector12 = structVector1
+ .addOrGet("sub12", FieldType.nullable(MinorType.INT.getType()), IntVector.class);
+ subVector12.allocateNew();
+ ValueVectorDataPopulator.setVector(subVector12, 0, 10);
+
+ structVector1.setIndexDefined(0);
+ structVector1.setIndexDefined(1);
+ structVector1.setValueCount(2);
+
+ // second struct vector: (string, string)
+ VarCharVector subVector21 = structVector2
+ .addOrGet("sub21", FieldType.nullable(MinorType.VARCHAR.getType()), VarCharVector.class);
+ subVector21.allocateNew();
+ ValueVectorDataPopulator.setVector(subVector21, "a0");
+
+ VarCharVector subVector22 = structVector2
+ .addOrGet("sub22", FieldType.nullable(MinorType.VARCHAR.getType()), VarCharVector.class);
+ subVector22.allocateNew();
+ ValueVectorDataPopulator.setVector(subVector22, "b0");
+
+ structVector2.setIndexDefined(0);
+ structVector2.setValueCount(1);
+
+ // register relative types
+ byte typeId1 = unionVector.registerNewTypeId(structVector1.getField());
+ byte typeId2 = unionVector.registerNewTypeId(structVector2.getField());
+ assertEquals(typeId1, 0);
+ assertEquals(typeId2, 1);
+
+ // add two struct vectors to union vector
+ unionVector.addVector(typeId1, structVector1);
+ unionVector.addVector(typeId2, structVector2);
+
+ while (unionVector.getValueCapacity() < 3) {
+ unionVector.reAlloc();
+ }
+
+ ArrowBuf offsetBuf = unionVector.getOffsetBuffer();
+
+ unionVector.setTypeId(0, typeId1);
+ offsetBuf.setInt(0, 0);
+
+ unionVector.setTypeId(1, typeId2);
+ offsetBuf.setInt(DenseUnionVector.OFFSET_WIDTH, 0);
+
+ unionVector.setTypeId(2, typeId1);
+ offsetBuf.setInt(DenseUnionVector.OFFSET_WIDTH * 2, 1);
+
+ unionVector.setValueCount(3);
+
+ Map<String, Integer> value0 = new JsonStringHashMap<>();
+ value0.put("sub11", 0);
+ value0.put("sub12", 0);
+
+ assertEquals(value0, unionVector.getObject(0));
+
+ Map<String, Text> value1 = new JsonStringHashMap<>();
+ value1.put("sub21", new Text("a0"));
+ value1.put("sub22", new Text("b0"));
+
+ assertEquals(value1, unionVector.getObject(1));
+
+ Map<String, Integer> value2 = new JsonStringHashMap<>();
+ value2.put("sub11", 1);
+ value2.put("sub12", 10);
+
+ assertEquals(value2, unionVector.getObject(2));
+ }
+ }
+
+ /**
+ * Test adding two varchar vectors to the dense union vector.
+ */
+ @Test
+ public void testMultipleVarChars() {
+ try (VarCharVector childVector1 = new VarCharVector("child1", allocator);
+ VarCharVector childVector2 = new VarCharVector("child2", allocator);
+ DenseUnionVector unionVector = DenseUnionVector.empty("union", allocator)) {
+
+ // prepare sub vectors
+ ValueVectorDataPopulator.setVector(childVector1, "a0", "a4");
+ ValueVectorDataPopulator.setVector(childVector2, "b1", "b2");
+
+ // register relative types
+ byte typeId1 = unionVector.registerNewTypeId(childVector1.getField());
+ byte typeId2 = unionVector.registerNewTypeId(childVector2.getField());
+
+ assertEquals(typeId1, 0);
+ assertEquals(typeId2, 1);
+
+ while (unionVector.getValueCapacity() < 5) {
+ unionVector.reAlloc();
+ }
+
+ // add two struct vectors to union vector
+ unionVector.addVector(typeId1, childVector1);
+ unionVector.addVector(typeId2, childVector2);
+
+ ArrowBuf offsetBuf = unionVector.getOffsetBuffer();
+
+ // slot 0 points to child1
+ unionVector.setTypeId(0, typeId1);
+ offsetBuf.setInt(0, 0);
+
+ // slot 1 points to child2
+ unionVector.setTypeId(1, typeId2);
+ offsetBuf.setInt(DenseUnionVector.OFFSET_WIDTH, 0);
+
+ // slot 2 points to child2
+ unionVector.setTypeId(2, typeId2);
+ offsetBuf.setInt(DenseUnionVector.OFFSET_WIDTH * 2, 1);
+
+
+ // slot 4 points to child1
+ unionVector.setTypeId(4, typeId1);
+ offsetBuf.setInt(DenseUnionVector.OFFSET_WIDTH * 4, 1);
+
+ unionVector.setValueCount(5);
+
+ assertEquals(new Text("a0"), unionVector.getObject(0));
+ assertEquals(new Text("b1"), unionVector.getObject(1));
+ assertEquals(new Text("b2"), unionVector.getObject(2));
+ assertNull(unionVector.getObject(3));
+ assertEquals(new Text("a4"), unionVector.getObject(4));
+ }
+ }
+
+ @Test
+ public void testChildVectorValueCounts() {
+ final NullableIntHolder intHolder = new NullableIntHolder();
+ intHolder.isSet = 1;
+
+ final NullableBigIntHolder longHolder = new NullableBigIntHolder();
+ longHolder.isSet = 1;
+
+ final NullableFloat4Holder floatHolder = new NullableFloat4Holder();
+ floatHolder.isSet = 1;
+
+ try (DenseUnionVector vector = new DenseUnionVector("vector", allocator, null, null)) {
+ vector.allocateNew();
+
+ // populate the delta vector with values {7, null, 8L, 9.0f, 10, 12L}
+ while (vector.getValueCapacity() < 6) {
+ vector.reAlloc();
+ }
+ byte intTypeId = vector.registerNewTypeId(Field.nullable("", Types.MinorType.INT.getType()));
+ vector.setTypeId(0, intTypeId);
+ intHolder.value = 7;
+ vector.setSafe(0, intHolder);
+ byte longTypeId = vector.registerNewTypeId(Field.nullable("", Types.MinorType.BIGINT.getType()));
+ vector.setTypeId(2, longTypeId);
+ longHolder.value = 8L;
+ vector.setSafe(2, longHolder);
+ byte floatTypeId = vector.registerNewTypeId(Field.nullable("", Types.MinorType.FLOAT4.getType()));
+ vector.setTypeId(3, floatTypeId);
+ floatHolder.value = 9.0f;
+ vector.setSafe(3, floatHolder);
+
+ vector.setTypeId(4, intTypeId);
+ intHolder.value = 10;
+ vector.setSafe(4, intHolder);
+ vector.setTypeId(5, longTypeId);
+ longHolder.value = 12L;
+ vector.setSafe(5, longHolder);
+
+ vector.setValueCount(6);
+
+ // verify results
+ IntVector intVector = (IntVector) vector.getVectorByType(intTypeId);
+ assertEquals(2, intVector.getValueCount());
+ assertEquals(7, intVector.get(0));
+ assertEquals(10, intVector.get(1));
+
+ BigIntVector longVector = (BigIntVector) vector.getVectorByType(longTypeId);
+ assertEquals(2, longVector.getValueCount());
+ assertEquals(8L, longVector.get(0));
+ assertEquals(12L, longVector.get(1));
+
+ Float4Vector floagVector = (Float4Vector) vector.getVectorByType(floatTypeId);
+ assertEquals(1, floagVector.getValueCount());
+ assertEquals(9.0f, floagVector.get(0), 0);
+ }
+ }
+
+ private static NullableIntHolder newIntHolder(int value) {
+ final NullableIntHolder holder = new NullableIntHolder();
+ holder.isSet = 1;
+ holder.value = value;
+ return holder;
+ }
+
+ private static NullableBitHolder newBitHolder(boolean value) {
+ final NullableBitHolder holder = new NullableBitHolder();
+ holder.isSet = 1;
+ holder.value = value ? 1 : 0;
+ return holder;
+ }
+
+ private static NullableFloat4Holder newFloat4Holder(float value) {
+ final NullableFloat4Holder holder = new NullableFloat4Holder();
+ holder.isSet = 1;
+ holder.value = value;
+ return holder;
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestDictionaryVector.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestDictionaryVector.java
new file mode 100644
index 000000000..bc6cddf36
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestDictionaryVector.java
@@ -0,0 +1,1032 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.TestUtils.newVarBinaryVector;
+import static org.apache.arrow.vector.TestUtils.newVarCharVector;
+import static org.apache.arrow.vector.testing.ValueVectorDataPopulator.setVector;
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.function.ToIntBiFunction;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.FixedSizeListVector;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.complex.UnionVector;
+import org.apache.arrow.vector.complex.impl.NullableStructWriter;
+import org.apache.arrow.vector.complex.impl.UnionListWriter;
+import org.apache.arrow.vector.dictionary.Dictionary;
+import org.apache.arrow.vector.dictionary.DictionaryEncoder;
+import org.apache.arrow.vector.dictionary.DictionaryProvider;
+import org.apache.arrow.vector.dictionary.ListSubfieldEncoder;
+import org.apache.arrow.vector.dictionary.StructSubfieldEncoder;
+import org.apache.arrow.vector.holders.NullableIntHolder;
+import org.apache.arrow.vector.holders.NullableUInt4Holder;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.DictionaryEncoding;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.Text;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestDictionaryVector {
+
+ private BufferAllocator allocator;
+
+ byte[] zero = "foo".getBytes(StandardCharsets.UTF_8);
+ byte[] one = "bar".getBytes(StandardCharsets.UTF_8);
+ byte[] two = "baz".getBytes(StandardCharsets.UTF_8);
+
+ byte[][] data = new byte[][] {zero, one, two};
+
+ @Before
+ public void init() {
+ allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100);
+ }
+
+ @After
+ public void terminate() throws Exception {
+ allocator.close();
+ }
+
+ @Test
+ public void testEncodeStrings() {
+ // Create a new value vector
+ try (final VarCharVector vector = newVarCharVector("foo", allocator);
+ final VarCharVector dictionaryVector = newVarCharVector("dict", allocator);) {
+
+ setVector(vector, zero, one, one, two, zero);
+ setVector(dictionaryVector, zero, one, two);
+
+ Dictionary dictionary =
+ new Dictionary(dictionaryVector, new DictionaryEncoding(1L, false, null));
+
+ try (final ValueVector encoded = DictionaryEncoder.encode(vector, dictionary)) {
+ // verify indices
+ assertEquals(IntVector.class, encoded.getClass());
+
+ IntVector index = ((IntVector) encoded);
+ assertEquals(5, index.getValueCount());
+ assertEquals(0, index.get(0));
+ assertEquals(1, index.get(1));
+ assertEquals(1, index.get(2));
+ assertEquals(2, index.get(3));
+ assertEquals(0, index.get(4));
+
+ // now run through the decoder and verify we get the original back
+ try (ValueVector decoded = DictionaryEncoder.decode(encoded, dictionary)) {
+ assertEquals(vector.getClass(), decoded.getClass());
+ assertEquals(vector.getValueCount(), ((VarCharVector) decoded).getValueCount());
+ for (int i = 0; i < 5; i++) {
+ assertEquals(vector.getObject(i), ((VarCharVector) decoded).getObject(i));
+ }
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testEncodeLargeVector() {
+ // Create a new value vector
+ try (final VarCharVector vector = newVarCharVector("foo", allocator);
+ final VarCharVector dictionaryVector = newVarCharVector("dict", allocator);) {
+ vector.allocateNew();
+
+ int count = 10000;
+
+ for (int i = 0; i < 10000; ++i) {
+ vector.setSafe(i, data[i % 3], 0, data[i % 3].length);
+ }
+ vector.setValueCount(count);
+
+ setVector(dictionaryVector, zero, one, two);
+
+ Dictionary dictionary =
+ new Dictionary(dictionaryVector, new DictionaryEncoding(1L, false, null));
+
+ try (final ValueVector encoded = DictionaryEncoder.encode(vector, dictionary)) {
+ // verify indices
+ assertEquals(IntVector.class, encoded.getClass());
+
+ IntVector index = ((IntVector) encoded);
+ assertEquals(count, index.getValueCount());
+ for (int i = 0; i < count; ++i) {
+ assertEquals(i % 3, index.get(i));
+ }
+
+ // now run through the decoder and verify we get the original back
+ try (ValueVector decoded = DictionaryEncoder.decode(encoded, dictionary)) {
+ assertEquals(vector.getClass(), decoded.getClass());
+ assertEquals(vector.getValueCount(), decoded.getValueCount());
+ for (int i = 0; i < count; ++i) {
+ assertEquals(vector.getObject(i), decoded.getObject(i));
+ }
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testEncodeList() {
+ // Create a new value vector
+ try (final ListVector vector = ListVector.empty("vector", allocator);
+ final ListVector dictionaryVector = ListVector.empty("dict", allocator);) {
+
+ UnionListWriter writer = vector.getWriter();
+ writer.allocate();
+
+ //set some values
+ writeListVector(writer, new int[]{10, 20});
+ writeListVector(writer, new int[]{10, 20});
+ writeListVector(writer, new int[]{10, 20});
+ writeListVector(writer, new int[]{30, 40, 50});
+ writeListVector(writer, new int[]{30, 40, 50});
+ writeListVector(writer, new int[]{10, 20});
+
+ writer.setValueCount(6);
+
+ UnionListWriter dictWriter = dictionaryVector.getWriter();
+ dictWriter.allocate();
+
+ writeListVector(dictWriter, new int[]{10, 20});
+ writeListVector(dictWriter, new int[]{30, 40, 50});
+
+ dictWriter.setValueCount(2);
+
+ Dictionary dictionary = new Dictionary(dictionaryVector, new DictionaryEncoding(1L, false, null));
+
+ try (final ValueVector encoded = DictionaryEncoder.encode(vector, dictionary)) {
+ // verify indices
+ assertEquals(IntVector.class, encoded.getClass());
+
+ IntVector index = ((IntVector) encoded);
+ assertEquals(6, index.getValueCount());
+ assertEquals(0, index.get(0));
+ assertEquals(0, index.get(1));
+ assertEquals(0, index.get(2));
+ assertEquals(1, index.get(3));
+ assertEquals(1, index.get(4));
+ assertEquals(0, index.get(5));
+
+ // now run through the decoder and verify we get the original back
+ try (ValueVector decoded = DictionaryEncoder.decode(encoded, dictionary)) {
+ assertEquals(vector.getClass(), decoded.getClass());
+ assertEquals(vector.getValueCount(), decoded.getValueCount());
+ for (int i = 0; i < 5; i++) {
+ assertEquals(vector.getObject(i), decoded.getObject(i));
+ }
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testEncodeStruct() {
+ // Create a new value vector
+ try (final StructVector vector = StructVector.empty("vector", allocator);
+ final StructVector dictionaryVector = StructVector.empty("dict", allocator);) {
+ vector.addOrGet("f0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class);
+ vector.addOrGet("f1", FieldType.nullable(new ArrowType.Int(64, true)), BigIntVector.class);
+ dictionaryVector.addOrGet("f0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class);
+ dictionaryVector.addOrGet("f1", FieldType.nullable(new ArrowType.Int(64, true)), BigIntVector.class);
+
+ NullableStructWriter writer = vector.getWriter();
+ writer.allocate();
+
+ writeStructVector(writer, 1, 10L);
+ writeStructVector(writer, 1, 10L);
+ writeStructVector(writer, 1, 10L);
+ writeStructVector(writer, 2, 20L);
+ writeStructVector(writer, 2, 20L);
+ writeStructVector(writer, 2, 20L);
+ writeStructVector(writer, 1, 10L);
+
+ writer.setValueCount(7);
+
+ NullableStructWriter dictWriter = dictionaryVector.getWriter();
+ dictWriter.allocate();
+
+ writeStructVector(dictWriter, 1, 10L);
+ writeStructVector(dictWriter, 2, 20L);
+
+
+ dictionaryVector.setValueCount(2);
+
+ Dictionary dictionary = new Dictionary(dictionaryVector, new DictionaryEncoding(1L, false, null));
+
+ try (final ValueVector encoded = DictionaryEncoder.encode(vector, dictionary)) {
+ // verify indices
+ assertEquals(IntVector.class, encoded.getClass());
+
+ IntVector index = ((IntVector) encoded);
+ assertEquals(7, index.getValueCount());
+ assertEquals(0, index.get(0));
+ assertEquals(0, index.get(1));
+ assertEquals(0, index.get(2));
+ assertEquals(1, index.get(3));
+ assertEquals(1, index.get(4));
+ assertEquals(1, index.get(5));
+ assertEquals(0, index.get(6));
+
+ // now run through the decoder and verify we get the original back
+ try (ValueVector decoded = DictionaryEncoder.decode(encoded, dictionary)) {
+ assertEquals(vector.getClass(), decoded.getClass());
+ assertEquals(vector.getValueCount(), decoded.getValueCount());
+ for (int i = 0; i < 5; i++) {
+ assertEquals(vector.getObject(i), decoded.getObject(i));
+ }
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testEncodeBinaryVector() {
+ // Create a new value vector
+ try (final VarBinaryVector vector = newVarBinaryVector("foo", allocator);
+ final VarBinaryVector dictionaryVector = newVarBinaryVector("dict", allocator)) {
+
+ setVector(vector, zero, one, one, two, zero);
+ setVector(dictionaryVector, zero, one, two);
+
+ Dictionary dictionary = new Dictionary(dictionaryVector, new DictionaryEncoding(1L, false, null));
+
+ try (final ValueVector encoded = DictionaryEncoder.encode(vector, dictionary)) {
+ // verify indices
+ assertEquals(IntVector.class, encoded.getClass());
+
+ IntVector index = ((IntVector) encoded);
+ assertEquals(5, index.getValueCount());
+ assertEquals(0, index.get(0));
+ assertEquals(1, index.get(1));
+ assertEquals(1, index.get(2));
+ assertEquals(2, index.get(3));
+ assertEquals(0, index.get(4));
+
+ // now run through the decoder and verify we get the original back
+ try (VarBinaryVector decoded = (VarBinaryVector) DictionaryEncoder.decode(encoded, dictionary)) {
+ assertEquals(vector.getClass(), decoded.getClass());
+ assertEquals(vector.getValueCount(), decoded.getValueCount());
+ for (int i = 0; i < 5; i++) {
+ assertTrue(Arrays.equals(vector.getObject(i), decoded.getObject(i)));
+ }
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testEncodeUnion() {
+ // Create a new value vector
+ try (final UnionVector vector = new UnionVector("vector", allocator, /* field type */ null, /* call-back */ null);
+ final UnionVector dictionaryVector =
+ new UnionVector("dict", allocator, /* field type */ null, /* call-back */ null);) {
+
+ final NullableUInt4Holder uintHolder1 = new NullableUInt4Holder();
+ uintHolder1.value = 10;
+ uintHolder1.isSet = 1;
+
+ final NullableIntHolder intHolder1 = new NullableIntHolder();
+ intHolder1.value = 10;
+ intHolder1.isSet = 1;
+
+ final NullableIntHolder intHolder2 = new NullableIntHolder();
+ intHolder2.value = 20;
+ intHolder2.isSet = 1;
+
+ //write data
+ vector.setType(0, Types.MinorType.UINT4);
+ vector.setSafe(0, uintHolder1);
+
+ vector.setType(1, Types.MinorType.INT);
+ vector.setSafe(1, intHolder1);
+
+ vector.setType(2, Types.MinorType.INT);
+ vector.setSafe(2, intHolder1);
+
+ vector.setType(3, Types.MinorType.INT);
+ vector.setSafe(3, intHolder2);
+
+ vector.setType(4, Types.MinorType.INT);
+ vector.setSafe(4, intHolder2);
+
+ vector.setValueCount(5);
+
+ //write dictionary
+ dictionaryVector.setType(0, Types.MinorType.UINT4);
+ dictionaryVector.setSafe(0, uintHolder1);
+
+ dictionaryVector.setType(1, Types.MinorType.INT);
+ dictionaryVector.setSafe(1, intHolder1);
+
+ dictionaryVector.setType(2, Types.MinorType.INT);
+ dictionaryVector.setSafe(2, intHolder2);
+
+ dictionaryVector.setValueCount(3);
+
+ Dictionary dictionary = new Dictionary(dictionaryVector, new DictionaryEncoding(1L, false, null));
+
+ try (final ValueVector encoded = DictionaryEncoder.encode(vector, dictionary)) {
+ // verify indices
+ assertEquals(IntVector.class, encoded.getClass());
+
+ IntVector index = ((IntVector) encoded);
+ assertEquals(5, index.getValueCount());
+ assertEquals(0, index.get(0));
+ assertEquals(1, index.get(1));
+ assertEquals(1, index.get(2));
+ assertEquals(2, index.get(3));
+ assertEquals(2, index.get(4));
+
+ // now run through the decoder and verify we get the original back
+ try (ValueVector decoded = DictionaryEncoder.decode(encoded, dictionary)) {
+ assertEquals(vector.getClass(), decoded.getClass());
+ assertEquals(vector.getValueCount(), decoded.getValueCount());
+ for (int i = 0; i < 5; i++) {
+ assertEquals(vector.getObject(i), decoded.getObject(i));
+ }
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testIntEquals() {
+ //test Int
+ try (final IntVector vector1 = new IntVector("int", allocator);
+ final IntVector vector2 = new IntVector("int", allocator)) {
+
+ Dictionary dict1 = new Dictionary(vector1, new DictionaryEncoding(1L, false, null));
+ Dictionary dict2 = new Dictionary(vector2, new DictionaryEncoding(1L, false, null));
+
+ setVector(vector1, 1, 2, 3);
+ setVector(vector2, 1, 2, 0);
+
+ assertFalse(dict1.equals(dict2));
+
+ vector2.setSafe(2, 3);
+ assertTrue(dict1.equals(dict2));
+ }
+ }
+
+ @Test
+ public void testVarcharEquals() {
+ try (final VarCharVector vector1 = new VarCharVector("varchar", allocator);
+ final VarCharVector vector2 = new VarCharVector("varchar", allocator)) {
+
+ Dictionary dict1 = new Dictionary(vector1, new DictionaryEncoding(1L, false, null));
+ Dictionary dict2 = new Dictionary(vector2, new DictionaryEncoding(1L, false, null));
+
+ setVector(vector1, zero, one, two);
+ setVector(vector2, zero, one, one);
+
+ assertFalse(dict1.equals(dict2));
+
+ vector2.setSafe(2, two, 0, two.length);
+ assertTrue(dict1.equals(dict2));
+ }
+ }
+
+ @Test
+ public void testVarBinaryEquals() {
+ try (final VarBinaryVector vector1 = new VarBinaryVector("binary", allocator);
+ final VarBinaryVector vector2 = new VarBinaryVector("binary", allocator)) {
+
+ Dictionary dict1 = new Dictionary(vector1, new DictionaryEncoding(1L, false, null));
+ Dictionary dict2 = new Dictionary(vector2, new DictionaryEncoding(1L, false, null));
+
+ setVector(vector1, zero, one, two);
+ setVector(vector2, zero, one, one);
+
+ assertFalse(dict1.equals(dict2));
+
+ vector2.setSafe(2, two, 0, two.length);
+ assertTrue(dict1.equals(dict2));
+ }
+ }
+
+ @Test
+ public void testListEquals() {
+ try (final ListVector vector1 = ListVector.empty("list", allocator);
+ final ListVector vector2 = ListVector.empty("list", allocator);) {
+
+ Dictionary dict1 = new Dictionary(vector1, new DictionaryEncoding(1L, false, null));
+ Dictionary dict2 = new Dictionary(vector2, new DictionaryEncoding(1L, false, null));
+
+ UnionListWriter writer1 = vector1.getWriter();
+ writer1.allocate();
+
+ //set some values
+ writeListVector(writer1, new int[] {1, 2});
+ writeListVector(writer1, new int[] {3, 4});
+ writeListVector(writer1, new int[] {5, 6});
+ writer1.setValueCount(3);
+
+ UnionListWriter writer2 = vector2.getWriter();
+ writer2.allocate();
+
+ //set some values
+ writeListVector(writer2, new int[] {1, 2});
+ writeListVector(writer2, new int[] {3, 4});
+ writeListVector(writer2, new int[] {5, 6});
+ writer2.setValueCount(3);
+
+ assertTrue(dict1.equals(dict2));
+ }
+ }
+
+ @Test
+ public void testStructEquals() {
+ try (final StructVector vector1 = StructVector.empty("struct", allocator);
+ final StructVector vector2 = StructVector.empty("struct", allocator);) {
+ vector1.addOrGet("f0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class);
+ vector1.addOrGet("f1", FieldType.nullable(new ArrowType.Int(64, true)), BigIntVector.class);
+ vector2.addOrGet("f0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class);
+ vector2.addOrGet("f1", FieldType.nullable(new ArrowType.Int(64, true)), BigIntVector.class);
+
+ Dictionary dict1 = new Dictionary(vector1, new DictionaryEncoding(1L, false, null));
+ Dictionary dict2 = new Dictionary(vector2, new DictionaryEncoding(1L, false, null));
+
+ NullableStructWriter writer1 = vector1.getWriter();
+ writer1.allocate();
+
+ writeStructVector(writer1, 1, 10L);
+ writeStructVector(writer1, 2, 20L);
+ writer1.setValueCount(2);
+
+ NullableStructWriter writer2 = vector2.getWriter();
+ writer2.allocate();
+
+ writeStructVector(writer2, 1, 10L);
+ writeStructVector(writer2, 2, 20L);
+ writer2.setValueCount(2);
+
+ assertTrue(dict1.equals(dict2));
+ }
+ }
+
+ @Test
+ public void testUnionEquals() {
+ try (final UnionVector vector1 = new UnionVector("union", allocator, /* field type */ null, /* call-back */ null);
+ final UnionVector vector2 =
+ new UnionVector("union", allocator, /* field type */ null, /* call-back */ null);) {
+
+ final NullableUInt4Holder uInt4Holder = new NullableUInt4Holder();
+ uInt4Holder.value = 10;
+ uInt4Holder.isSet = 1;
+
+ final NullableIntHolder intHolder = new NullableIntHolder();
+ uInt4Holder.value = 20;
+ uInt4Holder.isSet = 1;
+
+ vector1.setType(0, Types.MinorType.UINT4);
+ vector1.setSafe(0, uInt4Holder);
+
+ vector1.setType(2, Types.MinorType.INT);
+ vector1.setSafe(2, intHolder);
+ vector1.setValueCount(3);
+
+ vector2.setType(0, Types.MinorType.UINT4);
+ vector2.setSafe(0, uInt4Holder);
+
+ vector2.setType(2, Types.MinorType.INT);
+ vector2.setSafe(2, intHolder);
+ vector2.setValueCount(3);
+
+ Dictionary dict1 = new Dictionary(vector1, new DictionaryEncoding(1L, false, null));
+ Dictionary dict2 = new Dictionary(vector2, new DictionaryEncoding(1L, false, null));
+
+ assertTrue(dict1.equals(dict2));
+ }
+ }
+
+ @Test
+ public void testEncodeWithEncoderInstance() {
+ // Create a new value vector
+ try (final VarCharVector vector = newVarCharVector("vector", allocator);
+ final VarCharVector dictionaryVector = newVarCharVector("dict", allocator);) {
+
+ setVector(vector, zero, one, one, two, zero);
+ setVector(dictionaryVector, zero, one, two);
+
+ Dictionary dictionary =
+ new Dictionary(dictionaryVector, new DictionaryEncoding(1L, false, null));
+ DictionaryEncoder encoder = new DictionaryEncoder(dictionary, allocator);
+
+ try (final ValueVector encoded = encoder.encode(vector)) {
+ // verify indices
+ assertEquals(IntVector.class, encoded.getClass());
+
+ IntVector index = ((IntVector) encoded);
+ assertEquals(5, index.getValueCount());
+ assertEquals(0, index.get(0));
+ assertEquals(1, index.get(1));
+ assertEquals(1, index.get(2));
+ assertEquals(2, index.get(3));
+ assertEquals(0, index.get(4));
+
+ // now run through the decoder and verify we get the original back
+ try (ValueVector decoded = encoder.decode(encoded)) {
+ assertEquals(vector.getClass(), decoded.getClass());
+ assertEquals(vector.getValueCount(), (decoded).getValueCount());
+ for (int i = 0; i < 5; i++) {
+ assertEquals(vector.getObject(i), ((VarCharVector) decoded).getObject(i));
+ }
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testEncodeMultiVectors() {
+ // Create a new value vector
+ try (final VarCharVector vector1 = newVarCharVector("vector1", allocator);
+ final VarCharVector vector2 = newVarCharVector("vector2", allocator);
+ final VarCharVector dictionaryVector = newVarCharVector("dict", allocator);) {
+
+ setVector(vector1, zero, one, one, two, zero);
+ setVector(vector2, zero, one, one);
+ setVector(dictionaryVector, zero, one, two);
+
+ Dictionary dictionary =
+ new Dictionary(dictionaryVector, new DictionaryEncoding(1L, false, null));
+ DictionaryEncoder encoder = new DictionaryEncoder(dictionary, allocator);
+
+ try (final ValueVector encoded = encoder.encode(vector1)) {
+ // verify indices
+ assertEquals(IntVector.class, encoded.getClass());
+
+ IntVector index = ((IntVector) encoded);
+ assertEquals(5, index.getValueCount());
+ assertEquals(0, index.get(0));
+ assertEquals(1, index.get(1));
+ assertEquals(1, index.get(2));
+ assertEquals(2, index.get(3));
+ assertEquals(0, index.get(4));
+
+ // now run through the decoder and verify we get the original back
+ try (ValueVector decoded = encoder.decode(encoded)) {
+ assertEquals(vector1.getClass(), decoded.getClass());
+ assertEquals(vector1.getValueCount(), (decoded).getValueCount());
+ for (int i = 0; i < 5; i++) {
+ assertEquals(vector1.getObject(i), ((VarCharVector) decoded).getObject(i));
+ }
+ }
+ }
+
+ try (final ValueVector encoded = encoder.encode(vector2)) {
+ // verify indices
+ assertEquals(IntVector.class, encoded.getClass());
+
+ IntVector index = ((IntVector) encoded);
+ assertEquals(3, index.getValueCount());
+ assertEquals(0, index.get(0));
+ assertEquals(1, index.get(1));
+ assertEquals(1, index.get(2));
+
+ // now run through the decoder and verify we get the original back
+ try (ValueVector decoded = encoder.decode(encoded)) {
+ assertEquals(vector2.getClass(), decoded.getClass());
+ assertEquals(vector2.getValueCount(), (decoded).getValueCount());
+ for (int i = 0; i < 3; i++) {
+ assertEquals(vector2.getObject(i), ((VarCharVector) decoded).getObject(i));
+ }
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testEncodeListSubField() {
+ // Create a new value vector
+ try (final ListVector vector = ListVector.empty("vector", allocator);
+ final ListVector dictionaryVector = ListVector.empty("dict", allocator);) {
+
+ UnionListWriter writer = vector.getWriter();
+ writer.allocate();
+
+ //set some values
+ writeListVector(writer, new int[]{10, 20});
+ writeListVector(writer, new int[]{10, 20});
+ writeListVector(writer, new int[]{10, 20});
+ writeListVector(writer, new int[]{30, 40, 50});
+ writeListVector(writer, new int[]{30, 40, 50});
+ writeListVector(writer, new int[]{10, 20});
+ writer.setValueCount(6);
+
+ UnionListWriter dictWriter = dictionaryVector.getWriter();
+ dictWriter.allocate();
+ writeListVector(dictWriter, new int[]{10, 20, 30, 40, 50});
+ dictionaryVector.setValueCount(1);
+
+ Dictionary dictionary = new Dictionary(dictionaryVector, new DictionaryEncoding(1L, false, null));
+ ListSubfieldEncoder encoder = new ListSubfieldEncoder(dictionary, allocator);
+
+ try (final ListVector encoded = (ListVector) encoder.encodeListSubField(vector)) {
+ // verify indices
+ assertEquals(ListVector.class, encoded.getClass());
+
+ assertEquals(6, encoded.getValueCount());
+ int[] realValue1 = convertListToIntArray(encoded.getObject(0));
+ assertTrue(Arrays.equals(new int[] {0, 1}, realValue1));
+ int[] realValue2 = convertListToIntArray(encoded.getObject(1));
+ assertTrue(Arrays.equals(new int[] {0, 1}, realValue2));
+ int[] realValue3 = convertListToIntArray(encoded.getObject(2));
+ assertTrue(Arrays.equals(new int[] {0, 1}, realValue3));
+ int[] realValue4 = convertListToIntArray(encoded.getObject(3));
+ assertTrue(Arrays.equals(new int[] {2, 3, 4}, realValue4));
+ int[] realValue5 = convertListToIntArray(encoded.getObject(4));
+ assertTrue(Arrays.equals(new int[] {2, 3, 4}, realValue5));
+ int[] realValue6 = convertListToIntArray(encoded.getObject(5));
+ assertTrue(Arrays.equals(new int[] {0, 1}, realValue6));
+
+ // now run through the decoder and verify we get the original back
+ try (ValueVector decoded = encoder.decodeListSubField(encoded)) {
+ assertEquals(vector.getClass(), decoded.getClass());
+ assertEquals(vector.getValueCount(), decoded.getValueCount());
+ for (int i = 0; i < 5; i++) {
+ assertEquals(vector.getObject(i), decoded.getObject(i));
+ }
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testEncodeFixedSizeListSubField() {
+ // Create a new value vector
+ try (final FixedSizeListVector vector = FixedSizeListVector.empty("vector", 2, allocator);
+ final FixedSizeListVector dictionaryVector = FixedSizeListVector.empty("dict", 2, allocator)) {
+
+ vector.allocateNew();
+ vector.setValueCount(4);
+
+ IntVector dataVector =
+ (IntVector) vector.addOrGetVector(FieldType.nullable(Types.MinorType.INT.getType())).getVector();
+ dataVector.allocateNew(8);
+ dataVector.setValueCount(8);
+ // set value at index 0
+ vector.setNotNull(0);
+ dataVector.set(0, 10);
+ dataVector.set(1, 20);
+ // set value at index 1
+ vector.setNotNull(1);
+ dataVector.set(2, 10);
+ dataVector.set(3, 20);
+ // set value at index 2
+ vector.setNotNull(2);
+ dataVector.set(4, 30);
+ dataVector.set(5, 40);
+ // set value at index 3
+ vector.setNotNull(3);
+ dataVector.set(6, 10);
+ dataVector.set(7, 20);
+
+ dictionaryVector.allocateNew();
+ dictionaryVector.setValueCount(2);
+ IntVector dictDataVector =
+ (IntVector) dictionaryVector.addOrGetVector(FieldType.nullable(Types.MinorType.INT.getType())).getVector();
+ dictDataVector.allocateNew(4);
+ dictDataVector.setValueCount(4);
+
+ dictionaryVector.setNotNull(0);
+ dictDataVector.set(0, 10);
+ dictDataVector.set(1, 20);
+ dictionaryVector.setNotNull(1);
+ dictDataVector.set(2, 30);
+ dictDataVector.set(3, 40);
+
+ Dictionary dictionary = new Dictionary(dictionaryVector, new DictionaryEncoding(1L, false, null));
+ ListSubfieldEncoder encoder = new ListSubfieldEncoder(dictionary, allocator);
+
+ try (final FixedSizeListVector encoded =
+ (FixedSizeListVector) encoder.encodeListSubField(vector)) {
+ // verify indices
+ assertEquals(FixedSizeListVector.class, encoded.getClass());
+
+ assertEquals(4, encoded.getValueCount());
+ int[] realValue1 = convertListToIntArray(encoded.getObject(0));
+ assertTrue(Arrays.equals(new int[] {0, 1}, realValue1));
+ int[] realValue2 = convertListToIntArray(encoded.getObject(1));
+ assertTrue(Arrays.equals(new int[] {0, 1}, realValue2));
+ int[] realValue3 = convertListToIntArray(encoded.getObject(2));
+ assertTrue(Arrays.equals(new int[] {2, 3}, realValue3));
+ int[] realValue4 = convertListToIntArray(encoded.getObject(3));
+ assertTrue(Arrays.equals(new int[] {0, 1}, realValue4));
+
+ // now run through the decoder and verify we get the original back
+ try (ValueVector decoded = encoder.decodeListSubField(encoded)) {
+ assertEquals(vector.getClass(), decoded.getClass());
+ assertEquals(vector.getValueCount(), decoded.getValueCount());
+ for (int i = 0; i < 5; i++) {
+ assertEquals(vector.getObject(i), decoded.getObject(i));
+ }
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testEncodeStructSubField() {
+ try (final StructVector vector = StructVector.empty("vector", allocator);
+ final VarCharVector dictVector1 = new VarCharVector("f0", allocator);
+ final VarCharVector dictVector2 = new VarCharVector("f1", allocator)) {
+
+ vector.addOrGet("f0", FieldType.nullable(ArrowType.Utf8.INSTANCE), VarCharVector.class);
+ vector.addOrGet("f1", FieldType.nullable(ArrowType.Utf8.INSTANCE), VarCharVector.class);
+
+ NullableStructWriter writer = vector.getWriter();
+ writer.allocate();
+ //set some values
+ writeStructVector(writer, "aa", "baz");
+ writeStructVector(writer, "bb", "bar");
+ writeStructVector(writer, "cc", "foo");
+ writeStructVector(writer, "aa", "foo");
+ writeStructVector(writer, "dd", "foo");
+ writer.setValueCount(5);
+
+ // initialize dictionaries
+ DictionaryProvider.MapDictionaryProvider provider = new DictionaryProvider.MapDictionaryProvider();
+
+
+ setVector(dictVector1,
+ "aa".getBytes(StandardCharsets.UTF_8),
+ "bb".getBytes(StandardCharsets.UTF_8),
+ "cc".getBytes(StandardCharsets.UTF_8),
+ "dd".getBytes(StandardCharsets.UTF_8));
+ setVector(dictVector2,
+ "foo".getBytes(StandardCharsets.UTF_8),
+ "baz".getBytes(StandardCharsets.UTF_8),
+ "bar".getBytes(StandardCharsets.UTF_8));
+
+ provider.put(new Dictionary(dictVector1, new DictionaryEncoding(1L, false, null)));
+ provider.put(new Dictionary(dictVector2, new DictionaryEncoding(2L, false, null)));
+
+ StructSubfieldEncoder encoder = new StructSubfieldEncoder(allocator, provider);
+ Map<Integer, Long> columnToDictionaryId = new HashMap<>();
+ columnToDictionaryId.put(0, 1L);
+ columnToDictionaryId.put(1, 2L);
+
+ try (final StructVector encoded = (StructVector) encoder.encode(vector, columnToDictionaryId)) {
+ // verify indices
+ assertEquals(StructVector.class, encoded.getClass());
+
+ assertEquals(5, encoded.getValueCount());
+ Object[] realValue1 = convertMapValuesToArray(encoded.getObject(0));
+ assertTrue(Arrays.equals(new Object[] {0, 1}, realValue1));
+ Object[] realValue2 = convertMapValuesToArray(encoded.getObject(1));
+ assertTrue(Arrays.equals(new Object[] {1, 2}, realValue2));
+ Object[] realValue3 = convertMapValuesToArray(encoded.getObject(2));
+ assertTrue(Arrays.equals(new Object[] {2, 0}, realValue3));
+ Object[] realValue4 = convertMapValuesToArray(encoded.getObject(3));
+ assertTrue(Arrays.equals(new Object[] {0, 0}, realValue4));
+ Object[] realValue5 = convertMapValuesToArray(encoded.getObject(4));
+ assertTrue(Arrays.equals(new Object[] {3, 0}, realValue5));
+
+ // now run through the decoder and verify we get the original back
+ try (ValueVector decoded = encoder.decode(encoded)) {
+ assertEquals(vector.getClass(), decoded.getClass());
+ assertEquals(vector.getValueCount(), decoded.getValueCount());
+ for (int i = 0; i < 5; i++) {
+ assertEquals(vector.getObject(i), decoded.getObject(i));
+ }
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testEncodeStructSubFieldWithCertainColumns() {
+ // in this case, some child vector is encoded and others are not
+ try (final StructVector vector = StructVector.empty("vector", allocator);
+ final VarCharVector dictVector1 = new VarCharVector("f0", allocator)) {
+
+ vector.addOrGet("f0", FieldType.nullable(ArrowType.Utf8.INSTANCE), VarCharVector.class);
+ vector.addOrGet("f1", FieldType.nullable(ArrowType.Utf8.INSTANCE), VarCharVector.class);
+
+ NullableStructWriter writer = vector.getWriter();
+ writer.allocate();
+ //set some values
+ writeStructVector(writer, "aa", "baz");
+ writeStructVector(writer, "bb", "bar");
+ writeStructVector(writer, "cc", "foo");
+ writeStructVector(writer, "aa", "foo");
+ writeStructVector(writer, "dd", "foo");
+ writer.setValueCount(5);
+
+ // initialize dictionaries
+ DictionaryProvider.MapDictionaryProvider provider = new DictionaryProvider.MapDictionaryProvider();
+
+ setVector(dictVector1, "aa".getBytes(), "bb".getBytes(), "cc".getBytes(), "dd".getBytes());
+
+ provider.put(new Dictionary(dictVector1, new DictionaryEncoding(1L, false, null)));
+ StructSubfieldEncoder encoder = new StructSubfieldEncoder(allocator, provider);
+ Map<Integer, Long> columnToDictionaryId = new HashMap<>();
+ columnToDictionaryId.put(0, 1L);
+
+ try (final StructVector encoded = (StructVector) encoder.encode(vector, columnToDictionaryId)) {
+ // verify indices
+ assertEquals(StructVector.class, encoded.getClass());
+
+ assertEquals(5, encoded.getValueCount());
+ Object[] realValue1 = convertMapValuesToArray(encoded.getObject(0));
+ assertTrue(Arrays.equals(new Object[] {0, new Text("baz")}, realValue1));
+ Object[] realValue2 = convertMapValuesToArray(encoded.getObject(1));
+ assertTrue(Arrays.equals(new Object[] {1, new Text("bar")}, realValue2));
+ Object[] realValue3 = convertMapValuesToArray(encoded.getObject(2));
+ assertTrue(Arrays.equals(new Object[] {2, new Text("foo")}, realValue3));
+ Object[] realValue4 = convertMapValuesToArray(encoded.getObject(3));
+ assertTrue(Arrays.equals(new Object[] {0, new Text("foo")}, realValue4));
+ Object[] realValue5 = convertMapValuesToArray(encoded.getObject(4));
+ assertTrue(Arrays.equals(new Object[] {3, new Text("foo")}, realValue5));
+
+ // now run through the decoder and verify we get the original back
+ try (ValueVector decoded = encoder.decode(encoded)) {
+ assertEquals(vector.getClass(), decoded.getClass());
+ assertEquals(vector.getValueCount(), decoded.getValueCount());
+ for (int i = 0; i < 5; i++) {
+ assertEquals(vector.getObject(i), decoded.getObject(i));
+ }
+ }
+ }
+
+ }
+ }
+
+ private void testDictionary(Dictionary dictionary, ToIntBiFunction<ValueVector, Integer> valGetter) {
+ try (VarCharVector vector = new VarCharVector("vector", allocator)) {
+ setVector(vector, "1", "3", "5", "7", "9");
+ try (ValueVector encodedVector = DictionaryEncoder.encode(vector, dictionary)) {
+
+ // verify encoded result
+ assertEquals(vector.getValueCount(), encodedVector.getValueCount());
+ assertEquals(valGetter.applyAsInt(encodedVector, 0), 1);
+ assertEquals(valGetter.applyAsInt(encodedVector, 1), 3);
+ assertEquals(valGetter.applyAsInt(encodedVector, 2), 5);
+ assertEquals(valGetter.applyAsInt(encodedVector, 3), 7);
+ assertEquals(valGetter.applyAsInt(encodedVector, 4), 9);
+
+ try (ValueVector decodedVector = DictionaryEncoder.decode(encodedVector, dictionary)) {
+ assertTrue(decodedVector instanceof VarCharVector);
+ assertEquals(vector.getValueCount(), decodedVector.getValueCount());
+ assertArrayEquals("1".getBytes(), ((VarCharVector) decodedVector).get(0));
+ assertArrayEquals("3".getBytes(), ((VarCharVector) decodedVector).get(1));
+ assertArrayEquals("5".getBytes(), ((VarCharVector) decodedVector).get(2));
+ assertArrayEquals("7".getBytes(), ((VarCharVector) decodedVector).get(3));
+ assertArrayEquals("9".getBytes(), ((VarCharVector) decodedVector).get(4));
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testDictionaryUInt1() {
+ try (VarCharVector dictionaryVector = new VarCharVector("dict vector", allocator)) {
+ setVector(dictionaryVector, "0", "1", "2", "3", "4", "5", "6", "7", "8", "9");
+ Dictionary dictionary1 = new Dictionary(dictionaryVector,
+ new DictionaryEncoding(/*id=*/10L, /*ordered=*/false,
+ /*indexType=*/new ArrowType.Int(/*bitWidth*/8, /*isSigned*/false)));
+ testDictionary(dictionary1, (vector, index) -> ((UInt1Vector) vector).get(index));
+ }
+ }
+
+ @Test
+ public void testDictionaryUInt2() {
+ try (VarCharVector dictionaryVector = new VarCharVector("dict vector", allocator)) {
+ setVector(dictionaryVector, "0", "1", "2", "3", "4", "5", "6", "7", "8", "9");
+ Dictionary dictionary2 = new Dictionary(dictionaryVector,
+ new DictionaryEncoding(/*id=*/20L, /*ordered=*/false,
+ /*indexType=*/new ArrowType.Int(/*indexType=*/16, /*isSigned*/false)));
+ testDictionary(dictionary2, (vector, index) -> ((UInt2Vector) vector).get(index));
+ }
+ }
+
+ @Test
+ public void testDictionaryUInt4() {
+ try (VarCharVector dictionaryVector = new VarCharVector("dict vector", allocator)) {
+ setVector(dictionaryVector, "0", "1", "2", "3", "4", "5", "6", "7", "8", "9");
+ Dictionary dictionary4 = new Dictionary(dictionaryVector,
+ new DictionaryEncoding(/*id=*/30L, /*ordered=*/false,
+ /*indexType=*/new ArrowType.Int(/*indexType=*/32, /*isSigned*/false)));
+ testDictionary(dictionary4, (vector, index) -> ((UInt4Vector) vector).get(index));
+ }
+ }
+
+ @Test
+ public void testDictionaryUInt8() {
+ try (VarCharVector dictionaryVector = new VarCharVector("dict vector", allocator)) {
+ setVector(dictionaryVector, "0", "1", "2", "3", "4", "5", "6", "7", "8", "9");
+ Dictionary dictionary8 = new Dictionary(dictionaryVector,
+ new DictionaryEncoding(/*id=*/40L, /*ordered=*/false,
+ /*indexType=*/new ArrowType.Int(/*indexType=*/64, /*isSigned*/false)));
+ testDictionary(dictionary8, (vector, index) -> (int) ((UInt8Vector) vector).get(index));
+ }
+ }
+
+ @Test
+ public void testDictionaryUIntOverflow() {
+ // the size is within the range of UInt1, but outside the range of TinyInt.
+ final int vecLength = 256;
+ try (VarCharVector dictionaryVector = new VarCharVector("dict vector", allocator)) {
+ dictionaryVector.allocateNew(vecLength * 3, vecLength);
+ for (int i = 0; i < vecLength; i++) {
+ dictionaryVector.set(i, String.valueOf(i).getBytes());
+ }
+ dictionaryVector.setValueCount(vecLength);
+
+ Dictionary dictionary = new Dictionary(dictionaryVector,
+ new DictionaryEncoding(/*id=*/10L, /*ordered=*/false,
+ /*indexType=*/new ArrowType.Int(/*indexType=*/8, /*isSigned*/false)));
+
+ try (VarCharVector vector = new VarCharVector("vector", allocator)) {
+ setVector(vector, "255");
+ try (UInt1Vector encodedVector = (UInt1Vector) DictionaryEncoder.encode(vector, dictionary)) {
+
+ // verify encoded result
+ assertEquals(1, encodedVector.getValueCount());
+ assertEquals(255, encodedVector.getValueAsLong(0));
+
+ try (VarCharVector decodedVector = (VarCharVector) DictionaryEncoder.decode(encodedVector, dictionary)) {
+ assertEquals(1, decodedVector.getValueCount());
+ assertArrayEquals("255".getBytes(), decodedVector.get(0));
+ }
+ }
+ }
+ }
+ }
+
+ private int[] convertListToIntArray(List list) {
+ int[] values = new int[list.size()];
+ for (int i = 0; i < list.size(); i++) {
+ values[i] = (int) list.get(i);
+ }
+ return values;
+ }
+
+ private Object[] convertMapValuesToArray(Map map) {
+ Object[] values = new Object[map.size()];
+ Iterator valueIterator = map.values().iterator();
+ for (int i = 0; i < map.size(); i++) {
+ values[i] = valueIterator.next();
+ }
+ return values;
+ }
+
+ private void writeStructVector(NullableStructWriter writer, String value1, String value2) {
+
+ byte[] bytes1 = value1.getBytes(StandardCharsets.UTF_8);
+ byte[] bytes2 = value2.getBytes(StandardCharsets.UTF_8);
+ ArrowBuf temp = allocator.buffer(bytes1.length > bytes2.length ? bytes1.length : bytes2.length);
+
+ writer.start();
+ temp.setBytes(0, bytes1);
+ writer.varChar("f0").writeVarChar(0, bytes1.length, temp);
+ temp.setBytes(0, bytes2);
+ writer.varChar("f1").writeVarChar(0, bytes2.length, temp);
+ writer.end();
+ temp.close();
+ }
+
+ private void writeStructVector(NullableStructWriter writer, int value1, long value2) {
+ writer.start();
+ writer.integer("f0").writeInt(value1);
+ writer.bigInt("f1").writeBigInt(value2);
+ writer.end();
+ }
+
+ private void writeListVector(UnionListWriter writer, int[] values) {
+ writer.startList();
+ for (int v: values) {
+ writer.integer().writeInt(v);
+ }
+ writer.endList();
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestDurationVector.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestDurationVector.java
new file mode 100644
index 000000000..8ae876f20
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestDurationVector.java
@@ -0,0 +1,137 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+
+import java.time.Duration;
+
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.holders.NullableDurationHolder;
+import org.apache.arrow.vector.types.TimeUnit;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestDurationVector {
+ RootAllocator allocator;
+
+ @Before
+ public void init() {
+ allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100);
+ }
+
+ @After
+ public void terminate() {
+ allocator.close();
+ }
+
+ @Test
+ public void testSecBasics() {
+ try (DurationVector secVector = TestUtils.newVector(DurationVector.class, "second",
+ new ArrowType.Duration(TimeUnit.SECOND), allocator)) {
+
+ secVector.allocateNew();
+ secVector.setNull(0);
+ secVector.setSafe(1, 1000);
+ secVector.setValueCount(2);
+ assertNull(secVector.getObject(0));
+ assertEquals(Duration.ofSeconds(1000), secVector.getObject(1));
+ assertNull(secVector.getAsStringBuilder(0));
+ assertEquals("PT16M40S", secVector.getAsStringBuilder(1).toString());
+ // Holder
+ NullableDurationHolder holder = new NullableDurationHolder();
+ secVector.get(0, holder);
+ assertEquals(0, holder.isSet);
+ secVector.get(1, holder);
+ assertEquals(1 , holder.isSet);
+ assertEquals(1000 , holder.value);
+ }
+ }
+
+ @Test
+ public void testMilliBasics() {
+ try (DurationVector milliVector = TestUtils.newVector(DurationVector.class, "nanos",
+ new ArrowType.Duration(TimeUnit.MILLISECOND), allocator)) {
+
+ milliVector.allocateNew();
+ milliVector.setNull(0);
+ milliVector.setSafe(1, 1000);
+ milliVector.setValueCount(2);
+ assertNull(milliVector.getObject(0));
+ assertEquals(Duration.ofSeconds(1), milliVector.getObject(1));
+ assertNull(milliVector.getAsStringBuilder(0));
+ assertEquals("PT1S", milliVector.getAsStringBuilder(1).toString());
+ // Holder
+ NullableDurationHolder holder = new NullableDurationHolder();
+ milliVector.get(0, holder);
+ assertEquals(0, holder.isSet);
+ milliVector.get(1, holder);
+ assertEquals(1 , holder.isSet);
+ assertEquals(1000 , holder.value);
+ }
+ }
+
+ @Test
+ public void testMicroBasics() {
+ try (DurationVector microVector = TestUtils.newVector(DurationVector.class, "micro",
+ new ArrowType.Duration(TimeUnit.MICROSECOND), allocator)) {
+
+ microVector.allocateNew();
+ microVector.setNull(0);
+ microVector.setSafe(1, 1000);
+ microVector.setValueCount(2);
+ assertNull(microVector.getObject(0));
+ assertEquals(Duration.ofMillis(1), microVector.getObject(1));
+ assertNull(microVector.getAsStringBuilder(0));
+ assertEquals("PT0.001S", microVector.getAsStringBuilder(1).toString());
+ // Holder
+ NullableDurationHolder holder = new NullableDurationHolder();
+ microVector.get(0, holder);
+ assertEquals(0, holder.isSet);
+ microVector.get(1, holder);
+ assertEquals(1 , holder.isSet);
+ assertEquals(1000 , holder.value);
+ }
+ }
+
+ @Test
+ public void testNanosBasics() {
+ try (DurationVector nanoVector = TestUtils.newVector(DurationVector.class, "nanos",
+ new ArrowType.Duration(TimeUnit.NANOSECOND), allocator)) {
+
+ nanoVector.allocateNew();
+ nanoVector.setNull(0);
+ nanoVector.setSafe(1, 1000000);
+ nanoVector.setValueCount(2);
+ assertNull(nanoVector.getObject(0));
+ assertEquals(Duration.ofMillis(1), nanoVector.getObject(1));
+ assertNull(nanoVector.getAsStringBuilder(0));
+ assertEquals("PT0.001S", nanoVector.getAsStringBuilder(1).toString());
+ // Holder
+ NullableDurationHolder holder = new NullableDurationHolder();
+ nanoVector.get(0, holder);
+ assertEquals(0, holder.isSet);
+ nanoVector.get(1, holder);
+ assertEquals(1 , holder.isSet);
+ assertEquals(1000000 , holder.value);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeBinaryVector.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeBinaryVector.java
new file mode 100644
index 000000000..363821e98
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeBinaryVector.java
@@ -0,0 +1,279 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.junit.Assert.*;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.holders.FixedSizeBinaryHolder;
+import org.apache.arrow.vector.holders.NullableFixedSizeBinaryHolder;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestFixedSizeBinaryVector {
+ private static final int numValues = 123;
+ private static final int typeWidth = 9;
+ private static final int smallDataSize = 6;
+ private static final int largeDataSize = 12;
+
+ private static byte[][] values;
+
+ static {
+ values = new byte[numValues][typeWidth];
+ for (int i = 0; i < numValues; i++) {
+ for (int j = 0; j < typeWidth; j++) {
+ values[i][j] = ((byte) i);
+ }
+ }
+ }
+
+ private ArrowBuf[] bufs = new ArrowBuf[numValues];
+ private FixedSizeBinaryHolder[] holders = new FixedSizeBinaryHolder[numValues];
+ private NullableFixedSizeBinaryHolder[] nullableHolders = new NullableFixedSizeBinaryHolder[numValues];
+
+ private static byte[] smallValue;
+
+ static {
+ smallValue = new byte[smallDataSize];
+ for (int i = 0; i < smallDataSize; i++) {
+ smallValue[i] = ((byte) i);
+ }
+ }
+
+ private ArrowBuf smallBuf;
+ private FixedSizeBinaryHolder smallHolder;
+ private NullableFixedSizeBinaryHolder smallNullableHolder;
+
+ private static byte[] largeValue;
+
+ static {
+ largeValue = new byte[largeDataSize];
+ for (int i = 0; i < largeDataSize; i++) {
+ largeValue[i] = ((byte) i);
+ }
+ }
+
+ private ArrowBuf largeBuf;
+ private FixedSizeBinaryHolder largeHolder;
+ private NullableFixedSizeBinaryHolder largeNullableHolder;
+
+ private BufferAllocator allocator;
+ private FixedSizeBinaryVector vector;
+
+ private static void failWithException(String message) throws Exception {
+ throw new Exception(message);
+ }
+
+
+ @Before
+ public void init() throws Exception {
+ allocator = new DirtyRootAllocator(Integer.MAX_VALUE, (byte) 100);
+ vector = new FixedSizeBinaryVector("fixedSizeBinary", allocator, typeWidth);
+ vector.allocateNew();
+
+ for (int i = 0; i < numValues; i++) {
+ bufs[i] = allocator.buffer(typeWidth);
+ bufs[i].setBytes(0, values[i]);
+
+ holders[i] = new FixedSizeBinaryHolder();
+ holders[i].byteWidth = typeWidth;
+ holders[i].buffer = bufs[i];
+
+ nullableHolders[i] = new NullableFixedSizeBinaryHolder();
+ nullableHolders[i].byteWidth = typeWidth;
+ nullableHolders[i].buffer = bufs[i];
+ nullableHolders[i].isSet = 1;
+ }
+
+ smallBuf = allocator.buffer(smallDataSize);
+ smallBuf.setBytes(0, smallValue);
+
+ smallHolder = new FixedSizeBinaryHolder();
+ smallHolder.byteWidth = smallDataSize;
+ smallHolder.buffer = smallBuf;
+
+ smallNullableHolder = new NullableFixedSizeBinaryHolder();
+ smallNullableHolder.byteWidth = smallDataSize;
+ smallNullableHolder.buffer = smallBuf;
+
+ largeBuf = allocator.buffer(largeDataSize);
+ largeBuf.setBytes(0, largeValue);
+
+ largeHolder = new FixedSizeBinaryHolder();
+ largeHolder.byteWidth = typeWidth;
+ largeHolder.buffer = largeBuf;
+
+ largeNullableHolder = new NullableFixedSizeBinaryHolder();
+ largeNullableHolder.byteWidth = typeWidth;
+ largeNullableHolder.buffer = largeBuf;
+ }
+
+ @After
+ public void terminate() throws Exception {
+ for (int i = 0; i < numValues; i++) {
+ bufs[i].close();
+ }
+ smallBuf.close();
+ largeBuf.close();
+
+ vector.close();
+ allocator.close();
+ }
+
+ @Test
+ public void testSetUsingByteArray() {
+ for (int i = 0; i < numValues; i++) {
+ vector.set(i, values[i]);
+ }
+ vector.setValueCount(numValues);
+ for (int i = 0; i < numValues; i++) {
+ assertArrayEquals(values[i], vector.getObject(i));
+ }
+ }
+
+ @Test
+ public void testSetUsingNull() {
+ final byte[] value = null;
+ for (int i = 0; i < numValues; i++) {
+ final int index = i;
+ Exception e = assertThrows(NullPointerException.class, () -> {
+ vector.set(index, value);
+ });
+ assertEquals("expecting a valid byte array", e.getMessage());
+ }
+ }
+
+ @Test
+ public void testSetUsingHolder() {
+ for (int i = 0; i < numValues; i++) {
+ vector.set(i, holders[i]);
+ }
+ vector.setValueCount(numValues);
+ for (int i = 0; i < numValues; i++) {
+ assertArrayEquals(values[i], vector.getObject(i));
+ }
+ }
+
+ @Test
+ public void testSetUsingNullableHolder() {
+ for (int i = 0; i < numValues; i++) {
+ vector.set(i, nullableHolders[i]);
+ }
+ vector.setValueCount(numValues);
+ for (int i = 0; i < numValues; i++) {
+ assertArrayEquals(values[i], vector.getObject(i));
+ }
+ }
+
+ @Test
+ public void testGetUsingNullableHolder() {
+ for (int i = 0; i < numValues; i++) {
+ vector.set(i, holders[i]);
+ }
+ vector.setValueCount(numValues);
+ for (int i = 0; i < numValues; i++) {
+ vector.get(i, nullableHolders[i]);
+ assertEquals(typeWidth, nullableHolders[i].byteWidth);
+ assertTrue(nullableHolders[i].isSet > 0);
+ byte[] actual = new byte[typeWidth];
+ nullableHolders[i].buffer.getBytes(0, actual, 0, typeWidth);
+ assertArrayEquals(values[i], actual);
+ }
+ }
+
+ @Test
+ public void testSetWithInvalidInput() throws Exception {
+ String errorMsg = "input data needs to be at least " + typeWidth + " bytes";
+
+ // test small inputs, byteWidth matches but value or buffer is too small
+ try {
+ vector.set(0, smallValue);
+ failWithException(errorMsg);
+ } catch (AssertionError ignore) {
+ }
+
+ try {
+ vector.set(0, smallHolder);
+ failWithException(errorMsg);
+ } catch (AssertionError ignore) {
+ }
+
+ try {
+ vector.set(0, smallNullableHolder);
+ failWithException(errorMsg);
+ } catch (AssertionError ignore) {
+ }
+
+ try {
+ vector.set(0, smallBuf);
+ failWithException(errorMsg);
+ } catch (AssertionError ignore) {
+ }
+
+ // test large inputs, byteWidth matches but value or buffer is bigger than byteWidth
+ vector.set(0, largeValue);
+ vector.set(0, largeHolder);
+ vector.set(0, largeNullableHolder);
+ vector.set(0, largeBuf);
+ }
+
+ @Test
+ public void setSetSafeWithInvalidInput() throws Exception {
+ String errorMsg = "input data needs to be at least " + typeWidth + " bytes";
+
+ // test small inputs, byteWidth matches but value or buffer is too small
+ try {
+ vector.setSafe(0, smallValue);
+ failWithException(errorMsg);
+ } catch (AssertionError ignore) {
+ }
+
+ try {
+ vector.setSafe(0, smallHolder);
+ failWithException(errorMsg);
+ } catch (AssertionError ignore) {
+ }
+
+ try {
+ vector.setSafe(0, smallNullableHolder);
+ failWithException(errorMsg);
+ } catch (AssertionError ignore) {
+ }
+
+ try {
+ vector.setSafe(0, smallBuf);
+ failWithException(errorMsg);
+ } catch (AssertionError ignore) {
+ }
+
+ // test large inputs, byteWidth matches but value or buffer is bigger than byteWidth
+ vector.setSafe(0, largeValue);
+ vector.setSafe(0, largeHolder);
+ vector.setSafe(0, largeNullableHolder);
+ vector.setSafe(0, largeBuf);
+ }
+
+ @Test
+ public void testGetNull() {
+ vector.setNull(0);
+ assertNull(vector.get(0));
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeListVector.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeListVector.java
new file mode 100644
index 000000000..9d7e413a7
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeListVector.java
@@ -0,0 +1,507 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertNull;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+
+import java.math.BigDecimal;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.FixedSizeListVector;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.impl.UnionFixedSizeListReader;
+import org.apache.arrow.vector.complex.impl.UnionFixedSizeListWriter;
+import org.apache.arrow.vector.complex.impl.UnionListReader;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestFixedSizeListVector {
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void init() {
+ allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100);
+ }
+
+ @After
+ public void terminate() throws Exception {
+ allocator.close();
+ }
+
+ @Test
+ public void testIntType() {
+ try (FixedSizeListVector vector = FixedSizeListVector.empty("list", 2, allocator)) {
+ IntVector nested = (IntVector) vector.addOrGetVector(FieldType.nullable(MinorType.INT.getType())).getVector();
+ vector.allocateNew();
+
+ for (int i = 0; i < 10; i++) {
+ vector.setNotNull(i);
+ nested.set(i * 2, i);
+ nested.set(i * 2 + 1, i + 10);
+ }
+ vector.setValueCount(10);
+
+ UnionFixedSizeListReader reader = vector.getReader();
+ for (int i = 0; i < 10; i++) {
+ reader.setPosition(i);
+ Assert.assertTrue(reader.isSet());
+ Assert.assertTrue(reader.next());
+ assertEquals(i, reader.reader().readInteger().intValue());
+ Assert.assertTrue(reader.next());
+ assertEquals(i + 10, reader.reader().readInteger().intValue());
+ Assert.assertFalse(reader.next());
+ assertEquals(Arrays.asList(i, i + 10), reader.readObject());
+ }
+ }
+ }
+
+ @Test
+ public void testFloatTypeNullable() {
+ try (FixedSizeListVector vector = FixedSizeListVector.empty("list", 2, allocator)) {
+ Float4Vector nested = (Float4Vector) vector.addOrGetVector(FieldType.nullable(MinorType.FLOAT4.getType()))
+ .getVector();
+ vector.allocateNew();
+
+ for (int i = 0; i < 10; i++) {
+ if (i % 2 == 0) {
+ vector.setNotNull(i);
+ nested.set(i * 2, i + 0.1f);
+ nested.set(i * 2 + 1, i + 10.1f);
+ }
+ }
+ vector.setValueCount(10);
+
+ UnionFixedSizeListReader reader = vector.getReader();
+ for (int i = 0; i < 10; i++) {
+ reader.setPosition(i);
+ if (i % 2 == 0) {
+ Assert.assertTrue(reader.isSet());
+ Assert.assertTrue(reader.next());
+ assertEquals(i + 0.1f, reader.reader().readFloat(), 0.00001);
+ Assert.assertTrue(reader.next());
+ assertEquals(i + 10.1f, reader.reader().readFloat(), 0.00001);
+ Assert.assertFalse(reader.next());
+ assertEquals(Arrays.asList(i + 0.1f, i + 10.1f), reader.readObject());
+ } else {
+ Assert.assertFalse(reader.isSet());
+ Assert.assertNull(reader.readObject());
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testNestedInList() {
+ try (ListVector vector = ListVector.empty("list", allocator)) {
+ FixedSizeListVector tuples = (FixedSizeListVector) vector.addOrGetVector(
+ FieldType.nullable(new ArrowType.FixedSizeList(2))).getVector();
+ IntVector innerVector = (IntVector) tuples.addOrGetVector(FieldType.nullable(MinorType.INT.getType()))
+ .getVector();
+ vector.allocateNew();
+
+ for (int i = 0; i < 10; i++) {
+ if (i % 2 == 0) {
+ int position = vector.startNewValue(i);
+ for (int j = 0; j < i % 7; j++) {
+ tuples.setNotNull(position + j);
+ innerVector.set((position + j) * 2, j);
+ innerVector.set((position + j) * 2 + 1, j + 1);
+ }
+ vector.endValue(i, i % 7);
+ }
+ }
+ vector.setValueCount(10);
+
+ UnionListReader reader = vector.getReader();
+ for (int i = 0; i < 10; i++) {
+ reader.setPosition(i);
+ if (i % 2 == 0) {
+ for (int j = 0; j < i % 7; j++) {
+ Assert.assertTrue(reader.next());
+ FieldReader innerListReader = reader.reader();
+ for (int k = 0; k < 2; k++) {
+ Assert.assertTrue(innerListReader.next());
+ assertEquals(k + j, innerListReader.reader().readInteger().intValue());
+ }
+ Assert.assertFalse(innerListReader.next());
+ }
+ Assert.assertFalse(reader.next());
+ } else {
+ Assert.assertFalse(reader.isSet());
+ Assert.assertNull(reader.readObject());
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testTransferPair() {
+ try (FixedSizeListVector from = new FixedSizeListVector(
+ "from", allocator, new FieldType(true, new ArrowType.FixedSizeList(2), null), null);
+ FixedSizeListVector to = new FixedSizeListVector(
+ "to", allocator, new FieldType(true, new ArrowType.FixedSizeList(2), null), null)) {
+ Float4Vector nested = (Float4Vector) from.addOrGetVector(FieldType.nullable(MinorType.FLOAT4.getType()))
+ .getVector();
+ from.allocateNew();
+
+ for (int i = 0; i < 10; i++) {
+ if (i % 2 == 0) {
+ from.setNotNull(i);
+ nested.set(i * 2, i + 0.1f);
+ nested.set(i * 2 + 1, i + 10.1f);
+ }
+ }
+ from.setValueCount(10);
+
+ TransferPair pair = from.makeTransferPair(to);
+
+ pair.copyValueSafe(0, 1);
+ pair.copyValueSafe(2, 2);
+ to.copyFromSafe(4, 3, from);
+
+ to.setValueCount(10);
+
+ UnionFixedSizeListReader reader = to.getReader();
+
+ reader.setPosition(0);
+ Assert.assertFalse(reader.isSet());
+ Assert.assertNull(reader.readObject());
+
+ reader.setPosition(1);
+ Assert.assertTrue(reader.isSet());
+ Assert.assertTrue(reader.next());
+ assertEquals(0.1f, reader.reader().readFloat(), 0.00001);
+ Assert.assertTrue(reader.next());
+ assertEquals(10.1f, reader.reader().readFloat(), 0.00001);
+ Assert.assertFalse(reader.next());
+ assertEquals(Arrays.asList(0.1f, 10.1f), reader.readObject());
+
+ reader.setPosition(2);
+ Assert.assertTrue(reader.isSet());
+ Assert.assertTrue(reader.next());
+ assertEquals(2.1f, reader.reader().readFloat(), 0.00001);
+ Assert.assertTrue(reader.next());
+ assertEquals(12.1f, reader.reader().readFloat(), 0.00001);
+ Assert.assertFalse(reader.next());
+ assertEquals(Arrays.asList(2.1f, 12.1f), reader.readObject());
+
+ reader.setPosition(3);
+ Assert.assertTrue(reader.isSet());
+ Assert.assertTrue(reader.next());
+ assertEquals(4.1f, reader.reader().readFloat(), 0.00001);
+ Assert.assertTrue(reader.next());
+ assertEquals(14.1f, reader.reader().readFloat(), 0.00001);
+ Assert.assertFalse(reader.next());
+ assertEquals(Arrays.asList(4.1f, 14.1f), reader.readObject());
+
+ for (int i = 4; i < 10; i++) {
+ reader.setPosition(i);
+ Assert.assertFalse(reader.isSet());
+ Assert.assertNull(reader.readObject());
+ }
+ }
+ }
+
+ @Test
+ public void testConsistentChildName() throws Exception {
+ try (FixedSizeListVector listVector = FixedSizeListVector.empty("sourceVector", 2, allocator)) {
+ String emptyListStr = listVector.getField().toString();
+ Assert.assertTrue(emptyListStr.contains(ListVector.DATA_VECTOR_NAME));
+
+ listVector.addOrGetVector(FieldType.nullable(MinorType.INT.getType()));
+ String emptyVectorStr = listVector.getField().toString();
+ Assert.assertTrue(emptyVectorStr.contains(ListVector.DATA_VECTOR_NAME));
+ }
+ }
+
+ @Test
+ public void testUnionFixedSizeListWriterWithNulls() throws Exception {
+ /* Write to a decimal list vector
+ * each list of size 3 and having its data values alternating between null and a non-null.
+ * Read and verify
+ */
+ try (final FixedSizeListVector vector = FixedSizeListVector.empty("vector", /*listSize=*/3, allocator)) {
+
+ UnionFixedSizeListWriter writer = vector.getWriter();
+ writer.allocate();
+
+ final int valueCount = 100;
+
+ for (int i = 0; i < valueCount; i++) {
+ writer.startList();
+ writer.decimal().writeDecimal(new BigDecimal(i));
+ writer.writeNull();
+ writer.decimal().writeDecimal(new BigDecimal(i * 3));
+ writer.endList();
+ }
+ vector.setValueCount(valueCount);
+
+ for (int i = 0; i < valueCount; i++) {
+ List<BigDecimal> values = (List<BigDecimal>) vector.getObject(i);
+ assertEquals(3, values.size());
+ assertEquals(new BigDecimal(i), values.get(0));
+ assertEquals(null, values.get(1));
+ assertEquals(new BigDecimal(i * 3), values.get(2));
+ }
+ }
+ }
+
+ @Test
+ public void testUnionFixedSizeListWriter() throws Exception {
+ try (final FixedSizeListVector vector1 = FixedSizeListVector.empty("vector", 3, allocator)) {
+
+ UnionFixedSizeListWriter writer1 = vector1.getWriter();
+ writer1.allocate();
+
+ int[] values1 = new int[] {1, 2, 3};
+ int[] values2 = new int[] {4, 5, 6};
+ int[] values3 = new int[] {7, 8, 9};
+
+ //set some values
+ writeListVector(vector1, writer1, values1);
+ writeListVector(vector1, writer1, values2);
+ writeListVector(vector1, writer1, values3);
+ writer1.setValueCount(3);
+
+ assertEquals(3, vector1.getValueCount());
+
+ int[] realValue1 = convertListToIntArray(vector1.getObject(0));
+ assertTrue(Arrays.equals(values1, realValue1));
+ int[] realValue2 = convertListToIntArray(vector1.getObject(1));
+ assertTrue(Arrays.equals(values2, realValue2));
+ int[] realValue3 = convertListToIntArray(vector1.getObject(2));
+ assertTrue(Arrays.equals(values3, realValue3));
+ }
+ }
+
+ @Test
+ public void testWriteDecimal() throws Exception {
+ try (final FixedSizeListVector vector = FixedSizeListVector.empty("vector", /*listSize=*/3, allocator)) {
+
+ UnionFixedSizeListWriter writer = vector.getWriter();
+ writer.allocate();
+
+ final int valueCount = 100;
+
+ for (int i = 0; i < valueCount; i++) {
+ writer.startList();
+ writer.decimal().writeDecimal(new BigDecimal(i));
+ writer.decimal().writeDecimal(new BigDecimal(i * 2));
+ writer.decimal().writeDecimal(new BigDecimal(i * 3));
+ writer.endList();
+ }
+ vector.setValueCount(valueCount);
+
+ for (int i = 0; i < valueCount; i++) {
+ List<BigDecimal> values = (List<BigDecimal>) vector.getObject(i);
+ assertEquals(3, values.size());
+ assertEquals(new BigDecimal(i), values.get(0));
+ assertEquals(new BigDecimal(i * 2), values.get(1));
+ assertEquals(new BigDecimal(i * 3), values.get(2));
+ }
+ }
+ }
+
+ @Test
+ public void testDecimalIndexCheck() throws Exception {
+ try (final FixedSizeListVector vector = FixedSizeListVector.empty("vector", /*listSize=*/3, allocator)) {
+
+ UnionFixedSizeListWriter writer = vector.getWriter();
+ writer.allocate();
+
+ IllegalStateException e = assertThrows(IllegalStateException.class, () -> {
+ writer.startList();
+ writer.decimal().writeDecimal(new BigDecimal(1));
+ writer.decimal().writeDecimal(new BigDecimal(2));
+ writer.decimal().writeDecimal(new BigDecimal(3));
+ writer.decimal().writeDecimal(new BigDecimal(4));
+ writer.endList();
+ });
+ assertEquals("values at index 0 is greater than listSize 3", e.getMessage());
+ }
+ }
+
+
+ @Test(expected = IllegalStateException.class)
+ public void testWriteIllegalData() throws Exception {
+ try (final FixedSizeListVector vector1 = FixedSizeListVector.empty("vector", 3, allocator)) {
+
+ UnionFixedSizeListWriter writer1 = vector1.getWriter();
+ writer1.allocate();
+
+ int[] values1 = new int[] {1, 2, 3};
+ int[] values2 = new int[] {4, 5, 6, 7, 8};
+
+ //set some values
+ writeListVector(vector1, writer1, values1);
+ writeListVector(vector1, writer1, values2);
+ writer1.setValueCount(3);
+
+ assertEquals(3, vector1.getValueCount());
+ int[] realValue1 = convertListToIntArray(vector1.getObject(0));
+ assertTrue(Arrays.equals(values1, realValue1));
+ int[] realValue2 = convertListToIntArray(vector1.getObject(1));
+ assertTrue(Arrays.equals(values2, realValue2));
+ }
+ }
+
+ @Test
+ public void testSplitAndTransfer() throws Exception {
+ try (final FixedSizeListVector vector1 = FixedSizeListVector.empty("vector", 3, allocator)) {
+
+ UnionFixedSizeListWriter writer1 = vector1.getWriter();
+ writer1.allocate();
+
+ int[] values1 = new int[] {1, 2, 3};
+ int[] values2 = new int[] {4, 5, 6};
+ int[] values3 = new int[] {7, 8, 9};
+
+ //set some values
+ writeListVector(vector1, writer1, values1);
+ writeListVector(vector1, writer1, values2);
+ writeListVector(vector1, writer1, values3);
+ writer1.setValueCount(3);
+
+ TransferPair transferPair = vector1.getTransferPair(allocator);
+ transferPair.splitAndTransfer(0, 2);
+ FixedSizeListVector targetVector = (FixedSizeListVector) transferPair.getTo();
+
+ assertEquals(2, targetVector.getValueCount());
+ int[] realValue1 = convertListToIntArray(targetVector.getObject(0));
+ assertTrue(Arrays.equals(values1, realValue1));
+ int[] realValue2 = convertListToIntArray(targetVector.getObject(1));
+ assertTrue(Arrays.equals(values2, realValue2));
+
+ targetVector.clear();
+ }
+ }
+
+ @Test
+ public void testZeroWidthVector() {
+ try (final FixedSizeListVector vector1 = FixedSizeListVector.empty("vector", 0, allocator)) {
+
+ UnionFixedSizeListWriter writer1 = vector1.getWriter();
+ writer1.allocate();
+
+ int[] values1 = new int[] {};
+ int[] values2 = new int[] {};
+ int[] values3 = null;
+ int[] values4 = new int[] {};
+
+ //set some values
+ writeListVector(vector1, writer1, values1);
+ writeListVector(vector1, writer1, values2);
+ writeListVector(vector1, writer1, values3);
+ writeListVector(vector1, writer1, values4);
+ writer1.setValueCount(4);
+
+ assertEquals(4, vector1.getValueCount());
+
+ int[] realValue1 = convertListToIntArray(vector1.getObject(0));
+ assertArrayEquals(values1, realValue1);
+ int[] realValue2 = convertListToIntArray(vector1.getObject(1));
+ assertArrayEquals(values2, realValue2);
+ assertNull(vector1.getObject(2));
+ int[] realValue4 = convertListToIntArray(vector1.getObject(3));
+ assertArrayEquals(values4, realValue4);
+ }
+ }
+
+ @Test
+ public void testVectorWithNulls() {
+ try (final FixedSizeListVector vector1 = FixedSizeListVector.empty("vector", 4, allocator)) {
+
+ UnionFixedSizeListWriter writer1 = vector1.getWriter();
+ writer1.allocate();
+
+ List<Integer> values1 = Arrays.asList(null, 1, 2, 3);
+ List<Integer> values2 = Arrays.asList(4, null, 5, 6);
+ List<Integer> values3 = null;
+ List<Integer> values4 = Arrays.asList(7, 8, null, 9);
+
+ //set some values
+ writeListVector(vector1, writer1, values1);
+ writeListVector(vector1, writer1, values2);
+ writeListVector(vector1, writer1, values3);
+ writeListVector(vector1, writer1, values4);
+ writer1.setValueCount(4);
+
+ assertEquals(4, vector1.getValueCount());
+
+ List realValue1 = vector1.getObject(0);
+ assertEquals(values1, realValue1);
+ List realValue2 = vector1.getObject(1);
+ assertEquals(values2, realValue2);
+ List realValue3 = vector1.getObject(2);
+ assertEquals(values3, realValue3);
+ List realValue4 = vector1.getObject(3);
+ assertEquals(values4, realValue4);
+ }
+ }
+
+ private int[] convertListToIntArray(List list) {
+ int[] values = new int[list.size()];
+ for (int i = 0; i < list.size(); i++) {
+ values[i] = (int) list.get(i);
+ }
+ return values;
+ }
+
+ private void writeListVector(FixedSizeListVector vector, UnionFixedSizeListWriter writer, int[] values) {
+ writer.startList();
+ if (values != null) {
+ for (int v : values) {
+ writer.integer().writeInt(v);
+ }
+ } else {
+ vector.setNull(writer.getPosition());
+ }
+ writer.endList();
+ }
+
+ private void writeListVector(FixedSizeListVector vector, UnionFixedSizeListWriter writer, List<Integer> values) {
+ writer.startList();
+ if (values != null) {
+ for (Integer v : values) {
+ if (v == null) {
+ writer.writeNull();
+ } else {
+ writer.integer().writeInt(v);
+ }
+ }
+ } else {
+ vector.setNull(writer.getPosition());
+ }
+ writer.endList();
+ }
+
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestIntervalMonthDayNanoVector.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestIntervalMonthDayNanoVector.java
new file mode 100644
index 000000000..93d6fab70
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestIntervalMonthDayNanoVector.java
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.junit.Assert.assertEquals;
+
+
+import java.time.Duration;
+import java.time.Period;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.holders.IntervalMonthDayNanoHolder;
+import org.apache.arrow.vector.holders.NullableIntervalMonthDayNanoHolder;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestIntervalMonthDayNanoVector {
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void init() {
+ allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100);
+ }
+
+ @After
+ public void terminate() throws Exception {
+ allocator.close();
+ }
+
+ @Test
+ public void testBasics() {
+ try (final IntervalMonthDayNanoVector vector = new IntervalMonthDayNanoVector(/*name=*/"", allocator)) {
+ int valueCount = 100;
+ vector.setInitialCapacity(valueCount);
+ vector.allocateNew();
+ NullableIntervalMonthDayNanoHolder nullableHolder = new NullableIntervalMonthDayNanoHolder();
+ nullableHolder.isSet = 1;
+ nullableHolder.months = 2;
+ nullableHolder.days = 20;
+ nullableHolder.nanoseconds = 123;
+ IntervalMonthDayNanoHolder holder = new IntervalMonthDayNanoHolder();
+ holder.months = Integer.MIN_VALUE;
+ holder.days = Integer.MIN_VALUE;
+ holder.nanoseconds = Long.MIN_VALUE;
+
+
+ vector.set(0, /*months=*/1, /*days=*/2, /*nanoseconds=*/-2);
+ vector.setSafe(2, /*months=*/1, /*days=*/2, /*nanoseconds=*/-3);
+ vector.setSafe(/*index=*/4, nullableHolder);
+ vector.set(3, holder);
+ nullableHolder.isSet = 0;
+ vector.setSafe(/*index=*/5, nullableHolder);
+ vector.setValueCount(5);
+
+ assertEquals("P1M2D PT-0.000000002S ", vector.getAsStringBuilder(0).toString());
+ assertEquals(null, vector.getAsStringBuilder(1));
+ assertEquals("P1M2D PT-0.000000003S ", vector.getAsStringBuilder(2).toString());
+ assertEquals(new PeriodDuration(Period.of(0, Integer.MIN_VALUE, Integer.MIN_VALUE),
+ Duration.ofNanos(Long.MIN_VALUE)), vector.getObject(3));
+ assertEquals("P2M20D PT0.000000123S ", vector.getAsStringBuilder(4).toString());
+
+ assertEquals(null, vector.getObject(5));
+
+ vector.get(1, nullableHolder);
+ assertEquals(0, nullableHolder.isSet);
+
+ vector.get(2, nullableHolder);
+ assertEquals(1, nullableHolder.isSet);
+ assertEquals(1, nullableHolder.months);
+ assertEquals(2, nullableHolder.days);
+ assertEquals(-3, nullableHolder.nanoseconds);
+
+ IntervalMonthDayNanoVector.getDays(vector.valueBuffer, 2);
+ assertEquals(1, IntervalMonthDayNanoVector.getMonths(vector.valueBuffer, 2));
+ assertEquals(2, IntervalMonthDayNanoVector.getDays(vector.valueBuffer, 2));
+ assertEquals(-3, IntervalMonthDayNanoVector.getNanoseconds(vector.valueBuffer, 2));
+
+ assertEquals(0, vector.isSet(1));
+ assertEquals(1, vector.isSet(2));
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestIntervalYearVector.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestIntervalYearVector.java
new file mode 100644
index 000000000..5ea48b485
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestIntervalYearVector.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.junit.Assert.assertEquals;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestIntervalYearVector {
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void init() {
+ allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100);
+ }
+
+ @After
+ public void terminate() throws Exception {
+ allocator.close();
+ }
+
+ @Test
+ public void testGetAsStringBuilder() {
+ try (final IntervalYearVector vector = new IntervalYearVector("", allocator)) {
+ int valueCount = 100;
+ vector.setInitialCapacity(valueCount);
+ vector.allocateNew();
+ for (int i = 0; i < valueCount; i++) {
+ vector.set(i, i);
+ }
+
+ assertEquals("0 years 1 month ", vector.getAsStringBuilder(1).toString());
+ assertEquals("0 years 10 months ", vector.getAsStringBuilder(10).toString());
+ assertEquals("1 year 8 months ", vector.getAsStringBuilder(20).toString());
+ assertEquals("2 years 6 months ", vector.getAsStringBuilder(30).toString());
+
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java
new file mode 100644
index 000000000..c1d60da4d
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java
@@ -0,0 +1,982 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.BaseRepeatedValueVector;
+import org.apache.arrow.vector.complex.LargeListVector;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.impl.UnionLargeListWriter;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestLargeListVector {
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void init() {
+ allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100);
+ }
+
+ @After
+ public void terminate() throws Exception {
+ allocator.close();
+ }
+
+ @Test
+ public void testCopyFrom() throws Exception {
+ try (LargeListVector inVector = LargeListVector.empty("input", allocator);
+ LargeListVector outVector = LargeListVector.empty("output", allocator)) {
+ UnionLargeListWriter writer = inVector.getWriter();
+ writer.allocate();
+
+ // populate input vector with the following records
+ // [1, 2, 3]
+ // null
+ // []
+ writer.setPosition(0); // optional
+ writer.startList();
+ writer.bigInt().writeBigInt(1);
+ writer.bigInt().writeBigInt(2);
+ writer.bigInt().writeBigInt(3);
+ writer.endList();
+
+ writer.setPosition(2);
+ writer.startList();
+ writer.endList();
+
+ writer.setValueCount(3);
+
+ // copy values from input to output
+ outVector.allocateNew();
+ for (int i = 0; i < 3; i++) {
+ outVector.copyFrom(i, i, inVector);
+ }
+ outVector.setValueCount(3);
+
+ // assert the output vector is correct
+ FieldReader reader = outVector.getReader();
+ Assert.assertTrue("shouldn't be null", reader.isSet());
+ reader.setPosition(1);
+ Assert.assertFalse("should be null", reader.isSet());
+ reader.setPosition(2);
+ Assert.assertTrue("shouldn't be null", reader.isSet());
+
+
+ /* index 0 */
+ Object result = outVector.getObject(0);
+ ArrayList<Long> resultSet = (ArrayList<Long>) result;
+ assertEquals(3, resultSet.size());
+ assertEquals(new Long(1), resultSet.get(0));
+ assertEquals(new Long(2), resultSet.get(1));
+ assertEquals(new Long(3), resultSet.get(2));
+
+ /* index 1 */
+ result = outVector.getObject(1);
+ assertNull(result);
+
+ /* index 2 */
+ result = outVector.getObject(2);
+ resultSet = (ArrayList<Long>) result;
+ assertEquals(0, resultSet.size());
+
+ /* 3+0+0/3 */
+ assertEquals(1.0D, inVector.getDensity(), 0);
+ }
+ }
+
+ @Test
+ public void testSetLastSetUsage() throws Exception {
+ try (LargeListVector listVector = LargeListVector.empty("input", allocator)) {
+
+ /* Explicitly add the dataVector */
+ MinorType type = MinorType.BIGINT;
+ listVector.addOrGetVector(FieldType.nullable(type.getType()));
+
+ /* allocate memory */
+ listVector.allocateNew();
+
+ /* get inner buffers; validityBuffer and offsetBuffer */
+
+ ArrowBuf validityBuffer = listVector.getValidityBuffer();
+ ArrowBuf offsetBuffer = listVector.getOffsetBuffer();
+
+ /* get the underlying data vector -- BigIntVector */
+ BigIntVector dataVector = (BigIntVector) listVector.getDataVector();
+
+ /* check current lastSet */
+ assertEquals(-1L, listVector.getLastSet());
+
+ int index = 0;
+ int offset = 0;
+
+ /* write [10, 11, 12] to the list vector at index 0 */
+ BitVectorHelper.setBit(validityBuffer, index);
+ dataVector.setSafe(0, 1, 10);
+ dataVector.setSafe(1, 1, 11);
+ dataVector.setSafe(2, 1, 12);
+ offsetBuffer.setLong((index + 1) * LargeListVector.OFFSET_WIDTH, 3);
+
+ index += 1;
+
+ /* write [13, 14] to the list vector at index 1 */
+ BitVectorHelper.setBit(validityBuffer, index);
+ dataVector.setSafe(3, 1, 13);
+ dataVector.setSafe(4, 1, 14);
+ offsetBuffer.setLong((index + 1) * LargeListVector.OFFSET_WIDTH, 5);
+
+ index += 1;
+
+ /* write [15, 16, 17] to the list vector at index 2 */
+ BitVectorHelper.setBit(validityBuffer, index);
+ dataVector.setSafe(5, 1, 15);
+ dataVector.setSafe(6, 1, 16);
+ dataVector.setSafe(7, 1, 17);
+ offsetBuffer.setLong((index + 1) * LargeListVector.OFFSET_WIDTH, 8);
+
+ /* check current lastSet */
+ assertEquals(-1L, listVector.getLastSet());
+
+ /* set lastset and arbitrary valuecount for list vector.
+ *
+ * NOTE: if we don't execute setLastSet() before setLastValueCount(), then
+ * the latter will corrupt the offsetBuffer and thus the accessor will not
+ * retrieve the correct values from underlying dataBuffer. Run the test
+ * by commenting out next line and we should see failures from 5th assert
+ * onwards. This is why doing setLastSet() is important before setValueCount()
+ * once the vector has been loaded.
+ *
+ * Another important thing to remember is the value of lastSet itself.
+ * Even though the listVector has elements till index 2 only, the lastSet should
+ * be set as 3. This is because the offsetBuffer has valid offsets filled till index 3.
+ * If we do setLastSet(2), the offsetBuffer at index 3 will contain incorrect value
+ * after execution of setValueCount().
+ *
+ * correct state of the listVector
+ * bitvector {1, 1, 1, 0, 0.... }
+ * offsetvector {0, 3, 5, 8, 8, 8.....}
+ * datavector { [10, 11, 12],
+ * [13, 14],
+ * [15, 16, 17]
+ * }
+ *
+ * if we don't do setLastSet() before setValueCount --> incorrect state
+ * bitvector {1, 1, 1, 0, 0.... }
+ * offsetvector {0, 0, 0, 0, 0, 0.....}
+ * datavector { [10, 11, 12],
+ * [13, 14],
+ * [15, 16, 17]
+ * }
+ *
+ * if we do setLastSet(2) before setValueCount --> incorrect state
+ * bitvector {1, 1, 1, 0, 0.... }
+ * offsetvector {0, 3, 5, 5, 5, 5.....}
+ * datavector { [10, 11, 12],
+ * [13, 14],
+ * [15, 16, 17]
+ * }
+ */
+ listVector.setLastSet(2);
+ listVector.setValueCount(10);
+
+ /* (3+2+3)/10 */
+ assertEquals(0.8D, listVector.getDensity(), 0);
+
+ index = 0;
+ offset = (int) offsetBuffer.getLong(index * LargeListVector.OFFSET_WIDTH);
+ assertEquals(Integer.toString(0), Integer.toString(offset));
+
+ Long actual = dataVector.getObject(offset);
+ assertEquals(new Long(10), actual);
+ offset++;
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(11), actual);
+ offset++;
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(12), actual);
+
+ index++;
+ offset = (int) offsetBuffer.getLong(index * LargeListVector.OFFSET_WIDTH);
+ assertEquals(Integer.toString(3), Integer.toString(offset));
+
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(13), actual);
+ offset++;
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(14), actual);
+
+ index++;
+ offset = (int) offsetBuffer.getLong(index * LargeListVector.OFFSET_WIDTH);
+ assertEquals(Integer.toString(5), Integer.toString(offset));
+
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(15), actual);
+ offset++;
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(16), actual);
+ offset++;
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(17), actual);
+
+ index++;
+ offset = (int) offsetBuffer.getLong(index * LargeListVector.OFFSET_WIDTH);
+ assertEquals(Integer.toString(8), Integer.toString(offset));
+
+ actual = dataVector.getObject(offset);
+ assertNull(actual);
+ }
+ }
+
+ @Test
+ public void testSplitAndTransfer() throws Exception {
+ try (LargeListVector listVector = LargeListVector.empty("sourceVector", allocator)) {
+
+ /* Explicitly add the dataVector */
+ MinorType type = MinorType.BIGINT;
+ listVector.addOrGetVector(FieldType.nullable(type.getType()));
+
+ UnionLargeListWriter listWriter = listVector.getWriter();
+
+ /* allocate memory */
+ listWriter.allocate();
+
+ /* populate data */
+ listWriter.setPosition(0);
+ listWriter.startList();
+ listWriter.bigInt().writeBigInt(10);
+ listWriter.bigInt().writeBigInt(11);
+ listWriter.bigInt().writeBigInt(12);
+ listWriter.endList();
+
+ listWriter.setPosition(1);
+ listWriter.startList();
+ listWriter.bigInt().writeBigInt(13);
+ listWriter.bigInt().writeBigInt(14);
+ listWriter.endList();
+
+ listWriter.setPosition(2);
+ listWriter.startList();
+ listWriter.bigInt().writeBigInt(15);
+ listWriter.bigInt().writeBigInt(16);
+ listWriter.bigInt().writeBigInt(17);
+ listWriter.bigInt().writeBigInt(18);
+ listWriter.endList();
+
+ listWriter.setPosition(3);
+ listWriter.startList();
+ listWriter.bigInt().writeBigInt(19);
+ listWriter.endList();
+
+ listWriter.setPosition(4);
+ listWriter.startList();
+ listWriter.bigInt().writeBigInt(20);
+ listWriter.bigInt().writeBigInt(21);
+ listWriter.bigInt().writeBigInt(22);
+ listWriter.bigInt().writeBigInt(23);
+ listWriter.endList();
+
+ listVector.setValueCount(5);
+
+ assertEquals(4, listVector.getLastSet());
+
+ /* get offset buffer */
+ final ArrowBuf offsetBuffer = listVector.getOffsetBuffer();
+
+ /* get dataVector */
+ BigIntVector dataVector = (BigIntVector) listVector.getDataVector();
+
+ /* check the vector output */
+
+ int index = 0;
+ int offset = 0;
+ Long actual = null;
+
+ /* index 0 */
+ assertFalse(listVector.isNull(index));
+ offset = (int) offsetBuffer.getLong(index * LargeListVector.OFFSET_WIDTH);
+ assertEquals(Integer.toString(0), Integer.toString(offset));
+
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(10), actual);
+ offset++;
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(11), actual);
+ offset++;
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(12), actual);
+
+ /* index 1 */
+ index++;
+ assertFalse(listVector.isNull(index));
+ offset = (int) offsetBuffer.getLong(index * LargeListVector.OFFSET_WIDTH);
+ assertEquals(Integer.toString(3), Integer.toString(offset));
+
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(13), actual);
+ offset++;
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(14), actual);
+
+ /* index 2 */
+ index++;
+ assertFalse(listVector.isNull(index));
+ offset = (int) offsetBuffer.getLong(index * LargeListVector.OFFSET_WIDTH);
+ assertEquals(Integer.toString(5), Integer.toString(offset));
+
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(15), actual);
+ offset++;
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(16), actual);
+ offset++;
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(17), actual);
+ offset++;
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(18), actual);
+
+ /* index 3 */
+ index++;
+ assertFalse(listVector.isNull(index));
+ offset = (int) offsetBuffer.getLong(index * LargeListVector.OFFSET_WIDTH);
+ assertEquals(Integer.toString(9), Integer.toString(offset));
+
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(19), actual);
+
+ /* index 4 */
+ index++;
+ assertFalse(listVector.isNull(index));
+ offset = (int) offsetBuffer.getLong(index * LargeListVector.OFFSET_WIDTH);
+ assertEquals(Integer.toString(10), Integer.toString(offset));
+
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(20), actual);
+ offset++;
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(21), actual);
+ offset++;
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(22), actual);
+ offset++;
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(23), actual);
+
+ /* index 5 */
+ index++;
+ assertTrue(listVector.isNull(index));
+ offset = (int) offsetBuffer.getLong(index * LargeListVector.OFFSET_WIDTH);
+ assertEquals(Integer.toString(14), Integer.toString(offset));
+
+ /* do split and transfer */
+ try (LargeListVector toVector = LargeListVector.empty("toVector", allocator)) {
+
+ TransferPair transferPair = listVector.makeTransferPair(toVector);
+
+ int[][] transferLengths = {{0, 2}, {3, 1}, {4, 1}};
+
+ for (final int[] transferLength : transferLengths) {
+ int start = transferLength[0];
+ int splitLength = transferLength[1];
+
+ int dataLength1 = 0;
+ int dataLength2 = 0;
+
+ int offset1 = 0;
+ int offset2 = 0;
+
+ transferPair.splitAndTransfer(start, splitLength);
+
+ /* get offsetBuffer of toVector */
+ final ArrowBuf toOffsetBuffer = toVector.getOffsetBuffer();
+
+ /* get dataVector of toVector */
+ BigIntVector dataVector1 = (BigIntVector) toVector.getDataVector();
+
+ for (int i = 0; i < splitLength; i++) {
+ dataLength1 = (int) offsetBuffer.getLong((start + i + 1) * LargeListVector.OFFSET_WIDTH) -
+ (int) offsetBuffer.getLong((start + i) * LargeListVector.OFFSET_WIDTH);
+ dataLength2 = (int) toOffsetBuffer.getLong((i + 1) * LargeListVector.OFFSET_WIDTH) -
+ (int) toOffsetBuffer.getLong(i * LargeListVector.OFFSET_WIDTH);
+
+ assertEquals("Different data lengths at index: " + i + " and start: " + start,
+ dataLength1, dataLength2);
+
+ offset1 = (int) offsetBuffer.getLong((start + i) * LargeListVector.OFFSET_WIDTH);
+ offset2 = (int) toOffsetBuffer.getLong(i * LargeListVector.OFFSET_WIDTH);
+
+ for (int j = 0; j < dataLength1; j++) {
+ assertEquals("Different data at indexes: " + offset1 + " and " + offset2,
+ dataVector.getObject(offset1), dataVector1.getObject(offset2));
+
+ offset1++;
+ offset2++;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testNestedLargeListVector() throws Exception {
+ try (LargeListVector listVector = LargeListVector.empty("sourceVector", allocator)) {
+
+ UnionLargeListWriter listWriter = listVector.getWriter();
+
+ /* allocate memory */
+ listWriter.allocate();
+
+ /* the dataVector that backs a listVector will also be a
+ * listVector for this test.
+ */
+
+ /* write one or more inner lists at index 0 */
+ listWriter.setPosition(0);
+ listWriter.startList();
+
+ listWriter.list().startList();
+ listWriter.list().bigInt().writeBigInt(50);
+ listWriter.list().bigInt().writeBigInt(100);
+ listWriter.list().bigInt().writeBigInt(200);
+ listWriter.list().endList();
+
+ listWriter.list().startList();
+ listWriter.list().bigInt().writeBigInt(75);
+ listWriter.list().bigInt().writeBigInt(125);
+ listWriter.list().bigInt().writeBigInt(150);
+ listWriter.list().bigInt().writeBigInt(175);
+ listWriter.list().endList();
+
+ listWriter.endList();
+
+ /* write one or more inner lists at index 1 */
+ listWriter.setPosition(1);
+ listWriter.startList();
+
+ listWriter.list().startList();
+ listWriter.list().bigInt().writeBigInt(10);
+ listWriter.list().endList();
+
+ listWriter.list().startList();
+ listWriter.list().bigInt().writeBigInt(15);
+ listWriter.list().bigInt().writeBigInt(20);
+ listWriter.list().endList();
+
+ listWriter.list().startList();
+ listWriter.list().bigInt().writeBigInt(25);
+ listWriter.list().bigInt().writeBigInt(30);
+ listWriter.list().bigInt().writeBigInt(35);
+ listWriter.list().endList();
+
+ listWriter.endList();
+
+ assertEquals(1, listVector.getLastSet());
+
+ listVector.setValueCount(2);
+
+ assertEquals(2, listVector.getValueCount());
+
+ /* get listVector value at index 0 -- the value itself is a listvector */
+ Object result = listVector.getObject(0);
+ ArrayList<ArrayList<Long>> resultSet = (ArrayList<ArrayList<Long>>) result;
+ ArrayList<Long> list;
+
+ assertEquals(2, resultSet.size()); /* 2 inner lists at index 0 */
+ assertEquals(3, resultSet.get(0).size()); /* size of first inner list */
+ assertEquals(4, resultSet.get(1).size()); /* size of second inner list */
+
+ list = resultSet.get(0);
+ assertEquals(new Long(50), list.get(0));
+ assertEquals(new Long(100), list.get(1));
+ assertEquals(new Long(200), list.get(2));
+
+ list = resultSet.get(1);
+ assertEquals(new Long(75), list.get(0));
+ assertEquals(new Long(125), list.get(1));
+ assertEquals(new Long(150), list.get(2));
+ assertEquals(new Long(175), list.get(3));
+
+ /* get listVector value at index 1 -- the value itself is a listvector */
+ result = listVector.getObject(1);
+ resultSet = (ArrayList<ArrayList<Long>>) result;
+
+ assertEquals(3, resultSet.size()); /* 3 inner lists at index 1 */
+ assertEquals(1, resultSet.get(0).size()); /* size of first inner list */
+ assertEquals(2, resultSet.get(1).size()); /* size of second inner list */
+ assertEquals(3, resultSet.get(2).size()); /* size of third inner list */
+
+ list = resultSet.get(0);
+ assertEquals(new Long(10), list.get(0));
+
+ list = resultSet.get(1);
+ assertEquals(new Long(15), list.get(0));
+ assertEquals(new Long(20), list.get(1));
+
+ list = resultSet.get(2);
+ assertEquals(new Long(25), list.get(0));
+ assertEquals(new Long(30), list.get(1));
+ assertEquals(new Long(35), list.get(2));
+
+ /* check underlying bitVector */
+ assertFalse(listVector.isNull(0));
+ assertFalse(listVector.isNull(1));
+
+ /* check underlying offsets */
+ final ArrowBuf offsetBuffer = listVector.getOffsetBuffer();
+
+ /* listVector has 2 lists at index 0 and 3 lists at index 1 */
+ assertEquals(0, offsetBuffer.getLong(0 * LargeListVector.OFFSET_WIDTH));
+ assertEquals(2, offsetBuffer.getLong(1 * LargeListVector.OFFSET_WIDTH));
+ assertEquals(5, offsetBuffer.getLong(2 * LargeListVector.OFFSET_WIDTH));
+ }
+ }
+
+ @Test
+ public void testNestedLargeListVector1() throws Exception {
+ try (LargeListVector listVector = LargeListVector.empty("sourceVector", allocator)) {
+
+ MinorType listType = MinorType.LIST;
+ MinorType scalarType = MinorType.BIGINT;
+
+ listVector.addOrGetVector(FieldType.nullable(listType.getType()));
+
+ ListVector innerList1 = (ListVector) listVector.getDataVector();
+ innerList1.addOrGetVector(FieldType.nullable(listType.getType()));
+
+ ListVector innerList2 = (ListVector) innerList1.getDataVector();
+ innerList2.addOrGetVector(FieldType.nullable(listType.getType()));
+
+ ListVector innerList3 = (ListVector) innerList2.getDataVector();
+ innerList3.addOrGetVector(FieldType.nullable(listType.getType()));
+
+ ListVector innerList4 = (ListVector) innerList3.getDataVector();
+ innerList4.addOrGetVector(FieldType.nullable(listType.getType()));
+
+ ListVector innerList5 = (ListVector) innerList4.getDataVector();
+ innerList5.addOrGetVector(FieldType.nullable(listType.getType()));
+
+ ListVector innerList6 = (ListVector) innerList5.getDataVector();
+ innerList6.addOrGetVector(FieldType.nullable(scalarType.getType()));
+
+ listVector.setInitialCapacity(128);
+ }
+ }
+
+ @Test
+ public void testNestedLargeListVector2() throws Exception {
+ try (LargeListVector listVector = LargeListVector.empty("sourceVector", allocator)) {
+ listVector.setInitialCapacity(1);
+ UnionLargeListWriter listWriter = listVector.getWriter();
+ /* allocate memory */
+ listWriter.allocate();
+
+ /* write one or more inner lists at index 0 */
+ listWriter.setPosition(0);
+ listWriter.startList();
+
+ listWriter.list().startList();
+ listWriter.list().bigInt().writeBigInt(50);
+ listWriter.list().bigInt().writeBigInt(100);
+ listWriter.list().bigInt().writeBigInt(200);
+ listWriter.list().endList();
+
+ listWriter.list().startList();
+ listWriter.list().bigInt().writeBigInt(75);
+ listWriter.list().bigInt().writeBigInt(125);
+ listWriter.list().endList();
+
+ listWriter.endList();
+
+ /* write one or more inner lists at index 1 */
+ listWriter.setPosition(1);
+ listWriter.startList();
+
+ listWriter.list().startList();
+ listWriter.list().bigInt().writeBigInt(15);
+ listWriter.list().bigInt().writeBigInt(20);
+ listWriter.list().endList();
+
+ listWriter.list().startList();
+ listWriter.list().bigInt().writeBigInt(25);
+ listWriter.list().bigInt().writeBigInt(30);
+ listWriter.list().bigInt().writeBigInt(35);
+ listWriter.list().endList();
+
+ listWriter.endList();
+
+ assertEquals(1, listVector.getLastSet());
+
+ listVector.setValueCount(2);
+
+ assertEquals(2, listVector.getValueCount());
+
+ /* get listVector value at index 0 -- the value itself is a listvector */
+ Object result = listVector.getObject(0);
+ ArrayList<ArrayList<Long>> resultSet = (ArrayList<ArrayList<Long>>) result;
+ ArrayList<Long> list;
+
+ assertEquals(2, resultSet.size()); /* 2 inner lists at index 0 */
+ assertEquals(3, resultSet.get(0).size()); /* size of first inner list */
+ assertEquals(2, resultSet.get(1).size()); /* size of second inner list */
+
+ list = resultSet.get(0);
+ assertEquals(new Long(50), list.get(0));
+ assertEquals(new Long(100), list.get(1));
+ assertEquals(new Long(200), list.get(2));
+
+ list = resultSet.get(1);
+ assertEquals(new Long(75), list.get(0));
+ assertEquals(new Long(125), list.get(1));
+
+ /* get listVector value at index 1 -- the value itself is a listvector */
+ result = listVector.getObject(1);
+ resultSet = (ArrayList<ArrayList<Long>>) result;
+
+ assertEquals(2, resultSet.size()); /* 3 inner lists at index 1 */
+ assertEquals(2, resultSet.get(0).size()); /* size of first inner list */
+ assertEquals(3, resultSet.get(1).size()); /* size of second inner list */
+
+ list = resultSet.get(0);
+ assertEquals(new Long(15), list.get(0));
+ assertEquals(new Long(20), list.get(1));
+
+ list = resultSet.get(1);
+ assertEquals(new Long(25), list.get(0));
+ assertEquals(new Long(30), list.get(1));
+ assertEquals(new Long(35), list.get(2));
+
+ /* check underlying bitVector */
+ assertFalse(listVector.isNull(0));
+ assertFalse(listVector.isNull(1));
+
+ /* check underlying offsets */
+ final ArrowBuf offsetBuffer = listVector.getOffsetBuffer();
+
+ /* listVector has 2 lists at index 0 and 3 lists at index 1 */
+ assertEquals(0, offsetBuffer.getLong(0 * LargeListVector.OFFSET_WIDTH));
+ assertEquals(2, offsetBuffer.getLong(1 * LargeListVector.OFFSET_WIDTH));
+ assertEquals(4, offsetBuffer.getLong(2 * LargeListVector.OFFSET_WIDTH));
+ }
+ }
+
+ @Test
+ public void testGetBufferAddress() throws Exception {
+ try (LargeListVector listVector = LargeListVector.empty("vector", allocator)) {
+
+ UnionLargeListWriter listWriter = listVector.getWriter();
+ boolean error = false;
+
+ listWriter.allocate();
+
+ listWriter.setPosition(0);
+ listWriter.startList();
+ listWriter.bigInt().writeBigInt(50);
+ listWriter.bigInt().writeBigInt(100);
+ listWriter.bigInt().writeBigInt(200);
+ listWriter.endList();
+
+ listWriter.setPosition(1);
+ listWriter.startList();
+ listWriter.bigInt().writeBigInt(250);
+ listWriter.bigInt().writeBigInt(300);
+ listWriter.endList();
+
+ listVector.setValueCount(2);
+
+ /* check listVector contents */
+ Object result = listVector.getObject(0);
+ ArrayList<Long> resultSet = (ArrayList<Long>) result;
+ assertEquals(3, resultSet.size());
+ assertEquals(new Long(50), resultSet.get(0));
+ assertEquals(new Long(100), resultSet.get(1));
+ assertEquals(new Long(200), resultSet.get(2));
+
+ result = listVector.getObject(1);
+ resultSet = (ArrayList<Long>) result;
+ assertEquals(2, resultSet.size());
+ assertEquals(new Long(250), resultSet.get(0));
+ assertEquals(new Long(300), resultSet.get(1));
+
+ List<ArrowBuf> buffers = listVector.getFieldBuffers();
+
+ long bitAddress = listVector.getValidityBufferAddress();
+ long offsetAddress = listVector.getOffsetBufferAddress();
+
+ try {
+ long dataAddress = listVector.getDataBufferAddress();
+ } catch (UnsupportedOperationException ue) {
+ error = true;
+ } finally {
+ assertTrue(error);
+ }
+
+ assertEquals(2, buffers.size());
+ assertEquals(bitAddress, buffers.get(0).memoryAddress());
+ assertEquals(offsetAddress, buffers.get(1).memoryAddress());
+
+ /* (3+2)/2 */
+ assertEquals(2.5, listVector.getDensity(), 0);
+ }
+ }
+
+ @Test
+ public void testConsistentChildName() throws Exception {
+ try (LargeListVector listVector = LargeListVector.empty("sourceVector", allocator)) {
+ String emptyListStr = listVector.getField().toString();
+ assertTrue(emptyListStr.contains(LargeListVector.DATA_VECTOR_NAME));
+
+ listVector.addOrGetVector(FieldType.nullable(MinorType.INT.getType()));
+ String emptyVectorStr = listVector.getField().toString();
+ assertTrue(emptyVectorStr.contains(LargeListVector.DATA_VECTOR_NAME));
+ }
+ }
+
+ @Test
+ public void testSetInitialCapacity() {
+ try (final LargeListVector vector = LargeListVector.empty("", allocator)) {
+ vector.addOrGetVector(FieldType.nullable(MinorType.INT.getType()));
+
+ /**
+ * use the default multiplier of 5,
+ * 512 * 5 => 2560 * 4 => 10240 bytes => 16KB => 4096 value capacity.
+ */
+ vector.setInitialCapacity(512);
+ vector.allocateNew();
+ assertEquals(512, vector.getValueCapacity());
+ assertTrue(vector.getDataVector().getValueCapacity() >= 512 * 5);
+
+ /* use density as 4 */
+ vector.setInitialCapacity(512, 4);
+ vector.allocateNew();
+ assertEquals(512, vector.getValueCapacity());
+ assertTrue(vector.getDataVector().getValueCapacity() >= 512 * 4);
+
+ /**
+ * inner value capacity we pass to data vector is 512 * 0.1 => 51
+ * For an int vector this is 204 bytes of memory for data buffer
+ * and 7 bytes for validity buffer.
+ * and with power of 2 allocation, we allocate 256 bytes and 8 bytes
+ * for the data buffer and validity buffer of the inner vector. Thus
+ * value capacity of inner vector is 64
+ */
+ vector.setInitialCapacity(512, 0.1);
+ vector.allocateNew();
+ assertEquals(512, vector.getValueCapacity());
+ assertTrue(vector.getDataVector().getValueCapacity() >= 51);
+
+ /**
+ * inner value capacity we pass to data vector is 512 * 0.01 => 5
+ * For an int vector this is 20 bytes of memory for data buffer
+ * and 1 byte for validity buffer.
+ * and with power of 2 allocation, we allocate 32 bytes and 1 bytes
+ * for the data buffer and validity buffer of the inner vector. Thus
+ * value capacity of inner vector is 8
+ */
+ vector.setInitialCapacity(512, 0.01);
+ vector.allocateNew();
+ assertEquals(512, vector.getValueCapacity());
+ assertTrue(vector.getDataVector().getValueCapacity() >= 5);
+
+ /**
+ * inner value capacity we pass to data vector is 5 * 0.1 => 0
+ * which is then rounded off to 1. So we pass value count as 1
+ * to the inner int vector.
+ * the offset buffer of the list vector is allocated for 6 values
+ * which is 24 bytes and then rounded off to 32 bytes (8 values)
+ * the validity buffer of the list vector is allocated for 5
+ * values which is 1 byte. This is why value capacity of the list
+ * vector is 7 as we take the min of validity buffer value capacity
+ * and offset buffer value capacity.
+ */
+ vector.setInitialCapacity(5, 0.1);
+ vector.allocateNew();
+ assertEquals(7, vector.getValueCapacity());
+ assertTrue(vector.getDataVector().getValueCapacity() >= 1);
+ }
+ }
+
+ @Test
+ public void testClearAndReuse() {
+ try (final LargeListVector vector = LargeListVector.empty("list", allocator)) {
+ BigIntVector bigIntVector =
+ (BigIntVector) vector.addOrGetVector(FieldType.nullable(MinorType.BIGINT.getType())).getVector();
+ vector.setInitialCapacity(10);
+ vector.allocateNew();
+
+ vector.startNewValue(0);
+ bigIntVector.setSafe(0, 7);
+ vector.endValue(0, 1);
+ vector.startNewValue(1);
+ bigIntVector.setSafe(1, 8);
+ vector.endValue(1, 1);
+ vector.setValueCount(2);
+
+ Object result = vector.getObject(0);
+ ArrayList<Long> resultSet = (ArrayList<Long>) result;
+ assertEquals(new Long(7), resultSet.get(0));
+
+ result = vector.getObject(1);
+ resultSet = (ArrayList<Long>) result;
+ assertEquals(new Long(8), resultSet.get(0));
+
+ // Clear and release the buffers to trigger a realloc when adding next value
+ vector.clear();
+
+ // The list vector should reuse a buffer when reallocating the offset buffer
+ vector.startNewValue(0);
+ bigIntVector.setSafe(0, 7);
+ vector.endValue(0, 1);
+ vector.startNewValue(1);
+ bigIntVector.setSafe(1, 8);
+ vector.endValue(1, 1);
+ vector.setValueCount(2);
+
+ result = vector.getObject(0);
+ resultSet = (ArrayList<Long>) result;
+ assertEquals(new Long(7), resultSet.get(0));
+
+ result = vector.getObject(1);
+ resultSet = (ArrayList<Long>) result;
+ assertEquals(new Long(8), resultSet.get(0));
+ }
+ }
+
+ @Test
+ public void testWriterGetField() {
+ try (final LargeListVector vector = LargeListVector.empty("list", allocator)) {
+
+ UnionLargeListWriter writer = vector.getWriter();
+ writer.allocate();
+
+ //set some values
+ writer.startList();
+ writer.integer().writeInt(1);
+ writer.integer().writeInt(2);
+ writer.endList();
+ vector.setValueCount(2);
+
+ Field expectedDataField = new Field(BaseRepeatedValueVector.DATA_VECTOR_NAME,
+ FieldType.nullable(new ArrowType.Int(32, true)), null);
+ Field expectedField = new Field(vector.getName(), FieldType.nullable(ArrowType.LargeList.INSTANCE),
+ Arrays.asList(expectedDataField));
+
+ assertEquals(expectedField, writer.getField());
+ }
+ }
+
+ @Test
+ public void testClose() throws Exception {
+ try (final LargeListVector vector = LargeListVector.empty("list", allocator)) {
+
+ UnionLargeListWriter writer = vector.getWriter();
+ writer.allocate();
+
+ //set some values
+ writer.startList();
+ writer.integer().writeInt(1);
+ writer.integer().writeInt(2);
+ writer.endList();
+ vector.setValueCount(2);
+
+ assertTrue(vector.getBufferSize() > 0);
+ assertTrue(vector.getDataVector().getBufferSize() > 0);
+
+ writer.close();
+ assertEquals(0, vector.getBufferSize());
+ assertEquals(0, vector.getDataVector().getBufferSize());
+ }
+ }
+
+ @Test
+ public void testGetBufferSizeFor() {
+ try (final LargeListVector vector = LargeListVector.empty("list", allocator)) {
+
+ UnionLargeListWriter writer = vector.getWriter();
+ writer.allocate();
+
+ //set some values
+ writeIntValues(writer, new int[] {1, 2});
+ writeIntValues(writer, new int[] {3, 4});
+ writeIntValues(writer, new int[] {5, 6});
+ writeIntValues(writer, new int[] {7, 8, 9, 10});
+ writeIntValues(writer, new int[] {11, 12, 13, 14});
+ writer.setValueCount(5);
+
+ IntVector dataVector = (IntVector) vector.getDataVector();
+ int[] indices = new int[] {0, 2, 4, 6, 10, 14};
+
+ for (int valueCount = 1; valueCount <= 5; valueCount++) {
+ int validityBufferSize = BitVectorHelper.getValidityBufferSize(valueCount);
+ int offsetBufferSize = (valueCount + 1) * LargeListVector.OFFSET_WIDTH;
+
+ int expectedSize = validityBufferSize + offsetBufferSize + dataVector.getBufferSizeFor(indices[valueCount]);
+ assertEquals(expectedSize, vector.getBufferSizeFor(valueCount));
+ }
+ }
+ }
+
+ @Test
+ public void testIsEmpty() {
+ try (final LargeListVector vector = LargeListVector.empty("list", allocator)) {
+ UnionLargeListWriter writer = vector.getWriter();
+ writer.allocate();
+
+ // set values [1,2], null, [], [5,6]
+ writeIntValues(writer, new int[] {1, 2});
+ writer.setPosition(2);
+ writeIntValues(writer, new int[] {});
+ writeIntValues(writer, new int[] {5, 6});
+ writer.setValueCount(4);
+
+ assertFalse(vector.isEmpty(0));
+ assertTrue(vector.isNull(1));
+ assertTrue(vector.isEmpty(1));
+ assertFalse(vector.isNull(2));
+ assertTrue(vector.isEmpty(2));
+ assertFalse(vector.isEmpty(3));
+ }
+ }
+
+ private void writeIntValues(UnionLargeListWriter writer, int[] values) {
+ writer.startList();
+ for (int v: values) {
+ writer.integer().writeInt(v);
+ }
+ writer.endList();
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarBinaryVector.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarBinaryVector.java
new file mode 100644
index 000000000..644827ce9
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarBinaryVector.java
@@ -0,0 +1,104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.holders.NullableLargeVarBinaryHolder;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestLargeVarBinaryVector {
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void prepare() {
+ allocator = new RootAllocator(Integer.MAX_VALUE);
+ }
+
+ @After
+ public void shutdown() {
+ allocator.close();
+ }
+
+ @Test
+ public void testSetNullableLargeVarBinaryHolder() {
+ try (LargeVarBinaryVector vector = new LargeVarBinaryVector("", allocator)) {
+ vector.allocateNew(100, 10);
+
+ NullableLargeVarBinaryHolder nullHolder = new NullableLargeVarBinaryHolder();
+ nullHolder.isSet = 0;
+
+ NullableLargeVarBinaryHolder binHolder = new NullableLargeVarBinaryHolder();
+ binHolder.isSet = 1;
+
+ String str = "hello";
+ ArrowBuf buf = allocator.buffer(16);
+ buf.setBytes(0, str.getBytes());
+
+ binHolder.start = 0;
+ binHolder.end = str.length();
+ binHolder.buffer = buf;
+
+ vector.set(0, nullHolder);
+ vector.set(1, binHolder);
+
+ // verify results
+ assertTrue(vector.isNull(0));
+ assertEquals(str, new String(vector.get(1)));
+
+ buf.close();
+ }
+ }
+
+ @Test
+ public void testSetNullableLargeVarBinaryHolderSafe() {
+ try (LargeVarBinaryVector vector = new LargeVarBinaryVector("", allocator)) {
+ vector.allocateNew(5, 1);
+
+ NullableLargeVarBinaryHolder nullHolder = new NullableLargeVarBinaryHolder();
+ nullHolder.isSet = 0;
+
+ NullableLargeVarBinaryHolder binHolder = new NullableLargeVarBinaryHolder();
+ binHolder.isSet = 1;
+
+ String str = "hello world";
+ ArrowBuf buf = allocator.buffer(16);
+ buf.setBytes(0, str.getBytes());
+
+ binHolder.start = 0;
+ binHolder.end = str.length();
+ binHolder.buffer = buf;
+
+ vector.setSafe(0, binHolder);
+ vector.setSafe(1, nullHolder);
+
+ // verify results
+ assertEquals(str, new String(vector.get(0)));
+ assertTrue(vector.isNull(1));
+
+ buf.close();
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarCharVector.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarCharVector.java
new file mode 100644
index 000000000..1b81c6b20
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarCharVector.java
@@ -0,0 +1,816 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.OutOfMemoryException;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.holders.NullableLargeVarCharHolder;
+import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
+import org.apache.arrow.vector.testing.ValueVectorDataPopulator;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.apache.arrow.vector.util.OversizedAllocationException;
+import org.apache.arrow.vector.util.TransferPair;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.jupiter.api.Assertions;
+
+public class TestLargeVarCharVector {
+
+ private static final byte[] STR1 = "AAAAA1".getBytes();
+ private static final byte[] STR2 = "BBBBBBBBB2".getBytes();
+ private static final byte[] STR3 = "CCCC3".getBytes();
+ private static final byte[] STR4 = "DDDDDDDD4".getBytes();
+ private static final byte[] STR5 = "EEE5".getBytes();
+ private static final byte[] STR6 = "FFFFF6".getBytes();
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void prepare() {
+ allocator = new RootAllocator(Integer.MAX_VALUE);
+ }
+
+ @After
+ public void shutdown() {
+ allocator.close();
+ }
+
+ @Test
+ public void testTransfer() {
+ try (BufferAllocator childAllocator1 = allocator.newChildAllocator("child1", 1000000, 1000000);
+ BufferAllocator childAllocator2 = allocator.newChildAllocator("child2", 1000000, 1000000);
+ LargeVarCharVector v1 = new LargeVarCharVector("v1", childAllocator1);
+ LargeVarCharVector v2 = new LargeVarCharVector("v2", childAllocator2);) {
+ v1.allocateNew();
+ v1.setSafe(4094, "hello world".getBytes(), 0, 11);
+ v1.setValueCount(4001);
+
+ long memoryBeforeTransfer = childAllocator1.getAllocatedMemory();
+
+ v1.makeTransferPair(v2).transfer();
+
+ assertEquals(0, childAllocator1.getAllocatedMemory());
+ assertEquals(memoryBeforeTransfer, childAllocator2.getAllocatedMemory());
+ }
+ }
+
+ @Test
+ public void testCopyValueSafe() {
+ try (final LargeVarCharVector largeVarCharVector = new LargeVarCharVector("myvector", allocator);
+ final LargeVarCharVector newLargeVarCharVector = new LargeVarCharVector("newvector", allocator)) {
+ largeVarCharVector.allocateNew(10000, 1000);
+
+ final int valueCount = 500;
+ populateLargeVarcharVector(largeVarCharVector, valueCount, null);
+
+ final TransferPair tp = largeVarCharVector.makeTransferPair(newLargeVarCharVector);
+
+ // new vector memory is not pre-allocated, we expect copyValueSafe work fine.
+ for (int i = 0; i < valueCount; i++) {
+ tp.copyValueSafe(i, i);
+ }
+ newLargeVarCharVector.setValueCount(valueCount);
+
+ for (int i = 0; i < valueCount; i++) {
+ final boolean expectedSet = (i % 3) == 0;
+ if (expectedSet) {
+ assertFalse(largeVarCharVector.isNull(i));
+ assertFalse(newLargeVarCharVector.isNull(i));
+ assertArrayEquals(largeVarCharVector.get(i), newLargeVarCharVector.get(i));
+ } else {
+ assertTrue(newLargeVarCharVector.isNull(i));
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testSplitAndTransferNon() {
+ try (final LargeVarCharVector largeVarCharVector = new LargeVarCharVector("myvector", allocator)) {
+
+ largeVarCharVector.allocateNew(10000, 1000);
+ final int valueCount = 500;
+ populateLargeVarcharVector(largeVarCharVector, valueCount, null);
+
+ final TransferPair tp = largeVarCharVector.getTransferPair(allocator);
+ try (LargeVarCharVector newLargeVarCharVector = (LargeVarCharVector) tp.getTo()) {
+
+ tp.splitAndTransfer(0, 0);
+ assertEquals(0, newLargeVarCharVector.getValueCount());
+ }
+ }
+ }
+
+ @Test
+ public void testSplitAndTransferAll() {
+ try (final LargeVarCharVector largeVarCharVector = new LargeVarCharVector("myvector", allocator)) {
+
+ largeVarCharVector.allocateNew(10000, 1000);
+ final int valueCount = 500;
+ populateLargeVarcharVector(largeVarCharVector, valueCount, null);
+
+ final TransferPair tp = largeVarCharVector.getTransferPair(allocator);
+ try (LargeVarCharVector newLargeVarCharVector = (LargeVarCharVector) tp.getTo()) {
+
+ tp.splitAndTransfer(0, valueCount);
+ assertEquals(valueCount, newLargeVarCharVector.getValueCount());
+ }
+ }
+ }
+
+ @Test
+ public void testInvalidStartIndex() {
+ try (final LargeVarCharVector largeVarCharVector = new LargeVarCharVector("myvector", allocator);
+ final LargeVarCharVector newLargeVarCharVector = new LargeVarCharVector("newvector", allocator)) {
+
+ largeVarCharVector.allocateNew(10000, 1000);
+ final int valueCount = 500;
+ populateLargeVarcharVector(largeVarCharVector, valueCount, null);
+
+ final TransferPair tp = largeVarCharVector.makeTransferPair(newLargeVarCharVector);
+
+ IllegalArgumentException e = Assertions.assertThrows(
+ IllegalArgumentException.class,
+ () -> tp.splitAndTransfer(valueCount, 10));
+
+ assertEquals("Invalid startIndex: 500", e.getMessage());
+ }
+ }
+
+ @Test
+ public void testInvalidLength() {
+ try (final LargeVarCharVector largeVarCharVector = new LargeVarCharVector("myvector", allocator);
+ final LargeVarCharVector newLargeVarCharVector = new LargeVarCharVector("newvector", allocator)) {
+
+ largeVarCharVector.allocateNew(10000, 1000);
+ final int valueCount = 500;
+ populateLargeVarcharVector(largeVarCharVector, valueCount, null);
+
+ final TransferPair tp = largeVarCharVector.makeTransferPair(newLargeVarCharVector);
+
+ IllegalArgumentException e = Assertions.assertThrows(
+ IllegalArgumentException.class,
+ () -> tp.splitAndTransfer(0, valueCount * 2));
+
+ assertEquals("Invalid length: 1000", e.getMessage());
+ }
+ }
+
+ @Test /* LargeVarCharVector */
+ public void testSizeOfValueBuffer() {
+ try (final LargeVarCharVector vector = new LargeVarCharVector("", allocator)) {
+ int valueCount = 100;
+ int currentSize = 0;
+ vector.setInitialCapacity(valueCount);
+ vector.allocateNew();
+ vector.setValueCount(valueCount);
+ for (int i = 0; i < valueCount; i++) {
+ currentSize += i;
+ vector.setSafe(i, new byte[i]);
+ }
+
+ assertEquals(currentSize, vector.sizeOfValueBuffer());
+ }
+ }
+
+ @Test
+ public void testSetLastSetUsage() {
+ final byte[] STR1 = "AAAAA1".getBytes();
+ final byte[] STR2 = "BBBBBBBBB2".getBytes();
+ final byte[] STR3 = "CCCC3".getBytes();
+ final byte[] STR4 = "DDDDDDDD4".getBytes();
+ final byte[] STR5 = "EEE5".getBytes();
+ final byte[] STR6 = "FFFFF6".getBytes();
+
+ try (final LargeVarCharVector vector = new LargeVarCharVector("myvector", allocator)) {
+ vector.allocateNew(1024 * 10, 1024);
+
+ setBytes(0, STR1, vector);
+ setBytes(1, STR2, vector);
+ setBytes(2, STR3, vector);
+ setBytes(3, STR4, vector);
+ setBytes(4, STR5, vector);
+ setBytes(5, STR6, vector);
+
+ /* Check current lastSet */
+ assertEquals(-1, vector.getLastSet());
+
+ /* Check the vector output */
+ assertArrayEquals(STR1, vector.get(0));
+ assertArrayEquals(STR2, vector.get(1));
+ assertArrayEquals(STR3, vector.get(2));
+ assertArrayEquals(STR4, vector.get(3));
+ assertArrayEquals(STR5, vector.get(4));
+ assertArrayEquals(STR6, vector.get(5));
+
+ /*
+ * If we don't do setLastSe(5) before setValueCount(), then the latter will corrupt
+ * the value vector by filling in all positions [0,valuecount-1] will empty byte arrays.
+ * Run the test by commenting out next line and we should see incorrect vector output.
+ */
+ vector.setLastSet(5);
+ vector.setValueCount(20);
+
+ /* Check current lastSet */
+ assertEquals(19, vector.getLastSet());
+
+ /* Check the vector output again */
+ assertArrayEquals(STR1, vector.get(0));
+ assertArrayEquals(STR2, vector.get(1));
+ assertArrayEquals(STR3, vector.get(2));
+ assertArrayEquals(STR4, vector.get(3));
+ assertArrayEquals(STR5, vector.get(4));
+ assertArrayEquals(STR6, vector.get(5));
+ assertEquals(0, vector.getValueLength(6));
+ assertEquals(0, vector.getValueLength(7));
+ assertEquals(0, vector.getValueLength(8));
+ assertEquals(0, vector.getValueLength(9));
+ assertEquals(0, vector.getValueLength(10));
+ assertEquals(0, vector.getValueLength(11));
+ assertEquals(0, vector.getValueLength(12));
+ assertEquals(0, vector.getValueLength(13));
+ assertEquals(0, vector.getValueLength(14));
+ assertEquals(0, vector.getValueLength(15));
+ assertEquals(0, vector.getValueLength(16));
+ assertEquals(0, vector.getValueLength(17));
+ assertEquals(0, vector.getValueLength(18));
+ assertEquals(0, vector.getValueLength(19));
+
+ /* Check offsets */
+ assertEquals(0, vector.offsetBuffer.getLong(0 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(6, vector.offsetBuffer.getLong(1 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(16, vector.offsetBuffer.getLong(2 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(21, vector.offsetBuffer.getLong(3 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(30, vector.offsetBuffer.getLong(4 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(34, vector.offsetBuffer.getLong(5 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(40, vector.offsetBuffer.getLong(6 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(40, vector.offsetBuffer.getLong(7 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(40, vector.offsetBuffer.getLong(8 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(40, vector.offsetBuffer.getLong(9 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(40, vector.offsetBuffer.getLong(10 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(40, vector.offsetBuffer.getLong(11 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(40, vector.offsetBuffer.getLong(12 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(40, vector.offsetBuffer.getLong(13 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(40, vector.offsetBuffer.getLong(14 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(40, vector.offsetBuffer.getLong(15 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(40, vector.offsetBuffer.getLong(16 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(40, vector.offsetBuffer.getLong(17 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(40, vector.offsetBuffer.getLong(18 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(40, vector.offsetBuffer.getLong(19 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+
+ vector.set(19, STR6);
+ assertArrayEquals(STR6, vector.get(19));
+ assertEquals(40, vector.offsetBuffer.getLong(19 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(46, vector.offsetBuffer.getLong(20 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+ }
+ }
+
+ @Test(expected = OutOfMemoryException.class)
+ public void testVectorAllocateNew() {
+ try (RootAllocator smallAllocator = new RootAllocator(200);
+ LargeVarCharVector vector = new LargeVarCharVector("vec", smallAllocator)) {
+ vector.allocateNew();
+ }
+ }
+
+ @Test(expected = OversizedAllocationException.class)
+ public void testLargeVariableVectorReallocation() {
+ final LargeVarCharVector vector = new LargeVarCharVector("vector", allocator);
+ // edge case 1: value count = MAX_VALUE_ALLOCATION
+ final long expectedAllocationInBytes = BaseValueVector.MAX_ALLOCATION_SIZE;
+ final int expectedOffsetSize = 10;
+ try {
+ vector.allocateNew(expectedAllocationInBytes, 10);
+ assertTrue(expectedOffsetSize <= vector.getValueCapacity());
+ assertTrue(expectedAllocationInBytes <= vector.getDataBuffer().capacity());
+ vector.reAlloc();
+ assertTrue(expectedOffsetSize * 2 <= vector.getValueCapacity());
+ assertTrue(expectedAllocationInBytes * 2 <= vector.getDataBuffer().capacity());
+ } finally {
+ vector.close();
+ }
+
+ // common: value count < MAX_VALUE_ALLOCATION
+ try {
+ vector.allocateNew(BaseValueVector.MAX_ALLOCATION_SIZE / 2, 0);
+ vector.reAlloc(); // value allocation reaches to MAX_VALUE_ALLOCATION
+ vector.reAlloc(); // this tests if it overflows
+ } finally {
+ vector.close();
+ }
+ }
+
+ @Test
+ public void testSplitAndTransfer() {
+ try (final LargeVarCharVector largeVarCharVector = new LargeVarCharVector("myvector", allocator)) {
+ largeVarCharVector.allocateNew(10000, 1000);
+
+ final int valueCount = 500;
+ final String[] compareArray = new String[valueCount];
+
+ populateLargeVarcharVector(largeVarCharVector, valueCount, compareArray);
+
+ final TransferPair tp = largeVarCharVector.getTransferPair(allocator);
+ try (final LargeVarCharVector newLargeVarCharVector = (LargeVarCharVector) tp.getTo()) {
+ final int[][] startLengths = {{0, 201}, {201, 0}, {201, 200}, {401, 99}};
+
+ for (final int[] startLength : startLengths) {
+ final int start = startLength[0];
+ final int length = startLength[1];
+ tp.splitAndTransfer(start, length);
+ for (int i = 0; i < length; i++) {
+ final boolean expectedSet = ((start + i) % 3) == 0;
+ if (expectedSet) {
+ final byte[] expectedValue = compareArray[start + i].getBytes();
+ assertFalse(newLargeVarCharVector.isNull(i));
+ assertArrayEquals(expectedValue, newLargeVarCharVector.get(i));
+ } else {
+ assertTrue(newLargeVarCharVector.isNull(i));
+ }
+ }
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testReallocAfterVectorTransfer() {
+ final byte[] STR1 = "AAAAA1".getBytes();
+ final byte[] STR2 = "BBBBBBBBB2".getBytes();
+
+ try (final LargeVarCharVector vector = new LargeVarCharVector("vector", allocator)) {
+ /* 4096 values with 10 byte per record */
+ vector.allocateNew(4096 * 10, 4096);
+ int valueCapacity = vector.getValueCapacity();
+ assertTrue(valueCapacity >= 4096);
+
+ /* populate the vector */
+ for (int i = 0; i < valueCapacity; i++) {
+ if ((i & 1) == 1) {
+ vector.set(i, STR1);
+ } else {
+ vector.set(i, STR2);
+ }
+ }
+
+ /* Check the vector output */
+ for (int i = 0; i < valueCapacity; i++) {
+ if ((i & 1) == 1) {
+ assertArrayEquals(STR1, vector.get(i));
+ } else {
+ assertArrayEquals(STR2, vector.get(i));
+ }
+ }
+
+ /* trigger first realloc */
+ vector.setSafe(valueCapacity, STR2, 0, STR2.length);
+ assertTrue(vector.getValueCapacity() >= 2 * valueCapacity);
+ while (vector.getByteCapacity() < 10 * vector.getValueCapacity()) {
+ vector.reallocDataBuffer();
+ }
+
+ /* populate the remaining vector */
+ for (int i = valueCapacity; i < vector.getValueCapacity(); i++) {
+ if ((i & 1) == 1) {
+ vector.set(i, STR1);
+ } else {
+ vector.set(i, STR2);
+ }
+ }
+
+ /* Check the vector output */
+ valueCapacity = vector.getValueCapacity();
+ for (int i = 0; i < valueCapacity; i++) {
+ if ((i & 1) == 1) {
+ assertArrayEquals(STR1, vector.get(i));
+ } else {
+ assertArrayEquals(STR2, vector.get(i));
+ }
+ }
+
+ /* trigger second realloc */
+ vector.setSafe(valueCapacity + 10, STR2, 0, STR2.length);
+ assertTrue(vector.getValueCapacity() >= 2 * valueCapacity);
+ while (vector.getByteCapacity() < 10 * vector.getValueCapacity()) {
+ vector.reallocDataBuffer();
+ }
+
+ /* populate the remaining vector */
+ for (int i = valueCapacity; i < vector.getValueCapacity(); i++) {
+ if ((i & 1) == 1) {
+ vector.set(i, STR1);
+ } else {
+ vector.set(i, STR2);
+ }
+ }
+
+ /* Check the vector output */
+ valueCapacity = vector.getValueCapacity();
+ for (int i = 0; i < valueCapacity; i++) {
+ if ((i & 1) == 1) {
+ assertArrayEquals(STR1, vector.get(i));
+ } else {
+ assertArrayEquals(STR2, vector.get(i));
+ }
+ }
+
+ /* we are potentially working with 4x the size of vector buffer
+ * that we initially started with. Now let's transfer the vector.
+ */
+
+ TransferPair transferPair = vector.getTransferPair(allocator);
+ transferPair.transfer();
+ try (LargeVarCharVector toVector = (LargeVarCharVector) transferPair.getTo()) {
+ valueCapacity = toVector.getValueCapacity();
+
+ for (int i = 0; i < valueCapacity; i++) {
+ if ((i & 1) == 1) {
+ assertArrayEquals(STR1, toVector.get(i));
+ } else {
+ assertArrayEquals(STR2, toVector.get(i));
+ }
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testVectorLoadUnload() {
+ try (final LargeVarCharVector vector1 = new LargeVarCharVector("myvector", allocator)) {
+
+ ValueVectorDataPopulator.setVector(vector1, STR1, STR2, STR3, STR4, STR5, STR6);
+
+ assertEquals(5, vector1.getLastSet());
+ vector1.setValueCount(15);
+ assertEquals(14, vector1.getLastSet());
+
+ /* Check the vector output */
+ assertArrayEquals(STR1, vector1.get(0));
+ assertArrayEquals(STR2, vector1.get(1));
+ assertArrayEquals(STR3, vector1.get(2));
+ assertArrayEquals(STR4, vector1.get(3));
+ assertArrayEquals(STR5, vector1.get(4));
+ assertArrayEquals(STR6, vector1.get(5));
+
+ Field field = vector1.getField();
+ String fieldName = field.getName();
+
+ List<Field> fields = new ArrayList<>();
+ List<FieldVector> fieldVectors = new ArrayList<>();
+
+ fields.add(field);
+ fieldVectors.add(vector1);
+
+ Schema schema = new Schema(fields);
+
+ VectorSchemaRoot schemaRoot1 = new VectorSchemaRoot(schema, fieldVectors, vector1.getValueCount());
+ VectorUnloader vectorUnloader = new VectorUnloader(schemaRoot1);
+
+ try (
+ ArrowRecordBatch recordBatch = vectorUnloader.getRecordBatch();
+ VectorSchemaRoot schemaRoot2 = VectorSchemaRoot.create(schema, allocator);
+ ) {
+
+ VectorLoader vectorLoader = new VectorLoader(schemaRoot2);
+ vectorLoader.load(recordBatch);
+
+ LargeVarCharVector vector2 = (LargeVarCharVector) schemaRoot2.getVector(fieldName);
+ /*
+ * lastSet would have internally been set by VectorLoader.load() when it invokes
+ * loadFieldBuffers.
+ */
+ assertEquals(14, vector2.getLastSet());
+ vector2.setValueCount(25);
+ assertEquals(24, vector2.getLastSet());
+
+ /* Check the vector output */
+ assertArrayEquals(STR1, vector2.get(0));
+ assertArrayEquals(STR2, vector2.get(1));
+ assertArrayEquals(STR3, vector2.get(2));
+ assertArrayEquals(STR4, vector2.get(3));
+ assertArrayEquals(STR5, vector2.get(4));
+ assertArrayEquals(STR6, vector2.get(5));
+ }
+ }
+ }
+
+ @Test
+ public void testFillEmptiesUsage() {
+ try (final LargeVarCharVector vector = new LargeVarCharVector("myvector", allocator)) {
+
+ vector.allocateNew(1024 * 10, 1024);
+
+ setBytes(0, STR1, vector);
+ setBytes(1, STR2, vector);
+ setBytes(2, STR3, vector);
+ setBytes(3, STR4, vector);
+ setBytes(4, STR5, vector);
+ setBytes(5, STR6, vector);
+
+ /* Check current lastSet */
+ assertEquals(-1, vector.getLastSet());
+
+ /* Check the vector output */
+ assertArrayEquals(STR1, vector.get(0));
+ assertArrayEquals(STR2, vector.get(1));
+ assertArrayEquals(STR3, vector.get(2));
+ assertArrayEquals(STR4, vector.get(3));
+ assertArrayEquals(STR5, vector.get(4));
+ assertArrayEquals(STR6, vector.get(5));
+
+ vector.setLastSet(5);
+ /* fill empty byte arrays from index [6, 9] */
+ vector.fillEmpties(10);
+
+ /* Check current lastSet */
+ assertEquals(9, vector.getLastSet());
+
+ /* Check the vector output */
+ assertArrayEquals(STR1, vector.get(0));
+ assertArrayEquals(STR2, vector.get(1));
+ assertArrayEquals(STR3, vector.get(2));
+ assertArrayEquals(STR4, vector.get(3));
+ assertArrayEquals(STR5, vector.get(4));
+ assertArrayEquals(STR6, vector.get(5));
+ assertEquals(0, vector.getValueLength(6));
+ assertEquals(0, vector.getValueLength(7));
+ assertEquals(0, vector.getValueLength(8));
+ assertEquals(0, vector.getValueLength(9));
+
+ setBytes(10, STR1, vector);
+ setBytes(11, STR2, vector);
+
+ vector.setLastSet(11);
+ /* fill empty byte arrays from index [12, 14] */
+ vector.setValueCount(15);
+
+ /* Check current lastSet */
+ assertEquals(14, vector.getLastSet());
+
+ /* Check the vector output */
+ assertArrayEquals(STR1, vector.get(0));
+ assertArrayEquals(STR2, vector.get(1));
+ assertArrayEquals(STR3, vector.get(2));
+ assertArrayEquals(STR4, vector.get(3));
+ assertArrayEquals(STR5, vector.get(4));
+ assertArrayEquals(STR6, vector.get(5));
+ assertEquals(0, vector.getValueLength(6));
+ assertEquals(0, vector.getValueLength(7));
+ assertEquals(0, vector.getValueLength(8));
+ assertEquals(0, vector.getValueLength(9));
+ assertArrayEquals(STR1, vector.get(10));
+ assertArrayEquals(STR2, vector.get(11));
+ assertEquals(0, vector.getValueLength(12));
+ assertEquals(0, vector.getValueLength(13));
+ assertEquals(0, vector.getValueLength(14));
+
+ /* Check offsets */
+ assertEquals(0,
+ vector.offsetBuffer.getLong(0 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(6,
+ vector.offsetBuffer.getLong(1 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(16,
+ vector.offsetBuffer.getLong(2 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(21,
+ vector.offsetBuffer.getLong(3 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(30,
+ vector.offsetBuffer.getLong(4 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(34,
+ vector.offsetBuffer.getLong(5 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+
+ assertEquals(40,
+ vector.offsetBuffer.getLong(6 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(40,
+ vector.offsetBuffer.getLong(7 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(40,
+ vector.offsetBuffer.getLong(8 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(40,
+ vector.offsetBuffer.getLong(9 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(40,
+ vector.offsetBuffer.getLong(10 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+
+ assertEquals(46,
+ vector.offsetBuffer.getLong(11 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(56,
+ vector.offsetBuffer.getLong(12 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+
+ assertEquals(56,
+ vector.offsetBuffer.getLong(13 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(56,
+ vector.offsetBuffer.getLong(14 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(56,
+ vector.offsetBuffer.getLong(15 * BaseLargeVariableWidthVector.OFFSET_WIDTH));
+ }
+ }
+
+ @Test
+ public void testGetBufferAddress1() {
+ try (final LargeVarCharVector vector = new LargeVarCharVector("myvector", allocator)) {
+
+ ValueVectorDataPopulator.setVector(vector, STR1, STR2, STR3, STR4, STR5, STR6);
+ vector.setValueCount(15);
+
+ /* check the vector output */
+ assertArrayEquals(STR1, vector.get(0));
+ assertArrayEquals(STR2, vector.get(1));
+ assertArrayEquals(STR3, vector.get(2));
+ assertArrayEquals(STR4, vector.get(3));
+ assertArrayEquals(STR5, vector.get(4));
+ assertArrayEquals(STR6, vector.get(5));
+
+ List<ArrowBuf> buffers = vector.getFieldBuffers();
+ long bitAddress = vector.getValidityBufferAddress();
+ long offsetAddress = vector.getOffsetBufferAddress();
+ long dataAddress = vector.getDataBufferAddress();
+
+ assertEquals(3, buffers.size());
+ assertEquals(bitAddress, buffers.get(0).memoryAddress());
+ assertEquals(offsetAddress, buffers.get(1).memoryAddress());
+ assertEquals(dataAddress, buffers.get(2).memoryAddress());
+ }
+ }
+
+ @Test
+ public void testSetNullableLargeVarCharHolder() {
+ try (LargeVarCharVector vector = new LargeVarCharVector("", allocator)) {
+ vector.allocateNew(100, 10);
+
+ NullableLargeVarCharHolder nullHolder = new NullableLargeVarCharHolder();
+ nullHolder.isSet = 0;
+
+ NullableLargeVarCharHolder stringHolder = new NullableLargeVarCharHolder();
+ stringHolder.isSet = 1;
+
+ String str = "hello";
+ ArrowBuf buf = allocator.buffer(16);
+ buf.setBytes(0, str.getBytes());
+
+ stringHolder.start = 0;
+ stringHolder.end = str.length();
+ stringHolder.buffer = buf;
+
+ vector.set(0, nullHolder);
+ vector.set(1, stringHolder);
+
+ // verify results
+ assertTrue(vector.isNull(0));
+ assertEquals(str, new String(vector.get(1)));
+
+ buf.close();
+ }
+ }
+
+ @Test
+ public void testSetNullableLargeVarCharHolderSafe() {
+ try (LargeVarCharVector vector = new LargeVarCharVector("", allocator)) {
+ vector.allocateNew(5, 1);
+
+ NullableLargeVarCharHolder nullHolder = new NullableLargeVarCharHolder();
+ nullHolder.isSet = 0;
+
+ NullableLargeVarCharHolder stringHolder = new NullableLargeVarCharHolder();
+ stringHolder.isSet = 1;
+
+ String str = "hello world";
+ ArrowBuf buf = allocator.buffer(16);
+ buf.setBytes(0, str.getBytes());
+
+ stringHolder.start = 0;
+ stringHolder.end = str.length();
+ stringHolder.buffer = buf;
+
+ vector.setSafe(0, stringHolder);
+ vector.setSafe(1, nullHolder);
+
+ // verify results
+ assertEquals(str, new String(vector.get(0)));
+ assertTrue(vector.isNull(1));
+
+ buf.close();
+ }
+ }
+
+ @Test
+ public void testGetNullFromLargeVariableWidthVector() {
+ try (final LargeVarCharVector largeVarCharVector = new LargeVarCharVector("largevarcharvec", allocator);
+ final LargeVarBinaryVector largeVarBinaryVector = new LargeVarBinaryVector("largevarbinary", allocator)) {
+ largeVarCharVector.allocateNew(10, 1);
+ largeVarBinaryVector.allocateNew(10, 1);
+
+ largeVarCharVector.setNull(0);
+ largeVarBinaryVector.setNull(0);
+
+ assertNull(largeVarCharVector.get(0));
+ assertNull(largeVarBinaryVector.get(0));
+ }
+ }
+
+ @Test
+ public void testLargeVariableWidthVectorNullHashCode() {
+ try (LargeVarCharVector largeVarChVec = new LargeVarCharVector("large var char vector", allocator)) {
+ largeVarChVec.allocateNew(100, 1);
+ largeVarChVec.setValueCount(1);
+
+ largeVarChVec.set(0, "abc".getBytes());
+ largeVarChVec.setNull(0);
+
+ assertEquals(0, largeVarChVec.hashCode(0));
+ }
+ }
+
+ @Test
+ public void testUnloadLargeVariableWidthVector() {
+ try (final LargeVarCharVector largeVarCharVector = new LargeVarCharVector("var char", allocator)) {
+ largeVarCharVector.allocateNew(5, 2);
+ largeVarCharVector.setValueCount(2);
+
+ largeVarCharVector.set(0, "abcd".getBytes());
+
+ List<ArrowBuf> bufs = largeVarCharVector.getFieldBuffers();
+ assertEquals(3, bufs.size());
+
+ ArrowBuf offsetBuf = bufs.get(1);
+ ArrowBuf dataBuf = bufs.get(2);
+
+ assertEquals(24, offsetBuf.writerIndex());
+ assertEquals(4, offsetBuf.getLong(8));
+ assertEquals(4, offsetBuf.getLong(16));
+
+ assertEquals(4, dataBuf.writerIndex());
+ }
+ }
+
+ @Test
+ public void testNullableType() {
+ try (final LargeVarCharVector vector = new LargeVarCharVector("", allocator)) {
+ vector.setInitialCapacity(512);
+ vector.allocateNew();
+
+ assertTrue(vector.getValueCapacity() >= 512);
+ int initialCapacity = vector.getValueCapacity();
+
+ try {
+ vector.set(initialCapacity, "foo".getBytes(StandardCharsets.UTF_8));
+ Assert.fail("Expected out of bounds exception");
+ } catch (Exception e) {
+ // ok
+ }
+
+ vector.reAlloc();
+ assertTrue(vector.getValueCapacity() >= 2 * initialCapacity);
+
+ vector.set(initialCapacity, "foo".getBytes(StandardCharsets.UTF_8));
+ assertEquals("foo", new String(vector.get(initialCapacity), StandardCharsets.UTF_8));
+ }
+ }
+
+ private void populateLargeVarcharVector(final LargeVarCharVector vector, int valueCount, String[] values) {
+ for (int i = 0; i < valueCount; i += 3) {
+ final String s = String.format("%010d", i);
+ vector.set(i, s.getBytes());
+ if (values != null) {
+ values[i] = s;
+ }
+ }
+ vector.setValueCount(valueCount);
+ }
+
+ public static void setBytes(int index, byte[] bytes, LargeVarCharVector vector) {
+ final long currentOffset = vector.offsetBuffer.getLong((long) index * BaseLargeVariableWidthVector.OFFSET_WIDTH);
+
+ BitVectorHelper.setBit(vector.validityBuffer, index);
+ vector.offsetBuffer.setLong(
+ (long) (index + 1) * BaseLargeVariableWidthVector.OFFSET_WIDTH, currentOffset + bytes.length);
+ vector.valueBuffer.setBytes(currentOffset, bytes, 0, bytes.length);
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java
new file mode 100644
index 000000000..ffeedf04d
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java
@@ -0,0 +1,981 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.BaseRepeatedValueVector;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.impl.UnionListWriter;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestListVector {
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void init() {
+ allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100);
+ }
+
+ @After
+ public void terminate() throws Exception {
+ allocator.close();
+ }
+
+ @Test
+ public void testCopyFrom() throws Exception {
+ try (ListVector inVector = ListVector.empty("input", allocator);
+ ListVector outVector = ListVector.empty("output", allocator)) {
+ UnionListWriter writer = inVector.getWriter();
+ writer.allocate();
+
+ // populate input vector with the following records
+ // [1, 2, 3]
+ // null
+ // []
+ writer.setPosition(0); // optional
+ writer.startList();
+ writer.bigInt().writeBigInt(1);
+ writer.bigInt().writeBigInt(2);
+ writer.bigInt().writeBigInt(3);
+ writer.endList();
+
+ writer.setPosition(2);
+ writer.startList();
+ writer.endList();
+
+ writer.setValueCount(3);
+
+ // copy values from input to output
+ outVector.allocateNew();
+ for (int i = 0; i < 3; i++) {
+ outVector.copyFrom(i, i, inVector);
+ }
+ outVector.setValueCount(3);
+
+ // assert the output vector is correct
+ FieldReader reader = outVector.getReader();
+ Assert.assertTrue("shouldn't be null", reader.isSet());
+ reader.setPosition(1);
+ Assert.assertFalse("should be null", reader.isSet());
+ reader.setPosition(2);
+ Assert.assertTrue("shouldn't be null", reader.isSet());
+
+
+ /* index 0 */
+ Object result = outVector.getObject(0);
+ ArrayList<Long> resultSet = (ArrayList<Long>) result;
+ assertEquals(3, resultSet.size());
+ assertEquals(new Long(1), (Long) resultSet.get(0));
+ assertEquals(new Long(2), (Long) resultSet.get(1));
+ assertEquals(new Long(3), (Long) resultSet.get(2));
+
+ /* index 1 */
+ result = outVector.getObject(1);
+ assertNull(result);
+
+ /* index 2 */
+ result = outVector.getObject(2);
+ resultSet = (ArrayList<Long>) result;
+ assertEquals(0, resultSet.size());
+
+ /* 3+0+0/3 */
+ assertEquals(1.0D, inVector.getDensity(), 0);
+ }
+ }
+
+ @Test
+ public void testSetLastSetUsage() throws Exception {
+ try (ListVector listVector = ListVector.empty("input", allocator)) {
+
+ /* Explicitly add the dataVector */
+ MinorType type = MinorType.BIGINT;
+ listVector.addOrGetVector(FieldType.nullable(type.getType()));
+
+ /* allocate memory */
+ listVector.allocateNew();
+
+ /* get inner buffers; validityBuffer and offsetBuffer */
+
+ ArrowBuf validityBuffer = listVector.getValidityBuffer();
+ ArrowBuf offsetBuffer = listVector.getOffsetBuffer();
+
+ /* get the underlying data vector -- BigIntVector */
+ BigIntVector dataVector = (BigIntVector) listVector.getDataVector();
+
+ /* check current lastSet */
+ assertEquals(Integer.toString(-1), Integer.toString(listVector.getLastSet()));
+
+ int index = 0;
+ int offset = 0;
+
+ /* write [10, 11, 12] to the list vector at index 0 */
+ BitVectorHelper.setBit(validityBuffer, index);
+ dataVector.setSafe(0, 1, 10);
+ dataVector.setSafe(1, 1, 11);
+ dataVector.setSafe(2, 1, 12);
+ offsetBuffer.setInt((index + 1) * ListVector.OFFSET_WIDTH, 3);
+
+ index += 1;
+
+ /* write [13, 14] to the list vector at index 1 */
+ BitVectorHelper.setBit(validityBuffer, index);
+ dataVector.setSafe(3, 1, 13);
+ dataVector.setSafe(4, 1, 14);
+ offsetBuffer.setInt((index + 1) * ListVector.OFFSET_WIDTH, 5);
+
+ index += 1;
+
+ /* write [15, 16, 17] to the list vector at index 2 */
+ BitVectorHelper.setBit(validityBuffer, index);
+ dataVector.setSafe(5, 1, 15);
+ dataVector.setSafe(6, 1, 16);
+ dataVector.setSafe(7, 1, 17);
+ offsetBuffer.setInt((index + 1) * ListVector.OFFSET_WIDTH, 8);
+
+ /* check current lastSet */
+ assertEquals(Integer.toString(-1), Integer.toString(listVector.getLastSet()));
+
+ /* set lastset and arbitrary valuecount for list vector.
+ *
+ * NOTE: if we don't execute setLastSet() before setLastValueCount(), then
+ * the latter will corrupt the offsetBuffer and thus the accessor will not
+ * retrieve the correct values from underlying dataBuffer. Run the test
+ * by commenting out next line and we should see failures from 5th assert
+ * onwards. This is why doing setLastSet() is important before setValueCount()
+ * once the vector has been loaded.
+ *
+ * Another important thing to remember is the value of lastSet itself.
+ * Even though the listVector has elements till index 2 only, the lastSet should
+ * be set as 3. This is because the offsetBuffer has valid offsets filled till index 3.
+ * If we do setLastSet(2), the offsetBuffer at index 3 will contain incorrect value
+ * after execution of setValueCount().
+ *
+ * correct state of the listVector
+ * bitvector {1, 1, 1, 0, 0.... }
+ * offsetvector {0, 3, 5, 8, 8, 8.....}
+ * datavector { [10, 11, 12],
+ * [13, 14],
+ * [15, 16, 17]
+ * }
+ *
+ * if we don't do setLastSet() before setValueCount --> incorrect state
+ * bitvector {1, 1, 1, 0, 0.... }
+ * offsetvector {0, 0, 0, 0, 0, 0.....}
+ * datavector { [10, 11, 12],
+ * [13, 14],
+ * [15, 16, 17]
+ * }
+ *
+ * if we do setLastSet(2) before setValueCount --> incorrect state
+ * bitvector {1, 1, 1, 0, 0.... }
+ * offsetvector {0, 3, 5, 5, 5, 5.....}
+ * datavector { [10, 11, 12],
+ * [13, 14],
+ * [15, 16, 17]
+ * }
+ */
+ listVector.setLastSet(2);
+ listVector.setValueCount(10);
+
+ /* (3+2+3)/10 */
+ assertEquals(0.8D, listVector.getDensity(), 0);
+
+ index = 0;
+ offset = offsetBuffer.getInt(index * ListVector.OFFSET_WIDTH);
+ assertEquals(Integer.toString(0), Integer.toString(offset));
+
+ Long actual = dataVector.getObject(offset);
+ assertEquals(new Long(10), actual);
+ offset++;
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(11), actual);
+ offset++;
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(12), actual);
+
+ index++;
+ offset = offsetBuffer.getInt(index * ListVector.OFFSET_WIDTH);
+ assertEquals(Integer.toString(3), Integer.toString(offset));
+
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(13), actual);
+ offset++;
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(14), actual);
+
+ index++;
+ offset = offsetBuffer.getInt(index * ListVector.OFFSET_WIDTH);
+ assertEquals(Integer.toString(5), Integer.toString(offset));
+
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(15), actual);
+ offset++;
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(16), actual);
+ offset++;
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(17), actual);
+
+ index++;
+ offset = offsetBuffer.getInt(index * ListVector.OFFSET_WIDTH);
+ assertEquals(Integer.toString(8), Integer.toString(offset));
+
+ actual = dataVector.getObject(offset);
+ assertNull(actual);
+ }
+ }
+
+ @Test
+ public void testSplitAndTransfer() throws Exception {
+ try (ListVector listVector = ListVector.empty("sourceVector", allocator)) {
+
+ /* Explicitly add the dataVector */
+ MinorType type = MinorType.BIGINT;
+ listVector.addOrGetVector(FieldType.nullable(type.getType()));
+
+ UnionListWriter listWriter = listVector.getWriter();
+
+ /* allocate memory */
+ listWriter.allocate();
+
+ /* populate data */
+ listWriter.setPosition(0);
+ listWriter.startList();
+ listWriter.bigInt().writeBigInt(10);
+ listWriter.bigInt().writeBigInt(11);
+ listWriter.bigInt().writeBigInt(12);
+ listWriter.endList();
+
+ listWriter.setPosition(1);
+ listWriter.startList();
+ listWriter.bigInt().writeBigInt(13);
+ listWriter.bigInt().writeBigInt(14);
+ listWriter.endList();
+
+ listWriter.setPosition(2);
+ listWriter.startList();
+ listWriter.bigInt().writeBigInt(15);
+ listWriter.bigInt().writeBigInt(16);
+ listWriter.bigInt().writeBigInt(17);
+ listWriter.bigInt().writeBigInt(18);
+ listWriter.endList();
+
+ listWriter.setPosition(3);
+ listWriter.startList();
+ listWriter.bigInt().writeBigInt(19);
+ listWriter.endList();
+
+ listWriter.setPosition(4);
+ listWriter.startList();
+ listWriter.bigInt().writeBigInt(20);
+ listWriter.bigInt().writeBigInt(21);
+ listWriter.bigInt().writeBigInt(22);
+ listWriter.bigInt().writeBigInt(23);
+ listWriter.endList();
+
+ listVector.setValueCount(5);
+
+ assertEquals(4, listVector.getLastSet());
+
+ /* get offset buffer */
+ final ArrowBuf offsetBuffer = listVector.getOffsetBuffer();
+
+ /* get dataVector */
+ BigIntVector dataVector = (BigIntVector) listVector.getDataVector();
+
+ /* check the vector output */
+
+ int index = 0;
+ int offset = 0;
+ Long actual = null;
+
+ /* index 0 */
+ assertFalse(listVector.isNull(index));
+ offset = offsetBuffer.getInt(index * ListVector.OFFSET_WIDTH);
+ assertEquals(Integer.toString(0), Integer.toString(offset));
+
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(10), actual);
+ offset++;
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(11), actual);
+ offset++;
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(12), actual);
+
+ /* index 1 */
+ index++;
+ assertFalse(listVector.isNull(index));
+ offset = offsetBuffer.getInt(index * ListVector.OFFSET_WIDTH);
+ assertEquals(Integer.toString(3), Integer.toString(offset));
+
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(13), actual);
+ offset++;
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(14), actual);
+
+ /* index 2 */
+ index++;
+ assertFalse(listVector.isNull(index));
+ offset = offsetBuffer.getInt(index * ListVector.OFFSET_WIDTH);
+ assertEquals(Integer.toString(5), Integer.toString(offset));
+
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(15), actual);
+ offset++;
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(16), actual);
+ offset++;
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(17), actual);
+ offset++;
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(18), actual);
+
+ /* index 3 */
+ index++;
+ assertFalse(listVector.isNull(index));
+ offset = offsetBuffer.getInt(index * ListVector.OFFSET_WIDTH);
+ assertEquals(Integer.toString(9), Integer.toString(offset));
+
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(19), actual);
+
+ /* index 4 */
+ index++;
+ assertFalse(listVector.isNull(index));
+ offset = offsetBuffer.getInt(index * ListVector.OFFSET_WIDTH);
+ assertEquals(Integer.toString(10), Integer.toString(offset));
+
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(20), actual);
+ offset++;
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(21), actual);
+ offset++;
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(22), actual);
+ offset++;
+ actual = dataVector.getObject(offset);
+ assertEquals(new Long(23), actual);
+
+ /* index 5 */
+ index++;
+ assertTrue(listVector.isNull(index));
+ offset = offsetBuffer.getInt(index * ListVector.OFFSET_WIDTH);
+ assertEquals(Integer.toString(14), Integer.toString(offset));
+
+ /* do split and transfer */
+ try (ListVector toVector = ListVector.empty("toVector", allocator)) {
+
+ TransferPair transferPair = listVector.makeTransferPair(toVector);
+
+ int[][] transferLengths = {{0, 2}, {3, 1}, {4, 1}};
+
+ for (final int[] transferLength : transferLengths) {
+ int start = transferLength[0];
+ int splitLength = transferLength[1];
+
+ int dataLength1 = 0;
+ int dataLength2 = 0;
+
+ int offset1 = 0;
+ int offset2 = 0;
+
+ transferPair.splitAndTransfer(start, splitLength);
+
+ /* get offsetBuffer of toVector */
+ final ArrowBuf toOffsetBuffer = toVector.getOffsetBuffer();
+
+ /* get dataVector of toVector */
+ BigIntVector dataVector1 = (BigIntVector) toVector.getDataVector();
+
+ for (int i = 0; i < splitLength; i++) {
+ dataLength1 = offsetBuffer.getInt((start + i + 1) * ListVector.OFFSET_WIDTH) -
+ offsetBuffer.getInt((start + i) * ListVector.OFFSET_WIDTH);
+ dataLength2 = toOffsetBuffer.getInt((i + 1) * ListVector.OFFSET_WIDTH) -
+ toOffsetBuffer.getInt(i * ListVector.OFFSET_WIDTH);
+
+ assertEquals("Different data lengths at index: " + i + " and start: " + start,
+ dataLength1, dataLength2);
+
+ offset1 = offsetBuffer.getInt((start + i) * ListVector.OFFSET_WIDTH);
+ offset2 = toOffsetBuffer.getInt(i * ListVector.OFFSET_WIDTH);
+
+ for (int j = 0; j < dataLength1; j++) {
+ assertEquals("Different data at indexes: " + offset1 + " and " + offset2,
+ dataVector.getObject(offset1), dataVector1.getObject(offset2));
+
+ offset1++;
+ offset2++;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testNestedListVector() throws Exception {
+ try (ListVector listVector = ListVector.empty("sourceVector", allocator)) {
+
+ UnionListWriter listWriter = listVector.getWriter();
+
+ /* allocate memory */
+ listWriter.allocate();
+
+ /* the dataVector that backs a listVector will also be a
+ * listVector for this test.
+ */
+
+ /* write one or more inner lists at index 0 */
+ listWriter.setPosition(0);
+ listWriter.startList();
+
+ listWriter.list().startList();
+ listWriter.list().bigInt().writeBigInt(50);
+ listWriter.list().bigInt().writeBigInt(100);
+ listWriter.list().bigInt().writeBigInt(200);
+ listWriter.list().endList();
+
+ listWriter.list().startList();
+ listWriter.list().bigInt().writeBigInt(75);
+ listWriter.list().bigInt().writeBigInt(125);
+ listWriter.list().bigInt().writeBigInt(150);
+ listWriter.list().bigInt().writeBigInt(175);
+ listWriter.list().endList();
+
+ listWriter.endList();
+
+ /* write one or more inner lists at index 1 */
+ listWriter.setPosition(1);
+ listWriter.startList();
+
+ listWriter.list().startList();
+ listWriter.list().bigInt().writeBigInt(10);
+ listWriter.list().endList();
+
+ listWriter.list().startList();
+ listWriter.list().bigInt().writeBigInt(15);
+ listWriter.list().bigInt().writeBigInt(20);
+ listWriter.list().endList();
+
+ listWriter.list().startList();
+ listWriter.list().bigInt().writeBigInt(25);
+ listWriter.list().bigInt().writeBigInt(30);
+ listWriter.list().bigInt().writeBigInt(35);
+ listWriter.list().endList();
+
+ listWriter.endList();
+
+ assertEquals(1, listVector.getLastSet());
+
+ listVector.setValueCount(2);
+
+ assertEquals(2, listVector.getValueCount());
+
+ /* get listVector value at index 0 -- the value itself is a listvector */
+ Object result = listVector.getObject(0);
+ ArrayList<ArrayList<Long>> resultSet = (ArrayList<ArrayList<Long>>) result;
+ ArrayList<Long> list;
+
+ assertEquals(2, resultSet.size()); /* 2 inner lists at index 0 */
+ assertEquals(3, resultSet.get(0).size()); /* size of first inner list */
+ assertEquals(4, resultSet.get(1).size()); /* size of second inner list */
+
+ list = resultSet.get(0);
+ assertEquals(new Long(50), list.get(0));
+ assertEquals(new Long(100), list.get(1));
+ assertEquals(new Long(200), list.get(2));
+
+ list = resultSet.get(1);
+ assertEquals(new Long(75), list.get(0));
+ assertEquals(new Long(125), list.get(1));
+ assertEquals(new Long(150), list.get(2));
+ assertEquals(new Long(175), list.get(3));
+
+ /* get listVector value at index 1 -- the value itself is a listvector */
+ result = listVector.getObject(1);
+ resultSet = (ArrayList<ArrayList<Long>>) result;
+
+ assertEquals(3, resultSet.size()); /* 3 inner lists at index 1 */
+ assertEquals(1, resultSet.get(0).size()); /* size of first inner list */
+ assertEquals(2, resultSet.get(1).size()); /* size of second inner list */
+ assertEquals(3, resultSet.get(2).size()); /* size of third inner list */
+
+ list = resultSet.get(0);
+ assertEquals(new Long(10), list.get(0));
+
+ list = resultSet.get(1);
+ assertEquals(new Long(15), list.get(0));
+ assertEquals(new Long(20), list.get(1));
+
+ list = resultSet.get(2);
+ assertEquals(new Long(25), list.get(0));
+ assertEquals(new Long(30), list.get(1));
+ assertEquals(new Long(35), list.get(2));
+
+ /* check underlying bitVector */
+ assertFalse(listVector.isNull(0));
+ assertFalse(listVector.isNull(1));
+
+ /* check underlying offsets */
+ final ArrowBuf offsetBuffer = listVector.getOffsetBuffer();
+
+ /* listVector has 2 lists at index 0 and 3 lists at index 1 */
+ assertEquals(0, offsetBuffer.getInt(0 * ListVector.OFFSET_WIDTH));
+ assertEquals(2, offsetBuffer.getInt(1 * ListVector.OFFSET_WIDTH));
+ assertEquals(5, offsetBuffer.getInt(2 * ListVector.OFFSET_WIDTH));
+ }
+ }
+
+ @Test
+ public void testNestedListVector1() throws Exception {
+ try (ListVector listVector = ListVector.empty("sourceVector", allocator)) {
+
+ MinorType listType = MinorType.LIST;
+ MinorType scalarType = MinorType.BIGINT;
+
+ listVector.addOrGetVector(FieldType.nullable(listType.getType()));
+
+ ListVector innerList1 = (ListVector) listVector.getDataVector();
+ innerList1.addOrGetVector(FieldType.nullable(listType.getType()));
+
+ ListVector innerList2 = (ListVector) innerList1.getDataVector();
+ innerList2.addOrGetVector(FieldType.nullable(listType.getType()));
+
+ ListVector innerList3 = (ListVector) innerList2.getDataVector();
+ innerList3.addOrGetVector(FieldType.nullable(listType.getType()));
+
+ ListVector innerList4 = (ListVector) innerList3.getDataVector();
+ innerList4.addOrGetVector(FieldType.nullable(listType.getType()));
+
+ ListVector innerList5 = (ListVector) innerList4.getDataVector();
+ innerList5.addOrGetVector(FieldType.nullable(listType.getType()));
+
+ ListVector innerList6 = (ListVector) innerList5.getDataVector();
+ innerList6.addOrGetVector(FieldType.nullable(scalarType.getType()));
+
+ listVector.setInitialCapacity(128);
+ }
+ }
+
+ @Test
+ public void testNestedListVector2() throws Exception {
+ try (ListVector listVector = ListVector.empty("sourceVector", allocator)) {
+ listVector.setInitialCapacity(1);
+ UnionListWriter listWriter = listVector.getWriter();
+ /* allocate memory */
+ listWriter.allocate();
+
+ /* write one or more inner lists at index 0 */
+ listWriter.setPosition(0);
+ listWriter.startList();
+
+ listWriter.list().startList();
+ listWriter.list().bigInt().writeBigInt(50);
+ listWriter.list().bigInt().writeBigInt(100);
+ listWriter.list().bigInt().writeBigInt(200);
+ listWriter.list().endList();
+
+ listWriter.list().startList();
+ listWriter.list().bigInt().writeBigInt(75);
+ listWriter.list().bigInt().writeBigInt(125);
+ listWriter.list().endList();
+
+ listWriter.endList();
+
+ /* write one or more inner lists at index 1 */
+ listWriter.setPosition(1);
+ listWriter.startList();
+
+ listWriter.list().startList();
+ listWriter.list().bigInt().writeBigInt(15);
+ listWriter.list().bigInt().writeBigInt(20);
+ listWriter.list().endList();
+
+ listWriter.list().startList();
+ listWriter.list().bigInt().writeBigInt(25);
+ listWriter.list().bigInt().writeBigInt(30);
+ listWriter.list().bigInt().writeBigInt(35);
+ listWriter.list().endList();
+
+ listWriter.endList();
+
+ assertEquals(1, listVector.getLastSet());
+
+ listVector.setValueCount(2);
+
+ assertEquals(2, listVector.getValueCount());
+
+ /* get listVector value at index 0 -- the value itself is a listvector */
+ Object result = listVector.getObject(0);
+ ArrayList<ArrayList<Long>> resultSet = (ArrayList<ArrayList<Long>>) result;
+ ArrayList<Long> list;
+
+ assertEquals(2, resultSet.size()); /* 2 inner lists at index 0 */
+ assertEquals(3, resultSet.get(0).size()); /* size of first inner list */
+ assertEquals(2, resultSet.get(1).size()); /* size of second inner list */
+
+ list = resultSet.get(0);
+ assertEquals(new Long(50), list.get(0));
+ assertEquals(new Long(100), list.get(1));
+ assertEquals(new Long(200), list.get(2));
+
+ list = resultSet.get(1);
+ assertEquals(new Long(75), list.get(0));
+ assertEquals(new Long(125), list.get(1));
+
+ /* get listVector value at index 1 -- the value itself is a listvector */
+ result = listVector.getObject(1);
+ resultSet = (ArrayList<ArrayList<Long>>) result;
+
+ assertEquals(2, resultSet.size()); /* 3 inner lists at index 1 */
+ assertEquals(2, resultSet.get(0).size()); /* size of first inner list */
+ assertEquals(3, resultSet.get(1).size()); /* size of second inner list */
+
+ list = resultSet.get(0);
+ assertEquals(new Long(15), list.get(0));
+ assertEquals(new Long(20), list.get(1));
+
+ list = resultSet.get(1);
+ assertEquals(new Long(25), list.get(0));
+ assertEquals(new Long(30), list.get(1));
+ assertEquals(new Long(35), list.get(2));
+
+ /* check underlying bitVector */
+ assertFalse(listVector.isNull(0));
+ assertFalse(listVector.isNull(1));
+
+ /* check underlying offsets */
+ final ArrowBuf offsetBuffer = listVector.getOffsetBuffer();
+
+ /* listVector has 2 lists at index 0 and 3 lists at index 1 */
+ assertEquals(0, offsetBuffer.getInt(0 * ListVector.OFFSET_WIDTH));
+ assertEquals(2, offsetBuffer.getInt(1 * ListVector.OFFSET_WIDTH));
+ assertEquals(4, offsetBuffer.getInt(2 * ListVector.OFFSET_WIDTH));
+ }
+ }
+
+ @Test
+ public void testGetBufferAddress() throws Exception {
+ try (ListVector listVector = ListVector.empty("vector", allocator)) {
+
+ UnionListWriter listWriter = listVector.getWriter();
+ boolean error = false;
+
+ listWriter.allocate();
+
+ listWriter.setPosition(0);
+ listWriter.startList();
+ listWriter.bigInt().writeBigInt(50);
+ listWriter.bigInt().writeBigInt(100);
+ listWriter.bigInt().writeBigInt(200);
+ listWriter.endList();
+
+ listWriter.setPosition(1);
+ listWriter.startList();
+ listWriter.bigInt().writeBigInt(250);
+ listWriter.bigInt().writeBigInt(300);
+ listWriter.endList();
+
+ listVector.setValueCount(2);
+
+ /* check listVector contents */
+ Object result = listVector.getObject(0);
+ ArrayList<Long> resultSet = (ArrayList<Long>) result;
+ assertEquals(3, resultSet.size());
+ assertEquals(new Long(50), resultSet.get(0));
+ assertEquals(new Long(100), resultSet.get(1));
+ assertEquals(new Long(200), resultSet.get(2));
+
+ result = listVector.getObject(1);
+ resultSet = (ArrayList<Long>) result;
+ assertEquals(2, resultSet.size());
+ assertEquals(new Long(250), resultSet.get(0));
+ assertEquals(new Long(300), resultSet.get(1));
+
+ List<ArrowBuf> buffers = listVector.getFieldBuffers();
+
+ long bitAddress = listVector.getValidityBufferAddress();
+ long offsetAddress = listVector.getOffsetBufferAddress();
+
+ try {
+ long dataAddress = listVector.getDataBufferAddress();
+ } catch (UnsupportedOperationException ue) {
+ error = true;
+ } finally {
+ assertTrue(error);
+ }
+
+ assertEquals(2, buffers.size());
+ assertEquals(bitAddress, buffers.get(0).memoryAddress());
+ assertEquals(offsetAddress, buffers.get(1).memoryAddress());
+
+ /* (3+2)/2 */
+ assertEquals(2.5, listVector.getDensity(), 0);
+ }
+ }
+
+ @Test
+ public void testConsistentChildName() throws Exception {
+ try (ListVector listVector = ListVector.empty("sourceVector", allocator)) {
+ String emptyListStr = listVector.getField().toString();
+ assertTrue(emptyListStr.contains(ListVector.DATA_VECTOR_NAME));
+
+ listVector.addOrGetVector(FieldType.nullable(MinorType.INT.getType()));
+ String emptyVectorStr = listVector.getField().toString();
+ assertTrue(emptyVectorStr.contains(ListVector.DATA_VECTOR_NAME));
+ }
+ }
+
+ @Test
+ public void testSetInitialCapacity() {
+ try (final ListVector vector = ListVector.empty("", allocator)) {
+ vector.addOrGetVector(FieldType.nullable(MinorType.INT.getType()));
+
+ /**
+ * use the default multiplier of 5,
+ * 512 * 5 => 2560 * 4 => 10240 bytes => 16KB => 4096 value capacity.
+ */
+ vector.setInitialCapacity(512);
+ vector.allocateNew();
+ assertEquals(512, vector.getValueCapacity());
+ assertTrue(vector.getDataVector().getValueCapacity() >= 512 * 5);
+
+ /* use density as 4 */
+ vector.setInitialCapacity(512, 4);
+ vector.allocateNew();
+ assertEquals(512, vector.getValueCapacity());
+ assertTrue(vector.getDataVector().getValueCapacity() >= 512 * 4);
+
+ /**
+ * inner value capacity we pass to data vector is 512 * 0.1 => 51
+ * For an int vector this is 204 bytes of memory for data buffer
+ * and 7 bytes for validity buffer.
+ * and with power of 2 allocation, we allocate 256 bytes and 8 bytes
+ * for the data buffer and validity buffer of the inner vector. Thus
+ * value capacity of inner vector is 64
+ */
+ vector.setInitialCapacity(512, 0.1);
+ vector.allocateNew();
+ assertEquals(512, vector.getValueCapacity());
+ assertTrue(vector.getDataVector().getValueCapacity() >= 51);
+
+ /**
+ * inner value capacity we pass to data vector is 512 * 0.01 => 5
+ * For an int vector this is 20 bytes of memory for data buffer
+ * and 1 byte for validity buffer.
+ * and with power of 2 allocation, we allocate 32 bytes and 1 bytes
+ * for the data buffer and validity buffer of the inner vector. Thus
+ * value capacity of inner vector is 8
+ */
+ vector.setInitialCapacity(512, 0.01);
+ vector.allocateNew();
+ assertEquals(512, vector.getValueCapacity());
+ assertTrue(vector.getDataVector().getValueCapacity() >= 5);
+
+ /**
+ * inner value capacity we pass to data vector is 5 * 0.1 => 0
+ * which is then rounded off to 1. So we pass value count as 1
+ * to the inner int vector.
+ * the offset buffer of the list vector is allocated for 6 values
+ * which is 24 bytes and then rounded off to 32 bytes (8 values)
+ * the validity buffer of the list vector is allocated for 5
+ * values which is 1 byte. This is why value capacity of the list
+ * vector is 7 as we take the min of validity buffer value capacity
+ * and offset buffer value capacity.
+ */
+ vector.setInitialCapacity(5, 0.1);
+ vector.allocateNew();
+ assertEquals(7, vector.getValueCapacity());
+ assertTrue(vector.getDataVector().getValueCapacity() >= 1);
+ }
+ }
+
+ @Test
+ public void testClearAndReuse() {
+ try (final ListVector vector = ListVector.empty("list", allocator)) {
+ BigIntVector bigIntVector =
+ (BigIntVector) vector.addOrGetVector(FieldType.nullable(MinorType.BIGINT.getType())).getVector();
+ vector.setInitialCapacity(10);
+ vector.allocateNew();
+
+ vector.startNewValue(0);
+ bigIntVector.setSafe(0, 7);
+ vector.endValue(0, 1);
+ vector.startNewValue(1);
+ bigIntVector.setSafe(1, 8);
+ vector.endValue(1, 1);
+ vector.setValueCount(2);
+
+ Object result = vector.getObject(0);
+ ArrayList<Long> resultSet = (ArrayList<Long>) result;
+ assertEquals(new Long(7), resultSet.get(0));
+
+ result = vector.getObject(1);
+ resultSet = (ArrayList<Long>) result;
+ assertEquals(new Long(8), resultSet.get(0));
+
+ // Clear and release the buffers to trigger a realloc when adding next value
+ vector.clear();
+
+ // The list vector should reuse a buffer when reallocating the offset buffer
+ vector.startNewValue(0);
+ bigIntVector.setSafe(0, 7);
+ vector.endValue(0, 1);
+ vector.startNewValue(1);
+ bigIntVector.setSafe(1, 8);
+ vector.endValue(1, 1);
+ vector.setValueCount(2);
+
+ result = vector.getObject(0);
+ resultSet = (ArrayList<Long>) result;
+ assertEquals(new Long(7), resultSet.get(0));
+
+ result = vector.getObject(1);
+ resultSet = (ArrayList<Long>) result;
+ assertEquals(new Long(8), resultSet.get(0));
+ }
+ }
+
+ @Test
+ public void testWriterGetField() {
+ try (final ListVector vector = ListVector.empty("list", allocator)) {
+
+ UnionListWriter writer = vector.getWriter();
+ writer.allocate();
+
+ //set some values
+ writer.startList();
+ writer.integer().writeInt(1);
+ writer.integer().writeInt(2);
+ writer.endList();
+ vector.setValueCount(2);
+
+ Field expectedDataField = new Field(BaseRepeatedValueVector.DATA_VECTOR_NAME,
+ FieldType.nullable(new ArrowType.Int(32, true)), null);
+ Field expectedField = new Field(vector.getName(), FieldType.nullable(ArrowType.List.INSTANCE),
+ Arrays.asList(expectedDataField));
+
+ assertEquals(expectedField, writer.getField());
+ }
+ }
+
+ @Test
+ public void testClose() throws Exception {
+ try (final ListVector vector = ListVector.empty("list", allocator)) {
+
+ UnionListWriter writer = vector.getWriter();
+ writer.allocate();
+
+ //set some values
+ writer.startList();
+ writer.integer().writeInt(1);
+ writer.integer().writeInt(2);
+ writer.endList();
+ vector.setValueCount(2);
+
+ assertTrue(vector.getBufferSize() > 0);
+ assertTrue(vector.getDataVector().getBufferSize() > 0);
+
+ writer.close();
+ assertEquals(0, vector.getBufferSize());
+ assertEquals(0, vector.getDataVector().getBufferSize());
+ }
+ }
+
+ @Test
+ public void testGetBufferSizeFor() {
+ try (final ListVector vector = ListVector.empty("list", allocator)) {
+
+ UnionListWriter writer = vector.getWriter();
+ writer.allocate();
+
+ //set some values
+ writeIntValues(writer, new int[] {1, 2});
+ writeIntValues(writer, new int[] {3, 4});
+ writeIntValues(writer, new int[] {5, 6});
+ writeIntValues(writer, new int[] {7, 8, 9, 10});
+ writeIntValues(writer, new int[] {11, 12, 13, 14});
+ writer.setValueCount(5);
+
+ IntVector dataVector = (IntVector) vector.getDataVector();
+ int[] indices = new int[] {0, 2, 4, 6, 10, 14};
+
+ for (int valueCount = 1; valueCount <= 5; valueCount++) {
+ int validityBufferSize = BitVectorHelper.getValidityBufferSize(valueCount);
+ int offsetBufferSize = (valueCount + 1) * BaseRepeatedValueVector.OFFSET_WIDTH;
+
+ int expectedSize = validityBufferSize + offsetBufferSize + dataVector.getBufferSizeFor(indices[valueCount]);
+ assertEquals(expectedSize, vector.getBufferSizeFor(valueCount));
+ }
+ }
+ }
+
+ @Test
+ public void testIsEmpty() {
+ try (final ListVector vector = ListVector.empty("list", allocator)) {
+ UnionListWriter writer = vector.getWriter();
+ writer.allocate();
+
+ // set values [1,2], null, [], [5,6]
+ writeIntValues(writer, new int[] {1, 2});
+ writer.setPosition(2);
+ writeIntValues(writer, new int[] {});
+ writeIntValues(writer, new int[] {5, 6});
+ writer.setValueCount(4);
+
+ assertFalse(vector.isEmpty(0));
+ assertTrue(vector.isNull(1));
+ assertTrue(vector.isEmpty(1));
+ assertFalse(vector.isNull(2));
+ assertTrue(vector.isEmpty(2));
+ assertFalse(vector.isEmpty(3));
+ }
+ }
+
+ private void writeIntValues(UnionListWriter writer, int[] values) {
+ writer.startList();
+ for (int v: values) {
+ writer.integer().writeInt(v);
+ }
+ writer.endList();
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java
new file mode 100644
index 000000000..9637021db
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java
@@ -0,0 +1,1113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+
+import java.util.ArrayList;
+import java.util.Map;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.MapVector;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.complex.impl.UnionMapReader;
+import org.apache.arrow.vector.complex.impl.UnionMapWriter;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.complex.writer.BaseWriter.ListWriter;
+import org.apache.arrow.vector.complex.writer.BaseWriter.MapWriter;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.JsonStringArrayList;
+import org.apache.arrow.vector.util.TransferPair;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestMapVector {
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void init() {
+ allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100);
+ }
+
+ @After
+ public void terminate() throws Exception {
+ allocator.close();
+ }
+
+ public <T> T getResultKey(Map<?, T> resultStruct) {
+ assertTrue(resultStruct.containsKey(MapVector.KEY_NAME));
+ return resultStruct.get(MapVector.KEY_NAME);
+ }
+
+ public <T> T getResultValue(Map<?, T> resultStruct) {
+ assertTrue(resultStruct.containsKey(MapVector.VALUE_NAME));
+ return resultStruct.get(MapVector.VALUE_NAME);
+ }
+
+ @Test
+ public void testBasicOperation() {
+ int count = 5;
+ try (MapVector mapVector = MapVector.empty("map", allocator, false)) {
+ mapVector.allocateNew();
+ UnionMapWriter mapWriter = mapVector.getWriter();
+ for (int i = 0; i < count; i++) {
+ mapWriter.startMap();
+ for (int j = 0; j < i + 1; j++) {
+ mapWriter.startEntry();
+ mapWriter.key().bigInt().writeBigInt(j);
+ mapWriter.value().integer().writeInt(j);
+ mapWriter.endEntry();
+ }
+ mapWriter.endMap();
+ }
+ mapWriter.setValueCount(count);
+ UnionMapReader mapReader = mapVector.getReader();
+ for (int i = 0; i < count; i++) {
+ mapReader.setPosition(i);
+ for (int j = 0; j < i + 1; j++) {
+ mapReader.next();
+ assertEquals("record: " + i, j, mapReader.key().readLong().longValue());
+ assertEquals(j, mapReader.value().readInteger().intValue());
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testBasicOperationNulls() {
+ int count = 6;
+ try (MapVector mapVector = MapVector.empty("map", allocator, false)) {
+ mapVector.allocateNew();
+ UnionMapWriter mapWriter = mapVector.getWriter();
+ for (int i = 0; i < count; i++) {
+ // i == 1 is a NULL
+ if (i != 1) {
+ mapWriter.setPosition(i);
+ mapWriter.startMap();
+ // i == 3 is an empty map
+ if (i != 3) {
+ for (int j = 0; j < i + 1; j++) {
+ mapWriter.startEntry();
+ mapWriter.key().bigInt().writeBigInt(j);
+ // i == 5 maps to a NULL value
+ if (i != 5) {
+ mapWriter.value().integer().writeInt(j);
+ }
+ mapWriter.endEntry();
+ }
+ }
+ mapWriter.endMap();
+ }
+ }
+ mapWriter.setValueCount(count);
+ UnionMapReader mapReader = mapVector.getReader();
+ for (int i = 0; i < count; i++) {
+ mapReader.setPosition(i);
+ if (i == 1) {
+ assertFalse(mapReader.isSet());
+ } else {
+ if (i == 3) {
+ JsonStringArrayList<?> result = (JsonStringArrayList<?>) mapReader.readObject();
+ assertTrue(result.isEmpty());
+ } else {
+ for (int j = 0; j < i + 1; j++) {
+ mapReader.next();
+ assertEquals("record: " + i, j, mapReader.key().readLong().longValue());
+ if (i == 5) {
+ assertFalse(mapReader.value().isSet());
+ } else {
+ assertEquals(j, mapReader.value().readInteger().intValue());
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testCopyFrom() throws Exception {
+ try (MapVector inVector = MapVector.empty("input", allocator, false);
+ MapVector outVector = MapVector.empty("output", allocator, false)) {
+ UnionMapWriter writer = inVector.getWriter();
+ writer.allocate();
+
+ // populate input vector with the following records
+ // {1 -> 11, 2 -> 22, 3 -> 33}
+ // null
+ // {2 -> null}
+ writer.setPosition(0); // optional
+ writer.startMap();
+ writer.startEntry();
+ writer.key().bigInt().writeBigInt(1);
+ writer.value().bigInt().writeBigInt(11);
+ writer.endEntry();
+ writer.startEntry();
+ writer.key().bigInt().writeBigInt(2);
+ writer.value().bigInt().writeBigInt(22);
+ writer.endEntry();
+ writer.startEntry();
+ writer.key().bigInt().writeBigInt(3);
+ writer.value().bigInt().writeBigInt(33);
+ writer.endEntry();
+ writer.endMap();
+
+ writer.setPosition(2);
+ writer.startMap();
+ writer.startEntry();
+ writer.key().bigInt().writeBigInt(2);
+ writer.endEntry();
+ writer.endMap();
+
+ writer.setValueCount(3);
+
+ // copy values from input to output
+ outVector.allocateNew();
+ for (int i = 0; i < 3; i++) {
+ outVector.copyFrom(i, i, inVector);
+ }
+ outVector.setValueCount(3);
+
+ // assert the output vector is correct
+ FieldReader reader = outVector.getReader();
+ assertTrue("shouldn't be null", reader.isSet());
+ reader.setPosition(1);
+ assertFalse("should be null", reader.isSet());
+ reader.setPosition(2);
+ assertTrue("shouldn't be null", reader.isSet());
+
+
+ /* index 0 */
+ Object result = outVector.getObject(0);
+ ArrayList<?> resultSet = (ArrayList<?>) result;
+ assertEquals(3, resultSet.size());
+ Map<?, ?> resultStruct = (Map<?, ?>) resultSet.get(0);
+ assertEquals(1L, getResultKey(resultStruct));
+ assertEquals(11L, getResultValue(resultStruct));
+ resultStruct = (Map<?, ?>) resultSet.get(1);
+ assertEquals(2L, getResultKey(resultStruct));
+ assertEquals(22L, getResultValue(resultStruct));
+ resultStruct = (Map<?, ?>) resultSet.get(2);
+ assertEquals(3L, getResultKey(resultStruct));
+ assertEquals(33L, getResultValue(resultStruct));
+
+ /* index 1 */
+ result = outVector.getObject(1);
+ assertNull(result);
+
+ /* index 2 */
+ result = outVector.getObject(2);
+ resultSet = (ArrayList<?>) result;
+ assertEquals(1, resultSet.size());
+ resultStruct = (Map<?, ?>) resultSet.get(0);
+ assertEquals(2L, getResultKey(resultStruct));
+ assertFalse(resultStruct.containsKey(MapVector.VALUE_NAME));
+ }
+ }
+
+ @Test
+ public void testSplitAndTransfer() throws Exception {
+ try (MapVector mapVector = MapVector.empty("sourceVector", allocator, false)) {
+
+ /* Explicitly add the map child vectors */
+ FieldType type = new FieldType(false, ArrowType.Struct.INSTANCE, null, null);
+ AddOrGetResult<StructVector> addResult = mapVector.addOrGetVector(type);
+ FieldType keyType = new FieldType(false, MinorType.BIGINT.getType(), null, null);
+ FieldType valueType = FieldType.nullable(MinorType.FLOAT8.getType());
+ addResult.getVector().addOrGet(MapVector.KEY_NAME, keyType, BigIntVector.class);
+ addResult.getVector().addOrGet(MapVector.VALUE_NAME, valueType, Float8Vector.class);
+
+ UnionMapWriter mapWriter = mapVector.getWriter();
+
+ /* allocate memory */
+ mapWriter.allocate();
+
+ /* populate data */
+ mapWriter.setPosition(0);
+ mapWriter.startMap();
+ mapWriter.startEntry();
+ mapWriter.key().bigInt().writeBigInt(10);
+ mapWriter.value().float8().writeFloat8(1.0);
+ mapWriter.endEntry();
+ mapWriter.startEntry();
+ mapWriter.key().bigInt().writeBigInt(11);
+ mapWriter.value().float8().writeFloat8(1.1);
+ mapWriter.endEntry();
+ mapWriter.startEntry();
+ mapWriter.key().bigInt().writeBigInt(12);
+ mapWriter.value().float8().writeFloat8(1.2);
+ mapWriter.endEntry();
+ mapWriter.endMap();
+
+ mapWriter.setPosition(1);
+ mapWriter.startMap();
+ mapWriter.startEntry();
+ mapWriter.key().bigInt().writeBigInt(13);
+ mapWriter.value().float8().writeFloat8(1.3);
+ mapWriter.endEntry();
+ mapWriter.startEntry();
+ mapWriter.key().bigInt().writeBigInt(14);
+ mapWriter.value().float8().writeFloat8(1.4);
+ mapWriter.endEntry();
+ mapWriter.endMap();
+
+ mapWriter.setPosition(2);
+ mapWriter.startMap();
+ mapWriter.startEntry();
+ mapWriter.key().bigInt().writeBigInt(15);
+ mapWriter.value().float8().writeFloat8(1.5);
+ mapWriter.endEntry();
+ mapWriter.startEntry();
+ mapWriter.key().bigInt().writeBigInt(16);
+ mapWriter.value().float8().writeFloat8(1.6);
+ mapWriter.endEntry();
+ mapWriter.startEntry();
+ mapWriter.key().bigInt().writeBigInt(17);
+ mapWriter.value().float8().writeFloat8(1.7);
+ mapWriter.endEntry();
+ mapWriter.startEntry();
+ mapWriter.key().bigInt().writeBigInt(18);
+ mapWriter.value().float8().writeFloat8(1.8);
+ mapWriter.endEntry();
+ mapWriter.endMap();
+
+ mapWriter.setPosition(3);
+ mapWriter.startMap();
+ mapWriter.startEntry();
+ mapWriter.key().bigInt().writeBigInt(19);
+ mapWriter.value().float8().writeFloat8(1.9);
+ mapWriter.endEntry();
+ mapWriter.endMap();
+
+ mapWriter.setPosition(4);
+ mapWriter.startMap();
+ mapWriter.startEntry();
+ mapWriter.key().bigInt().writeBigInt(20);
+ mapWriter.value().float8().writeFloat8(2.0);
+ mapWriter.endEntry();
+ mapWriter.startEntry();
+ mapWriter.key().bigInt().writeBigInt(21);
+ mapWriter.value().float8().writeFloat8(2.1);
+ mapWriter.endEntry();
+ mapWriter.startEntry();
+ mapWriter.key().bigInt().writeBigInt(22);
+ mapWriter.value().float8().writeFloat8(2.2);
+ mapWriter.endEntry();
+ mapWriter.startEntry();
+ mapWriter.key().bigInt().writeBigInt(23);
+ mapWriter.value().float8().writeFloat8(2.3);
+ mapWriter.endEntry();
+ mapWriter.endMap();
+
+ mapVector.setValueCount(5);
+
+ assertEquals(4, mapVector.getLastSet());
+
+ /* get offset buffer */
+ final ArrowBuf offsetBuffer = mapVector.getOffsetBuffer();
+
+ /* get dataVector */
+ StructVector dataVector = (StructVector) mapVector.getDataVector();
+
+ /* check the vector output */
+ int index = 0;
+ int offset = 0;
+ Map<?, ?> result = null;
+
+ /* index 0 */
+ assertFalse(mapVector.isNull(index));
+ offset = offsetBuffer.getInt(index * MapVector.OFFSET_WIDTH);
+ assertEquals(Integer.toString(0), Integer.toString(offset));
+
+ result = dataVector.getObject(offset);
+ assertEquals(10L, getResultKey(result));
+ assertEquals(1.0, getResultValue(result));
+ offset++;
+ result = dataVector.getObject(offset);
+ assertEquals(11L, getResultKey(result));
+ assertEquals(1.1, getResultValue(result));
+ offset++;
+ result = dataVector.getObject(offset);
+ assertEquals(12L, getResultKey(result));
+ assertEquals(1.2, getResultValue(result));
+
+ /* index 1 */
+ index++;
+ assertFalse(mapVector.isNull(index));
+ offset = offsetBuffer.getInt(index * MapVector.OFFSET_WIDTH);
+ assertEquals(Integer.toString(3), Integer.toString(offset));
+
+ result = dataVector.getObject(offset);
+ assertEquals(13L, getResultKey(result));
+ assertEquals(1.3, getResultValue(result));
+ offset++;
+ result = dataVector.getObject(offset);
+ assertEquals(14L, getResultKey(result));
+ assertEquals(1.4, getResultValue(result));
+
+ /* index 2 */
+ index++;
+ assertFalse(mapVector.isNull(index));
+ offset = offsetBuffer.getInt(index * MapVector.OFFSET_WIDTH);
+ assertEquals(Integer.toString(5), Integer.toString(offset));
+
+ result = dataVector.getObject(offset);
+ assertEquals(15L, getResultKey(result));
+ assertEquals(1.5, getResultValue(result));
+ offset++;
+ result = dataVector.getObject(offset);
+ assertEquals(16L, getResultKey(result));
+ assertEquals(1.6, getResultValue(result));
+ offset++;
+ result = dataVector.getObject(offset);
+ assertEquals(17L, getResultKey(result));
+ assertEquals(1.7, getResultValue(result));
+ offset++;
+ result = dataVector.getObject(offset);
+ assertEquals(18L, getResultKey(result));
+ assertEquals(1.8, getResultValue(result));
+
+ /* index 3 */
+ index++;
+ assertFalse(mapVector.isNull(index));
+ offset = offsetBuffer.getInt(index * MapVector.OFFSET_WIDTH);
+ assertEquals(Integer.toString(9), Integer.toString(offset));
+
+ result = dataVector.getObject(offset);
+ assertEquals(19L, getResultKey(result));
+ assertEquals(1.9, getResultValue(result));
+
+ /* index 4 */
+ index++;
+ assertFalse(mapVector.isNull(index));
+ offset = offsetBuffer.getInt(index * MapVector.OFFSET_WIDTH);
+ assertEquals(Integer.toString(10), Integer.toString(offset));
+
+ result = dataVector.getObject(offset);
+ assertEquals(20L, getResultKey(result));
+ assertEquals(2.0, getResultValue(result));
+ offset++;
+ result = dataVector.getObject(offset);
+ assertEquals(21L, getResultKey(result));
+ assertEquals(2.1, getResultValue(result));
+ offset++;
+ result = dataVector.getObject(offset);
+ assertEquals(22L, getResultKey(result));
+ assertEquals(2.2, getResultValue(result));
+ offset++;
+ result = dataVector.getObject(offset);
+ assertEquals(23L, getResultKey(result));
+ assertEquals(2.3, getResultValue(result));
+
+ /* index 5 */
+ index++;
+ assertTrue(mapVector.isNull(index));
+ offset = offsetBuffer.getInt(index * MapVector.OFFSET_WIDTH);
+ assertEquals(Integer.toString(14), Integer.toString(offset));
+
+ /* do split and transfer */
+ try (MapVector toVector = MapVector.empty("toVector", allocator, false)) {
+
+ TransferPair transferPair = mapVector.makeTransferPair(toVector);
+
+ int[][] transferLengths = {{0, 2}, {3, 1}, {4, 1}};
+
+ for (final int[] transferLength : transferLengths) {
+ int start = transferLength[0];
+ int splitLength = transferLength[1];
+
+ int dataLength1 = 0;
+ int dataLength2 = 0;
+
+ int offset1 = 0;
+ int offset2 = 0;
+
+ transferPair.splitAndTransfer(start, splitLength);
+
+ /* get offsetBuffer of toVector */
+ final ArrowBuf toOffsetBuffer = toVector.getOffsetBuffer();
+
+ /* get dataVector of toVector */
+ StructVector dataVector1 = (StructVector) toVector.getDataVector();
+
+ for (int i = 0; i < splitLength; i++) {
+ dataLength1 = offsetBuffer.getInt((start + i + 1) * MapVector.OFFSET_WIDTH) -
+ offsetBuffer.getInt((start + i) * MapVector.OFFSET_WIDTH);
+ dataLength2 = toOffsetBuffer.getInt((i + 1) * MapVector.OFFSET_WIDTH) -
+ toOffsetBuffer.getInt(i * MapVector.OFFSET_WIDTH);
+
+ assertEquals("Different data lengths at index: " + i + " and start: " + start,
+ dataLength1, dataLength2);
+
+ offset1 = offsetBuffer.getInt((start + i) * MapVector.OFFSET_WIDTH);
+ offset2 = toOffsetBuffer.getInt(i * MapVector.OFFSET_WIDTH);
+
+ for (int j = 0; j < dataLength1; j++) {
+ assertEquals("Different data at indexes: " + offset1 + " and " + offset2,
+ dataVector.getObject(offset1), dataVector1.getObject(offset2));
+
+ offset1++;
+ offset2++;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testMapWithListValue() throws Exception {
+ try (MapVector mapVector = MapVector.empty("sourceVector", allocator, false)) {
+
+ UnionMapWriter mapWriter = mapVector.getWriter();
+ ListWriter valueWriter;
+
+ /* allocate memory */
+ mapWriter.allocate();
+
+ /* the dataVector that backs a listVector will also be a
+ * listVector for this test.
+ */
+
+ /* write one or more maps index 0 */
+ mapWriter.setPosition(0);
+ mapWriter.startMap();
+
+ mapWriter.startEntry();
+ mapWriter.key().bigInt().writeBigInt(1);
+ valueWriter = mapWriter.value().list();
+ valueWriter.startList();
+ valueWriter.bigInt().writeBigInt(50);
+ valueWriter.bigInt().writeBigInt(100);
+ valueWriter.bigInt().writeBigInt(200);
+ valueWriter.endList();
+ mapWriter.endEntry();
+
+ mapWriter.startEntry();
+ mapWriter.key().bigInt().writeBigInt(2);
+ valueWriter = mapWriter.value().list();
+ valueWriter.startList();
+ valueWriter.bigInt().writeBigInt(75);
+ valueWriter.bigInt().writeBigInt(125);
+ valueWriter.bigInt().writeBigInt(150);
+ valueWriter.bigInt().writeBigInt(175);
+ valueWriter.endList();
+ mapWriter.endEntry();
+
+ mapWriter.endMap();
+
+ /* write one or more maps at index 1 */
+ mapWriter.setPosition(1);
+ mapWriter.startMap();
+
+ mapWriter.startEntry();
+ mapWriter.key().bigInt().writeBigInt(3);
+ valueWriter = mapWriter.value().list();
+ valueWriter.startList();
+ valueWriter.bigInt().writeBigInt(10);
+ valueWriter.endList();
+ mapWriter.endEntry();
+
+ mapWriter.startEntry();
+ mapWriter.key().bigInt().writeBigInt(4);
+ valueWriter = mapWriter.value().list();
+ valueWriter.startList();
+ valueWriter.bigInt().writeBigInt(15);
+ valueWriter.bigInt().writeBigInt(20);
+ valueWriter.endList();
+ mapWriter.endEntry();
+
+ mapWriter.startEntry();
+ mapWriter.key().bigInt().writeBigInt(5);
+ valueWriter = mapWriter.value().list();
+ valueWriter.startList();
+ valueWriter.bigInt().writeBigInt(25);
+ valueWriter.bigInt().writeBigInt(30);
+ valueWriter.bigInt().writeBigInt(35);
+ valueWriter.endList();
+ mapWriter.endEntry();
+
+ mapWriter.endMap();
+
+ assertEquals(1, mapVector.getLastSet());
+
+ mapWriter.setValueCount(2);
+
+ assertEquals(2, mapVector.getValueCount());
+
+ // Get mapVector element at index 0
+ Object result = mapVector.getObject(0);
+ ArrayList<?> resultSet = (ArrayList<?>) result;
+
+ // 2 map entries at index 0
+ assertEquals(2, resultSet.size());
+
+ // First Map entry
+ Map<?, ?> resultStruct = (Map<?, ?>) resultSet.get(0);
+ assertEquals(1L, getResultKey(resultStruct));
+ ArrayList<Long> list = (ArrayList<Long>) getResultValue(resultStruct);
+ assertEquals(3, list.size()); // value is a list with 3 elements
+ assertEquals(new Long(50), list.get(0));
+ assertEquals(new Long(100), list.get(1));
+ assertEquals(new Long(200), list.get(2));
+
+ // Second Map entry
+ resultStruct = (Map<?, ?>) resultSet.get(1);
+ list = (ArrayList<Long>) getResultValue(resultStruct);
+ assertEquals(4, list.size()); // value is a list with 4 elements
+ assertEquals(new Long(75), list.get(0));
+ assertEquals(new Long(125), list.get(1));
+ assertEquals(new Long(150), list.get(2));
+ assertEquals(new Long(175), list.get(3));
+
+ // Get mapVector element at index 1
+ result = mapVector.getObject(1);
+ resultSet = (ArrayList<?>) result;
+
+ // First Map entry
+ resultStruct = (Map<?, ?>) resultSet.get(0);
+ assertEquals(3L, getResultKey(resultStruct));
+ list = (ArrayList<Long>) getResultValue(resultStruct);
+ assertEquals(1, list.size()); // value is a list with 1 element
+ assertEquals(new Long(10), list.get(0));
+
+ // Second Map entry
+ resultStruct = (Map<?, ?>) resultSet.get(1);
+ assertEquals(4L, getResultKey(resultStruct));
+ list = (ArrayList<Long>) getResultValue(resultStruct);
+ assertEquals(2, list.size()); // value is a list with 1 element
+ assertEquals(new Long(15), list.get(0));
+ assertEquals(new Long(20), list.get(1));
+
+ // Third Map entry
+ resultStruct = (Map<?, ?>) resultSet.get(2);
+ assertEquals(5L, getResultKey(resultStruct));
+ list = (ArrayList<Long>) getResultValue(resultStruct);
+ assertEquals(3, list.size()); // value is a list with 1 element
+ assertEquals(new Long(25), list.get(0));
+ assertEquals(new Long(30), list.get(1));
+ assertEquals(new Long(35), list.get(2));
+
+ /* check underlying bitVector */
+ assertFalse(mapVector.isNull(0));
+ assertFalse(mapVector.isNull(1));
+
+ /* check underlying offsets */
+ final ArrowBuf offsetBuffer = mapVector.getOffsetBuffer();
+
+ /* mapVector has 2 entries at index 0 and 3 entries at index 1 */
+ assertEquals(0, offsetBuffer.getInt(0 * MapVector.OFFSET_WIDTH));
+ assertEquals(2, offsetBuffer.getInt(1 * MapVector.OFFSET_WIDTH));
+ assertEquals(5, offsetBuffer.getInt(2 * MapVector.OFFSET_WIDTH));
+ }
+ }
+
+ @Test
+ public void testMapWithMapValue() throws Exception {
+ try (MapVector mapVector = MapVector.empty("sourceVector", allocator, false)) {
+
+ UnionMapWriter mapWriter = mapVector.getWriter();
+ MapWriter valueWriter;
+
+ // we are essentially writing Map<Long, Map<Long, Long>>
+ // populate map vector with the following three records
+ // [
+ // null,
+ // [1:[50: 100, 200:400], 2:[75: 175, 150: 250]],
+ // [3:[10: 20], 4:[15: 20], 5:[25: 30, 35: null]]
+ // ]
+
+ /* write null at index 0 */
+ mapWriter.setPosition(0);
+ mapWriter.writeNull();
+
+ /* write one or more maps at index 1 */
+ mapWriter.setPosition(1);
+ mapWriter.startMap();
+
+ mapWriter.startEntry();
+ mapWriter.key().bigInt().writeBigInt(1);
+ valueWriter = mapWriter.value().map(false);
+ valueWriter.startMap();
+ writeEntry(valueWriter, 50, 100L);
+ writeEntry(valueWriter, 200, 400L);
+ valueWriter.endMap();
+ mapWriter.endEntry();
+
+ mapWriter.startEntry();
+ mapWriter.key().bigInt().writeBigInt(2);
+ valueWriter = mapWriter.value().map(false);
+ valueWriter.startMap();
+ writeEntry(valueWriter, 75, 175L);
+ writeEntry(valueWriter, 150, 250L);
+ valueWriter.endMap();
+ mapWriter.endEntry();
+
+ mapWriter.endMap();
+
+ /* write one or more maps at index 2 */
+ mapWriter.setPosition(2);
+ mapWriter.startMap();
+
+ mapWriter.startEntry();
+ mapWriter.key().bigInt().writeBigInt(3);
+ valueWriter = mapWriter.value().map(true);
+ valueWriter.startMap();
+ writeEntry(valueWriter, 10, 20L);
+ valueWriter.endMap();
+ mapWriter.endEntry();
+
+ mapWriter.startEntry();
+ mapWriter.key().bigInt().writeBigInt(4);
+ valueWriter = mapWriter.value().map(false);
+ valueWriter.startMap();
+ writeEntry(valueWriter, 15, 20L);
+ valueWriter.endMap();
+ mapWriter.endEntry();
+
+ mapWriter.startEntry();
+ mapWriter.key().bigInt().writeBigInt(5);
+ valueWriter = mapWriter.value().map(false);
+ valueWriter.startMap();
+ writeEntry(valueWriter, 25, 30L);
+ writeEntry(valueWriter, 35, (Long) null);
+ valueWriter.endMap();
+ mapWriter.endEntry();
+
+ mapWriter.endMap();
+
+ assertEquals(2, mapVector.getLastSet());
+
+ mapWriter.setValueCount(3);
+
+ assertEquals(3, mapVector.getValueCount());
+
+ // Get mapVector element at index 0
+ Object result = mapVector.getObject(0);
+ assertNull(result);
+
+ // Get mapVector element at index 1
+ result = mapVector.getObject(1);
+ ArrayList<?> resultSet = (ArrayList<?>) result;
+
+ // 2 map entries at index 0
+ assertEquals(2, resultSet.size());
+
+ // First Map entry
+ Map<?, ?> resultStruct = (Map<?, ?>) resultSet.get(0);
+ assertEquals(1L, getResultKey(resultStruct));
+ ArrayList<Map<?, ?>> list = (ArrayList<Map<?, ?>>) getResultValue(resultStruct);
+ assertEquals(2, list.size()); // value is a list of 2 two maps
+ Map<?, ?> innerMap = list.get(0);
+ assertEquals(50L, getResultKey(innerMap));
+ assertEquals(100L, getResultValue(innerMap));
+ innerMap = list.get(1);
+ assertEquals(200L, getResultKey(innerMap));
+ assertEquals(400L, getResultValue(innerMap));
+
+ // Second Map entry
+ resultStruct = (Map<?, ?>) resultSet.get(1);
+ assertEquals(2L, getResultKey(resultStruct));
+ list = (ArrayList<Map<?, ?>>) getResultValue(resultStruct);
+ assertEquals(2, list.size()); // value is a list of two maps
+ innerMap = list.get(0);
+ assertEquals(75L, getResultKey(innerMap));
+ assertEquals(175L, getResultValue(innerMap));
+ innerMap = list.get(1);
+ assertEquals(150L, getResultKey(innerMap));
+ assertEquals(250L, getResultValue(innerMap));
+
+ // Get mapVector element at index 2
+ result = mapVector.getObject(2);
+ resultSet = (ArrayList<?>) result;
+
+ // 3 map entries at index 1
+ assertEquals(3, resultSet.size());
+
+ // First Map entry
+ resultStruct = (Map<?, ?>) resultSet.get(0);
+ assertEquals(3L, getResultKey(resultStruct));
+ list = (ArrayList<Map<?, ?>>) getResultValue(resultStruct);
+ assertEquals(1, list.size()); // value is a list of maps with 1 element
+ innerMap = list.get(0);
+ assertEquals(10L, getResultKey(innerMap));
+ assertEquals(20L, getResultValue(innerMap));
+
+ // Second Map entry
+ resultStruct = (Map<?, ?>) resultSet.get(1);
+ assertEquals(4L, getResultKey(resultStruct));
+ list = (ArrayList<Map<?, ?>>) getResultValue(resultStruct);
+ assertEquals(1, list.size()); // value is a list of maps with 1 element
+ innerMap = list.get(0);
+ assertEquals(15L, getResultKey(innerMap));
+ assertEquals(20L, getResultValue(innerMap));
+
+ // Third Map entry
+ resultStruct = (Map<?, ?>) resultSet.get(2);
+ assertEquals(5L, getResultKey(resultStruct));
+ list = (ArrayList<Map<?, ?>>) getResultValue(resultStruct);
+ assertEquals(2, list.size()); // value is a list of maps with 2 elements
+ innerMap = list.get(0);
+ assertEquals(25L, getResultKey(innerMap));
+ assertEquals(30L, getResultValue(innerMap));
+ innerMap = list.get(1);
+ assertEquals(35L, getResultKey(innerMap));
+ assertNull(innerMap.get(MapVector.VALUE_NAME));
+
+ /* check underlying bitVector */
+ assertTrue(mapVector.isNull(0));
+ assertFalse(mapVector.isNull(1));
+ assertFalse(mapVector.isNull(2));
+
+ /* check underlying offsets */
+ final ArrowBuf offsetBuffer = mapVector.getOffsetBuffer();
+
+ /* mapVector has 0 entries at index 0, 2 entries at index 1, and 3 entries at index 2 */
+ assertEquals(0, offsetBuffer.getInt(0 * MapVector.OFFSET_WIDTH));
+ assertEquals(0, offsetBuffer.getInt(1 * MapVector.OFFSET_WIDTH));
+ assertEquals(2, offsetBuffer.getInt(2 * MapVector.OFFSET_WIDTH));
+ assertEquals(5, offsetBuffer.getInt(3 * MapVector.OFFSET_WIDTH));
+ }
+ }
+
+ @Test
+ public void testMapWithMapKeyAndMapValue() throws Exception {
+ try (MapVector mapVector = MapVector.empty("sourceVector", allocator, false)) {
+
+ UnionMapWriter mapWriter = mapVector.getWriter();
+ MapWriter keyWriter;
+ MapWriter valueWriter;
+
+ // we are essentially writing Map<Map<Integer, Integer>, Map<Long, Long>>
+ // populate map vector with the following two records
+ // [
+ // [[5: 10, 20: 40]:[50: 100, 200: 400], [50: 100]:[75: 175, 150: 250]],
+ // [[1: 2]:[10: 20], [30: 40]:[15: 20], [50: 60, 70: null]:[25: 30, 35: null], [5: null]: null]
+ // ]
+
+ mapWriter.setPosition(0);
+ mapWriter.startMap();
+
+ mapWriter.startEntry();
+ keyWriter = mapWriter.key().map(false);
+ keyWriter.startMap();
+ writeEntry(keyWriter, 5, 10);
+ writeEntry(keyWriter, 20, 40);
+ keyWriter.endMap();
+ valueWriter = mapWriter.value().map(false);
+ valueWriter.startMap();
+ writeEntry(valueWriter, 50, 100L);
+ writeEntry(valueWriter, 200, 400L);
+ valueWriter.endMap();
+ mapWriter.endEntry();
+
+ mapWriter.startEntry();
+ keyWriter = mapWriter.key().map(false);
+ keyWriter.startMap();
+ writeEntry(keyWriter, 50, 100);
+ keyWriter.endMap();
+ valueWriter = mapWriter.value().map(false);
+ valueWriter.startMap();
+ writeEntry(valueWriter, 75, 175L);
+ writeEntry(valueWriter, 150, 250L);
+ valueWriter.endMap();
+ mapWriter.endEntry();
+
+ mapWriter.endMap();
+
+ /* write one or more maps at index 1 */
+ mapWriter.setPosition(1);
+ mapWriter.startMap();
+
+ mapWriter.startEntry();
+ keyWriter = mapWriter.key().map(false);
+ keyWriter.startMap();
+ writeEntry(keyWriter, 1, 2);
+ keyWriter.endMap();
+ valueWriter = mapWriter.value().map(true);
+ valueWriter.startMap();
+ writeEntry(valueWriter, 10, 20L);
+ valueWriter.endMap();
+ mapWriter.endEntry();
+
+ mapWriter.startEntry();
+ keyWriter = mapWriter.key().map(false);
+ keyWriter.startMap();
+ writeEntry(keyWriter, 30, 40);
+ keyWriter.endMap();
+ valueWriter = mapWriter.value().map(false);
+ valueWriter.startMap();
+ writeEntry(valueWriter, 15, 20L);
+ valueWriter.endMap();
+ mapWriter.endEntry();
+
+ mapWriter.startEntry();
+ keyWriter = mapWriter.key().map(false);
+ keyWriter.startMap();
+ writeEntry(keyWriter, 50, 60);
+ writeEntry(keyWriter, 70, (Integer) null);
+ keyWriter.endMap();
+ valueWriter = mapWriter.value().map(false);
+ valueWriter.startMap();
+ writeEntry(valueWriter, 25, 30L);
+ writeEntry(valueWriter, 35, (Long) null);
+ valueWriter.endMap();
+ mapWriter.endEntry();
+
+ mapWriter.startEntry();
+ keyWriter = mapWriter.key().map(false);
+ keyWriter.startMap();
+ writeEntry(keyWriter, 5, (Integer) null);
+ keyWriter.endMap();
+ valueWriter = mapWriter.value().map(false);
+ valueWriter.writeNull();
+ mapWriter.endEntry();
+
+ mapWriter.endMap();
+
+ assertEquals(1, mapVector.getLastSet());
+
+ mapWriter.setValueCount(2);
+
+ assertEquals(2, mapVector.getValueCount());
+
+ // Get mapVector element at index 0
+ Object result = mapVector.getObject(0);
+ ArrayList<?> resultSet = (ArrayList<?>) result;
+
+ // 2 map entries at index 0
+ assertEquals(2, resultSet.size());
+
+ // First Map entry
+ Map<?, ArrayList<Map<?, ?>>> resultStruct = (Map<?, ArrayList<Map<?, ?>>>) resultSet.get(0);
+ ArrayList<Map<?, ?>> list = getResultKey(resultStruct);
+ assertEquals(2, list.size()); // key is a list of 2 two maps
+ Map<?, ?> innerMap = list.get(0);
+ assertEquals(5, getResultKey(innerMap));
+ assertEquals(10, getResultValue(innerMap));
+ innerMap = list.get(1);
+ assertEquals(20, getResultKey(innerMap));
+ assertEquals(40, getResultValue(innerMap));
+
+ list = getResultValue(resultStruct);
+ assertEquals(2, list.size()); // value is a list of 2 two maps
+ innerMap = list.get(0);
+ assertEquals(50L, getResultKey(innerMap));
+ assertEquals(100L, getResultValue(innerMap));
+ innerMap = list.get(1);
+ assertEquals(200L, getResultKey(innerMap));
+ assertEquals(400L, getResultValue(innerMap));
+
+ // Second Map entry
+ resultStruct = (Map<?, ArrayList<Map<?, ?>>>) resultSet.get(1);
+ list = getResultKey(resultStruct);
+ assertEquals(1, list.size()); // key is a list of 1 two map
+ innerMap = list.get(0);
+ assertEquals(50, getResultKey(innerMap));
+ assertEquals(100, getResultValue(innerMap));
+
+ list = getResultValue(resultStruct);
+ assertEquals(2, list.size()); // value is a list of two maps
+ innerMap = list.get(0);
+ assertEquals(75L, getResultKey(innerMap));
+ assertEquals(175L, getResultValue(innerMap));
+ innerMap = list.get(1);
+ assertEquals(150L, getResultKey(innerMap));
+ assertEquals(250L, getResultValue(innerMap));
+
+ // Get mapVector element at index 1
+ result = mapVector.getObject(1);
+ resultSet = (ArrayList<?>) result;
+
+ // 4 map entries at index 1
+ assertEquals(4, resultSet.size());
+
+ // First Map entry
+ resultStruct = (Map<?, ArrayList<Map<?, ?>>>) resultSet.get(0);
+ list = getResultKey(resultStruct);
+ assertEquals(1, list.size()); // key is a list of 1 map
+ innerMap = list.get(0);
+ assertEquals(1, getResultKey(innerMap));
+ assertEquals(2, getResultValue(innerMap));
+
+ list = getResultValue(resultStruct);
+ assertEquals(1, list.size()); // value is a list of maps with 1 element
+ innerMap = list.get(0);
+ assertEquals(10L, getResultKey(innerMap));
+ assertEquals(20L, getResultValue(innerMap));
+
+ // Second Map entry
+ resultStruct = (Map<?, ArrayList<Map<?, ?>>>) resultSet.get(1);
+ list = getResultKey(resultStruct);
+ assertEquals(1, list.size()); // key is a list of 1 map
+ innerMap = list.get(0);
+ assertEquals(30, getResultKey(innerMap));
+ assertEquals(40, getResultValue(innerMap));
+
+ list = getResultValue(resultStruct);
+ assertEquals(1, list.size()); // value is a list of maps with 1 element
+ innerMap = list.get(0);
+ assertEquals(15L, getResultKey(innerMap));
+ assertEquals(20L, getResultValue(innerMap));
+
+ // Third Map entry
+ resultStruct = (Map<?, ArrayList<Map<?, ?>>>) resultSet.get(2);
+ list = getResultKey(resultStruct);
+ assertEquals(2, list.size()); // key is a list of two maps
+ innerMap = list.get(0);
+ assertEquals(50, getResultKey(innerMap));
+ assertEquals(60, getResultValue(innerMap));
+ innerMap = list.get(1);
+ assertEquals(70, getResultKey(innerMap));
+ assertNull(innerMap.get(MapVector.VALUE_NAME));
+
+ list = getResultValue(resultStruct);
+ assertEquals(2, list.size()); // value is a list of maps with 2 elements
+ innerMap = list.get(0);
+ assertEquals(25L, getResultKey(innerMap));
+ assertEquals(30L, getResultValue(innerMap));
+ innerMap = list.get(1);
+ assertEquals(35L, getResultKey(innerMap));
+ assertNull(innerMap.get(MapVector.VALUE_NAME));
+
+ // Fourth Map entry
+ resultStruct = (Map<?, ArrayList<Map<?, ?>>>) resultSet.get(3);
+ list = getResultKey(resultStruct);
+ assertEquals(1, list.size()); // key is a list of two maps
+ innerMap = list.get(0);
+ assertEquals(5, getResultKey(innerMap));
+ assertNull(innerMap.get(MapVector.VALUE_NAME));
+
+ assertNull(resultStruct.get(MapVector.VALUE_NAME));
+
+ /* check underlying bitVector */
+ assertFalse(mapVector.isNull(0));
+ assertFalse(mapVector.isNull(1));
+
+ /* check underlying offsets */
+ final ArrowBuf offsetBuffer = mapVector.getOffsetBuffer();
+
+ /* mapVector has 2 entries at index 0 and 4 entries at index 1 */
+ assertEquals(0, offsetBuffer.getInt(0 * MapVector.OFFSET_WIDTH));
+ assertEquals(2, offsetBuffer.getInt(1 * MapVector.OFFSET_WIDTH));
+ assertEquals(6, offsetBuffer.getInt(2 * MapVector.OFFSET_WIDTH));
+ }
+ }
+
+ private void writeEntry(MapWriter writer, long key, Long value) {
+ writer.startEntry();
+ writer.key().bigInt().writeBigInt(key);
+ if (value != null) {
+ writer.value().bigInt().writeBigInt(value);
+ }
+ writer.endEntry();
+ }
+
+ private void writeEntry(MapWriter writer, int key, Integer value) {
+ writer.startEntry();
+ writer.key().integer().writeInt(key);
+ if (value != null) {
+ writer.value().integer().writeInt(value);
+ }
+ writer.endEntry();
+ }
+
+ @Test
+ public void testClearAndReuse() {
+ try (final MapVector vector = MapVector.empty("map", allocator, false)) {
+ vector.allocateNew();
+ UnionMapWriter mapWriter = vector.getWriter();
+
+ mapWriter.startMap();
+ mapWriter.startEntry();
+ mapWriter.key().bigInt().writeBigInt(1);
+ mapWriter.value().integer().writeInt(11);
+ mapWriter.endEntry();
+ mapWriter.endMap();
+
+ mapWriter.startMap();
+ mapWriter.startEntry();
+ mapWriter.key().bigInt().writeBigInt(2);
+ mapWriter.value().integer().writeInt(22);
+ mapWriter.endEntry();
+ mapWriter.endMap();
+
+ mapWriter.setValueCount(2);
+
+ Object result = vector.getObject(0);
+ ArrayList<?> resultSet = (ArrayList<?>) result;
+ Map<?, ?> resultStruct = (Map<?, ?>) resultSet.get(0);
+ assertEquals(1L, getResultKey(resultStruct));
+ assertEquals(11, getResultValue(resultStruct));
+
+ result = vector.getObject(1);
+ resultSet = (ArrayList<?>) result;
+ resultStruct = (Map<?, ?>) resultSet.get(0);
+ assertEquals(2L, getResultKey(resultStruct));
+ assertEquals(22, getResultValue(resultStruct));
+
+ // Clear and release the buffers to trigger a realloc when adding next value
+ vector.clear();
+ mapWriter = new UnionMapWriter(vector);
+
+ // The map vector should reuse a buffer when reallocating the offset buffer
+ mapWriter.startMap();
+ mapWriter.startEntry();
+ mapWriter.key().bigInt().writeBigInt(3);
+ mapWriter.value().integer().writeInt(33);
+ mapWriter.endEntry();
+ mapWriter.startEntry();
+ mapWriter.key().bigInt().writeBigInt(4);
+ mapWriter.value().integer().writeInt(44);
+ mapWriter.endEntry();
+ mapWriter.endMap();
+
+ mapWriter.startMap();
+ mapWriter.startEntry();
+ mapWriter.key().bigInt().writeBigInt(5);
+ mapWriter.value().integer().writeInt(55);
+ mapWriter.endEntry();
+ mapWriter.endMap();
+
+ mapWriter.setValueCount(2);
+
+ result = vector.getObject(0);
+ resultSet = (ArrayList<?>) result;
+ resultStruct = (Map<?, ?>) resultSet.get(0);
+ assertEquals(3L, getResultKey(resultStruct));
+ assertEquals(33, getResultValue(resultStruct));
+ resultStruct = (Map<?, ?>) resultSet.get(1);
+ assertEquals(4L, getResultKey(resultStruct));
+ assertEquals(44, getResultValue(resultStruct));
+
+ result = vector.getObject(1);
+ resultSet = (ArrayList<?>) result;
+ resultStruct = (Map<?, ?>) resultSet.get(0);
+ assertEquals(5L, getResultKey(resultStruct));
+ assertEquals(55, getResultValue(resultStruct));
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestNullCheckingForGet.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestNullCheckingForGet.java
new file mode 100644
index 000000000..f1345e88a
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestNullCheckingForGet.java
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import java.lang.reflect.Field;
+import java.net.URLClassLoader;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ * Test cases for {@link NullCheckingForGet}.
+ */
+public class TestNullCheckingForGet {
+
+ /**
+ * Get a copy of the current class loader.
+ * @return the newly created class loader.
+ */
+ private ClassLoader copyClassLoader() {
+ ClassLoader curClassLoader = this.getClass().getClassLoader();
+ if (curClassLoader instanceof URLClassLoader) {
+ // for Java 1.8
+ return new URLClassLoader(((URLClassLoader) curClassLoader).getURLs(), null);
+ }
+
+ // for Java 1.9 and Java 11.
+ return null;
+ }
+
+ /**
+ * Get the value of flag {@link NullCheckingForGet#NULL_CHECKING_ENABLED}.
+ * @param classLoader the class loader from which to get the flag value.
+ * @return value of the flag.
+ */
+ private boolean getFlagValue(ClassLoader classLoader) throws Exception {
+ Class<?> clazz = classLoader.loadClass("org.apache.arrow.vector.NullCheckingForGet");
+ Field field = clazz.getField("NULL_CHECKING_ENABLED");
+ return (Boolean) field.get(null);
+ }
+
+ /**
+ * Ensure the flag for null checking is enabled by default.
+ * This will protect users from JVM crashes.
+ */
+ @Test
+ public void testDefaultValue() throws Exception {
+ ClassLoader classLoader = copyClassLoader();
+ if (classLoader != null) {
+ boolean nullCheckingEnabled = getFlagValue(classLoader);
+ Assert.assertTrue(nullCheckingEnabled);
+ }
+ }
+
+ /**
+ * Test setting the null checking flag by the system property.
+ * @throws Exception if loading class {@link NullCheckingForGet#NULL_CHECKING_ENABLED} fails.
+ */
+ @Test
+ public void testEnableSysProperty() throws Exception {
+ String sysProperty = System.getProperty("arrow.enable_null_check_for_get");
+ System.setProperty("arrow.enable_null_check_for_get", "false");
+
+ ClassLoader classLoader = copyClassLoader();
+ if (classLoader != null) {
+ boolean nullCheckingEnabled = getFlagValue(classLoader);
+ Assert.assertFalse(nullCheckingEnabled);
+ }
+
+ // restore system property
+ if (sysProperty != null) {
+ System.setProperty("arrow.enable_null_check_for_get", sysProperty);
+ } else {
+ System.clearProperty("arrow.enable_null_check_for_get");
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestOutOfMemoryForValueVector.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestOutOfMemoryForValueVector.java
new file mode 100644
index 000000000..7f26b5c1b
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestOutOfMemoryForValueVector.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.OutOfMemoryException;
+import org.apache.arrow.memory.RootAllocator;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * This class tests cases where we expect to receive {@link OutOfMemoryException}.
+ */
+public class TestOutOfMemoryForValueVector {
+
+ private static final String EMPTY_SCHEMA_PATH = "";
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void init() {
+ allocator = new RootAllocator(200); // Start with low memory limit
+ }
+
+ @Test(expected = OutOfMemoryException.class)
+ public void variableWidthVectorAllocateNew() {
+ try (VarCharVector vector = new VarCharVector(EMPTY_SCHEMA_PATH, allocator)) {
+ vector.allocateNew();
+ }
+ }
+
+ @Test(expected = OutOfMemoryException.class)
+ public void variableWidthVectorAllocateNewCustom() {
+ try (VarCharVector vector = new VarCharVector(EMPTY_SCHEMA_PATH, allocator)) {
+ vector.allocateNew(2342, 234);
+ }
+ }
+
+ @Test(expected = OutOfMemoryException.class)
+ public void fixedWidthVectorAllocateNew() {
+ try (IntVector vector = new IntVector(EMPTY_SCHEMA_PATH, allocator)) {
+ vector.allocateNew();
+ }
+ }
+
+ @Test(expected = OutOfMemoryException.class)
+ public void fixedWidthVectorAllocateNewCustom() {
+ try (IntVector vector = new IntVector(EMPTY_SCHEMA_PATH, allocator)) {
+ vector.allocateNew(2342);
+ }
+ }
+
+ @After
+ public void terminate() {
+ allocator.close();
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestOversizedAllocationForValueVector.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestOversizedAllocationForValueVector.java
new file mode 100644
index 000000000..23414e9f5
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestOversizedAllocationForValueVector.java
@@ -0,0 +1,132 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.util.OversizedAllocationException;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * This class tests that OversizedAllocationException occurs when a large memory is allocated for a vector.
+ * Typically, arrow allows the allocation of the size of at most Integer.MAX_VALUE, but this might cause OOM in tests.
+ * Thus, the max allocation size is limited to 1 KB in this class. Please see the surefire option in pom.xml.
+ */
+public class TestOversizedAllocationForValueVector {
+
+ private static final String EMPTY_SCHEMA_PATH = "";
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void init() {
+ allocator = new RootAllocator(Long.MAX_VALUE);
+ }
+
+ @After
+ public void terminate() throws Exception {
+ allocator.close();
+ }
+
+ @Test(expected = OversizedAllocationException.class)
+ public void testFixedVectorReallocation() {
+ final UInt4Vector vector = new UInt4Vector(EMPTY_SCHEMA_PATH, allocator);
+ // edge case 1: buffer size = max value capacity
+ final int expectedValueCapacity = checkedCastToInt(BaseValueVector.MAX_ALLOCATION_SIZE / 4);
+ try {
+ vector.allocateNew(expectedValueCapacity);
+ assertEquals(expectedValueCapacity, vector.getValueCapacity());
+ vector.reAlloc();
+ assertEquals(expectedValueCapacity * 2, vector.getValueCapacity());
+ } finally {
+ vector.close();
+ }
+
+ // common case: value count < max value capacity
+ try {
+ vector.allocateNew(checkedCastToInt(BaseValueVector.MAX_ALLOCATION_SIZE / 8));
+ vector.reAlloc(); // value allocation reaches to MAX_VALUE_ALLOCATION
+ vector.reAlloc(); // this should throw an IOOB
+ } finally {
+ vector.close();
+ }
+ }
+
+ @Test(expected = OversizedAllocationException.class)
+ public void testBitVectorReallocation() {
+ final BitVector vector = new BitVector(EMPTY_SCHEMA_PATH, allocator);
+ // edge case 1: buffer size ~ max value capacity
+ final int expectedValueCapacity = 1 << 29;
+ try {
+ vector.allocateNew(expectedValueCapacity);
+ assertEquals(expectedValueCapacity, vector.getValueCapacity());
+ vector.reAlloc();
+ assertEquals(expectedValueCapacity * 2, vector.getValueCapacity());
+ } finally {
+ vector.close();
+ }
+
+ // common: value count < MAX_VALUE_ALLOCATION
+ try {
+ vector.allocateNew(expectedValueCapacity);
+ for (int i = 0; i < 3; i++) {
+ vector.reAlloc(); // expand buffer size
+ }
+ assertEquals(Integer.MAX_VALUE, vector.getValueCapacity());
+ vector.reAlloc(); // buffer size ~ max allocation
+ assertEquals(Integer.MAX_VALUE, vector.getValueCapacity());
+ vector.reAlloc(); // overflow
+ } finally {
+ vector.close();
+ }
+ }
+
+
+ @Test(expected = OversizedAllocationException.class)
+ public void testVariableVectorReallocation() {
+ final VarCharVector vector = new VarCharVector(EMPTY_SCHEMA_PATH, allocator);
+ // edge case 1: value count = MAX_VALUE_ALLOCATION
+ final long expectedAllocationInBytes = BaseValueVector.MAX_ALLOCATION_SIZE;
+ final int expectedOffsetSize = 10;
+ try {
+ vector.allocateNew(expectedAllocationInBytes, 10);
+ assertTrue(expectedOffsetSize <= vector.getValueCapacity());
+ assertTrue(expectedAllocationInBytes <= vector.getDataBuffer().capacity());
+ vector.reAlloc();
+ assertTrue(expectedOffsetSize * 2 <= vector.getValueCapacity());
+ assertTrue(expectedAllocationInBytes * 2 <= vector.getDataBuffer().capacity());
+ } finally {
+ vector.close();
+ }
+
+ // common: value count < MAX_VALUE_ALLOCATION
+ try {
+ vector.allocateNew(BaseValueVector.MAX_ALLOCATION_SIZE / 2, 0);
+ vector.reAlloc(); // value allocation reaches to MAX_VALUE_ALLOCATION
+ vector.reAlloc(); // this tests if it overflows
+ } finally {
+ vector.close();
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestPeriodDuration.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestPeriodDuration.java
new file mode 100644
index 000000000..c8965dec3
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestPeriodDuration.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotEquals;
+
+import java.time.Duration;
+import java.time.Period;
+
+import org.junit.Test;
+
+public class TestPeriodDuration {
+
+ @Test
+ public void testBasics() {
+ PeriodDuration pd1 = new PeriodDuration(Period.of(1, 2, 3), Duration.ofNanos(123));
+ PeriodDuration pdEq1 = new PeriodDuration(Period.of(1, 2, 3), Duration.ofNanos(123));
+ PeriodDuration pd2 = new PeriodDuration(Period.of(1, 2, 3), Duration.ofNanos(12));
+ PeriodDuration pd3 = new PeriodDuration(Period.of(-1, -2, -3), Duration.ofNanos(-123));
+
+ assertEquals(pd1, pdEq1);
+ assertEquals(pd1.hashCode(), pdEq1.hashCode());
+
+ assertNotEquals(pd1, pd2);
+ assertNotEquals(pd1.hashCode(), pd2.hashCode());
+ assertNotEquals(pd1, pd3);
+ assertNotEquals(pd1.hashCode(), pd3.hashCode());
+ }
+
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java
new file mode 100644
index 000000000..e60b87e60
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java
@@ -0,0 +1,410 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.complex.FixedSizeListVector;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.complex.UnionVector;
+import org.apache.arrow.vector.types.pojo.ArrowType.Struct;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.jupiter.api.Assertions;
+
+public class TestSplitAndTransfer {
+ private BufferAllocator allocator;
+
+ @Before
+ public void init() {
+ allocator = new RootAllocator(Long.MAX_VALUE);
+ }
+
+ @After
+ public void terminate() throws Exception {
+ allocator.close();
+ }
+
+ private void populateVarcharVector(final VarCharVector vector, int valueCount, String[] compareArray) {
+ for (int i = 0; i < valueCount; i += 3) {
+ final String s = String.format("%010d", i);
+ vector.set(i, s.getBytes());
+ if (compareArray != null) {
+ compareArray[i] = s;
+ }
+ }
+ vector.setValueCount(valueCount);
+ }
+
+ @Test /* VarCharVector */
+ public void test() throws Exception {
+ try (final VarCharVector varCharVector = new VarCharVector("myvector", allocator)) {
+ varCharVector.allocateNew(10000, 1000);
+
+ final int valueCount = 500;
+ final String[] compareArray = new String[valueCount];
+
+ populateVarcharVector(varCharVector, valueCount, compareArray);
+
+ final TransferPair tp = varCharVector.getTransferPair(allocator);
+ final VarCharVector newVarCharVector = (VarCharVector) tp.getTo();
+ final int[][] startLengths = {{0, 201}, {201, 0}, {201, 200}, {401, 99}};
+
+ for (final int[] startLength : startLengths) {
+ final int start = startLength[0];
+ final int length = startLength[1];
+ tp.splitAndTransfer(start, length);
+ for (int i = 0; i < length; i++) {
+ final boolean expectedSet = ((start + i) % 3) == 0;
+ if (expectedSet) {
+ final byte[] expectedValue = compareArray[start + i].getBytes();
+ assertFalse(newVarCharVector.isNull(i));
+ assertArrayEquals(expectedValue, newVarCharVector.get(i));
+ } else {
+ assertTrue(newVarCharVector.isNull(i));
+ }
+ }
+ newVarCharVector.clear();
+ }
+ }
+ }
+
+ @Test
+ public void testMemoryConstrainedTransfer() {
+ try (final VarCharVector varCharVector = new VarCharVector("myvector", allocator)) {
+ allocator.setLimit(32768); /* set limit of 32KB */
+
+ varCharVector.allocateNew(10000, 1000);
+
+ final int valueCount = 1000;
+
+ populateVarcharVector(varCharVector, valueCount, null);
+
+ final TransferPair tp = varCharVector.getTransferPair(allocator);
+ final VarCharVector newVarCharVector = (VarCharVector) tp.getTo();
+ final int[][] startLengths = {{0, 700}, {700, 299}};
+
+ for (final int[] startLength : startLengths) {
+ final int start = startLength[0];
+ final int length = startLength[1];
+ tp.splitAndTransfer(start, length);
+ newVarCharVector.clear();
+ }
+ }
+ }
+
+ @Test
+ public void testTransfer() {
+ try (final VarCharVector varCharVector = new VarCharVector("myvector", allocator)) {
+ varCharVector.allocateNew(10000, 1000);
+
+ final int valueCount = 500;
+ final String[] compareArray = new String[valueCount];
+ populateVarcharVector(varCharVector, valueCount, compareArray);
+
+ final TransferPair tp = varCharVector.getTransferPair(allocator);
+ final VarCharVector newVarCharVector = (VarCharVector) tp.getTo();
+ tp.transfer();
+
+ assertEquals(0, varCharVector.valueCount);
+ assertEquals(valueCount, newVarCharVector.valueCount);
+
+ for (int i = 0; i < valueCount; i++) {
+ final boolean expectedSet = (i % 3) == 0;
+ if (expectedSet) {
+ final byte[] expectedValue = compareArray[i].getBytes();
+ assertFalse(newVarCharVector.isNull(i));
+ assertArrayEquals(expectedValue, newVarCharVector.get(i));
+ } else {
+ assertTrue(newVarCharVector.isNull(i));
+ }
+ }
+
+ newVarCharVector.clear();
+ }
+ }
+
+ @Test
+ public void testCopyValueSafe() {
+ try (final VarCharVector varCharVector = new VarCharVector("myvector", allocator);
+ final VarCharVector newVarCharVector = new VarCharVector("newvector", allocator)) {
+ varCharVector.allocateNew(10000, 1000);
+
+ final int valueCount = 500;
+ populateVarcharVector(varCharVector, valueCount, null);
+
+ final TransferPair tp = varCharVector.makeTransferPair(newVarCharVector);
+
+ // new vector memory is not pre-allocated, we expect copyValueSafe work fine.
+ for (int i = 0; i < valueCount; i++) {
+ tp.copyValueSafe(i, i);
+ }
+ newVarCharVector.setValueCount(valueCount);
+
+ for (int i = 0; i < valueCount; i++) {
+ final boolean expectedSet = (i % 3) == 0;
+ if (expectedSet) {
+ assertFalse(varCharVector.isNull(i));
+ assertFalse(newVarCharVector.isNull(i));
+ assertArrayEquals(varCharVector.get(i), newVarCharVector.get(i));
+ } else {
+ assertTrue(newVarCharVector.isNull(i));
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testSplitAndTransferNon() {
+ try (final VarCharVector varCharVector = new VarCharVector("myvector", allocator)) {
+
+ varCharVector.allocateNew(10000, 1000);
+ final int valueCount = 500;
+ populateVarcharVector(varCharVector, valueCount, null);
+
+ final TransferPair tp = varCharVector.getTransferPair(allocator);
+ VarCharVector newVarCharVector = (VarCharVector) tp.getTo();
+
+ tp.splitAndTransfer(0, 0);
+ assertEquals(0, newVarCharVector.getValueCount());
+
+ newVarCharVector.clear();
+ }
+ }
+
+ @Test
+ public void testSplitAndTransferAll() {
+ try (final VarCharVector varCharVector = new VarCharVector("myvector", allocator)) {
+
+ varCharVector.allocateNew(10000, 1000);
+ final int valueCount = 500;
+ populateVarcharVector(varCharVector, valueCount, null);
+
+ final TransferPair tp = varCharVector.getTransferPair(allocator);
+ VarCharVector newVarCharVector = (VarCharVector) tp.getTo();
+
+ tp.splitAndTransfer(0, valueCount);
+ assertEquals(valueCount, newVarCharVector.getValueCount());
+
+ newVarCharVector.clear();
+ }
+ }
+
+ @Test
+ public void testInvalidStartIndex() {
+ try (final VarCharVector varCharVector = new VarCharVector("myvector", allocator);
+ final VarCharVector newVarCharVector = new VarCharVector("newvector", allocator)) {
+
+ varCharVector.allocateNew(10000, 1000);
+ final int valueCount = 500;
+ populateVarcharVector(varCharVector, valueCount, null);
+
+ final TransferPair tp = varCharVector.makeTransferPair(newVarCharVector);
+
+ IllegalArgumentException e = Assertions.assertThrows(
+ IllegalArgumentException.class,
+ () -> tp.splitAndTransfer(valueCount, 10));
+
+ assertEquals("Invalid parameters startIndex: 500, length: 10 for valueCount: 500", e.getMessage());
+
+ newVarCharVector.clear();
+ }
+ }
+
+ @Test
+ public void testInvalidLength() {
+ try (final VarCharVector varCharVector = new VarCharVector("myvector", allocator);
+ final VarCharVector newVarCharVector = new VarCharVector("newvector", allocator)) {
+
+ varCharVector.allocateNew(10000, 1000);
+ final int valueCount = 500;
+ populateVarcharVector(varCharVector, valueCount, null);
+
+ final TransferPair tp = varCharVector.makeTransferPair(newVarCharVector);
+
+ IllegalArgumentException e = Assertions.assertThrows(
+ IllegalArgumentException.class,
+ () -> tp.splitAndTransfer(0, valueCount * 2));
+
+ assertEquals("Invalid parameters startIndex: 0, length: 1000 for valueCount: 500", e.getMessage());
+
+ newVarCharVector.clear();
+ }
+ }
+
+ @Test
+ public void testZeroStartIndexAndLength() {
+ try (final VarCharVector varCharVector = new VarCharVector("myvector", allocator);
+ final VarCharVector newVarCharVector = new VarCharVector("newvector", allocator)) {
+
+ varCharVector.allocateNew(0, 0);
+ final int valueCount = 0;
+ populateVarcharVector(varCharVector, valueCount, null);
+
+ final TransferPair tp = varCharVector.makeTransferPair(newVarCharVector);
+
+ tp.splitAndTransfer(0, 0);
+ assertEquals(valueCount, newVarCharVector.getValueCount());
+
+ newVarCharVector.clear();
+ }
+ }
+
+ @Test
+ public void testZeroLength() {
+ try (final VarCharVector varCharVector = new VarCharVector("myvector", allocator);
+ final VarCharVector newVarCharVector = new VarCharVector("newvector", allocator)) {
+
+ varCharVector.allocateNew(10000, 1000);
+ final int valueCount = 500;
+ populateVarcharVector(varCharVector, valueCount, null);
+
+ final TransferPair tp = varCharVector.makeTransferPair(newVarCharVector);
+
+ tp.splitAndTransfer(500, 0);
+ assertEquals(0, newVarCharVector.getValueCount());
+
+ newVarCharVector.clear();
+ }
+ }
+
+ @Test
+ public void testUnionVectorZeroStartIndexAndLength() {
+ try (final UnionVector unionVector = UnionVector.empty("myvector", allocator);
+ final UnionVector newUnionVector = UnionVector.empty("newvector", allocator)) {
+
+ unionVector.allocateNew();
+ final int valueCount = 0;
+ unionVector.setValueCount(valueCount);
+
+ final TransferPair tp = unionVector.makeTransferPair(newUnionVector);
+
+ tp.splitAndTransfer(0, 0);
+ assertEquals(valueCount, newUnionVector.getValueCount());
+
+ newUnionVector.clear();
+ }
+ }
+
+ @Test
+ public void testFixedWidthVectorZeroStartIndexAndLength() {
+ try (final IntVector intVector = new IntVector("myvector", allocator);
+ final IntVector newIntVector = new IntVector("newvector", allocator)) {
+
+ intVector.allocateNew(0);
+ final int valueCount = 0;
+ intVector.setValueCount(valueCount);
+
+ final TransferPair tp = intVector.makeTransferPair(newIntVector);
+
+ tp.splitAndTransfer(0, 0);
+ assertEquals(valueCount, newIntVector.getValueCount());
+
+ newIntVector.clear();
+ }
+ }
+
+ @Test
+ public void testBitVectorZeroStartIndexAndLength() {
+ try (final BitVector bitVector = new BitVector("myvector", allocator);
+ final BitVector newBitVector = new BitVector("newvector", allocator)) {
+
+ bitVector.allocateNew(0);
+ final int valueCount = 0;
+ bitVector.setValueCount(valueCount);
+
+ final TransferPair tp = bitVector.makeTransferPair(newBitVector);
+
+ tp.splitAndTransfer(0, 0);
+ assertEquals(valueCount, newBitVector.getValueCount());
+
+ newBitVector.clear();
+ }
+ }
+
+ @Test
+ public void testFixedSizeListVectorZeroStartIndexAndLength() {
+ try (final FixedSizeListVector listVector = FixedSizeListVector.empty("list", 4, allocator);
+ final FixedSizeListVector newListVector = FixedSizeListVector.empty("newList", 4, allocator)) {
+
+ listVector.allocateNew();
+ final int valueCount = 0;
+ listVector.setValueCount(valueCount);
+
+ final TransferPair tp = listVector.makeTransferPair(newListVector);
+
+ tp.splitAndTransfer(0, 0);
+ assertEquals(valueCount, newListVector.getValueCount());
+
+ newListVector.clear();
+ }
+ }
+
+ @Test
+ public void testListVectorZeroStartIndexAndLength() {
+ try (final ListVector listVector = ListVector.empty("list", allocator);
+ final ListVector newListVector = ListVector.empty("newList", allocator)) {
+
+ listVector.allocateNew();
+ final int valueCount = 0;
+ listVector.setValueCount(valueCount);
+
+ final TransferPair tp = listVector.makeTransferPair(newListVector);
+
+ tp.splitAndTransfer(0, 0);
+ assertEquals(valueCount, newListVector.getValueCount());
+
+ newListVector.clear();
+ }
+ }
+
+ @Test
+ public void testStructVectorZeroStartIndexAndLength() {
+ Map<String, String> metadata = new HashMap<>();
+ metadata.put("k1", "v1");
+ FieldType type = new FieldType(true, Struct.INSTANCE, null, metadata);
+ try (final StructVector structVector = new StructVector("structvec", allocator, type, null);
+ final StructVector newStructVector = new StructVector("newStructvec", allocator, type, null)) {
+
+ structVector.allocateNew();
+ final int valueCount = 0;
+ structVector.setValueCount(valueCount);
+
+ final TransferPair tp = structVector.makeTransferPair(newStructVector);
+
+ tp.splitAndTransfer(0, 0);
+ assertEquals(valueCount, newStructVector.getValueCount());
+
+ newStructVector.clear();
+ }
+ }
+
+
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java
new file mode 100644
index 000000000..734ff4631
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java
@@ -0,0 +1,183 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.junit.Assert.*;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.complex.UnionVector;
+import org.apache.arrow.vector.holders.ComplexHolder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType.Struct;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestStructVector {
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void init() {
+ allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100);
+ }
+
+ @After
+ public void terminate() throws Exception {
+ allocator.close();
+ }
+
+ @Test
+ public void testFieldMetadata() throws Exception {
+ Map<String, String> metadata = new HashMap<>();
+ metadata.put("k1", "v1");
+ FieldType type = new FieldType(true, Struct.INSTANCE, null, metadata);
+ try (StructVector vector = new StructVector("struct", allocator, type, null)) {
+ Assert.assertEquals(vector.getField().getMetadata(), type.getMetadata());
+ }
+ }
+
+ @Test
+ public void testMakeTransferPair() {
+ try (final StructVector s1 = StructVector.empty("s1", allocator);
+ final StructVector s2 = StructVector.empty("s2", allocator)) {
+ s1.addOrGet("struct_child", FieldType.nullable(MinorType.INT.getType()), IntVector.class);
+ s1.makeTransferPair(s2);
+ final FieldVector child = s1.getChild("struct_child");
+ final FieldVector toChild = s2.addOrGet("struct_child", child.getField().getFieldType(), child.getClass());
+ assertEquals(0, toChild.getValueCapacity());
+ assertEquals(0, toChild.getDataBuffer().capacity());
+ assertEquals(0, toChild.getValidityBuffer().capacity());
+ }
+ }
+
+ @Test
+ public void testAllocateAfterReAlloc() throws Exception {
+ Map<String, String> metadata = new HashMap<>();
+ metadata.put("k1", "v1");
+ FieldType type = new FieldType(true, Struct.INSTANCE, null, metadata);
+ try (StructVector vector = new StructVector("struct", allocator, type, null)) {
+ MinorType childtype = MinorType.INT;
+ vector.addOrGet("intchild", FieldType.nullable(childtype.getType()), IntVector.class);
+
+ /*
+ * Allocate the default size, and then, reAlloc. This should double the allocation.
+ */
+ vector.allocateNewSafe(); // Initial allocation
+ vector.reAlloc(); // Double the allocation size of self, and all children.
+ long savedValidityBufferCapacity = vector.getValidityBuffer().capacity();
+ int savedValueCapacity = vector.getValueCapacity();
+
+ /*
+ * Clear and allocate again.
+ */
+ vector.clear();
+ vector.allocateNewSafe();
+
+ /*
+ * Verify that the buffer sizes haven't changed.
+ */
+ Assert.assertEquals(vector.getValidityBuffer().capacity(), savedValidityBufferCapacity);
+ Assert.assertEquals(vector.getValueCapacity(), savedValueCapacity);
+ }
+ }
+
+ @Test
+ public void testReadNullValue() {
+ Map<String, String> metadata = new HashMap<>();
+ metadata.put("k1", "v1");
+ FieldType type = new FieldType(true, Struct.INSTANCE, null, metadata);
+ try (StructVector vector = new StructVector("struct", allocator, type, null)) {
+ MinorType childtype = MinorType.INT;
+ vector.addOrGet("intchild", FieldType.nullable(childtype.getType()), IntVector.class);
+ vector.setValueCount(2);
+
+ IntVector intVector = (IntVector) vector.getChild("intchild");
+ intVector.setSafe(0, 100);
+ vector.setIndexDefined(0);
+ intVector.setNull(1);
+ vector.setNull(1);
+
+ ComplexHolder holder = new ComplexHolder();
+ vector.get(0, holder);
+ assertNotEquals(0, holder.isSet);
+ assertNotNull(holder.reader);
+
+ vector.get(1, holder);
+ assertEquals(0, holder.isSet);
+ assertNull(holder.reader);
+ }
+ }
+
+ @Test
+ public void testGetPrimitiveVectors() {
+ FieldType type = new FieldType(true, Struct.INSTANCE, null, null);
+ try (StructVector vector = new StructVector("struct", allocator, type, null)) {
+
+ // add list vector
+ vector.addOrGet("list", FieldType.nullable(MinorType.LIST.getType()), ListVector.class);
+ ListVector listVector = vector.addOrGetList("list");
+ listVector.addOrGetVector(FieldType.nullable(MinorType.INT.getType()));
+
+ // add union vector
+ vector.addOrGet("union", FieldType.nullable(MinorType.UNION.getType()), UnionVector.class);
+ UnionVector unionVector = vector.addOrGetUnion("union");
+ unionVector.addVector(new BigIntVector("bigInt", allocator));
+ unionVector.addVector(new SmallIntVector("smallInt", allocator));
+
+ // add varchar vector
+ vector.addOrGet("varchar", FieldType.nullable(MinorType.VARCHAR.getType()), VarCharVector.class);
+
+ List<ValueVector> primitiveVectors = vector.getPrimitiveVectors();
+ assertEquals(4, primitiveVectors.size());
+ assertEquals(MinorType.INT, primitiveVectors.get(0).getMinorType());
+ assertEquals(MinorType.BIGINT, primitiveVectors.get(1).getMinorType());
+ assertEquals(MinorType.SMALLINT, primitiveVectors.get(2).getMinorType());
+ assertEquals(MinorType.VARCHAR, primitiveVectors.get(3).getMinorType());
+ }
+ }
+
+ @Test
+ public void testAddOrGetComplexChildVectors() {
+ FieldType type = new FieldType(true, Struct.INSTANCE, null, null);
+ try (StructVector vector = new StructVector("struct", allocator, type, null)) {
+
+ vector.addOrGetList("list");
+ vector.addOrGetFixedSizeList("fixedList", 2);
+ vector.addOrGetUnion("union");
+ vector.addOrGetStruct("struct");
+ vector.addOrGetMap("map", true);
+
+ List<FieldVector> childrens = vector.getChildrenFromFields();
+ assertEquals(5, childrens.size());
+ assertEquals(MinorType.LIST, childrens.get(0).getMinorType());
+ assertEquals(MinorType.FIXED_SIZE_LIST, childrens.get(1).getMinorType());
+ assertEquals(MinorType.UNION, childrens.get(2).getMinorType());
+ assertEquals(MinorType.STRUCT, childrens.get(3).getMinorType());
+ assertEquals(MinorType.MAP, childrens.get(4).getMinorType());
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestTypeLayout.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestTypeLayout.java
new file mode 100644
index 000000000..97930f433
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestTypeLayout.java
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.junit.Assert.assertEquals;
+
+import org.apache.arrow.vector.types.DateUnit;
+import org.apache.arrow.vector.types.FloatingPointPrecision;
+import org.apache.arrow.vector.types.IntervalUnit;
+import org.apache.arrow.vector.types.TimeUnit;
+import org.apache.arrow.vector.types.UnionMode;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.junit.Test;
+
+public class TestTypeLayout {
+
+ @Test
+ public void testTypeBufferCount() {
+ ArrowType type = new ArrowType.Int(8, true);
+ assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size());
+
+ type = new ArrowType.Union(UnionMode.Sparse, new int[2]);
+ assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size());
+
+ type = new ArrowType.Union(UnionMode.Dense, new int[1]);
+ assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size());
+
+ type = new ArrowType.Struct();
+ assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size());
+
+ type = new ArrowType.Timestamp(TimeUnit.MILLISECOND, null);
+ assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size());
+
+ type = new ArrowType.List();
+ assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size());
+
+ type = new ArrowType.FixedSizeList(5);
+ assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size());
+
+ type = new ArrowType.Map(false);
+ assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size());
+
+ type = new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE);
+ assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size());
+
+ type = new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE);
+ assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size());
+
+ type = new ArrowType.Decimal(10, 10, 128);
+ assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size());
+
+ type = new ArrowType.Decimal(10, 10, 256);
+ assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size());
+
+
+ type = new ArrowType.FixedSizeBinary(5);
+ assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size());
+
+ type = new ArrowType.Bool();
+ assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size());
+
+ type = new ArrowType.Binary();
+ assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size());
+
+ type = new ArrowType.Utf8();
+ assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size());
+
+ type = new ArrowType.Null();
+ assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size());
+
+ type = new ArrowType.Date(DateUnit.DAY);
+ assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size());
+
+ type = new ArrowType.Time(TimeUnit.MILLISECOND, 32);
+ assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size());
+
+ type = new ArrowType.Interval(IntervalUnit.DAY_TIME);
+ assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size());
+
+ type = new ArrowType.Duration(TimeUnit.MILLISECOND);
+ assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size());
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestUnionVector.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestUnionVector.java
new file mode 100644
index 000000000..f04998915
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestUnionVector.java
@@ -0,0 +1,520 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.MapVector;
+import org.apache.arrow.vector.complex.UnionVector;
+import org.apache.arrow.vector.complex.VectorWithOrdinal;
+import org.apache.arrow.vector.complex.impl.UnionWriter;
+import org.apache.arrow.vector.holders.NullableBitHolder;
+import org.apache.arrow.vector.holders.NullableFloat4Holder;
+import org.apache.arrow.vector.holders.NullableIntHolder;
+import org.apache.arrow.vector.holders.NullableUInt4Holder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.UnionMode;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.TransferPair;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestUnionVector {
+ private static final String EMPTY_SCHEMA_PATH = "";
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void init() {
+ allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100);
+ }
+
+ @After
+ public void terminate() throws Exception {
+ allocator.close();
+ }
+
+ @Test
+ public void testUnionVector() throws Exception {
+
+ final NullableUInt4Holder uInt4Holder = new NullableUInt4Holder();
+ uInt4Holder.value = 100;
+ uInt4Holder.isSet = 1;
+
+ try (UnionVector unionVector =
+ new UnionVector(EMPTY_SCHEMA_PATH, allocator, /* field type */ null, /* call-back */ null)) {
+ unionVector.allocateNew();
+
+ // write some data
+ unionVector.setType(0, MinorType.UINT4);
+ unionVector.setSafe(0, uInt4Holder);
+ unionVector.setType(2, MinorType.UINT4);
+ unionVector.setSafe(2, uInt4Holder);
+ unionVector.setValueCount(4);
+
+ // check that what we wrote is correct
+ assertEquals(4, unionVector.getValueCount());
+
+ assertEquals(false, unionVector.isNull(0));
+ assertEquals(100, unionVector.getObject(0));
+
+ assertNull(unionVector.getObject(1));
+
+ assertEquals(false, unionVector.isNull(2));
+ assertEquals(100, unionVector.getObject(2));
+
+ assertNull(unionVector.getObject(3));
+ }
+ }
+
+ @Test
+ public void testUnionVectorMapValue() throws Exception {
+ try (UnionVector unionVector =
+ new UnionVector(EMPTY_SCHEMA_PATH, allocator, /* field type */ null, /* call-back */ null)) {
+ unionVector.allocateNew();
+
+ UnionWriter writer = (UnionWriter) unionVector.getWriter();
+
+ // populate map vector with the following two records
+ // [
+ // null,
+ // [[1: 2], [3: 4], [5: null]]
+ // ]
+
+ writer.setPosition(0);
+ writer.writeNull();
+
+ writer.setPosition(1);
+ writer.startMap();
+
+ writer.startEntry();
+ writer.key().integer().writeInt(1);
+ writer.value().integer().writeInt(2);
+ writer.endEntry();
+
+ writer.startEntry();
+ writer.key().integer().writeInt(3);
+ writer.value().integer().writeInt(4);
+ writer.endEntry();
+
+ writer.startEntry();
+ writer.key().integer().writeInt(5);
+ writer.endEntry();
+
+ writer.endMap();
+
+ unionVector.setValueCount(2);
+
+ // check that what we wrote is correct
+ assertEquals(2, unionVector.getValueCount());
+
+ // first entry
+ assertNull(unionVector.getObject(0));
+
+ // second entry
+ List<Map<String, Integer>> resultList = (List<Map<String, Integer>>) unionVector.getObject(1);
+ assertEquals(3, resultList.size());
+
+ Map<String, Integer> resultMap = resultList.get(0);
+ assertEquals(1, (int) resultMap.get(MapVector.KEY_NAME));
+ assertEquals(2, (int) resultMap.get(MapVector.VALUE_NAME));
+
+ resultMap = resultList.get(1);
+ assertEquals(3, (int) resultMap.get(MapVector.KEY_NAME));
+ assertEquals(4, (int) resultMap.get(MapVector.VALUE_NAME));
+
+ resultMap = resultList.get(2);
+ assertEquals(5, (int) resultMap.get(MapVector.KEY_NAME));
+ assertNull(resultMap.get(MapVector.VALUE_NAME));
+ }
+ }
+
+ @Test
+ public void testTransfer() throws Exception {
+ try (UnionVector srcVector =
+ new UnionVector(EMPTY_SCHEMA_PATH, allocator, /* field type */ null, /* call-back */ null)) {
+ srcVector.allocateNew();
+
+ // write some data
+ srcVector.setType(0, MinorType.INT);
+ srcVector.setSafe(0, newIntHolder(5));
+ srcVector.setType(1, MinorType.BIT);
+ srcVector.setSafe(1, newBitHolder(false));
+ srcVector.setType(3, MinorType.INT);
+ srcVector.setSafe(3, newIntHolder(10));
+ srcVector.setType(5, MinorType.BIT);
+ srcVector.setSafe(5, newBitHolder(false));
+ srcVector.setValueCount(6);
+
+ try (UnionVector destVector =
+ new UnionVector(EMPTY_SCHEMA_PATH, allocator, /* field type */ null, /* call-back */ null)) {
+ TransferPair pair = srcVector.makeTransferPair(destVector);
+
+ // Creating the transfer should transfer the type of the field at least.
+ assertEquals(srcVector.getField(), destVector.getField());
+
+ // transfer
+ pair.transfer();
+
+ assertEquals(srcVector.getField(), destVector.getField());
+
+ // now check the values are transferred
+ assertEquals(6, destVector.getValueCount());
+
+ assertFalse(destVector.isNull(0));
+ assertEquals(5, destVector.getObject(0));
+
+ assertFalse(destVector.isNull(1));
+ assertEquals(false, destVector.getObject(1));
+
+ assertNull(destVector.getObject(2));
+
+ assertFalse(destVector.isNull(3));
+ assertEquals(10, destVector.getObject(3));
+
+ assertNull(destVector.getObject(4));
+
+ assertFalse(destVector.isNull(5));
+ assertEquals(false, destVector.getObject(5));
+ }
+ }
+ }
+
+ @Test
+ public void testSplitAndTransfer() throws Exception {
+ try (UnionVector sourceVector =
+ new UnionVector(EMPTY_SCHEMA_PATH, allocator, /* field type */ null, /* call-back */ null)) {
+
+ sourceVector.allocateNew();
+
+ /* populate the UnionVector */
+ sourceVector.setType(0, MinorType.INT);
+ sourceVector.setSafe(0, newIntHolder(5));
+ sourceVector.setType(1, MinorType.INT);
+ sourceVector.setSafe(1, newIntHolder(10));
+ sourceVector.setType(2, MinorType.INT);
+ sourceVector.setSafe(2, newIntHolder(15));
+ sourceVector.setType(3, MinorType.INT);
+ sourceVector.setSafe(3, newIntHolder(20));
+ sourceVector.setType(4, MinorType.INT);
+ sourceVector.setSafe(4, newIntHolder(25));
+ sourceVector.setType(5, MinorType.INT);
+ sourceVector.setSafe(5, newIntHolder(30));
+ sourceVector.setType(6, MinorType.INT);
+ sourceVector.setSafe(6, newIntHolder(35));
+ sourceVector.setType(7, MinorType.INT);
+ sourceVector.setSafe(7, newIntHolder(40));
+ sourceVector.setType(8, MinorType.INT);
+ sourceVector.setSafe(8, newIntHolder(45));
+ sourceVector.setType(9, MinorType.INT);
+ sourceVector.setSafe(9, newIntHolder(50));
+ sourceVector.setValueCount(10);
+
+ /* check the vector output */
+ assertEquals(10, sourceVector.getValueCount());
+ assertEquals(false, sourceVector.isNull(0));
+ assertEquals(5, sourceVector.getObject(0));
+ assertEquals(false, sourceVector.isNull(1));
+ assertEquals(10, sourceVector.getObject(1));
+ assertEquals(false, sourceVector.isNull(2));
+ assertEquals(15, sourceVector.getObject(2));
+ assertEquals(false, sourceVector.isNull(3));
+ assertEquals(20, sourceVector.getObject(3));
+ assertEquals(false, sourceVector.isNull(4));
+ assertEquals(25, sourceVector.getObject(4));
+ assertEquals(false, sourceVector.isNull(5));
+ assertEquals(30, sourceVector.getObject(5));
+ assertEquals(false, sourceVector.isNull(6));
+ assertEquals(35, sourceVector.getObject(6));
+ assertEquals(false, sourceVector.isNull(7));
+ assertEquals(40, sourceVector.getObject(7));
+ assertEquals(false, sourceVector.isNull(8));
+ assertEquals(45, sourceVector.getObject(8));
+ assertEquals(false, sourceVector.isNull(9));
+ assertEquals(50, sourceVector.getObject(9));
+
+ try (UnionVector toVector =
+ new UnionVector(EMPTY_SCHEMA_PATH, allocator, /* field type */ null, /* call-back */ null)) {
+
+ final TransferPair transferPair = sourceVector.makeTransferPair(toVector);
+
+ final int[][] transferLengths = {{0, 3},
+ {3, 1},
+ {4, 2},
+ {6, 1},
+ {7, 1},
+ {8, 2}
+ };
+
+ for (final int[] transferLength : transferLengths) {
+ final int start = transferLength[0];
+ final int length = transferLength[1];
+
+ transferPair.splitAndTransfer(start, length);
+
+ /* check the toVector output after doing the splitAndTransfer */
+ for (int i = 0; i < length; i++) {
+ assertEquals("Different data at indexes: " + (start + i) + "and " + i, sourceVector.getObject(start + i),
+ toVector.getObject(i));
+ }
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testSplitAndTransferWithMixedVectors() throws Exception {
+ try (UnionVector sourceVector =
+ new UnionVector(EMPTY_SCHEMA_PATH, allocator, /* field type */ null, /* call-back */ null)) {
+
+ sourceVector.allocateNew();
+
+ /* populate the UnionVector */
+ sourceVector.setType(0, MinorType.INT);
+ sourceVector.setSafe(0, newIntHolder(5));
+
+ sourceVector.setType(1, MinorType.FLOAT4);
+ sourceVector.setSafe(1, newFloat4Holder(5.5f));
+
+ sourceVector.setType(2, MinorType.INT);
+ sourceVector.setSafe(2, newIntHolder(10));
+
+ sourceVector.setType(3, MinorType.FLOAT4);
+ sourceVector.setSafe(3, newFloat4Holder(10.5f));
+
+ sourceVector.setType(4, MinorType.INT);
+ sourceVector.setSafe(4, newIntHolder(15));
+
+ sourceVector.setType(5, MinorType.FLOAT4);
+ sourceVector.setSafe(5, newFloat4Holder(15.5f));
+
+ sourceVector.setType(6, MinorType.INT);
+ sourceVector.setSafe(6, newIntHolder(20));
+
+ sourceVector.setType(7, MinorType.FLOAT4);
+ sourceVector.setSafe(7, newFloat4Holder(20.5f));
+
+ sourceVector.setType(8, MinorType.INT);
+ sourceVector.setSafe(8, newIntHolder(30));
+
+ sourceVector.setType(9, MinorType.FLOAT4);
+ sourceVector.setSafe(9, newFloat4Holder(30.5f));
+ sourceVector.setValueCount(10);
+
+ /* check the vector output */
+ assertEquals(10, sourceVector.getValueCount());
+ assertEquals(false, sourceVector.isNull(0));
+ assertEquals(5, sourceVector.getObject(0));
+ assertEquals(false, sourceVector.isNull(1));
+ assertEquals(5.5f, sourceVector.getObject(1));
+ assertEquals(false, sourceVector.isNull(2));
+ assertEquals(10, sourceVector.getObject(2));
+ assertEquals(false, sourceVector.isNull(3));
+ assertEquals(10.5f, sourceVector.getObject(3));
+ assertEquals(false, sourceVector.isNull(4));
+ assertEquals(15, sourceVector.getObject(4));
+ assertEquals(false, sourceVector.isNull(5));
+ assertEquals(15.5f, sourceVector.getObject(5));
+ assertEquals(false, sourceVector.isNull(6));
+ assertEquals(20, sourceVector.getObject(6));
+ assertEquals(false, sourceVector.isNull(7));
+ assertEquals(20.5f, sourceVector.getObject(7));
+ assertEquals(false, sourceVector.isNull(8));
+ assertEquals(30, sourceVector.getObject(8));
+ assertEquals(false, sourceVector.isNull(9));
+ assertEquals(30.5f, sourceVector.getObject(9));
+
+ try (UnionVector toVector =
+ new UnionVector(EMPTY_SCHEMA_PATH, allocator, /* field type */ null, /* call-back */ null)) {
+
+ final TransferPair transferPair = sourceVector.makeTransferPair(toVector);
+
+ final int[][] transferLengths = {{0, 2},
+ {2, 1},
+ {3, 2},
+ {5, 3},
+ {8, 2}
+ };
+
+ for (final int[] transferLength : transferLengths) {
+ final int start = transferLength[0];
+ final int length = transferLength[1];
+
+ transferPair.splitAndTransfer(start, length);
+
+ /* check the toVector output after doing the splitAndTransfer */
+ for (int i = 0; i < length; i++) {
+ assertEquals("Different values at index: " + i, sourceVector.getObject(start + i), toVector.getObject(i));
+ }
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testGetFieldTypeInfo() throws Exception {
+ Map<String, String> metadata = new HashMap<>();
+ metadata.put("key1", "value1");
+
+ int[] typeIds = new int[2];
+ typeIds[0] = MinorType.INT.ordinal();
+ typeIds[1] = MinorType.VARCHAR.ordinal();
+
+ List<Field> children = new ArrayList<>();
+ children.add(new Field("int", FieldType.nullable(MinorType.INT.getType()), null));
+ children.add(new Field("varchar", FieldType.nullable(MinorType.VARCHAR.getType()), null));
+
+ final FieldType fieldType = new FieldType(false, new ArrowType.Union(UnionMode.Sparse, typeIds),
+ /*dictionary=*/null, metadata);
+ final Field field = new Field("union", fieldType, children);
+
+ MinorType minorType = MinorType.UNION;
+ UnionVector vector = (UnionVector) minorType.getNewVector(field, allocator, null);
+ vector.initializeChildrenFromFields(children);
+
+ assertTrue(vector.getField().equals(field));
+
+ // Union has 2 child vectors
+ assertEquals(vector.size(), 2);
+
+ // Check child field 0
+ VectorWithOrdinal intChild = vector.getChildVectorWithOrdinal("int");
+ assertEquals(intChild.ordinal, 0);
+ assertEquals(intChild.vector.getField(), children.get(0));
+
+ // Check child field 1
+ VectorWithOrdinal varcharChild = vector.getChildVectorWithOrdinal("varchar");
+ assertEquals(varcharChild.ordinal, 1);
+ assertEquals(varcharChild.vector.getField(), children.get(1));
+ }
+
+ @Test
+ public void testGetBufferAddress() throws Exception {
+ try (UnionVector vector =
+ new UnionVector(EMPTY_SCHEMA_PATH, allocator, /* field type */ null, /* call-back */ null)) {
+ boolean error = false;
+
+ vector.allocateNew();
+
+ /* populate the UnionVector */
+ vector.setType(0, MinorType.INT);
+ vector.setSafe(0, newIntHolder(5));
+
+ vector.setType(1, MinorType.FLOAT4);
+ vector.setSafe(1, newFloat4Holder(5.5f));
+
+ vector.setType(2, MinorType.INT);
+ vector.setSafe(2, newIntHolder(10));
+
+ vector.setType(3, MinorType.FLOAT4);
+ vector.setSafe(3, newFloat4Holder(10.5f));
+
+ vector.setValueCount(10);
+
+ /* check the vector output */
+ assertEquals(10, vector.getValueCount());
+ assertEquals(false, vector.isNull(0));
+ assertEquals(5, vector.getObject(0));
+ assertEquals(false, vector.isNull(1));
+ assertEquals(5.5f, vector.getObject(1));
+ assertEquals(false, vector.isNull(2));
+ assertEquals(10, vector.getObject(2));
+ assertEquals(false, vector.isNull(3));
+ assertEquals(10.5f, vector.getObject(3));
+
+ List<ArrowBuf> buffers = vector.getFieldBuffers();
+
+
+ try {
+ long offsetAddress = vector.getOffsetBufferAddress();
+ } catch (UnsupportedOperationException ue) {
+ error = true;
+ } finally {
+ assertTrue(error);
+ error = false;
+ }
+
+ try {
+ long dataAddress = vector.getDataBufferAddress();
+ } catch (UnsupportedOperationException ue) {
+ error = true;
+ } finally {
+ assertTrue(error);
+ }
+
+ assertEquals(1, buffers.size());
+ }
+ }
+
+ @Test
+ public void testSetGetNull() {
+ try (UnionVector srcVector =
+ new UnionVector(EMPTY_SCHEMA_PATH, allocator, /* field type */ null, /* call-back */ null)) {
+ srcVector.allocateNew();
+
+ final NullableIntHolder holder = new NullableIntHolder();
+ holder.isSet = 1;
+ holder.value = 5;
+
+ // write some data
+ srcVector.setType(0, MinorType.INT);
+ srcVector.setSafe(0, holder);
+
+ assertFalse(srcVector.isNull(0));
+
+ holder.isSet = 0;
+ srcVector.setSafe(0, holder);
+
+ assertNull(srcVector.getObject(0));
+ }
+ }
+
+ private static NullableIntHolder newIntHolder(int value) {
+ final NullableIntHolder holder = new NullableIntHolder();
+ holder.isSet = 1;
+ holder.value = value;
+ return holder;
+ }
+
+ private static NullableBitHolder newBitHolder(boolean value) {
+ final NullableBitHolder holder = new NullableBitHolder();
+ holder.isSet = 1;
+ holder.value = value ? 1 : 0;
+ return holder;
+ }
+
+ private static NullableFloat4Holder newFloat4Holder(float value) {
+ final NullableFloat4Holder holder = new NullableFloat4Holder();
+ holder.isSet = 1;
+ holder.value = value;
+ return holder;
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestUtils.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestUtils.java
new file mode 100644
index 000000000..7e64dd386
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestUtils.java
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.FieldType;
+
+public class TestUtils {
+
+ public static VarCharVector newVarCharVector(String name, BufferAllocator allocator) {
+ return (VarCharVector)
+ FieldType.nullable(new ArrowType.Utf8()).createNewSingleVector(name, allocator, null);
+ }
+
+ public static VarBinaryVector newVarBinaryVector(String name, BufferAllocator allocator) {
+ return (VarBinaryVector)
+ FieldType.nullable(new ArrowType.Binary()).createNewSingleVector(name, allocator, null);
+ }
+
+ public static <T> T newVector(Class<T> c, String name, ArrowType type, BufferAllocator allocator) {
+ return c.cast(FieldType.nullable(type).createNewSingleVector(name, allocator, null));
+ }
+
+ public static <T> T newVector(Class<T> c, String name, MinorType type, BufferAllocator allocator) {
+ return c.cast(FieldType.nullable(type.getType()).createNewSingleVector(name, allocator, null));
+ }
+
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java
new file mode 100644
index 000000000..572c3d594
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java
@@ -0,0 +1,3061 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.apache.arrow.vector.TestUtils.newVarBinaryVector;
+import static org.apache.arrow.vector.TestUtils.newVarCharVector;
+import static org.apache.arrow.vector.TestUtils.newVector;
+import static org.apache.arrow.vector.testing.ValueVectorDataPopulator.setVector;
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+
+import java.nio.ByteBuffer;
+import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.memory.rounding.DefaultRoundingPolicy;
+import org.apache.arrow.memory.util.ArrowBufPointer;
+import org.apache.arrow.memory.util.CommonUtil;
+import org.apache.arrow.vector.compare.Range;
+import org.apache.arrow.vector.compare.RangeEqualsVisitor;
+import org.apache.arrow.vector.compare.VectorEqualsVisitor;
+import org.apache.arrow.vector.complex.DenseUnionVector;
+import org.apache.arrow.vector.complex.FixedSizeListVector;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.complex.UnionVector;
+import org.apache.arrow.vector.complex.impl.NullableStructWriter;
+import org.apache.arrow.vector.complex.impl.UnionListWriter;
+import org.apache.arrow.vector.holders.NullableIntHolder;
+import org.apache.arrow.vector.holders.NullableUInt4Holder;
+import org.apache.arrow.vector.holders.NullableVarBinaryHolder;
+import org.apache.arrow.vector.holders.NullableVarCharHolder;
+import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.apache.arrow.vector.util.OversizedAllocationException;
+import org.apache.arrow.vector.util.Text;
+import org.apache.arrow.vector.util.TransferPair;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestValueVector {
+
+ private static final String EMPTY_SCHEMA_PATH = "";
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void init() {
+ allocator = new RootAllocator(Long.MAX_VALUE);
+ }
+
+ private static final Charset utf8Charset = Charset.forName("UTF-8");
+ private static final byte[] STR1 = "AAAAA1".getBytes(utf8Charset);
+ private static final byte[] STR2 = "BBBBBBBBB2".getBytes(utf8Charset);
+ private static final byte[] STR3 = "CCCC3".getBytes(utf8Charset);
+ private static final byte[] STR4 = "DDDDDDDD4".getBytes(utf8Charset);
+ private static final byte[] STR5 = "EEE5".getBytes(utf8Charset);
+ private static final byte[] STR6 = "FFFFF6".getBytes(utf8Charset);
+ private static final int MAX_VALUE_COUNT =
+ (int) (Integer.getInteger("arrow.vector.max_allocation_bytes", Integer.MAX_VALUE) / 7);
+ private static final int MAX_VALUE_COUNT_8BYTE = (int) (MAX_VALUE_COUNT / 2);
+
+ @After
+ public void terminate() throws Exception {
+ allocator.close();
+ }
+
+ /*
+ * Tests for Fixed-Width vectors
+ *
+ * Covered types as of now
+ *
+ * -- UInt4Vector
+ * -- IntVector
+ * -- Float4Vector
+ * -- Float8Vector
+ *
+ * -- UInt4Vector
+ * -- IntVector
+ * -- Float4Vector
+ *
+ * TODO:
+ *
+ * -- SmallIntVector
+ * -- BigIntVector
+ * -- TinyIntVector
+ */
+
+ @Test /* UInt4Vector */
+ public void testFixedType1() {
+
+ // Create a new value vector for 1024 integers.
+ try (final UInt4Vector vector = new UInt4Vector(EMPTY_SCHEMA_PATH, allocator)) {
+
+ boolean error = false;
+ int initialCapacity = 0;
+
+ vector.allocateNew(1024);
+ initialCapacity = vector.getValueCapacity();
+ assertTrue(initialCapacity >= 1024);
+
+ // Put and set a few values
+ vector.setSafe(0, 100);
+ vector.setSafe(1, 101);
+ vector.setSafe(100, 102);
+ vector.setSafe(1022, 103);
+ vector.setSafe(1023, 104);
+
+ assertEquals(100, vector.get(0));
+ assertEquals(101, vector.get(1));
+ assertEquals(102, vector.get(100));
+ assertEquals(103, vector.get(1022));
+ assertEquals(104, vector.get(1023));
+
+ try {
+ vector.set(initialCapacity, 10000);
+ } catch (IndexOutOfBoundsException ie) {
+ error = true;
+ } finally {
+ assertTrue(error);
+ error = false;
+ }
+
+ try {
+ vector.get(initialCapacity);
+ } catch (IndexOutOfBoundsException ie) {
+ error = true;
+ } finally {
+ assertTrue(error);
+ error = false;
+ }
+
+ /* this should trigger a realloc() */
+ vector.setSafe(initialCapacity, 10000);
+
+ /* underlying buffer should now be able to store double the number of values */
+ assertTrue(vector.getValueCapacity() >= 2 * initialCapacity);
+
+ /* check vector data after realloc */
+ assertEquals(100, vector.get(0));
+ assertEquals(101, vector.get(1));
+ assertEquals(102, vector.get(100));
+ assertEquals(103, vector.get(1022));
+ assertEquals(104, vector.get(1023));
+ assertEquals(10000, vector.get(initialCapacity));
+
+ /* reset the vector */
+ int capacityBeforeReset = vector.getValueCapacity();
+ vector.reset();
+
+ /* capacity shouldn't change after reset */
+ assertEquals(capacityBeforeReset, vector.getValueCapacity());
+
+ /* vector data should have been zeroed out */
+ for (int i = 0; i < capacityBeforeReset; i++) {
+ // TODO: test vector.get(i) is 0 after unsafe get added
+ assertEquals("non-zero data not expected at index: " + i, true, vector.isNull(i));
+ }
+ }
+ }
+
+ @Test /* IntVector */
+ public void testFixedType2() {
+ try (final IntVector intVector = new IntVector(EMPTY_SCHEMA_PATH, allocator)) {
+ boolean error = false;
+ int initialCapacity = 16;
+
+ /* we should not throw exception for these values of capacity */
+ intVector.setInitialCapacity(MAX_VALUE_COUNT - 1);
+ intVector.setInitialCapacity(MAX_VALUE_COUNT);
+
+ try {
+ intVector.setInitialCapacity(MAX_VALUE_COUNT * 2);
+ } catch (OversizedAllocationException oe) {
+ error = true;
+ } finally {
+ assertTrue(error);
+ error = false;
+ }
+
+ intVector.setInitialCapacity(initialCapacity);
+ /* no memory allocation has happened yet so capacity of underlying buffer should be 0 */
+ assertEquals(0, intVector.getValueCapacity());
+
+ /* allocate 64 bytes (16 * 4) */
+ intVector.allocateNew();
+ /* underlying buffer should be able to store 16 values */
+ assertTrue(intVector.getValueCapacity() >= initialCapacity);
+ initialCapacity = intVector.getValueCapacity();
+
+ /* populate the vector */
+ int j = 1;
+ for (int i = 0; i < initialCapacity; i += 2) {
+ intVector.set(i, j);
+ j++;
+ }
+
+ try {
+ intVector.set(initialCapacity, j);
+ } catch (IndexOutOfBoundsException ie) {
+ error = true;
+ } finally {
+ assertTrue(error);
+ error = false;
+ }
+
+ /* check vector contents */
+ j = 1;
+ for (int i = 0; i < initialCapacity; i += 2) {
+ assertEquals("unexpected value at index: " + i, j, intVector.get(i));
+ j++;
+ }
+
+ try {
+ intVector.get(initialCapacity);
+ } catch (IndexOutOfBoundsException ie) {
+ error = true;
+ } finally {
+ assertTrue(error);
+ error = false;
+ }
+
+ /* this should trigger a realloc() */
+ intVector.setSafe(initialCapacity, j);
+
+ /* underlying buffer should now be able to store double the number of values */
+ assertTrue(intVector.getValueCapacity() >= initialCapacity * 2);
+
+ /* vector data should still be intact after realloc */
+ j = 1;
+ for (int i = 0; i <= initialCapacity; i += 2) {
+ assertEquals("unexpected value at index: " + i, j, intVector.get(i));
+ j++;
+ }
+
+ /* reset the vector */
+ int capacityBeforeRealloc = intVector.getValueCapacity();
+ intVector.reset();
+
+ /* capacity shouldn't change after reset */
+ assertEquals(capacityBeforeRealloc, intVector.getValueCapacity());
+
+ /* vector data should have been zeroed out */
+ for (int i = 0; i < capacityBeforeRealloc; i++) {
+ assertEquals("non-zero data not expected at index: " + i, true, intVector.isNull(i));
+ }
+ }
+ }
+
+ @Test /* VarCharVector */
+ public void testSizeOfValueBuffer() {
+ try (final VarCharVector vector = new VarCharVector(EMPTY_SCHEMA_PATH, allocator)) {
+ int valueCount = 100;
+ int currentSize = 0;
+ vector.setInitialCapacity(valueCount);
+ vector.allocateNew();
+ vector.setValueCount(valueCount);
+ for (int i = 0; i < valueCount; i++) {
+ currentSize += i;
+ vector.setSafe(i, new byte[i]);
+ }
+
+ assertEquals(currentSize, vector.sizeOfValueBuffer());
+ }
+ }
+
+ @Test /* Float4Vector */
+ public void testFixedType3() {
+ try (final Float4Vector floatVector = new Float4Vector(EMPTY_SCHEMA_PATH, allocator)) {
+ boolean error = false;
+ int initialCapacity = 16;
+
+ /* we should not throw exception for these values of capacity */
+ floatVector.setInitialCapacity(MAX_VALUE_COUNT - 1);
+ floatVector.setInitialCapacity(MAX_VALUE_COUNT);
+
+ try {
+ floatVector.setInitialCapacity(MAX_VALUE_COUNT * 2);
+ } catch (OversizedAllocationException oe) {
+ error = true;
+ } finally {
+ assertTrue(error);
+ error = false;
+ }
+
+ floatVector.setInitialCapacity(initialCapacity);
+ /* no memory allocation has happened yet so capacity of underlying buffer should be 0 */
+ assertEquals(0, floatVector.getValueCapacity());
+
+ /* allocate 64 bytes (16 * 4) */
+ floatVector.allocateNew();
+ /* underlying buffer should be able to store 16 values */
+ assertTrue(floatVector.getValueCapacity() >= initialCapacity);
+ initialCapacity = floatVector.getValueCapacity();
+
+ floatVector.zeroVector();
+
+ /* populate the floatVector */
+ floatVector.set(0, 1.5f);
+ floatVector.set(2, 2.5f);
+ floatVector.set(4, 3.3f);
+ floatVector.set(6, 4.8f);
+ floatVector.set(8, 5.6f);
+ floatVector.set(10, 6.6f);
+ floatVector.set(12, 7.8f);
+ floatVector.set(14, 8.5f);
+
+ try {
+ floatVector.set(initialCapacity, 9.5f);
+ } catch (IndexOutOfBoundsException ie) {
+ error = true;
+ } finally {
+ assertTrue(error);
+ error = false;
+ }
+
+ /* check vector contents */
+ assertEquals(1.5f, floatVector.get(0), 0);
+ assertEquals(2.5f, floatVector.get(2), 0);
+ assertEquals(3.3f, floatVector.get(4), 0);
+ assertEquals(4.8f, floatVector.get(6), 0);
+ assertEquals(5.6f, floatVector.get(8), 0);
+ assertEquals(6.6f, floatVector.get(10), 0);
+ assertEquals(7.8f, floatVector.get(12), 0);
+ assertEquals(8.5f, floatVector.get(14), 0);
+
+ try {
+ floatVector.get(initialCapacity);
+ } catch (IndexOutOfBoundsException ie) {
+ error = true;
+ } finally {
+ assertTrue(error);
+ error = false;
+ }
+
+ /* this should trigger a realloc() */
+ floatVector.setSafe(initialCapacity, 9.5f);
+
+ /* underlying buffer should now be able to store double the number of values */
+ assertTrue(floatVector.getValueCapacity() >= initialCapacity * 2);
+
+ /* vector data should still be intact after realloc */
+ assertEquals(1.5f, floatVector.get(0), 0);
+ assertEquals(2.5f, floatVector.get(2), 0);
+ assertEquals(3.3f, floatVector.get(4), 0);
+ assertEquals(4.8f, floatVector.get(6), 0);
+ assertEquals(5.6f, floatVector.get(8), 0);
+ assertEquals(6.6f, floatVector.get(10), 0);
+ assertEquals(7.8f, floatVector.get(12), 0);
+ assertEquals(8.5f, floatVector.get(14), 0);
+ assertEquals(9.5f, floatVector.get(initialCapacity), 0);
+
+ /* reset the vector */
+ int capacityBeforeReset = floatVector.getValueCapacity();
+ floatVector.reset();
+
+ /* capacity shouldn't change after reset */
+ assertEquals(capacityBeforeReset, floatVector.getValueCapacity());
+
+ /* vector data should be zeroed out */
+ for (int i = 0; i < capacityBeforeReset; i++) {
+ assertEquals("non-zero data not expected at index: " + i, true, floatVector.isNull(i));
+ }
+ }
+ }
+
+ @Test /* Float8Vector */
+ public void testFixedType4() {
+ try (final Float8Vector floatVector = new Float8Vector(EMPTY_SCHEMA_PATH, allocator)) {
+ boolean error = false;
+ int initialCapacity = 16;
+
+ /* we should not throw exception for these values of capacity */
+ floatVector.setInitialCapacity(MAX_VALUE_COUNT_8BYTE - 1);
+ floatVector.setInitialCapacity(MAX_VALUE_COUNT_8BYTE);
+
+ try {
+ floatVector.setInitialCapacity(MAX_VALUE_COUNT_8BYTE * 2);
+ } catch (OversizedAllocationException oe) {
+ error = true;
+ } finally {
+ assertTrue(error);
+ error = false;
+ }
+
+ floatVector.setInitialCapacity(initialCapacity);
+ /* no memory allocation has happened yet so capacity of underlying buffer should be 0 */
+ assertEquals(0, floatVector.getValueCapacity());
+
+ /* allocate 128 bytes (16 * 8) */
+ floatVector.allocateNew();
+ /* underlying buffer should be able to store 16 values */
+ assertTrue(floatVector.getValueCapacity() >= initialCapacity);
+ initialCapacity = floatVector.getValueCapacity();
+
+ /* populate the vector */
+ floatVector.set(0, 1.55);
+ floatVector.set(2, 2.53);
+ floatVector.set(4, 3.36);
+ floatVector.set(6, 4.82);
+ floatVector.set(8, 5.67);
+ floatVector.set(10, 6.67);
+ floatVector.set(12, 7.87);
+ floatVector.set(14, 8.56);
+
+ try {
+ floatVector.set(initialCapacity, 9.53);
+ } catch (IndexOutOfBoundsException ie) {
+ error = true;
+ } finally {
+ assertTrue(error);
+ error = false;
+ }
+
+ /* check floatVector contents */
+ assertEquals(1.55, floatVector.get(0), 0);
+ assertEquals(2.53, floatVector.get(2), 0);
+ assertEquals(3.36, floatVector.get(4), 0);
+ assertEquals(4.82, floatVector.get(6), 0);
+ assertEquals(5.67, floatVector.get(8), 0);
+ assertEquals(6.67, floatVector.get(10), 0);
+ assertEquals(7.87, floatVector.get(12), 0);
+ assertEquals(8.56, floatVector.get(14), 0);
+
+ try {
+ floatVector.get(initialCapacity);
+ } catch (IndexOutOfBoundsException ie) {
+ error = true;
+ } finally {
+ assertTrue(error);
+ error = false;
+ }
+
+ /* this should trigger a realloc() */
+ floatVector.setSafe(initialCapacity, 9.53);
+
+ /* underlying buffer should now be able to store double the number of values */
+ assertTrue(floatVector.getValueCapacity() >= initialCapacity * 2);
+
+ /* vector data should still be intact after realloc */
+ assertEquals(1.55, floatVector.get(0), 0);
+ assertEquals(2.53, floatVector.get(2), 0);
+ assertEquals(3.36, floatVector.get(4), 0);
+ assertEquals(4.82, floatVector.get(6), 0);
+ assertEquals(5.67, floatVector.get(8), 0);
+ assertEquals(6.67, floatVector.get(10), 0);
+ assertEquals(7.87, floatVector.get(12), 0);
+ assertEquals(8.56, floatVector.get(14), 0);
+ assertEquals(9.53, floatVector.get(initialCapacity), 0);
+
+ /* reset the vector */
+ int capacityBeforeReset = floatVector.getValueCapacity();
+ floatVector.reset();
+
+ /* capacity shouldn't change after reset */
+ assertEquals(capacityBeforeReset, floatVector.getValueCapacity());
+
+ /* vector data should be zeroed out */
+ for (int i = 0; i < capacityBeforeReset; i++) {
+ assertEquals("non-zero data not expected at index: " + i, true, floatVector.isNull(i));
+ }
+ }
+ }
+
+ @Test /* UInt4Vector */
+ public void testNullableFixedType1() {
+
+ // Create a new value vector for 1024 integers.
+ try (final UInt4Vector vector = newVector(UInt4Vector.class, EMPTY_SCHEMA_PATH, new ArrowType.Int(32, false),
+ allocator);) {
+ boolean error = false;
+ int initialCapacity = 1024;
+
+ vector.setInitialCapacity(initialCapacity);
+ /* no memory allocation has happened yet */
+ assertEquals(0, vector.getValueCapacity());
+
+ vector.allocateNew();
+ assertTrue(vector.getValueCapacity() >= initialCapacity);
+ initialCapacity = vector.getValueCapacity();
+
+ // Put and set a few values
+ vector.set(0, 100);
+ vector.set(1, 101);
+ vector.set(100, 102);
+ vector.set(initialCapacity - 2, 103);
+ vector.set(initialCapacity - 1, 104);
+
+ /* check vector contents */
+ assertEquals(100, vector.get(0));
+ assertEquals(101, vector.get(1));
+ assertEquals(102, vector.get(100));
+ assertEquals(103, vector.get(initialCapacity - 2));
+ assertEquals(104, vector.get(initialCapacity - 1));
+
+ int val = 0;
+
+ /* check unset bits/null values */
+ for (int i = 2, j = 101; i <= 99 || j <= initialCapacity - 3; i++, j++) {
+ if (i <= 99) {
+ assertTrue(vector.isNull(i));
+ }
+ if (j <= initialCapacity - 3) {
+ assertTrue(vector.isNull(j));
+ }
+ }
+
+ try {
+ vector.set(initialCapacity, 10000);
+ } catch (IndexOutOfBoundsException ie) {
+ error = true;
+ } finally {
+ assertTrue(error);
+ error = false;
+ }
+
+ try {
+ vector.get(initialCapacity);
+ } catch (IndexOutOfBoundsException ie) {
+ error = true;
+ } finally {
+ assertTrue(error);
+ error = false;
+ }
+
+ /* should trigger a realloc of the underlying bitvector and valuevector */
+ vector.setSafe(initialCapacity, 10000);
+
+ /* check new capacity */
+ assertTrue(vector.getValueCapacity() >= initialCapacity * 2);
+
+ /* vector contents should still be intact after realloc */
+ assertEquals(100, vector.get(0));
+ assertEquals(101, vector.get(1));
+ assertEquals(102, vector.get(100));
+ assertEquals(103, vector.get(initialCapacity - 2));
+ assertEquals(104, vector.get(initialCapacity - 1));
+ assertEquals(10000, vector.get(initialCapacity));
+
+ val = 0;
+
+ /* check unset bits/null values */
+ for (int i = 2, j = 101; i < 99 || j < initialCapacity - 3; i++, j++) {
+ if (i <= 99) {
+ assertTrue(vector.isNull(i));
+ }
+ if (j <= initialCapacity - 3) {
+ assertTrue(vector.isNull(j));
+ }
+ }
+
+ /* reset the vector */
+ int capacityBeforeReset = vector.getValueCapacity();
+ vector.reset();
+
+ /* capacity shouldn't change after reset */
+ assertEquals(capacityBeforeReset, vector.getValueCapacity());
+
+ /* vector data should be zeroed out */
+ for (int i = 0; i < capacityBeforeReset; i++) {
+ assertTrue("non-null data not expected at index: " + i, vector.isNull(i));
+ }
+ }
+ }
+
+ @Test /* Float4Vector */
+ public void testNullableFixedType2() {
+ // Create a new value vector for 1024 integers
+ try (final Float4Vector vector = newVector(Float4Vector.class, EMPTY_SCHEMA_PATH, MinorType.FLOAT4, allocator);) {
+ boolean error = false;
+ int initialCapacity = 16;
+
+ vector.setInitialCapacity(initialCapacity);
+ /* no memory allocation has happened yet */
+ assertEquals(0, vector.getValueCapacity());
+
+ vector.allocateNew();
+ assertTrue(vector.getValueCapacity() >= initialCapacity);
+ initialCapacity = vector.getValueCapacity();
+
+ /* populate the vector */
+ vector.set(0, 100.5f);
+ vector.set(2, 201.5f);
+ vector.set(4, 300.3f);
+ vector.set(6, 423.8f);
+ vector.set(8, 555.6f);
+ vector.set(10, 66.6f);
+ vector.set(12, 78.8f);
+ vector.set(14, 89.5f);
+
+ try {
+ vector.set(initialCapacity, 90.5f);
+ } catch (IndexOutOfBoundsException ie) {
+ error = true;
+ } finally {
+ assertTrue(error);
+ error = false;
+ }
+
+ /* check vector contents */
+ assertEquals(100.5f, vector.get(0), 0);
+ assertTrue(vector.isNull(1));
+ assertEquals(201.5f, vector.get(2), 0);
+ assertTrue(vector.isNull(3));
+ assertEquals(300.3f, vector.get(4), 0);
+ assertTrue(vector.isNull(5));
+ assertEquals(423.8f, vector.get(6), 0);
+ assertTrue(vector.isNull(7));
+ assertEquals(555.6f, vector.get(8), 0);
+ assertTrue(vector.isNull(9));
+ assertEquals(66.6f, vector.get(10), 0);
+ assertTrue(vector.isNull(11));
+ assertEquals(78.8f, vector.get(12), 0);
+ assertTrue(vector.isNull(13));
+ assertEquals(89.5f, vector.get(14), 0);
+ assertTrue(vector.isNull(15));
+
+ try {
+ vector.get(initialCapacity);
+ } catch (IndexOutOfBoundsException ie) {
+ error = true;
+ } finally {
+ assertTrue(error);
+ error = false;
+ }
+
+ /* this should trigger a realloc() */
+ vector.setSafe(initialCapacity, 90.5f);
+
+ /* underlying buffer should now be able to store double the number of values */
+ assertTrue(vector.getValueCapacity() >= 2 * initialCapacity);
+
+ /* vector data should still be intact after realloc */
+ assertEquals(100.5f, vector.get(0), 0);
+ assertTrue(vector.isNull(1));
+ assertEquals(201.5f, vector.get(2), 0);
+ assertTrue(vector.isNull(3));
+ assertEquals(300.3f, vector.get(4), 0);
+ assertTrue(vector.isNull(5));
+ assertEquals(423.8f, vector.get(6), 0);
+ assertTrue(vector.isNull(7));
+ assertEquals(555.6f, vector.get(8), 0);
+ assertTrue(vector.isNull(9));
+ assertEquals(66.6f, vector.get(10), 0);
+ assertTrue(vector.isNull(11));
+ assertEquals(78.8f, vector.get(12), 0);
+ assertTrue(vector.isNull(13));
+ assertEquals(89.5f, vector.get(14), 0);
+ assertTrue(vector.isNull(15));
+
+ /* reset the vector */
+ int capacityBeforeReset = vector.getValueCapacity();
+ vector.reset();
+
+ /* capacity shouldn't change after reset */
+ assertEquals(capacityBeforeReset, vector.getValueCapacity());
+
+ /* vector data should be zeroed out */
+ for (int i = 0; i < capacityBeforeReset; i++) {
+ assertTrue("non-null data not expected at index: " + i, vector.isNull(i));
+ }
+ }
+ }
+
+ @Test /* IntVector */
+ public void testNullableFixedType3() {
+ // Create a new value vector for 1024 integers
+ try (final IntVector vector = newVector(IntVector.class, EMPTY_SCHEMA_PATH, MinorType.INT, allocator)) {
+ boolean error = false;
+ int initialCapacity = 1024;
+
+ /* no memory allocation has happened yet so capacity of underlying buffer should be 0 */
+ assertEquals(0, vector.getValueCapacity());
+ /* allocate space for 4KB data (1024 * 4) */
+ vector.allocateNew(initialCapacity);
+ /* underlying buffer should be able to store 1024 values */
+ assertTrue(vector.getValueCapacity() >= initialCapacity);
+ initialCapacity = vector.getValueCapacity();
+
+ vector.set(0, 1);
+ vector.set(1, 2);
+ vector.set(100, 3);
+ vector.set(1022, 4);
+ vector.set(1023, 5);
+
+ /* check vector contents */
+ int j = 1;
+ for (int i = 0; i <= 1023; i++) {
+ if ((i >= 2 && i <= 99) || (i >= 101 && i <= 1021)) {
+ assertTrue("non-null data not expected at index: " + i, vector.isNull(i));
+ } else {
+ assertFalse("null data not expected at index: " + i, vector.isNull(i));
+ assertEquals("unexpected value at index: " + i, j, vector.get(i));
+ j++;
+ }
+ }
+
+ vector.setValueCount(1024);
+ Field field = vector.getField();
+
+ List<ArrowBuf> buffers = vector.getFieldBuffers();
+
+ assertEquals(2, buffers.size());
+
+ ArrowBuf validityVectorBuf = buffers.get(0);
+
+ /* bitvector tracks 1024 integers --> 1024 bits --> 128 bytes */
+ assertTrue(validityVectorBuf.readableBytes() >= 128);
+ assertEquals(3, validityVectorBuf.getByte(0)); // 1st and second bit defined
+ for (int i = 1; i < 12; i++) {
+ assertEquals(0, validityVectorBuf.getByte(i)); // nothing defined until 100
+ }
+ assertEquals(16, validityVectorBuf.getByte(12)); // 100th bit is defined (12 * 8 + 4)
+ for (int i = 13; i < 127; i++) {
+ assertEquals(0, validityVectorBuf.getByte(i)); // nothing defined between 100th and 1022nd
+ }
+ assertEquals(-64, validityVectorBuf.getByte(127)); // 1022nd and 1023rd bit defined
+
+ /* this should trigger a realloc() */
+ vector.setSafe(initialCapacity, 6);
+
+ /* underlying buffer should now be able to store double the number of values */
+ assertTrue(vector.getValueCapacity() >= 2 * initialCapacity);
+
+ /* vector data should still be intact after realloc */
+ j = 1;
+ for (int i = 0; i < (initialCapacity * 2); i++) {
+ if ((i > 1023 && i != initialCapacity) || (i >= 2 && i <= 99) || (i >= 101 && i <= 1021)) {
+ assertTrue("non-null data not expected at index: " + i, vector.isNull(i));
+ } else {
+ assertFalse("null data not expected at index: " + i, vector.isNull(i));
+ assertEquals("unexpected value at index: " + i, j, vector.get(i));
+ j++;
+ }
+ }
+
+ /* reset the vector */
+ int capacityBeforeReset = vector.getValueCapacity();
+ vector.reset();
+
+ /* capacity shouldn't change after reset */
+ assertEquals(capacityBeforeReset, vector.getValueCapacity());
+
+ /* vector data should have been zeroed out */
+ for (int i = 0; i < capacityBeforeReset; i++) {
+ assertTrue("non-null data not expected at index: " + i, vector.isNull(i));
+ }
+
+ vector.allocateNew(initialCapacity * 4);
+ // vector has been erased
+ for (int i = 0; i < initialCapacity * 4; i++) {
+ assertTrue("non-null data not expected at index: " + i, vector.isNull(i));
+ }
+ }
+ }
+
+ @Test /* IntVector */
+ public void testNullableFixedType4() {
+ try (final IntVector vector = newVector(IntVector.class, EMPTY_SCHEMA_PATH, MinorType.INT, allocator)) {
+
+ /* no memory allocation has happened yet */
+ assertEquals(0, vector.getValueCapacity());
+
+ vector.allocateNew();
+ int valueCapacity = vector.getValueCapacity();
+ assertEquals(vector.INITIAL_VALUE_ALLOCATION, valueCapacity);
+
+ int baseValue = 20000;
+
+ for (int i = 0; i < valueCapacity; i++) {
+ if ((i & 1) == 1) {
+ vector.set(i, baseValue + i);
+ }
+ }
+
+ for (int i = 0; i < valueCapacity; i++) {
+ if ((i & 1) == 1) {
+ assertFalse("unexpected null value at index: " + i, vector.isNull(i));
+ assertEquals("unexpected value at index: " + i, (baseValue + i), vector.get(i));
+ } else {
+ assertTrue("unexpected non-null value at index: " + i, vector.isNull(i));
+ }
+ }
+
+ vector.setSafe(valueCapacity, 20000000);
+ assertTrue(vector.getValueCapacity() >= valueCapacity * 2);
+
+ for (int i = 0; i < vector.getValueCapacity(); i++) {
+ if (i == valueCapacity) {
+ assertFalse("unexpected null value at index: " + i, vector.isNull(i));
+ assertEquals("unexpected value at index: " + i, 20000000, vector.get(i));
+ } else if (i < valueCapacity) {
+ if ((i & 1) == 1) {
+ assertFalse("unexpected null value at index: " + i, vector.isNull(i));
+ assertEquals("unexpected value at index: " + i, (baseValue + i), vector.get(i));
+ }
+ } else {
+ assertTrue("unexpected non-null value at index: " + i, vector.isNull(i));
+ }
+ }
+
+ vector.zeroVector();
+
+ for (int i = 0; i < vector.getValueCapacity(); i += 2) {
+ vector.set(i, baseValue + i);
+ }
+
+ for (int i = 0; i < vector.getValueCapacity(); i++) {
+ if (i % 2 == 0) {
+ assertFalse("unexpected null value at index: " + i, vector.isNull(i));
+ assertEquals("unexpected value at index: " + i, (baseValue + i), vector.get(i));
+ } else {
+ assertTrue("unexpected non-null value at index: " + i, vector.isNull(i));
+ }
+ }
+
+ int valueCapacityBeforeRealloc = vector.getValueCapacity();
+ vector.setSafe(valueCapacityBeforeRealloc + 1000, 400000000);
+ assertTrue(vector.getValueCapacity() >= valueCapacity * 4);
+
+ for (int i = 0; i < vector.getValueCapacity(); i++) {
+ if (i == (valueCapacityBeforeRealloc + 1000)) {
+ assertFalse("unexpected null value at index: " + i, vector.isNull(i));
+ assertEquals("unexpected value at index: " + i, 400000000, vector.get(i));
+ } else if (i < valueCapacityBeforeRealloc && (i % 2) == 0) {
+ assertFalse("unexpected null value at index: " + i, vector.isNull(i));
+ assertEquals("unexpected value at index: " + i, baseValue + i, vector.get(i));
+ } else {
+ assertTrue("unexpected non-null value at index: " + i, vector.isNull(i));
+ }
+ }
+
+ /* reset the vector */
+ int valueCapacityBeforeReset = vector.getValueCapacity();
+ vector.reset();
+
+ /* capacity shouldn't change after reset */
+ assertEquals(valueCapacityBeforeReset, vector.getValueCapacity());
+
+ /* vector data should be zeroed out */
+ for (int i = 0; i < valueCapacityBeforeReset; i++) {
+ assertTrue("non-null data not expected at index: " + i, vector.isNull(i));
+ }
+ }
+ }
+
+ /*
+ * Tests for Variable Width Vectors
+ *
+ * Covered types as of now
+ *
+ * -- VarCharVector
+ * -- VarBinaryVector
+ *
+ * TODO:
+ *
+ * -- VarCharVector
+ * -- VarBinaryVector
+ */
+
+ /**
+ * ARROW-7831: this checks that a slice taken off a buffer is still readable after that buffer's allocator is closed.
+ */
+ @Test /* VarCharVector */
+ public void testSplitAndTransfer1() {
+ try (final VarCharVector targetVector = newVarCharVector("split-target", allocator)) {
+ try (final VarCharVector sourceVector = newVarCharVector(EMPTY_SCHEMA_PATH, allocator)) {
+ sourceVector.allocateNew(1024 * 10, 1024);
+
+ sourceVector.set(0, STR1);
+ sourceVector.set(1, STR2);
+ sourceVector.set(2, STR3);
+ sourceVector.setValueCount(3);
+
+ final long allocatedMem = allocator.getAllocatedMemory();
+ final int validityRefCnt = sourceVector.getValidityBuffer().refCnt();
+ final int offsetRefCnt = sourceVector.getOffsetBuffer().refCnt();
+ final int dataRefCnt = sourceVector.getDataBuffer().refCnt();
+
+ // split and transfer with slice starting at the beginning: this should not allocate anything new
+ sourceVector.splitAndTransferTo(0, 2, targetVector);
+ assertEquals(allocatedMem, allocator.getAllocatedMemory());
+ // The validity and offset buffers are sliced from a same buffer.See BaseFixedWidthVector#allocateBytes.
+ // Therefore, the refcnt of the validity buffer is increased once since the startIndex is 0. The refcnt of the
+ // offset buffer is increased as well for the same reason. This amounts to a total of 2.
+ assertEquals(validityRefCnt + 2, sourceVector.getValidityBuffer().refCnt());
+ assertEquals(offsetRefCnt + 2, sourceVector.getOffsetBuffer().refCnt());
+ assertEquals(dataRefCnt + 1, sourceVector.getDataBuffer().refCnt());
+ }
+ assertArrayEquals(STR1, targetVector.get(0));
+ assertArrayEquals(STR2, targetVector.get(1));
+ }
+ }
+
+ /**
+ * ARROW-7831: this checks that a vector that got sliced is still readable after the slice's allocator got closed.
+ */
+ @Test /* VarCharVector */
+ public void testSplitAndTransfer2() {
+ try (final VarCharVector sourceVector = newVarCharVector(EMPTY_SCHEMA_PATH, allocator)) {
+ try (final VarCharVector targetVector = newVarCharVector("split-target", allocator)) {
+ sourceVector.allocateNew(1024 * 10, 1024);
+
+ sourceVector.set(0, STR1);
+ sourceVector.set(1, STR2);
+ sourceVector.set(2, STR3);
+ sourceVector.setValueCount(3);
+
+ final long allocatedMem = allocator.getAllocatedMemory();
+ final int validityRefCnt = sourceVector.getValidityBuffer().refCnt();
+ final int offsetRefCnt = sourceVector.getOffsetBuffer().refCnt();
+ final int dataRefCnt = sourceVector.getDataBuffer().refCnt();
+
+ // split and transfer with slice starting at the beginning: this should not allocate anything new
+ sourceVector.splitAndTransferTo(0, 2, targetVector);
+ assertEquals(allocatedMem, allocator.getAllocatedMemory());
+ // The validity and offset buffers are sliced from a same buffer.See BaseFixedWidthVector#allocateBytes.
+ // Therefore, the refcnt of the validity buffer is increased once since the startIndex is 0. The refcnt of the
+ // offset buffer is increased as well for the same reason. This amounts to a total of 2.
+ assertEquals(validityRefCnt + 2, sourceVector.getValidityBuffer().refCnt());
+ assertEquals(offsetRefCnt + 2, sourceVector.getOffsetBuffer().refCnt());
+ assertEquals(dataRefCnt + 1, sourceVector.getDataBuffer().refCnt());
+ }
+ assertArrayEquals(STR1, sourceVector.get(0));
+ assertArrayEquals(STR2, sourceVector.get(1));
+ assertArrayEquals(STR3, sourceVector.get(2));
+ }
+ }
+
+ /**
+ * ARROW-7831: this checks an offset splitting optimization, in the case where all the values up to the start of the
+ * slice are null/empty, which avoids allocation for the offset buffer.
+ */
+ @Test /* VarCharVector */
+ public void testSplitAndTransfer3() {
+ try (final VarCharVector targetVector = newVarCharVector("split-target", allocator);
+ final VarCharVector sourceVector = newVarCharVector(EMPTY_SCHEMA_PATH, allocator)) {
+ sourceVector.allocateNew(1024 * 10, 1024);
+
+ sourceVector.set(0, new byte[0]);
+ sourceVector.setNull(1);
+ sourceVector.set(2, STR1);
+ sourceVector.set(3, STR2);
+ sourceVector.set(4, STR3);
+ sourceVector.setValueCount(5);
+
+ final long allocatedMem = allocator.getAllocatedMemory();
+ final int validityRefCnt = sourceVector.getValidityBuffer().refCnt();
+ final int offsetRefCnt = sourceVector.getOffsetBuffer().refCnt();
+ final int dataRefCnt = sourceVector.getDataBuffer().refCnt();
+
+ sourceVector.splitAndTransferTo(2, 2, targetVector);
+ // because the offset starts at 0 since the first 2 values are empty/null, the allocation only consists in
+ // the size needed for the validity buffer
+ final long validitySize =
+ DefaultRoundingPolicy.DEFAULT_ROUNDING_POLICY.getRoundedSize(
+ BaseValueVector.getValidityBufferSizeFromCount(2));
+ assertEquals(allocatedMem + validitySize, allocator.getAllocatedMemory());
+ // The validity and offset buffers are sliced from a same buffer.See BaseFixedWidthVector#allocateBytes.
+ // Since values up to the startIndex are empty/null, the offset buffer doesn't need to be reallocated and
+ // therefore its refcnt is increased by 1.
+ assertEquals(validityRefCnt + 1, sourceVector.getValidityBuffer().refCnt());
+ assertEquals(offsetRefCnt + 1, sourceVector.getOffsetBuffer().refCnt());
+ assertEquals(dataRefCnt + 1, sourceVector.getDataBuffer().refCnt());
+
+ assertArrayEquals(STR1, targetVector.get(0));
+ assertArrayEquals(STR2, targetVector.get(1));
+ }
+ }
+
+ /**
+ * ARROW-7831: ensures that data is transferred from one allocator to another in case of 0-index start special cases.
+ */
+ @Test /* VarCharVector */
+ public void testSplitAndTransfer4() {
+ try (final BufferAllocator targetAllocator = allocator.newChildAllocator("target-alloc", 256, 256);
+ final VarCharVector targetVector = newVarCharVector("split-target", targetAllocator)) {
+ try (final BufferAllocator sourceAllocator = allocator.newChildAllocator("source-alloc", 256, 256);
+ final VarCharVector sourceVector = newVarCharVector(EMPTY_SCHEMA_PATH, sourceAllocator)) {
+ sourceVector.allocateNew(50, 3);
+
+ sourceVector.set(0, STR1);
+ sourceVector.set(1, STR2);
+ sourceVector.set(2, STR3);
+ sourceVector.setValueCount(3);
+
+ final long allocatedMem = allocator.getAllocatedMemory();
+ final int validityRefCnt = sourceVector.getValidityBuffer().refCnt();
+ final int offsetRefCnt = sourceVector.getOffsetBuffer().refCnt();
+ final int dataRefCnt = sourceVector.getDataBuffer().refCnt();
+
+ // split and transfer with slice starting at the beginning: this should not allocate anything new
+ sourceVector.splitAndTransferTo(0, 2, targetVector);
+ assertEquals(allocatedMem, allocator.getAllocatedMemory());
+ // Unlike testSplitAndTransfer1 where the buffers originated from the same allocator, the refcnts of each
+ // buffers for this test should be the same as what the source allocator ended up with.
+ assertEquals(validityRefCnt, sourceVector.getValidityBuffer().refCnt());
+ assertEquals(offsetRefCnt, sourceVector.getOffsetBuffer().refCnt());
+ assertEquals(dataRefCnt, sourceVector.getDataBuffer().refCnt());
+ }
+ assertArrayEquals(STR1, targetVector.get(0));
+ assertArrayEquals(STR2, targetVector.get(1));
+ }
+ }
+
+ @Test /* VarCharVector */
+ public void testNullableVarType1() {
+
+ // Create a new value vector for 1024 integers.
+ try (final VarCharVector vector = newVarCharVector(EMPTY_SCHEMA_PATH, allocator)) {
+ vector.allocateNew(1024 * 10, 1024);
+
+ vector.set(0, STR1);
+ vector.set(1, STR2);
+ vector.set(2, STR3);
+ vector.setSafe(3, STR3, 1, STR3.length - 1);
+ vector.setSafe(4, STR3, 2, STR3.length - 2);
+ ByteBuffer str3ByteBuffer = ByteBuffer.wrap(STR3);
+ vector.setSafe(5, str3ByteBuffer, 1, STR3.length - 1);
+ vector.setSafe(6, str3ByteBuffer, 2, STR3.length - 2);
+
+ // Set with convenience function
+ Text txt = new Text("foo");
+ vector.setSafe(7, txt);
+
+ // Check the sample strings.
+ assertArrayEquals(STR1, vector.get(0));
+ assertArrayEquals(STR2, vector.get(1));
+ assertArrayEquals(STR3, vector.get(2));
+ assertArrayEquals(Arrays.copyOfRange(STR3, 1, STR3.length), vector.get(3));
+ assertArrayEquals(Arrays.copyOfRange(STR3, 2, STR3.length), vector.get(4));
+ assertArrayEquals(Arrays.copyOfRange(STR3, 1, STR3.length), vector.get(5));
+ assertArrayEquals(Arrays.copyOfRange(STR3, 2, STR3.length), vector.get(6));
+
+ // Check returning a Text object
+ assertEquals(txt, vector.getObject(7));
+
+ // Ensure null value throws.
+ boolean b = false;
+ assertNull(vector.get(8));
+ }
+ }
+
+ @Test /* VarBinaryVector */
+ public void testNullableVarType2() {
+
+ // Create a new value vector for 1024 integers.
+ try (final VarBinaryVector vector = newVarBinaryVector(EMPTY_SCHEMA_PATH, allocator)) {
+ vector.allocateNew(1024 * 10, 1024);
+
+ vector.set(0, STR1);
+ vector.set(1, STR2);
+ vector.set(2, STR3);
+ vector.setSafe(3, STR3, 1, STR3.length - 1);
+ vector.setSafe(4, STR3, 2, STR3.length - 2);
+ ByteBuffer str3ByteBuffer = ByteBuffer.wrap(STR3);
+ vector.setSafe(5, str3ByteBuffer, 1, STR3.length - 1);
+ vector.setSafe(6, str3ByteBuffer, 2, STR3.length - 2);
+
+ // Check the sample strings.
+ assertArrayEquals(STR1, vector.get(0));
+ assertArrayEquals(STR2, vector.get(1));
+ assertArrayEquals(STR3, vector.get(2));
+ assertArrayEquals(Arrays.copyOfRange(STR3, 1, STR3.length), vector.get(3));
+ assertArrayEquals(Arrays.copyOfRange(STR3, 2, STR3.length), vector.get(4));
+ assertArrayEquals(Arrays.copyOfRange(STR3, 1, STR3.length), vector.get(5));
+ assertArrayEquals(Arrays.copyOfRange(STR3, 2, STR3.length), vector.get(6));
+
+ // Ensure null value throws.
+ assertNull(vector.get(7));
+ }
+ }
+
+
+ /*
+ * generic tests
+ *
+ * -- lastSet() and setValueCount()
+ * -- fillEmpties()
+ * -- VectorLoader and VectorUnloader
+ * -- some realloc tests
+ *
+ * TODO:
+ *
+ * The realloc() related tests below should be moved up and we need to
+ * add realloc related tests (edge cases) for more vector types.
+ */
+
+ @Test /* Float8Vector */
+ public void testReallocAfterVectorTransfer1() {
+ try (final Float8Vector vector = new Float8Vector(EMPTY_SCHEMA_PATH, allocator)) {
+ int initialCapacity = 4096;
+ boolean error = false;
+
+ /* use the default capacity; 4096*8 => 32KB */
+ vector.setInitialCapacity(initialCapacity);
+ vector.allocateNew();
+
+ assertTrue(vector.getValueCapacity() >= initialCapacity);
+ initialCapacity = vector.getValueCapacity();
+
+ double baseValue = 100.375;
+
+ for (int i = 0; i < initialCapacity; i++) {
+ vector.setSafe(i, baseValue + (double) i);
+ }
+
+ /* the above setSafe calls should not have triggered a realloc as
+ * we are within the capacity. check the vector contents
+ */
+ assertEquals(initialCapacity, vector.getValueCapacity());
+
+ for (int i = 0; i < initialCapacity; i++) {
+ double value = vector.get(i);
+ assertEquals(baseValue + (double) i, value, 0);
+ }
+
+ /* this should trigger a realloc */
+ vector.setSafe(initialCapacity, baseValue + (double) initialCapacity);
+ assertTrue(vector.getValueCapacity() >= initialCapacity * 2);
+ int capacityAfterRealloc1 = vector.getValueCapacity();
+
+ for (int i = initialCapacity + 1; i < capacityAfterRealloc1; i++) {
+ vector.setSafe(i, baseValue + (double) i);
+ }
+
+ for (int i = 0; i < capacityAfterRealloc1; i++) {
+ double value = vector.get(i);
+ assertEquals(baseValue + (double) i, value, 0);
+ }
+
+ /* this should trigger a realloc */
+ vector.setSafe(capacityAfterRealloc1, baseValue + (double) (capacityAfterRealloc1));
+ assertTrue(vector.getValueCapacity() >= initialCapacity * 4);
+ int capacityAfterRealloc2 = vector.getValueCapacity();
+
+ for (int i = capacityAfterRealloc1 + 1; i < capacityAfterRealloc2; i++) {
+ vector.setSafe(i, baseValue + (double) i);
+ }
+
+ for (int i = 0; i < capacityAfterRealloc2; i++) {
+ double value = vector.get(i);
+ assertEquals(baseValue + (double) i, value, 0);
+ }
+
+ /* at this point we are working with a 128KB buffer data for this
+ * vector. now let's transfer this vector
+ */
+
+ TransferPair transferPair = vector.getTransferPair(allocator);
+ transferPair.transfer();
+
+ Float8Vector toVector = (Float8Vector) transferPair.getTo();
+
+ /* now let's realloc the toVector */
+ toVector.reAlloc();
+ assertTrue(toVector.getValueCapacity() >= initialCapacity * 8);
+
+ for (int i = 0; i < toVector.getValueCapacity(); i++) {
+ if (i < capacityAfterRealloc2) {
+ assertEquals(baseValue + (double) i, toVector.get(i), 0);
+ } else {
+ assertTrue(toVector.isNull(i));
+ }
+ }
+
+ toVector.close();
+ }
+ }
+
+ @Test /* Float8Vector */
+ public void testReallocAfterVectorTransfer2() {
+ try (final Float8Vector vector = new Float8Vector(EMPTY_SCHEMA_PATH, allocator)) {
+ int initialCapacity = 4096;
+ boolean error = false;
+
+ vector.allocateNew(initialCapacity);
+ assertTrue(vector.getValueCapacity() >= initialCapacity);
+ initialCapacity = vector.getValueCapacity();
+
+ double baseValue = 100.375;
+
+ for (int i = 0; i < initialCapacity; i++) {
+ vector.setSafe(i, baseValue + (double) i);
+ }
+
+ /* the above setSafe calls should not have triggered a realloc as
+ * we are within the capacity. check the vector contents
+ */
+ assertEquals(initialCapacity, vector.getValueCapacity());
+
+ for (int i = 0; i < initialCapacity; i++) {
+ double value = vector.get(i);
+ assertEquals(baseValue + (double) i, value, 0);
+ }
+
+ /* this should trigger a realloc */
+ vector.setSafe(initialCapacity, baseValue + (double) initialCapacity);
+ assertTrue(vector.getValueCapacity() >= initialCapacity * 2);
+ int capacityAfterRealloc1 = vector.getValueCapacity();
+
+ for (int i = initialCapacity + 1; i < capacityAfterRealloc1; i++) {
+ vector.setSafe(i, baseValue + (double) i);
+ }
+
+ for (int i = 0; i < capacityAfterRealloc1; i++) {
+ double value = vector.get(i);
+ assertEquals(baseValue + (double) i, value, 0);
+ }
+
+ /* this should trigger a realloc */
+ vector.setSafe(capacityAfterRealloc1, baseValue + (double) (capacityAfterRealloc1));
+ assertTrue(vector.getValueCapacity() >= initialCapacity * 4);
+ int capacityAfterRealloc2 = vector.getValueCapacity();
+
+ for (int i = capacityAfterRealloc1 + 1; i < capacityAfterRealloc2; i++) {
+ vector.setSafe(i, baseValue + (double) i);
+ }
+
+ for (int i = 0; i < capacityAfterRealloc2; i++) {
+ double value = vector.get(i);
+ assertEquals(baseValue + (double) i, value, 0);
+ }
+
+ /* at this point we are working with a 128KB buffer data for this
+ * vector. now let's transfer this vector
+ */
+
+ TransferPair transferPair = vector.getTransferPair(allocator);
+ transferPair.transfer();
+
+ Float8Vector toVector = (Float8Vector) transferPair.getTo();
+
+ /* check toVector contents before realloc */
+ for (int i = 0; i < toVector.getValueCapacity(); i++) {
+ assertFalse("unexpected null value at index: " + i, toVector.isNull(i));
+ double value = toVector.get(i);
+ assertEquals("unexpected value at index: " + i, baseValue + (double) i, value, 0);
+ }
+
+ /* now let's realloc the toVector and check contents again */
+ toVector.reAlloc();
+ assertTrue(toVector.getValueCapacity() >= initialCapacity * 8);
+
+ for (int i = 0; i < toVector.getValueCapacity(); i++) {
+ if (i < capacityAfterRealloc2) {
+ assertFalse("unexpected null value at index: " + i, toVector.isNull(i));
+ double value = toVector.get(i);
+ assertEquals("unexpected value at index: " + i, baseValue + (double) i, value, 0);
+ } else {
+ assertTrue("unexpected non-null value at index: " + i, toVector.isNull(i));
+ }
+ }
+
+ toVector.close();
+ }
+ }
+
+ @Test /* VarCharVector */
+ public void testReallocAfterVectorTransfer3() {
+ try (final VarCharVector vector = new VarCharVector(EMPTY_SCHEMA_PATH, allocator)) {
+ /* 4096 values with 10 byte per record */
+ vector.allocateNew(4096 * 10, 4096);
+ int valueCapacity = vector.getValueCapacity();
+ assertTrue(valueCapacity >= 4096);
+
+ /* populate the vector */
+ for (int i = 0; i < valueCapacity; i++) {
+ if ((i & 1) == 1) {
+ vector.set(i, STR1);
+ } else {
+ vector.set(i, STR2);
+ }
+ }
+
+ /* Check the vector output */
+ for (int i = 0; i < valueCapacity; i++) {
+ if ((i & 1) == 1) {
+ assertArrayEquals(STR1, vector.get(i));
+ } else {
+ assertArrayEquals(STR2, vector.get(i));
+ }
+ }
+
+ /* trigger first realloc */
+ vector.setSafe(valueCapacity, STR2, 0, STR2.length);
+ assertTrue(vector.getValueCapacity() >= 2 * valueCapacity);
+ while (vector.getByteCapacity() < 10 * vector.getValueCapacity()) {
+ vector.reallocDataBuffer();
+ }
+
+ /* populate the remaining vector */
+ for (int i = valueCapacity; i < vector.getValueCapacity(); i++) {
+ if ((i & 1) == 1) {
+ vector.set(i, STR1);
+ } else {
+ vector.set(i, STR2);
+ }
+ }
+
+ /* Check the vector output */
+ valueCapacity = vector.getValueCapacity();
+ for (int i = 0; i < valueCapacity; i++) {
+ if ((i & 1) == 1) {
+ assertArrayEquals(STR1, vector.get(i));
+ } else {
+ assertArrayEquals(STR2, vector.get(i));
+ }
+ }
+
+ /* trigger second realloc */
+ vector.setSafe(valueCapacity + 10, STR2, 0, STR2.length);
+ assertTrue(vector.getValueCapacity() >= 2 * valueCapacity);
+ while (vector.getByteCapacity() < 10 * vector.getValueCapacity()) {
+ vector.reallocDataBuffer();
+ }
+
+ /* populate the remaining vector */
+ for (int i = valueCapacity; i < vector.getValueCapacity(); i++) {
+ if ((i & 1) == 1) {
+ vector.set(i, STR1);
+ } else {
+ vector.set(i, STR2);
+ }
+ }
+
+ /* Check the vector output */
+ valueCapacity = vector.getValueCapacity();
+ for (int i = 0; i < valueCapacity; i++) {
+ if ((i & 1) == 1) {
+ assertArrayEquals(STR1, vector.get(i));
+ } else {
+ assertArrayEquals(STR2, vector.get(i));
+ }
+ }
+
+ /* we are potentially working with 4x the size of vector buffer
+ * that we initially started with. Now let's transfer the vector.
+ */
+
+ TransferPair transferPair = vector.getTransferPair(allocator);
+ transferPair.transfer();
+ VarCharVector toVector = (VarCharVector) transferPair.getTo();
+ valueCapacity = toVector.getValueCapacity();
+
+ for (int i = 0; i < valueCapacity; i++) {
+ if ((i & 1) == 1) {
+ assertArrayEquals(STR1, toVector.get(i));
+ } else {
+ assertArrayEquals(STR2, toVector.get(i));
+ }
+ }
+
+ toVector.close();
+ }
+ }
+
+ @Test /* IntVector */
+ public void testReallocAfterVectorTransfer4() {
+ try (final IntVector vector = new IntVector(EMPTY_SCHEMA_PATH, allocator)) {
+
+ /* 4096 values */
+ vector.allocateNew(4096);
+ int valueCapacity = vector.getValueCapacity();
+ assertTrue(valueCapacity >= 4096);
+
+ /* populate the vector */
+ int baseValue = 1000;
+ for (int i = 0; i < valueCapacity; i++) {
+ if ((i & 1) == 0) {
+ vector.set(i, 1000 + i);
+ }
+ }
+
+ /* Check the vector output */
+ for (int i = 0; i < valueCapacity; i++) {
+ if ((i & 1) == 0) {
+ assertEquals(1000 + i, vector.get(i));
+ } else {
+ assertTrue(vector.isNull(i));
+ }
+ }
+
+ /* trigger first realloc */
+ vector.setSafe(valueCapacity, 10000000);
+ assertTrue(vector.getValueCapacity() >= valueCapacity * 2);
+
+ /* populate the remaining vector */
+ for (int i = valueCapacity; i < vector.getValueCapacity(); i++) {
+ if ((i & 1) == 0) {
+ vector.set(i, 1000 + i);
+ }
+ }
+
+ /* Check the vector output */
+ valueCapacity = vector.getValueCapacity();
+ for (int i = 0; i < valueCapacity; i++) {
+ if ((i & 1) == 0) {
+ assertEquals(1000 + i, vector.get(i));
+ } else {
+ assertTrue(vector.isNull(i));
+ }
+ }
+
+ /* trigger second realloc */
+ vector.setSafe(valueCapacity, 10000000);
+ assertTrue(vector.getValueCapacity() >= valueCapacity * 2);
+
+ /* populate the remaining vector */
+ for (int i = valueCapacity; i < vector.getValueCapacity(); i++) {
+ if ((i & 1) == 0) {
+ vector.set(i, 1000 + i);
+ }
+ }
+
+ /* Check the vector output */
+ valueCapacity = vector.getValueCapacity();
+ for (int i = 0; i < valueCapacity; i++) {
+ if ((i & 1) == 0) {
+ assertEquals(1000 + i, vector.get(i));
+ } else {
+ assertTrue(vector.isNull(i));
+ }
+ }
+
+ /* we are potentially working with 4x the size of vector buffer
+ * that we initially started with. Now let's transfer the vector.
+ */
+
+ TransferPair transferPair = vector.getTransferPair(allocator);
+ transferPair.transfer();
+ IntVector toVector = (IntVector) transferPair.getTo();
+ /* value capacity of source and target vectors should be same after
+ * the transfer.
+ */
+ assertEquals(valueCapacity, toVector.getValueCapacity());
+
+ for (int i = 0; i < valueCapacity; i++) {
+ if ((i & 1) == 0) {
+ assertEquals(1000 + i, toVector.get(i));
+ } else {
+ assertTrue(toVector.isNull(i));
+ }
+ }
+
+ toVector.close();
+ }
+ }
+
+ @Test
+ public void testReAllocFixedWidthVector() {
+ // Create a new value vector for 1024 integers
+ try (final Float4Vector vector = newVector(Float4Vector.class, EMPTY_SCHEMA_PATH, MinorType.FLOAT4, allocator)) {
+ vector.allocateNew(1024);
+
+ assertTrue(vector.getValueCapacity() >= 1024);
+ int initialCapacity = vector.getValueCapacity();
+
+ // Put values in indexes that fall within the initial allocation
+ vector.setSafe(0, 100.1f);
+ vector.setSafe(100, 102.3f);
+ vector.setSafe(1023, 104.5f);
+
+ // Now try to put values in space that falls beyond the initial allocation
+ vector.setSafe(2000, 105.5f);
+
+ // Check valueCapacity is more than initial allocation
+ assertTrue(vector.getValueCapacity() >= 2 * initialCapacity);
+
+ assertEquals(100.1f, vector.get(0), 0);
+ assertEquals(102.3f, vector.get(100), 0);
+ assertEquals(104.5f, vector.get(1023), 0);
+ assertEquals(105.5f, vector.get(2000), 0);
+
+ // Set the valueCount to be more than valueCapacity of current allocation. This is possible for ValueVectors
+ // as we don't call setSafe for null values, but we do call setValueCount when all values are inserted into the
+ // vector
+ vector.setValueCount(vector.getValueCapacity() + 200);
+ }
+ }
+
+ @Test
+ public void testReAllocVariableWidthVector() {
+ try (final VarCharVector vector = newVector(VarCharVector.class, EMPTY_SCHEMA_PATH, MinorType.VARCHAR, allocator)) {
+ vector.setInitialCapacity(4095);
+ vector.allocateNew();
+
+ int initialCapacity = vector.getValueCapacity();
+ assertTrue(initialCapacity >= 4095);
+
+ /* Put values in indexes that fall within the initial allocation */
+ vector.setSafe(0, STR1, 0, STR1.length);
+ vector.setSafe(initialCapacity - 1, STR2, 0, STR2.length);
+
+ /* the above set calls should NOT have triggered a realloc */
+ assertEquals(initialCapacity, vector.getValueCapacity());
+
+ /* Now try to put values in space that falls beyond the initial allocation */
+ vector.setSafe(initialCapacity + 200, STR3, 0, STR3.length);
+
+ /* Check valueCapacity is more than initial allocation */
+ assertTrue(initialCapacity * 2 <= vector.getValueCapacity());
+
+ assertArrayEquals(STR1, vector.get(0));
+ assertArrayEquals(STR2, vector.get(initialCapacity - 1));
+ assertArrayEquals(STR3, vector.get(initialCapacity + 200));
+
+ // Set the valueCount to be more than valueCapacity of current allocation. This is possible for ValueVectors
+ // as we don't call setSafe for null values, but we do call setValueCount when the current batch is processed.
+ vector.setValueCount(vector.getValueCapacity() + 200);
+ }
+ }
+
+ @Test
+ public void testFillEmptiesNotOverfill() {
+ try (final VarCharVector vector = newVector(VarCharVector.class, EMPTY_SCHEMA_PATH, MinorType.VARCHAR, allocator)) {
+ vector.setInitialCapacity(4095);
+ vector.allocateNew();
+
+ int initialCapacity = vector.getValueCapacity();
+ assertTrue(initialCapacity >= 4095);
+
+ vector.setSafe(4094, "hello".getBytes(), 0, 5);
+ /* the above set method should NOT have triggered a realloc */
+ assertEquals(initialCapacity, vector.getValueCapacity());
+
+ long bufSizeBefore = vector.getFieldBuffers().get(1).capacity();
+ vector.setValueCount(initialCapacity);
+ assertEquals(bufSizeBefore, vector.getFieldBuffers().get(1).capacity());
+ assertEquals(initialCapacity, vector.getValueCapacity());
+ }
+ }
+
+ @Test
+ public void testSetSafeWithArrowBufNoExcessAllocs() {
+ final int numValues = BaseFixedWidthVector.INITIAL_VALUE_ALLOCATION * 2;
+ final byte[] valueBytes = "hello world".getBytes();
+ final int valueBytesLength = valueBytes.length;
+ final int isSet = 1;
+
+ try (
+ final VarCharVector fromVector = newVector(VarCharVector.class, EMPTY_SCHEMA_PATH,
+ MinorType.VARCHAR, allocator);
+ final VarCharVector toVector = newVector(VarCharVector.class, EMPTY_SCHEMA_PATH,
+ MinorType.VARCHAR, allocator)) {
+ /*
+ * Populate the from vector with 'numValues' with byte-arrays, each of size 'valueBytesLength'.
+ */
+ fromVector.setInitialCapacity(numValues);
+ fromVector.allocateNew();
+ for (int i = 0; i < numValues; ++i) {
+ fromVector.setSafe(i, valueBytes, 0 /*start*/, valueBytesLength);
+ }
+ fromVector.setValueCount(numValues);
+ ArrowBuf fromDataBuffer = fromVector.getDataBuffer();
+ assertTrue(numValues * valueBytesLength <= fromDataBuffer.capacity());
+
+ /*
+ * Copy the entries one-by-one from 'fromVector' to 'toVector', but use the setSafe with
+ * ArrowBuf API (instead of setSafe with byte-array).
+ */
+ toVector.setInitialCapacity(numValues);
+ toVector.allocateNew();
+ for (int i = 0; i < numValues; i++) {
+ int start = fromVector.getStartOffset(i);
+ int end = fromVector.getStartOffset(i + 1);
+ toVector.setSafe(i, isSet, start, end, fromDataBuffer);
+ }
+
+ /*
+ * Since the 'fromVector' and 'toVector' have the same initial capacity, and were populated
+ * with the same varchar elements, the allocations and hence, the final capacity should be
+ * the same.
+ */
+ assertEquals(fromDataBuffer.capacity(), toVector.getDataBuffer().capacity());
+ }
+ }
+
+ @Test
+ public void testCopyFromWithNulls() {
+ try (final VarCharVector vector = newVector(VarCharVector.class, EMPTY_SCHEMA_PATH, MinorType.VARCHAR, allocator);
+ final VarCharVector vector2 =
+ newVector(VarCharVector.class, EMPTY_SCHEMA_PATH, MinorType.VARCHAR, allocator)) {
+
+ vector.setInitialCapacity(4095);
+ vector.allocateNew();
+ int capacity = vector.getValueCapacity();
+ assertTrue(capacity >= 4095);
+
+ for (int i = 0; i < capacity; i++) {
+ if (i % 3 == 0) {
+ continue;
+ }
+ byte[] b = Integer.toString(i).getBytes();
+ vector.setSafe(i, b, 0, b.length);
+ }
+
+ /* NO reAlloc() should have happened in setSafe() */
+ assertEquals(capacity, vector.getValueCapacity());
+
+ vector.setValueCount(capacity);
+
+ for (int i = 0; i < capacity; i++) {
+ if (i % 3 == 0) {
+ assertNull(vector.getObject(i));
+ } else {
+ assertEquals("unexpected value at index: " + i, Integer.toString(i), vector.getObject(i).toString());
+ }
+ }
+
+ vector2.setInitialCapacity(4095);
+ vector2.allocateNew();
+ int capacity2 = vector2.getValueCapacity();
+ assertEquals(capacity2, capacity);
+
+ for (int i = 0; i < capacity; i++) {
+ vector2.copyFromSafe(i, i, vector);
+ if (i % 3 == 0) {
+ assertNull(vector2.getObject(i));
+ } else {
+ assertEquals("unexpected value at index: " + i, Integer.toString(i), vector2.getObject(i).toString());
+ }
+ }
+
+ /* NO reAlloc() should have happened in copyFrom */
+ assertEquals(capacity, vector2.getValueCapacity());
+
+ vector2.setValueCount(capacity);
+
+ for (int i = 0; i < capacity; i++) {
+ if (i % 3 == 0) {
+ assertNull(vector2.getObject(i));
+ } else {
+ assertEquals("unexpected value at index: " + i, Integer.toString(i), vector2.getObject(i).toString());
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testCopyFromWithNulls1() {
+ try (final VarCharVector vector = newVector(VarCharVector.class, EMPTY_SCHEMA_PATH, MinorType.VARCHAR, allocator);
+ final VarCharVector vector2 =
+ newVector(VarCharVector.class, EMPTY_SCHEMA_PATH, MinorType.VARCHAR, allocator)) {
+
+ vector.setInitialCapacity(4095);
+ vector.allocateNew();
+ int capacity = vector.getValueCapacity();
+ assertTrue(capacity >= 4095);
+
+ for (int i = 0; i < capacity; i++) {
+ if (i % 3 == 0) {
+ continue;
+ }
+ byte[] b = Integer.toString(i).getBytes();
+ vector.setSafe(i, b, 0, b.length);
+ }
+
+ /* NO reAlloc() should have happened in setSafe() */
+ assertEquals(capacity, vector.getValueCapacity());
+
+ vector.setValueCount(capacity);
+
+ for (int i = 0; i < capacity; i++) {
+ if (i % 3 == 0) {
+ assertNull(vector.getObject(i));
+ } else {
+ assertEquals("unexpected value at index: " + i, Integer.toString(i), vector.getObject(i).toString());
+ }
+ }
+
+ /* set lesser initial capacity than actually needed
+ * to trigger reallocs in copyFromSafe()
+ */
+ vector2.allocateNew(1024 * 10, 1024);
+
+ int capacity2 = vector2.getValueCapacity();
+ assertTrue(capacity2 >= 1024);
+ assertTrue(capacity2 <= capacity);
+
+ for (int i = 0; i < capacity; i++) {
+ vector2.copyFromSafe(i, i, vector);
+ if (i % 3 == 0) {
+ assertNull(vector2.getObject(i));
+ } else {
+ assertEquals("unexpected value at index: " + i, Integer.toString(i), vector2.getObject(i).toString());
+ }
+ }
+
+ /* 2 reAllocs should have happened in copyFromSafe() */
+ assertEquals(capacity, vector2.getValueCapacity());
+
+ vector2.setValueCount(capacity);
+
+ for (int i = 0; i < capacity; i++) {
+ if (i % 3 == 0) {
+ assertNull(vector2.getObject(i));
+ } else {
+ assertEquals("unexpected value at index: " + i, Integer.toString(i), vector2.getObject(i).toString());
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testSetLastSetUsage() {
+ try (final VarCharVector vector = new VarCharVector("myvector", allocator)) {
+ vector.allocateNew(1024 * 10, 1024);
+
+ setBytes(0, STR1, vector);
+ setBytes(1, STR2, vector);
+ setBytes(2, STR3, vector);
+ setBytes(3, STR4, vector);
+ setBytes(4, STR5, vector);
+ setBytes(5, STR6, vector);
+
+ /* Check current lastSet */
+ assertEquals(-1, vector.getLastSet());
+
+ /* Check the vector output */
+ assertArrayEquals(STR1, vector.get(0));
+ assertArrayEquals(STR2, vector.get(1));
+ assertArrayEquals(STR3, vector.get(2));
+ assertArrayEquals(STR4, vector.get(3));
+ assertArrayEquals(STR5, vector.get(4));
+ assertArrayEquals(STR6, vector.get(5));
+
+ /*
+ * If we don't do setLastSe(5) before setValueCount(), then the latter will corrupt
+ * the value vector by filling in all positions [0,valuecount-1] will empty byte arrays.
+ * Run the test by commenting out next line and we should see incorrect vector output.
+ */
+ vector.setLastSet(5);
+ vector.setValueCount(20);
+
+ /* Check current lastSet */
+ assertEquals(19, vector.getLastSet());
+
+ /* Check the vector output again */
+ assertArrayEquals(STR1, vector.get(0));
+ assertArrayEquals(STR2, vector.get(1));
+ assertArrayEquals(STR3, vector.get(2));
+ assertArrayEquals(STR4, vector.get(3));
+ assertArrayEquals(STR5, vector.get(4));
+ assertArrayEquals(STR6, vector.get(5));
+
+ assertEquals(0, vector.getValueLength(6));
+ assertEquals(0, vector.getValueLength(7));
+ assertEquals(0, vector.getValueLength(8));
+ assertEquals(0, vector.getValueLength(9));
+ assertEquals(0, vector.getValueLength(10));
+ assertEquals(0, vector.getValueLength(11));
+ assertEquals(0, vector.getValueLength(12));
+ assertEquals(0, vector.getValueLength(13));
+ assertEquals(0, vector.getValueLength(14));
+ assertEquals(0, vector.getValueLength(15));
+ assertEquals(0, vector.getValueLength(16));
+ assertEquals(0, vector.getValueLength(17));
+ assertEquals(0, vector.getValueLength(18));
+ assertEquals(0, vector.getValueLength(19));
+
+ /* Check offsets */
+ assertEquals(0, vector.offsetBuffer.getInt(0 * BaseVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(6, vector.offsetBuffer.getInt(1 * BaseVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(16, vector.offsetBuffer.getInt(2 * BaseVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(21, vector.offsetBuffer.getInt(3 * BaseVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(30, vector.offsetBuffer.getInt(4 * BaseVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(34, vector.offsetBuffer.getInt(5 * BaseVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(40, vector.offsetBuffer.getInt(6 * BaseVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(40, vector.offsetBuffer.getInt(7 * BaseVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(40, vector.offsetBuffer.getInt(8 * BaseVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(40, vector.offsetBuffer.getInt(9 * BaseVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(40, vector.offsetBuffer.getInt(10 * BaseVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(40, vector.offsetBuffer.getInt(11 * BaseVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(40, vector.offsetBuffer.getInt(12 * BaseVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(40, vector.offsetBuffer.getInt(13 * BaseVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(40, vector.offsetBuffer.getInt(14 * BaseVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(40, vector.offsetBuffer.getInt(15 * BaseVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(40, vector.offsetBuffer.getInt(16 * BaseVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(40, vector.offsetBuffer.getInt(17 * BaseVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(40, vector.offsetBuffer.getInt(18 * BaseVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(40, vector.offsetBuffer.getInt(19 * BaseVariableWidthVector.OFFSET_WIDTH));
+
+ vector.set(19, STR6);
+ assertArrayEquals(STR6, vector.get(19));
+ assertEquals(40, vector.offsetBuffer.getInt(19 * BaseVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(46, vector.offsetBuffer.getInt(20 * BaseVariableWidthVector.OFFSET_WIDTH));
+ }
+ }
+
+ @Test
+ public void testVectorLoadUnload() {
+
+ try (final VarCharVector vector1 = new VarCharVector("myvector", allocator)) {
+
+ setVector(vector1, STR1, STR2, STR3, STR4, STR5, STR6);
+
+ assertEquals(5, vector1.getLastSet());
+ vector1.setValueCount(15);
+ assertEquals(14, vector1.getLastSet());
+
+ /* Check the vector output */
+ assertArrayEquals(STR1, vector1.get(0));
+ assertArrayEquals(STR2, vector1.get(1));
+ assertArrayEquals(STR3, vector1.get(2));
+ assertArrayEquals(STR4, vector1.get(3));
+ assertArrayEquals(STR5, vector1.get(4));
+ assertArrayEquals(STR6, vector1.get(5));
+
+ Field field = vector1.getField();
+ String fieldName = field.getName();
+
+ List<Field> fields = new ArrayList<>();
+ List<FieldVector> fieldVectors = new ArrayList<>();
+
+ fields.add(field);
+ fieldVectors.add(vector1);
+
+ Schema schema = new Schema(fields);
+
+ VectorSchemaRoot schemaRoot1 = new VectorSchemaRoot(schema, fieldVectors, vector1.getValueCount());
+ VectorUnloader vectorUnloader = new VectorUnloader(schemaRoot1);
+
+ try (
+ ArrowRecordBatch recordBatch = vectorUnloader.getRecordBatch();
+ BufferAllocator finalVectorsAllocator = allocator.newChildAllocator("new vector", 0, Long.MAX_VALUE);
+ VectorSchemaRoot schemaRoot2 = VectorSchemaRoot.create(schema, finalVectorsAllocator);
+ ) {
+
+ VectorLoader vectorLoader = new VectorLoader(schemaRoot2);
+ vectorLoader.load(recordBatch);
+
+ VarCharVector vector2 = (VarCharVector) schemaRoot2.getVector(fieldName);
+ /*
+ * lastSet would have internally been set by VectorLoader.load() when it invokes
+ * loadFieldBuffers.
+ */
+ assertEquals(14, vector2.getLastSet());
+ vector2.setValueCount(25);
+ assertEquals(24, vector2.getLastSet());
+
+ /* Check the vector output */
+ assertArrayEquals(STR1, vector2.get(0));
+ assertArrayEquals(STR2, vector2.get(1));
+ assertArrayEquals(STR3, vector2.get(2));
+ assertArrayEquals(STR4, vector2.get(3));
+ assertArrayEquals(STR5, vector2.get(4));
+ assertArrayEquals(STR6, vector2.get(5));
+ }
+ }
+ }
+
+ @Test
+ public void testFillEmptiesUsage() {
+ try (final VarCharVector vector = new VarCharVector("myvector", allocator)) {
+
+ vector.allocateNew(1024 * 10, 1024);
+
+ setBytes(0, STR1, vector);
+ setBytes(1, STR2, vector);
+ setBytes(2, STR3, vector);
+ setBytes(3, STR4, vector);
+ setBytes(4, STR5, vector);
+ setBytes(5, STR6, vector);
+
+ /* Check current lastSet */
+ assertEquals(-1, vector.getLastSet());
+
+ /* Check the vector output */
+ assertArrayEquals(STR1, vector.get(0));
+ assertArrayEquals(STR2, vector.get(1));
+ assertArrayEquals(STR3, vector.get(2));
+ assertArrayEquals(STR4, vector.get(3));
+ assertArrayEquals(STR5, vector.get(4));
+ assertArrayEquals(STR6, vector.get(5));
+
+ vector.setLastSet(5);
+ /* fill empty byte arrays from index [6, 9] */
+ vector.fillEmpties(10);
+
+ /* Check current lastSet */
+ assertEquals(9, vector.getLastSet());
+
+ /* Check the vector output */
+ assertArrayEquals(STR1, vector.get(0));
+ assertArrayEquals(STR2, vector.get(1));
+ assertArrayEquals(STR3, vector.get(2));
+ assertArrayEquals(STR4, vector.get(3));
+ assertArrayEquals(STR5, vector.get(4));
+ assertArrayEquals(STR6, vector.get(5));
+ assertEquals(0, vector.getValueLength(6));
+ assertEquals(0, vector.getValueLength(7));
+ assertEquals(0, vector.getValueLength(8));
+ assertEquals(0, vector.getValueLength(9));
+
+ setBytes(10, STR1, vector);
+ setBytes(11, STR2, vector);
+
+ vector.setLastSet(11);
+ /* fill empty byte arrays from index [12, 14] */
+ vector.setValueCount(15);
+
+ /* Check current lastSet */
+ assertEquals(14, vector.getLastSet());
+
+ /* Check the vector output */
+ assertArrayEquals(STR1, vector.get(0));
+ assertArrayEquals(STR2, vector.get(1));
+ assertArrayEquals(STR3, vector.get(2));
+ assertArrayEquals(STR4, vector.get(3));
+ assertArrayEquals(STR5, vector.get(4));
+ assertArrayEquals(STR6, vector.get(5));
+ assertEquals(0, vector.getValueLength(6));
+ assertEquals(0, vector.getValueLength(7));
+ assertEquals(0, vector.getValueLength(8));
+ assertEquals(0, vector.getValueLength(9));
+ assertArrayEquals(STR1, vector.get(10));
+ assertArrayEquals(STR2, vector.get(11));
+ assertEquals(0, vector.getValueLength(12));
+ assertEquals(0, vector.getValueLength(13));
+ assertEquals(0, vector.getValueLength(14));
+
+ /* Check offsets */
+ assertEquals(0,
+ vector.offsetBuffer.getInt(0 * BaseVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(6,
+ vector.offsetBuffer.getInt(1 * BaseVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(16,
+ vector.offsetBuffer.getInt(2 * BaseVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(21,
+ vector.offsetBuffer.getInt(3 * BaseVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(30,
+ vector.offsetBuffer.getInt(4 * BaseVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(34,
+ vector.offsetBuffer.getInt(5 * BaseVariableWidthVector.OFFSET_WIDTH));
+
+ assertEquals(40,
+ vector.offsetBuffer.getInt(6 * BaseVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(40,
+ vector.offsetBuffer.getInt(7 * BaseVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(40,
+ vector.offsetBuffer.getInt(8 * BaseVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(40,
+ vector.offsetBuffer.getInt(9 * BaseVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(40,
+ vector.offsetBuffer.getInt(10 * BaseVariableWidthVector.OFFSET_WIDTH));
+
+ assertEquals(46,
+ vector.offsetBuffer.getInt(11 * BaseVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(56,
+ vector.offsetBuffer.getInt(12 * BaseVariableWidthVector.OFFSET_WIDTH));
+
+ assertEquals(56,
+ vector.offsetBuffer.getInt(13 * BaseVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(56,
+ vector.offsetBuffer.getInt(14 * BaseVariableWidthVector.OFFSET_WIDTH));
+ assertEquals(56,
+ vector.offsetBuffer.getInt(15 * BaseVariableWidthVector.OFFSET_WIDTH));
+ }
+ }
+
+ @Test /* VarCharVector */
+ public void testGetBufferAddress1() {
+
+ try (final VarCharVector vector = new VarCharVector("myvector", allocator)) {
+
+ setVector(vector, STR1, STR2, STR3, STR4, STR5, STR6);
+ vector.setValueCount(15);
+
+ /* check the vector output */
+ assertArrayEquals(STR1, vector.get(0));
+ assertArrayEquals(STR2, vector.get(1));
+ assertArrayEquals(STR3, vector.get(2));
+ assertArrayEquals(STR4, vector.get(3));
+ assertArrayEquals(STR5, vector.get(4));
+ assertArrayEquals(STR6, vector.get(5));
+
+ List<ArrowBuf> buffers = vector.getFieldBuffers();
+ long bitAddress = vector.getValidityBufferAddress();
+ long offsetAddress = vector.getOffsetBufferAddress();
+ long dataAddress = vector.getDataBufferAddress();
+
+ assertEquals(3, buffers.size());
+ assertEquals(bitAddress, buffers.get(0).memoryAddress());
+ assertEquals(offsetAddress, buffers.get(1).memoryAddress());
+ assertEquals(dataAddress, buffers.get(2).memoryAddress());
+ }
+ }
+
+ @Test /* IntVector */
+ public void testGetBufferAddress2() {
+ try (final IntVector vector = new IntVector("myvector", allocator)) {
+ boolean error = false;
+ vector.allocateNew(16);
+
+ /* populate the vector */
+ for (int i = 0; i < 16; i += 2) {
+ vector.set(i, i + 10);
+ }
+
+ /* check the vector output */
+ for (int i = 0; i < 16; i += 2) {
+ assertEquals(i + 10, vector.get(i));
+ }
+
+ List<ArrowBuf> buffers = vector.getFieldBuffers();
+ long bitAddress = vector.getValidityBufferAddress();
+ long dataAddress = vector.getDataBufferAddress();
+
+ try {
+ long offsetAddress = vector.getOffsetBufferAddress();
+ } catch (UnsupportedOperationException ue) {
+ error = true;
+ } finally {
+ assertTrue(error);
+ }
+
+ assertEquals(2, buffers.size());
+ assertEquals(bitAddress, buffers.get(0).memoryAddress());
+ assertEquals(dataAddress, buffers.get(1).memoryAddress());
+ }
+ }
+
+ @Test
+ public void testMultipleClose() {
+ BufferAllocator vectorAllocator = allocator.newChildAllocator("vector_allocator", 0, Long.MAX_VALUE);
+ IntVector vector = newVector(IntVector.class, EMPTY_SCHEMA_PATH, MinorType.INT, vectorAllocator);
+ vector.close();
+ vectorAllocator.close();
+ vector.close();
+ vectorAllocator.close();
+ }
+
+ /* this method is used by the tests to bypass the vector set methods that manipulate
+ * lastSet. The method is to test the lastSet property and that's why we load the vector
+ * in a way that lastSet is not set automatically.
+ */
+ public static void setBytes(int index, byte[] bytes, VarCharVector vector) {
+ final int currentOffset = vector.offsetBuffer.getInt(index * BaseVariableWidthVector.OFFSET_WIDTH);
+
+ BitVectorHelper.setBit(vector.validityBuffer, index);
+ vector.offsetBuffer.setInt((index + 1) * BaseVariableWidthVector.OFFSET_WIDTH, currentOffset + bytes.length);
+ vector.valueBuffer.setBytes(currentOffset, bytes, 0, bytes.length);
+ }
+
+ @Test /* VarCharVector */
+ public void testSetInitialCapacity() {
+ try (final VarCharVector vector = new VarCharVector(EMPTY_SCHEMA_PATH, allocator)) {
+
+ /* use the default 8 data bytes on average per element */
+ int defaultCapacity = BaseValueVector.INITIAL_VALUE_ALLOCATION - 1;
+ vector.setInitialCapacity(defaultCapacity);
+ vector.allocateNew();
+ assertEquals(defaultCapacity, vector.getValueCapacity());
+ assertEquals(CommonUtil.nextPowerOfTwo(defaultCapacity * 8), vector.getDataBuffer().capacity());
+
+ vector.setInitialCapacity(defaultCapacity, 1);
+ vector.allocateNew();
+ assertEquals(defaultCapacity, vector.getValueCapacity());
+ assertEquals(CommonUtil.nextPowerOfTwo(defaultCapacity), vector.getDataBuffer().capacity());
+
+ vector.setInitialCapacity(defaultCapacity, 0.1);
+ vector.allocateNew();
+ assertEquals(defaultCapacity, vector.getValueCapacity());
+ assertEquals(CommonUtil.nextPowerOfTwo((int) (defaultCapacity * 0.1)), vector.getDataBuffer().capacity());
+
+ vector.setInitialCapacity(defaultCapacity, 0.01);
+ vector.allocateNew();
+ assertEquals(defaultCapacity, vector.getValueCapacity());
+ assertEquals(CommonUtil.nextPowerOfTwo((int) (defaultCapacity * 0.01)), vector.getDataBuffer().capacity());
+
+ vector.setInitialCapacity(5, 0.01);
+ vector.allocateNew();
+ assertEquals(5, vector.getValueCapacity());
+ assertEquals(2, vector.getDataBuffer().capacity());
+ }
+ }
+
+ @Test
+ public void testDefaultAllocNewAll() {
+ int defaultCapacity = BaseValueVector.INITIAL_VALUE_ALLOCATION;
+ int expectedSize;
+ long beforeSize;
+ try (BufferAllocator childAllocator = allocator.newChildAllocator("defaultAllocs", 0, Long.MAX_VALUE);
+ final IntVector intVector = new IntVector(EMPTY_SCHEMA_PATH, childAllocator);
+ final BigIntVector bigIntVector = new BigIntVector(EMPTY_SCHEMA_PATH, childAllocator);
+ final BitVector bitVector = new BitVector(EMPTY_SCHEMA_PATH, childAllocator);
+ final DecimalVector decimalVector = new DecimalVector(EMPTY_SCHEMA_PATH, childAllocator, 38, 6);
+ final VarCharVector varCharVector = new VarCharVector(EMPTY_SCHEMA_PATH, childAllocator)) {
+
+ // verify that the wastage is within bounds for IntVector.
+ beforeSize = childAllocator.getAllocatedMemory();
+ intVector.allocateNew();
+ assertTrue(intVector.getValueCapacity() >= defaultCapacity);
+ expectedSize = (defaultCapacity * IntVector.TYPE_WIDTH) +
+ BaseFixedWidthVector.getValidityBufferSizeFromCount(defaultCapacity);
+ assertTrue(childAllocator.getAllocatedMemory() - beforeSize <= expectedSize * 1.05);
+
+ // verify that the wastage is within bounds for BigIntVector.
+ beforeSize = childAllocator.getAllocatedMemory();
+ bigIntVector.allocateNew();
+ assertTrue(bigIntVector.getValueCapacity() >= defaultCapacity);
+ expectedSize = (defaultCapacity * bigIntVector.TYPE_WIDTH) +
+ BaseFixedWidthVector.getValidityBufferSizeFromCount(defaultCapacity);
+ assertTrue(childAllocator.getAllocatedMemory() - beforeSize <= expectedSize * 1.05);
+
+ // verify that the wastage is within bounds for DecimalVector.
+ beforeSize = childAllocator.getAllocatedMemory();
+ decimalVector.allocateNew();
+ assertTrue(decimalVector.getValueCapacity() >= defaultCapacity);
+ expectedSize = (defaultCapacity * decimalVector.TYPE_WIDTH) +
+ BaseFixedWidthVector.getValidityBufferSizeFromCount(defaultCapacity);
+ assertTrue(childAllocator.getAllocatedMemory() - beforeSize <= expectedSize * 1.05);
+
+ // verify that the wastage is within bounds for VarCharVector.
+ // var char vector have an offsets array that is 1 less than defaultCapacity
+ beforeSize = childAllocator.getAllocatedMemory();
+ varCharVector.allocateNew();
+ assertTrue(varCharVector.getValueCapacity() >= defaultCapacity - 1);
+ expectedSize = (defaultCapacity * VarCharVector.OFFSET_WIDTH) +
+ BaseFixedWidthVector.getValidityBufferSizeFromCount(defaultCapacity) +
+ defaultCapacity * 8;
+ // wastage should be less than 5%.
+ assertTrue(childAllocator.getAllocatedMemory() - beforeSize <= expectedSize * 1.05);
+
+ // verify that the wastage is within bounds for BitVector.
+ beforeSize = childAllocator.getAllocatedMemory();
+ bitVector.allocateNew();
+ assertTrue(bitVector.getValueCapacity() >= defaultCapacity);
+ expectedSize = BaseFixedWidthVector.getValidityBufferSizeFromCount(defaultCapacity) * 2;
+ assertTrue(childAllocator.getAllocatedMemory() - beforeSize <= expectedSize * 1.05);
+
+ }
+ }
+
+ @Test
+ public void testSetNullableVarCharHolder() {
+ try (VarCharVector vector = new VarCharVector("", allocator)) {
+ vector.allocateNew(100, 10);
+
+ NullableVarCharHolder nullHolder = new NullableVarCharHolder();
+ nullHolder.isSet = 0;
+
+ NullableVarCharHolder stringHolder = new NullableVarCharHolder();
+ stringHolder.isSet = 1;
+
+ String str = "hello";
+ ArrowBuf buf = allocator.buffer(16);
+ buf.setBytes(0, str.getBytes());
+
+ stringHolder.start = 0;
+ stringHolder.end = str.length();
+ stringHolder.buffer = buf;
+
+ vector.set(0, nullHolder);
+ vector.set(1, stringHolder);
+
+ // verify results
+ assertTrue(vector.isNull(0));
+ assertEquals(str, new String(vector.get(1)));
+
+ buf.close();
+ }
+ }
+
+ @Test
+ public void testSetNullableVarCharHolderSafe() {
+ try (VarCharVector vector = new VarCharVector("", allocator)) {
+ vector.allocateNew(5, 1);
+
+ NullableVarCharHolder nullHolder = new NullableVarCharHolder();
+ nullHolder.isSet = 0;
+
+ NullableVarCharHolder stringHolder = new NullableVarCharHolder();
+ stringHolder.isSet = 1;
+
+ String str = "hello world";
+ ArrowBuf buf = allocator.buffer(16);
+ buf.setBytes(0, str.getBytes());
+
+ stringHolder.start = 0;
+ stringHolder.end = str.length();
+ stringHolder.buffer = buf;
+
+ vector.setSafe(0, stringHolder);
+ vector.setSafe(1, nullHolder);
+
+ // verify results
+ assertEquals(str, new String(vector.get(0)));
+ assertTrue(vector.isNull(1));
+
+ buf.close();
+ }
+ }
+
+ @Test
+ public void testSetNullableVarBinaryHolder() {
+ try (VarBinaryVector vector = new VarBinaryVector("", allocator)) {
+ vector.allocateNew(100, 10);
+
+ NullableVarBinaryHolder nullHolder = new NullableVarBinaryHolder();
+ nullHolder.isSet = 0;
+
+ NullableVarBinaryHolder binHolder = new NullableVarBinaryHolder();
+ binHolder.isSet = 1;
+
+ String str = "hello";
+ ArrowBuf buf = allocator.buffer(16);
+ buf.setBytes(0, str.getBytes());
+
+ binHolder.start = 0;
+ binHolder.end = str.length();
+ binHolder.buffer = buf;
+
+ vector.set(0, nullHolder);
+ vector.set(1, binHolder);
+
+ // verify results
+ assertTrue(vector.isNull(0));
+ assertEquals(str, new String(vector.get(1)));
+
+ buf.close();
+ }
+ }
+
+ @Test
+ public void testSetNullableVarBinaryHolderSafe() {
+ try (VarBinaryVector vector = new VarBinaryVector("", allocator)) {
+ vector.allocateNew(5, 1);
+
+ NullableVarBinaryHolder nullHolder = new NullableVarBinaryHolder();
+ nullHolder.isSet = 0;
+
+ NullableVarBinaryHolder binHolder = new NullableVarBinaryHolder();
+ binHolder.isSet = 1;
+
+ String str = "hello world";
+ ArrowBuf buf = allocator.buffer(16);
+ buf.setBytes(0, str.getBytes());
+
+ binHolder.start = 0;
+ binHolder.end = str.length();
+ binHolder.buffer = buf;
+
+ vector.setSafe(0, binHolder);
+ vector.setSafe(1, nullHolder);
+
+ // verify results
+ assertEquals(str, new String(vector.get(0)));
+ assertTrue(vector.isNull(1));
+
+ buf.close();
+ }
+ }
+
+ @Test
+ public void testGetPointerFixedWidth() {
+ final int vectorLength = 100;
+ try (IntVector vec1 = new IntVector("vec1", allocator);
+ IntVector vec2 = new IntVector("vec2", allocator)) {
+ vec1.allocateNew(vectorLength);
+ vec2.allocateNew(vectorLength);
+
+ for (int i = 0; i < vectorLength; i++) {
+ if (i % 10 == 0) {
+ vec1.setNull(i);
+ vec2.setNull(i);
+ } else {
+ vec1.set(i, i * 1234);
+ vec2.set(i, i * 1234);
+ }
+ }
+
+ ArrowBufPointer ptr1 = new ArrowBufPointer();
+ ArrowBufPointer ptr2 = new ArrowBufPointer();
+
+ for (int i = 0; i < vectorLength; i++) {
+ vec1.getDataPointer(i, ptr1);
+ vec2.getDataPointer(i, ptr2);
+
+ if (i % 10 == 0) {
+ assertNull(ptr1.getBuf());
+ assertNull(ptr2.getBuf());
+ }
+
+ assertTrue(ptr1.equals(ptr2));
+ assertTrue(ptr2.equals(ptr2));
+ }
+ }
+ }
+
+ @Test
+ public void testGetPointerVariableWidth() {
+ final String[] sampleData = new String[]{
+ "abc", "123", "def", null, "hello", "aaaaa", "world", "2019", null, "0717"};
+
+ try (VarCharVector vec1 = new VarCharVector("vec1", allocator);
+ VarCharVector vec2 = new VarCharVector("vec2", allocator)) {
+ vec1.allocateNew(sampleData.length * 10, sampleData.length);
+ vec2.allocateNew(sampleData.length * 10, sampleData.length);
+
+ for (int i = 0; i < sampleData.length; i++) {
+ String str = sampleData[i];
+ if (str != null) {
+ vec1.set(i, sampleData[i].getBytes());
+ vec2.set(i, sampleData[i].getBytes());
+ } else {
+ vec1.setNull(i);
+ vec2.setNull(i);
+ }
+ }
+
+ ArrowBufPointer ptr1 = new ArrowBufPointer();
+ ArrowBufPointer ptr2 = new ArrowBufPointer();
+
+ for (int i = 0; i < sampleData.length; i++) {
+ vec1.getDataPointer(i, ptr1);
+ vec2.getDataPointer(i, ptr2);
+
+ assertTrue(ptr1.equals(ptr2));
+ assertTrue(ptr2.equals(ptr2));
+ }
+ }
+ }
+
+ @Test
+ public void testGetNullFromVariableWidthVector() {
+ try (final VarCharVector varCharVector = new VarCharVector("varcharvec", allocator);
+ final VarBinaryVector varBinaryVector = new VarBinaryVector("varbinary", allocator)) {
+ varCharVector.allocateNew(10, 1);
+ varBinaryVector.allocateNew(10, 1);
+
+ varCharVector.setNull(0);
+ varBinaryVector.setNull(0);
+
+ assertNull(varCharVector.get(0));
+ assertNull(varBinaryVector.get(0));
+ }
+ }
+
+ @Test
+ public void testZeroVectorEquals() {
+ try (final ZeroVector vector1 = new ZeroVector("vector");
+ final ZeroVector vector2 = new ZeroVector("vector")) {
+
+ VectorEqualsVisitor visitor = new VectorEqualsVisitor();
+ assertTrue(visitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testZeroVectorNotEquals() {
+ try (final IntVector intVector = new IntVector("int", allocator);
+ final ZeroVector zeroVector = new ZeroVector("zero");
+ final ZeroVector zeroVector1 = new ZeroVector("zero1")) {
+
+ VectorEqualsVisitor zeroVisitor = new VectorEqualsVisitor();
+ assertFalse(zeroVisitor.vectorEquals(intVector, zeroVector));
+
+ VectorEqualsVisitor intVisitor = new VectorEqualsVisitor();
+ assertFalse(intVisitor.vectorEquals(zeroVector, intVector));
+
+ VectorEqualsVisitor twoZeroVisitor = new VectorEqualsVisitor();
+ // they are not equal because of distinct names
+ assertFalse(twoZeroVisitor.vectorEquals(zeroVector, zeroVector1));
+ }
+ }
+
+ @Test
+ public void testIntVectorEqualsWithNull() {
+ try (final IntVector vector1 = new IntVector("int", allocator);
+ final IntVector vector2 = new IntVector("int", allocator)) {
+
+ setVector(vector1, 1, 2);
+ setVector(vector2, 1, null);
+
+ VectorEqualsVisitor visitor = new VectorEqualsVisitor();
+
+ assertFalse(visitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testIntVectorEquals() {
+ try (final IntVector vector1 = new IntVector("int", allocator);
+ final IntVector vector2 = new IntVector("int", allocator)) {
+
+ setVector(vector1, 1, 2, 3);
+ setVector(vector2, 1, 2, null);
+
+ VectorEqualsVisitor visitor = new VectorEqualsVisitor();
+
+ assertFalse(visitor.vectorEquals(vector1, vector2));
+
+ vector2.setValueCount(3);
+ vector2.setSafe(2, 2);
+ assertFalse(vector1.equals(vector2));
+
+ vector2.setSafe(2, 3);
+ assertTrue(visitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testDecimalVectorEquals() {
+ try (final DecimalVector vector1 = new DecimalVector("decimal", allocator, 3, 3);
+ final DecimalVector vector2 = new DecimalVector("decimal", allocator, 3, 3);
+ final DecimalVector vector3 = new DecimalVector("decimal", allocator, 3, 2)) {
+
+ setVector(vector1, 100L, 200L);
+ setVector(vector2, 100L, 200L);
+ setVector(vector3, 100L, 200L);
+
+ VectorEqualsVisitor visitor1 = new VectorEqualsVisitor();
+ VectorEqualsVisitor visitor2 = new VectorEqualsVisitor();
+
+ assertTrue(visitor1.vectorEquals(vector1, vector2));
+ assertFalse(visitor2.vectorEquals(vector1, vector3));
+ }
+ }
+
+ @Test
+ public void testVarcharVectorEqualsWithNull() {
+ try (final VarCharVector vector1 = new VarCharVector("varchar", allocator);
+ final VarCharVector vector2 = new VarCharVector("varchar", allocator)) {
+
+ setVector(vector1, STR1, STR2);
+ setVector(vector2, STR1, null);
+
+ VectorEqualsVisitor visitor = new VectorEqualsVisitor();
+ assertFalse(visitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testVarcharVectorEquals() {
+ try (final VarCharVector vector1 = new VarCharVector("varchar", allocator);
+ final VarCharVector vector2 = new VarCharVector("varchar", allocator)) {
+
+ setVector(vector1, STR1, STR2, STR3);
+ setVector(vector2, STR1, STR2);
+
+ VectorEqualsVisitor visitor = new VectorEqualsVisitor();
+ assertFalse(visitor.vectorEquals(vector1, vector2));
+
+ vector2.setSafe(2, STR3, 0, STR3.length);
+ vector2.setValueCount(3);
+ assertTrue(visitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testVarBinaryVectorEquals() {
+ try (final VarBinaryVector vector1 = new VarBinaryVector("binary", allocator);
+ final VarBinaryVector vector2 = new VarBinaryVector("binary", allocator)) {
+
+ setVector(vector1, STR1, STR2, STR3);
+ setVector(vector2, STR1, STR2);
+
+ VectorEqualsVisitor visitor = new VectorEqualsVisitor();
+ assertFalse(visitor.vectorEquals(vector1, vector2));
+
+ vector2.setSafe(2, STR3, 0, STR3.length);
+ vector2.setValueCount(3);
+ assertTrue(visitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testListVectorEqualsWithNull() {
+ try (final ListVector vector1 = ListVector.empty("list", allocator);
+ final ListVector vector2 = ListVector.empty("list", allocator);) {
+
+ UnionListWriter writer1 = vector1.getWriter();
+ writer1.allocate();
+
+ //set some values
+ writeListVector(writer1, new int[] {1, 2});
+ writeListVector(writer1, new int[] {3, 4});
+ writeListVector(writer1, new int[] {});
+ writer1.setValueCount(3);
+
+ UnionListWriter writer2 = vector2.getWriter();
+ writer2.allocate();
+
+ //set some values
+ writeListVector(writer2, new int[] {1, 2});
+ writeListVector(writer2, new int[] {3, 4});
+ writer2.setValueCount(3);
+
+ VectorEqualsVisitor visitor = new VectorEqualsVisitor();
+
+ assertFalse(visitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testListVectorEquals() {
+ try (final ListVector vector1 = ListVector.empty("list", allocator);
+ final ListVector vector2 = ListVector.empty("list", allocator);) {
+
+ UnionListWriter writer1 = vector1.getWriter();
+ writer1.allocate();
+
+ //set some values
+ writeListVector(writer1, new int[] {1, 2});
+ writeListVector(writer1, new int[] {3, 4});
+ writeListVector(writer1, new int[] {5, 6});
+ writer1.setValueCount(3);
+
+ UnionListWriter writer2 = vector2.getWriter();
+ writer2.allocate();
+
+ //set some values
+ writeListVector(writer2, new int[] {1, 2});
+ writeListVector(writer2, new int[] {3, 4});
+ writer2.setValueCount(2);
+
+ VectorEqualsVisitor visitor = new VectorEqualsVisitor();
+ assertFalse(visitor.vectorEquals(vector1, vector2));
+
+ writeListVector(writer2, new int[] {5, 6});
+ writer2.setValueCount(3);
+
+ assertTrue(visitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testStructVectorEqualsWithNull() {
+
+ try (final StructVector vector1 = StructVector.empty("struct", allocator);
+ final StructVector vector2 = StructVector.empty("struct", allocator);) {
+ vector1.addOrGet("f0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class);
+ vector1.addOrGet("f1", FieldType.nullable(new ArrowType.Int(64, true)), BigIntVector.class);
+ vector2.addOrGet("f0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class);
+ vector2.addOrGet("f1", FieldType.nullable(new ArrowType.Int(64, true)), BigIntVector.class);
+
+ NullableStructWriter writer1 = vector1.getWriter();
+ writer1.allocate();
+
+ writeStructVector(writer1, 1, 10L);
+ writeStructVector(writer1, 2, 20L);
+ writeStructVector(writer1, 3, 30L);
+ writer1.setValueCount(3);
+
+ NullableStructWriter writer2 = vector2.getWriter();
+ writer2.allocate();
+
+ writeStructVector(writer2, 1, 10L);
+ writeStructVector(writer2, 3, 30L);
+ writer2.setValueCount(3);
+
+ VectorEqualsVisitor visitor = new VectorEqualsVisitor();
+ assertFalse(visitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testStructVectorEquals() {
+ try (final StructVector vector1 = StructVector.empty("struct", allocator);
+ final StructVector vector2 = StructVector.empty("struct", allocator);) {
+ vector1.addOrGet("f0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class);
+ vector1.addOrGet("f1", FieldType.nullable(new ArrowType.Int(64, true)), BigIntVector.class);
+ vector2.addOrGet("f0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class);
+ vector2.addOrGet("f1", FieldType.nullable(new ArrowType.Int(64, true)), BigIntVector.class);
+
+ NullableStructWriter writer1 = vector1.getWriter();
+ writer1.allocate();
+
+ writeStructVector(writer1, 1, 10L);
+ writeStructVector(writer1, 2, 20L);
+ writeStructVector(writer1, 3, 30L);
+ writer1.setValueCount(3);
+
+ NullableStructWriter writer2 = vector2.getWriter();
+ writer2.allocate();
+
+ writeStructVector(writer2, 1, 10L);
+ writeStructVector(writer2, 2, 20L);
+ writer2.setValueCount(2);
+
+ VectorEqualsVisitor visitor = new VectorEqualsVisitor();
+ assertFalse(visitor.vectorEquals(vector1, vector2));
+
+ writeStructVector(writer2, 3, 30L);
+ writer2.setValueCount(3);
+
+ assertTrue(visitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testStructVectorEqualsWithDiffChild() {
+ try (final StructVector vector1 = StructVector.empty("struct", allocator);
+ final StructVector vector2 = StructVector.empty("struct", allocator);) {
+ vector1.addOrGet("f0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class);
+ vector1.addOrGet("f1", FieldType.nullable(new ArrowType.Int(64, true)), BigIntVector.class);
+ vector2.addOrGet("f0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class);
+ vector2.addOrGet("f10", FieldType.nullable(new ArrowType.Int(64, true)), BigIntVector.class);
+
+ NullableStructWriter writer1 = vector1.getWriter();
+ writer1.allocate();
+
+ writeStructVector(writer1, 1, 10L);
+ writeStructVector(writer1, 2, 20L);
+ writer1.setValueCount(2);
+
+ NullableStructWriter writer2 = vector2.getWriter();
+ writer2.allocate();
+
+ writeStructVector(writer2, 1, 10L);
+ writeStructVector(writer2, 2, 20L);
+ writer2.setValueCount(2);
+
+ VectorEqualsVisitor visitor = new VectorEqualsVisitor();
+ assertFalse(visitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testUnionVectorEquals() {
+ try (final UnionVector vector1 = new UnionVector("union", allocator, /* field type */ null, /* call-back */ null);
+ final UnionVector vector2 = new UnionVector("union", allocator, /* field type */ null, /* call-back */ null);) {
+
+ final NullableUInt4Holder uInt4Holder = new NullableUInt4Holder();
+ uInt4Holder.value = 10;
+ uInt4Holder.isSet = 1;
+
+ final NullableIntHolder intHolder = new NullableIntHolder();
+ uInt4Holder.value = 20;
+ uInt4Holder.isSet = 1;
+
+ vector1.setType(0, Types.MinorType.UINT4);
+ vector1.setSafe(0, uInt4Holder);
+
+ vector1.setType(1, Types.MinorType.INT);
+ vector1.setSafe(1, intHolder);
+ vector1.setValueCount(2);
+
+ vector2.setType(0, Types.MinorType.UINT4);
+ vector2.setSafe(0, uInt4Holder);
+
+ vector2.setType(1, Types.MinorType.INT);
+ vector2.setSafe(1, intHolder);
+ vector2.setValueCount(2);
+
+ VectorEqualsVisitor visitor = new VectorEqualsVisitor();
+ assertTrue(visitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test(expected = IllegalArgumentException.class)
+ public void testEqualsWithIndexOutOfRange() {
+ try (final IntVector vector1 = new IntVector("int", allocator);
+ final IntVector vector2 = new IntVector("int", allocator)) {
+
+ setVector(vector1, 1, 2);
+ setVector(vector2, 1, 2);
+
+ assertTrue(new RangeEqualsVisitor(vector1, vector2).rangeEquals(new Range(2, 3, 1)));
+ }
+ }
+
+ @Test
+ public void testFixedWidthVectorNullHashCode() {
+ try (IntVector intVec = new IntVector("int vector", allocator)) {
+ intVec.allocateNew(1);
+ intVec.setValueCount(1);
+
+ intVec.set(0, 100);
+ intVec.setNull(0);
+
+ assertEquals(0, intVec.hashCode(0));
+ }
+ }
+
+ @Test
+ public void testVariableWidthVectorNullHashCode() {
+ try (VarCharVector varChVec = new VarCharVector("var char vector", allocator)) {
+ varChVec.allocateNew(100, 1);
+ varChVec.setValueCount(1);
+
+ varChVec.set(0, "abc".getBytes());
+ varChVec.setNull(0);
+
+ assertEquals(0, varChVec.hashCode(0));
+ }
+ }
+
+ @Test
+ public void testUnionNullHashCode() {
+ try (UnionVector srcVector =
+ new UnionVector(EMPTY_SCHEMA_PATH, allocator, /* field type */ null, /* call-back */ null)) {
+ srcVector.allocateNew();
+
+ final NullableIntHolder holder = new NullableIntHolder();
+ holder.isSet = 0;
+
+ // write some data
+ srcVector.setType(0, MinorType.INT);
+ srcVector.setSafe(0, holder);
+
+ assertEquals(0, srcVector.hashCode(0));
+ }
+ }
+
+ @Test
+ public void testToString() {
+ try (final IntVector intVector = new IntVector("intVector", allocator);
+ final ListVector listVector = ListVector.empty("listVector", allocator);
+ final StructVector structVector = StructVector.empty("structVector", allocator)) {
+
+ // validate intVector toString
+ assertEquals("[]", intVector.toString());
+ intVector.setValueCount(3);
+ intVector.setSafe(0, 1);
+ intVector.setSafe(1, 2);
+ intVector.setSafe(2, 3);
+ assertEquals("[1, 2, 3]", intVector.toString());
+
+ // validate intVector with plenty values
+ intVector.setValueCount(100);
+ for (int i = 0; i < 100; i++) {
+ intVector.setSafe(i, i);
+ }
+ assertEquals("[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, ... 90, 91, 92, 93, 94, 95, 96, 97, 98, 99]",
+ intVector.toString());
+
+ // validate listVector toString
+ listVector.allocateNewSafe();
+ listVector.initializeChildrenFromFields(
+ Collections.singletonList(Field.nullable("child", ArrowType.Utf8.INSTANCE)));
+ VarCharVector dataVector = (VarCharVector) listVector.getDataVector();
+
+ listVector.startNewValue(0);
+ dataVector.setSafe(0, "aaa".getBytes(StandardCharsets.UTF_8));
+ dataVector.setSafe(1, "bbb".getBytes(StandardCharsets.UTF_8));
+ listVector.endValue(0, 2);
+
+ listVector.startNewValue(1);
+ dataVector.setSafe(2, "ccc".getBytes(StandardCharsets.UTF_8));
+ dataVector.setSafe(3, "ddd".getBytes(StandardCharsets.UTF_8));
+ listVector.endValue(1, 2);
+ listVector.setValueCount(2);
+
+ assertEquals("[[\"aaa\",\"bbb\"], [\"ccc\",\"ddd\"]]", listVector.toString());
+
+ // validate structVector toString
+ structVector.addOrGet("f0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class);
+ structVector.addOrGet("f1", FieldType.nullable(new ArrowType.Int(64, true)), BigIntVector.class);
+
+ NullableStructWriter structWriter = structVector.getWriter();
+ structWriter.allocate();
+
+ writeStructVector(structWriter, 1, 10L);
+ writeStructVector(structWriter, 2, 20L);
+ structWriter.setValueCount(2);
+
+ assertEquals("[{\"f0\":1,\"f1\":10}, {\"f0\":2,\"f1\":20}]", structVector.toString());
+ }
+ }
+
+ @Test
+ public void testUInt1VectorToString() {
+ try (final UInt1Vector uInt1Vector = new UInt1Vector("uInt1Vector", allocator)) {
+ setVector(uInt1Vector, (byte) 0xff);
+ assertEquals("[255]", uInt1Vector.toString());
+ }
+ }
+
+ @Test
+ public void testUInt2VectorToString() {
+ try (final UInt2Vector uInt2Vector = new UInt2Vector("uInt2Vector", allocator)) {
+ setVector(uInt2Vector, (char) 0xffff);
+ assertEquals("[65535]", uInt2Vector.toString());
+ }
+ }
+
+ @Test
+ public void testUInt4VectorToString() {
+ try (final UInt4Vector uInt4Vector = new UInt4Vector("uInt4Vector", allocator)) {
+ setVector(uInt4Vector, 0xffffffff);
+ assertEquals("[4294967295]", uInt4Vector.toString());
+ }
+ }
+
+ @Test
+ public void testUInt8VectorToString() {
+ try (final UInt8Vector uInt8Vector = new UInt8Vector("uInt8Vector", allocator)) {
+ setVector(uInt8Vector, 0xffffffffffffffffL);
+ assertEquals("[18446744073709551615]", uInt8Vector.toString());
+ }
+ }
+
+ @Test
+ public void testUnloadVariableWidthVector() {
+ try (final VarCharVector varCharVector = new VarCharVector("var char", allocator)) {
+ varCharVector.allocateNew(5, 2);
+ varCharVector.setValueCount(2);
+
+ varCharVector.set(0, "abcd".getBytes());
+
+ List<ArrowBuf> bufs = varCharVector.getFieldBuffers();
+ assertEquals(3, bufs.size());
+
+ ArrowBuf offsetBuf = bufs.get(1);
+ ArrowBuf dataBuf = bufs.get(2);
+
+ assertEquals(12, offsetBuf.writerIndex());
+ assertEquals(4, offsetBuf.getInt(4));
+ assertEquals(4, offsetBuf.getInt(8));
+
+ assertEquals(4, dataBuf.writerIndex());
+ }
+ }
+
+ private void writeStructVector(NullableStructWriter writer, int value1, long value2) {
+ writer.start();
+ writer.integer("f0").writeInt(value1);
+ writer.bigInt("f1").writeBigInt(value2);
+ writer.end();
+ }
+
+ private void writeListVector(UnionListWriter writer, int[] values) {
+ writer.startList();
+ for (int v: values) {
+ writer.integer().writeInt(v);
+ }
+ writer.endList();
+ }
+
+ @Test
+ public void testVariableVectorGetEndOffset() {
+ try (final VarCharVector vector1 = new VarCharVector("v1", allocator);
+ final VarBinaryVector vector2 = new VarBinaryVector("v2", allocator)) {
+
+ setVector(vector1, STR1, null, STR2);
+ setVector(vector2, STR1, STR2, STR3);
+
+ assertEquals(0, vector1.getStartOffset(0));
+ assertEquals(STR1.length, vector1.getEndOffset(0));
+ assertEquals(STR1.length, vector1.getStartOffset(1));
+ assertEquals(STR1.length, vector1.getEndOffset(1));
+ assertEquals(STR1.length, vector1.getStartOffset(2));
+ assertEquals(STR1.length + STR2.length, vector1.getEndOffset(2));
+
+ assertEquals(0, vector2.getStartOffset(0));
+ assertEquals(STR1.length, vector2.getEndOffset(0));
+ assertEquals(STR1.length, vector2.getStartOffset(1));
+ assertEquals(STR1.length + STR2.length, vector2.getEndOffset(1));
+ assertEquals(STR1.length + STR2.length, vector2.getStartOffset(2));
+ assertEquals(STR1.length + STR2.length + STR3.length, vector2.getEndOffset(2));
+ }
+ }
+
+ @Test
+ public void testEmptyBufBehavior() {
+ final int valueCount = 10;
+
+ try (final IntVector vector = new IntVector("v", allocator)) {
+ assertEquals(1, vector.getDataBuffer().refCnt());
+ assertEquals(1, vector.getValidityBuffer().refCnt());
+ assertEquals(0, vector.getDataBuffer().capacity());
+ assertEquals(0, vector.getValidityBuffer().capacity());
+
+ vector.allocateNew(valueCount);
+ assertEquals(2, vector.getDataBuffer().refCnt());
+ assertEquals(2, vector.getValidityBuffer().refCnt());
+ assertEquals(56, vector.getDataBuffer().capacity());
+ assertEquals(8, vector.getValidityBuffer().capacity());
+
+ vector.close();
+ assertEquals(1, vector.getDataBuffer().refCnt());
+ assertEquals(1, vector.getValidityBuffer().refCnt());
+ assertEquals(0, vector.getDataBuffer().capacity());
+ assertEquals(0, vector.getValidityBuffer().capacity());
+ }
+
+ try (final VarCharVector vector = new VarCharVector("v", allocator)) {
+ assertEquals(1, vector.getDataBuffer().refCnt());
+ assertEquals(1, vector.getValidityBuffer().refCnt());
+ assertEquals(1, vector.getOffsetBuffer().refCnt());
+ assertEquals(0, vector.getDataBuffer().capacity());
+ assertEquals(0, vector.getValidityBuffer().capacity());
+ assertEquals(0, vector.getOffsetBuffer().capacity());
+
+ vector.allocateNew(valueCount);
+ assertEquals(1, vector.getDataBuffer().refCnt());
+ assertEquals(2, vector.getValidityBuffer().refCnt());
+ assertEquals(2, vector.getOffsetBuffer().refCnt());
+ assertEquals(32768, vector.getDataBuffer().capacity());
+ assertEquals(8, vector.getValidityBuffer().capacity());
+ assertEquals(56, vector.getOffsetBuffer().capacity());
+
+ vector.close();
+ assertEquals(1, vector.getDataBuffer().refCnt());
+ assertEquals(1, vector.getValidityBuffer().refCnt());
+ assertEquals(1, vector.getOffsetBuffer().refCnt());
+ assertEquals(0, vector.getDataBuffer().capacity());
+ assertEquals(0, vector.getValidityBuffer().capacity());
+ assertEquals(0, vector.getOffsetBuffer().capacity());
+ }
+
+ try (final ListVector vector = ListVector.empty("v", allocator)) {
+ assertEquals(1, vector.getValidityBuffer().refCnt());
+ assertEquals(1, vector.getOffsetBuffer().refCnt());
+ assertEquals(0, vector.getValidityBuffer().capacity());
+ assertEquals(0, vector.getOffsetBuffer().capacity());
+
+ vector.setValueCount(valueCount);
+ vector.allocateNewSafe();
+ assertEquals(1, vector.getValidityBuffer().refCnt());
+ assertEquals(1, vector.getOffsetBuffer().refCnt());
+ assertEquals(512, vector.getValidityBuffer().capacity());
+ assertEquals(16384, vector.getOffsetBuffer().capacity());
+
+ vector.close();
+ assertEquals(1, vector.getValidityBuffer().refCnt());
+ assertEquals(1, vector.getOffsetBuffer().refCnt());
+ assertEquals(0, vector.getValidityBuffer().capacity());
+ assertEquals(0, vector.getOffsetBuffer().capacity());
+ }
+
+ try (final FixedSizeListVector vector = FixedSizeListVector.empty("v", 2, allocator)) {
+ assertEquals(1, vector.getValidityBuffer().refCnt());
+ assertEquals(0, vector.getValidityBuffer().capacity());
+
+ vector.setValueCount(10);
+ vector.allocateNewSafe();
+ assertEquals(1, vector.getValidityBuffer().refCnt());
+ assertEquals(512, vector.getValidityBuffer().capacity());
+
+ vector.close();
+ assertEquals(1, vector.getValidityBuffer().refCnt());
+ assertEquals(0, vector.getValidityBuffer().capacity());
+ }
+
+ try (final StructVector vector = StructVector.empty("v", allocator)) {
+ assertEquals(1, vector.getValidityBuffer().refCnt());
+ assertEquals(0, vector.getValidityBuffer().capacity());
+
+ vector.setValueCount(valueCount);
+ vector.allocateNewSafe();
+ assertEquals(1, vector.getValidityBuffer().refCnt());
+ assertEquals(512, vector.getValidityBuffer().capacity());
+
+ vector.close();
+ assertEquals(1, vector.getValidityBuffer().refCnt());
+ assertEquals(0, vector.getValidityBuffer().capacity());
+ }
+
+ try (final UnionVector vector = UnionVector.empty("v", allocator)) {
+ assertEquals(1, vector.getTypeBuffer().refCnt());
+ assertEquals(0, vector.getTypeBuffer().capacity());
+
+ vector.setValueCount(10);
+ vector.allocateNewSafe();
+ assertEquals(1, vector.getTypeBuffer().refCnt());
+ assertEquals(4096, vector.getTypeBuffer().capacity());
+
+ vector.close();
+ assertEquals(1, vector.getTypeBuffer().refCnt());
+ assertEquals(0, vector.getTypeBuffer().capacity());
+ }
+
+ try (final DenseUnionVector vector = DenseUnionVector.empty("v", allocator)) {
+ assertEquals(1, vector.getTypeBuffer().refCnt());
+ assertEquals(1, vector.getOffsetBuffer().refCnt());
+ assertEquals(0, vector.getTypeBuffer().capacity());
+ assertEquals(0, vector.getOffsetBuffer().capacity());
+
+ vector.setValueCount(valueCount);
+ vector.allocateNew();
+ assertEquals(1, vector.getTypeBuffer().refCnt());
+ assertEquals(1, vector.getOffsetBuffer().refCnt());
+ assertEquals(4096, vector.getTypeBuffer().capacity());
+ assertEquals(16384, vector.getOffsetBuffer().capacity());
+
+ vector.close();
+ assertEquals(1, vector.getTypeBuffer().refCnt());
+ assertEquals(1, vector.getOffsetBuffer().refCnt());
+ assertEquals(0, vector.getTypeBuffer().capacity());
+ assertEquals(0, vector.getOffsetBuffer().capacity());
+ }
+ }
+
+ @Test
+ public void testSetGetUInt1() {
+ try (UInt1Vector vector = new UInt1Vector("vector", allocator)) {
+ vector.allocateNew(2);
+
+ vector.setWithPossibleTruncate(0, UInt1Vector.MAX_UINT1);
+ vector.setUnsafeWithPossibleTruncate(1, UInt1Vector.MAX_UINT1);
+ vector.setValueCount(2);
+
+ assertEquals(UInt1Vector.MAX_UINT1 & UInt1Vector.PROMOTION_MASK, vector.getValueAsLong(0));
+ assertEquals(UInt1Vector.MAX_UINT1 & UInt1Vector.PROMOTION_MASK, vector.getValueAsLong(1));
+ }
+ }
+
+ @Test
+ public void testSetGetUInt2() {
+ try (UInt2Vector vector = new UInt2Vector("vector", allocator)) {
+ vector.allocateNew(2);
+
+ vector.setWithPossibleTruncate(0, UInt2Vector.MAX_UINT2);
+ vector.setUnsafeWithPossibleTruncate(1, UInt2Vector.MAX_UINT2);
+ vector.setValueCount(2);
+
+ assertEquals(UInt2Vector.MAX_UINT2, vector.getValueAsLong(0));
+ assertEquals(UInt2Vector.MAX_UINT2, vector.getValueAsLong(1));
+ }
+ }
+
+ @Test
+ public void testSetGetUInt4() {
+ try (UInt4Vector vector = new UInt4Vector("vector", allocator)) {
+ vector.allocateNew(2);
+
+ vector.setWithPossibleTruncate(0, UInt4Vector.MAX_UINT4);
+ vector.setUnsafeWithPossibleTruncate(1, UInt4Vector.MAX_UINT4);
+ vector.setValueCount(2);
+
+ long expected = UInt4Vector.MAX_UINT4 & UInt4Vector.PROMOTION_MASK;
+ assertEquals(expected, vector.getValueAsLong(0));
+ assertEquals(expected, vector.getValueAsLong(1));
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharListVector.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharListVector.java
new file mode 100644
index 000000000..a9b155499
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharListVector.java
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.impl.UnionListWriter;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestVarCharListVector {
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void init() {
+ allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100);
+ }
+
+ @After
+ public void terminate() throws Exception {
+ allocator.close();
+ }
+
+ @Test
+ public void testVarCharListWithNulls() {
+ byte[] bytes = "a".getBytes();
+ try (ListVector vector = new ListVector("VarList", allocator, FieldType.nullable(Types
+ .MinorType.VARCHAR.getType()), null);
+ ArrowBuf tempBuf = allocator.buffer(bytes.length)) {
+ UnionListWriter writer = vector.getWriter();
+ writer.allocate();
+
+ // populate input vector with the following records
+ // ["a"]
+ // null
+ // ["b"]
+ writer.setPosition(0); // optional
+ writer.startList();
+ tempBuf.setBytes(0, bytes);
+ writer.writeVarChar(0, bytes.length, tempBuf);
+ writer.endList();
+
+ writer.setPosition(2);
+ writer.startList();
+ bytes = "b".getBytes();
+ tempBuf.setBytes(0, bytes);
+ writer.writeVarChar(0, bytes.length, tempBuf);
+ writer.endList();
+
+ writer.setValueCount(2);
+
+ Assert.assertTrue(vector.getValueCount() == 2);
+ Assert.assertTrue(vector.getDataVector().getValueCount() == 2);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestVectorAlloc.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestVectorAlloc.java
new file mode 100644
index 000000000..dfc75ec8e
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestVectorAlloc.java
@@ -0,0 +1,169 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.util.Arrays;
+import java.util.Collections;
+
+import org.apache.arrow.memory.AllocationListener;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.memory.rounding.DefaultRoundingPolicy;
+import org.apache.arrow.memory.rounding.RoundingPolicy;
+import org.apache.arrow.vector.types.TimeUnit;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.ArrowType.Decimal;
+import org.apache.arrow.vector.types.pojo.ArrowType.Duration;
+import org.apache.arrow.vector.types.pojo.ArrowType.FixedSizeBinary;
+import org.apache.arrow.vector.types.pojo.ArrowType.Timestamp;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestVectorAlloc {
+ private BufferAllocator rootAllocator;
+
+ private BufferAllocator policyAllocator;
+
+ @Before
+ public void init() {
+ rootAllocator = new RootAllocator(Long.MAX_VALUE);
+ policyAllocator =
+ new RootAllocator(AllocationListener.NOOP, Integer.MAX_VALUE, new CustomPolicy());
+ }
+
+ @After
+ public void terminate() throws Exception {
+ rootAllocator.close();
+ policyAllocator.close();
+ }
+
+ private static Field field(String name, ArrowType type) {
+ return new Field(name, new FieldType(true, type, null), Collections.emptyList());
+ }
+
+ @Test
+ public void testVectorAllocWithField() {
+ Schema schema = new Schema(Arrays.asList(
+ field("TINYINT", MinorType.TINYINT.getType()),
+ field("SMALLINT", MinorType.SMALLINT.getType()),
+ field("INT", MinorType.INT.getType()),
+ field("BIGINT", MinorType.BIGINT.getType()),
+ field("UINT1", MinorType.UINT1.getType()),
+ field("UINT2", MinorType.UINT2.getType()),
+ field("UINT4", MinorType.UINT4.getType()),
+ field("UINT8", MinorType.UINT8.getType()),
+ field("FLOAT4", MinorType.FLOAT4.getType()),
+ field("FLOAT8", MinorType.FLOAT8.getType()),
+ field("UTF8", MinorType.VARCHAR.getType()),
+ field("VARBINARY", MinorType.VARBINARY.getType()),
+ field("BIT", MinorType.BIT.getType()),
+ field("DECIMAL", new Decimal(38, 5, 128)),
+ field("FIXEDSIZEBINARY", new FixedSizeBinary(50)),
+ field("DATEDAY", MinorType.DATEDAY.getType()),
+ field("DATEMILLI", MinorType.DATEMILLI.getType()),
+ field("TIMESEC", MinorType.TIMESEC.getType()),
+ field("TIMEMILLI", MinorType.TIMEMILLI.getType()),
+ field("TIMEMICRO", MinorType.TIMEMICRO.getType()),
+ field("TIMENANO", MinorType.TIMENANO.getType()),
+ field("TIMESTAMPSEC", MinorType.TIMESTAMPSEC.getType()),
+ field("TIMESTAMPMILLI", MinorType.TIMESTAMPMILLI.getType()),
+ field("TIMESTAMPMICRO", MinorType.TIMESTAMPMICRO.getType()),
+ field("TIMESTAMPNANO", MinorType.TIMESTAMPNANO.getType()),
+ field("TIMESTAMPSECTZ", new Timestamp(TimeUnit.SECOND, "PST")),
+ field("TIMESTAMPMILLITZ", new Timestamp(TimeUnit.MILLISECOND, "PST")),
+ field("TIMESTAMPMICROTZ", new Timestamp(TimeUnit.MICROSECOND, "PST")),
+ field("TIMESTAMPNANOTZ", new Timestamp(TimeUnit.NANOSECOND, "PST")),
+ field("INTERVALDAY", MinorType.INTERVALDAY.getType()),
+ field("INTERVALYEAR", MinorType.INTERVALYEAR.getType()),
+ field("DURATION", new Duration(TimeUnit.MILLISECOND))
+ ));
+
+ try (BufferAllocator allocator = rootAllocator.newChildAllocator("child", 0, Long.MAX_VALUE)) {
+ for (Field field : schema.getFields()) {
+ try (FieldVector vector = field.createVector(allocator)) {
+ assertEquals(vector.getMinorType(),
+ Types.getMinorTypeForArrowType(field.getFieldType().getType()));
+ vector.allocateNew();
+ }
+ }
+ }
+ }
+
+ private static final int CUSTOM_SEGMENT_SIZE = 200;
+
+ /**
+ * A custom rounding policy that rounds the size to
+ * the next multiple of 200.
+ */
+ private static class CustomPolicy implements RoundingPolicy {
+
+ @Override
+ public long getRoundedSize(long requestSize) {
+ return (requestSize + CUSTOM_SEGMENT_SIZE - 1) / CUSTOM_SEGMENT_SIZE * CUSTOM_SEGMENT_SIZE;
+ }
+ }
+
+ @Test
+ public void testFixedWidthVectorAllocation() {
+ try (IntVector vec1 = new IntVector("vec", policyAllocator);
+ IntVector vec2 = new IntVector("vec", rootAllocator)) {
+ assertTrue(vec1.getAllocator().getRoundingPolicy() instanceof CustomPolicy);
+ vec1.allocateNew(50);
+ long totalCapacity = vec1.getValidityBuffer().capacity() + vec1.getDataBuffer().capacity();
+
+ // the total capacity must be a multiple of the segment size
+ assertTrue(totalCapacity % CUSTOM_SEGMENT_SIZE == 0);
+
+ assertTrue(vec2.getAllocator().getRoundingPolicy() instanceof DefaultRoundingPolicy);
+ vec2.allocateNew(50);
+ totalCapacity = vec2.getValidityBuffer().capacity() + vec2.getDataBuffer().capacity();
+
+ // the total capacity must be a power of two
+ assertEquals(totalCapacity & (totalCapacity - 1), 0);
+ }
+ }
+
+ @Test
+ public void testVariableWidthVectorAllocation() {
+ try (VarCharVector vec1 = new VarCharVector("vec", policyAllocator);
+ VarCharVector vec2 = new VarCharVector("vec", rootAllocator)) {
+ assertTrue(vec1.getAllocator().getRoundingPolicy() instanceof CustomPolicy);
+ vec1.allocateNew(50);
+ long totalCapacity = vec1.getValidityBuffer().capacity() + vec1.getOffsetBuffer().capacity();
+
+ // the total capacity must be a multiple of the segment size
+ assertTrue(totalCapacity % CUSTOM_SEGMENT_SIZE == 0);
+
+ assertTrue(vec2.getAllocator().getRoundingPolicy() instanceof DefaultRoundingPolicy);
+ vec2.allocateNew(50);
+ totalCapacity = vec2.getValidityBuffer().capacity() + vec2.getOffsetBuffer().capacity();
+
+ // the total capacity must be a power of two
+ assertEquals(totalCapacity & (totalCapacity - 1), 0);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReAlloc.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReAlloc.java
new file mode 100644
index 000000000..18bb2c957
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReAlloc.java
@@ -0,0 +1,474 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.junit.Assert.*;
+
+import java.nio.charset.StandardCharsets;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.complex.DenseUnionVector;
+import org.apache.arrow.vector.complex.FixedSizeListVector;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.complex.UnionVector;
+import org.apache.arrow.vector.complex.impl.NullableStructWriter;
+import org.apache.arrow.vector.complex.impl.UnionFixedSizeListWriter;
+import org.apache.arrow.vector.complex.impl.UnionListWriter;
+import org.apache.arrow.vector.holders.NullableIntHolder;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.DataSizeRoundingUtil;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+
+public class TestVectorReAlloc {
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void init() {
+ allocator = new RootAllocator(Long.MAX_VALUE);
+ }
+
+ @After
+ public void terminate() throws Exception {
+ allocator.close();
+ }
+
+ @Test
+ public void testFixedType() {
+ try (final UInt4Vector vector = new UInt4Vector("", allocator)) {
+ vector.setInitialCapacity(512);
+ vector.allocateNew();
+
+ assertTrue(vector.getValueCapacity() >= 512);
+ int initialCapacity = vector.getValueCapacity();
+
+ try {
+ vector.set(initialCapacity, 0);
+ Assert.fail("Expected out of bounds exception");
+ } catch (Exception e) {
+ // ok
+ }
+
+ vector.reAlloc();
+ assertTrue(vector.getValueCapacity() >= 2 * initialCapacity);
+
+ vector.set(initialCapacity, 100);
+ assertEquals(100, vector.get(initialCapacity));
+ }
+ }
+
+ @Test
+ public void testNullableType() {
+ try (final VarCharVector vector = new VarCharVector("", allocator)) {
+ vector.setInitialCapacity(512);
+ vector.allocateNew();
+
+ assertTrue(vector.getValueCapacity() >= 512);
+ int initialCapacity = vector.getValueCapacity();
+
+ try {
+ vector.set(initialCapacity, "foo".getBytes(StandardCharsets.UTF_8));
+ Assert.fail("Expected out of bounds exception");
+ } catch (Exception e) {
+ // ok
+ }
+
+ vector.reAlloc();
+ assertTrue(vector.getValueCapacity() >= 2 * initialCapacity);
+
+ vector.set(initialCapacity, "foo".getBytes(StandardCharsets.UTF_8));
+ assertEquals("foo", new String(vector.get(initialCapacity), StandardCharsets.UTF_8));
+ }
+ }
+
+ @Test
+ public void testListType() {
+ try (final ListVector vector = ListVector.empty("", allocator)) {
+ vector.addOrGetVector(FieldType.nullable(MinorType.INT.getType()));
+
+ vector.setInitialCapacity(512);
+ vector.allocateNew();
+
+ assertEquals(512, vector.getValueCapacity());
+
+ try {
+ vector.getInnerValueCountAt(2014);
+ Assert.fail("Expected out of bounds exception");
+ } catch (Exception e) {
+ // ok
+ }
+
+ vector.reAlloc();
+ assertEquals(1024, vector.getValueCapacity());
+ assertEquals(0, vector.getOffsetBuffer().getInt(2014 * ListVector.OFFSET_WIDTH));
+ }
+ }
+
+ @Test
+ public void testStructType() {
+ try (final StructVector vector = StructVector.empty("", allocator)) {
+ vector.addOrGet("", FieldType.nullable(MinorType.INT.getType()), IntVector.class);
+
+ vector.setInitialCapacity(512);
+ vector.allocateNew();
+
+ assertEquals(512, vector.getValueCapacity());
+
+ try {
+ vector.getObject(513);
+ Assert.fail("Expected out of bounds exception");
+ } catch (Exception e) {
+ // ok
+ }
+
+ vector.reAlloc();
+ assertEquals(1024, vector.getValueCapacity());
+ assertNull(vector.getObject(513));
+ }
+ }
+
+ @Test
+ public void testVariableWidthTypeSetNullValues() {
+ // Test ARROW-11223 bug is fixed
+ try (final BaseVariableWidthVector v1 = new VarCharVector("var1", allocator)) {
+ v1.setInitialCapacity(512);
+ v1.allocateNew();
+ int numNullValues1 = v1.getValueCapacity() + 1;
+ for (int i = 0; i < numNullValues1; i++) {
+ v1.setNull(i);
+ }
+ Assert.assertTrue(v1.getBufferSizeFor(numNullValues1) > 0);
+ }
+
+ try (final BaseLargeVariableWidthVector v2 = new LargeVarCharVector("var2", allocator)) {
+ v2.setInitialCapacity(512);
+ v2.allocateNew();
+ int numNullValues2 = v2.getValueCapacity() + 1;
+ for (int i = 0; i < numNullValues2; i++) {
+ v2.setNull(i);
+ }
+ Assert.assertTrue(v2.getBufferSizeFor(numNullValues2) > 0);
+ }
+ }
+
+ @Test
+ public void testFixedAllocateAfterReAlloc() throws Exception {
+ try (final IntVector vector = new IntVector("", allocator)) {
+ /*
+ * Allocate the default size, and then, reAlloc. This should double the allocation.
+ */
+ vector.allocateNewSafe(); // Initial allocation
+ vector.reAlloc(); // Double the allocation size.
+ int savedValueCapacity = vector.getValueCapacity();
+
+ /*
+ * Clear and allocate again.
+ */
+ vector.clear();
+ vector.allocateNewSafe();
+
+ /*
+ * Verify that the buffer sizes haven't changed.
+ */
+ Assert.assertEquals(vector.getValueCapacity(), savedValueCapacity);
+ }
+ }
+
+ @Test
+ public void testVariableAllocateAfterReAlloc() throws Exception {
+ try (final VarCharVector vector = new VarCharVector("", allocator)) {
+ /*
+ * Allocate the default size, and then, reAlloc. This should double the allocation.
+ */
+ vector.allocateNewSafe(); // Initial allocation
+ vector.reAlloc(); // Double the allocation size.
+ int savedValueCapacity = vector.getValueCapacity();
+ long savedValueBufferSize = vector.valueBuffer.capacity();
+
+ /*
+ * Clear and allocate again.
+ */
+ vector.clear();
+ vector.allocateNewSafe();
+
+ /*
+ * Verify that the buffer sizes haven't changed.
+ */
+ Assert.assertEquals(vector.getValueCapacity(), savedValueCapacity);
+ Assert.assertEquals(vector.valueBuffer.capacity(), savedValueBufferSize);
+ }
+ }
+
+ @Test
+ public void testLargeVariableAllocateAfterReAlloc() throws Exception {
+ try (final LargeVarCharVector vector = new LargeVarCharVector("", allocator)) {
+ /*
+ * Allocate the default size, and then, reAlloc. This should double the allocation.
+ */
+ vector.allocateNewSafe(); // Initial allocation
+ vector.reAlloc(); // Double the allocation size.
+ int savedValueCapacity = vector.getValueCapacity();
+ long savedValueBufferSize = vector.valueBuffer.capacity();
+
+ /*
+ * Clear and allocate again.
+ */
+ vector.clear();
+ vector.allocateNewSafe();
+
+ /*
+ * Verify that the buffer sizes haven't changed.
+ */
+ Assert.assertEquals(vector.getValueCapacity(), savedValueCapacity);
+ Assert.assertEquals(vector.valueBuffer.capacity(), savedValueBufferSize);
+ }
+ }
+
+ @Test
+ public void testVarCharAllocateNew() throws Exception {
+ final int count = 6000;
+
+ try (final VarCharVector vector = new VarCharVector("", allocator)) {
+ vector.allocateNew(count);
+
+ // verify that the validity buffer and value buffer have capacity for atleast 'count' elements.
+ Assert.assertTrue(vector.getValidityBuffer().capacity() >= DataSizeRoundingUtil.divideBy8Ceil(count));
+ Assert.assertTrue(vector.getOffsetBuffer().capacity() >= (count + 1) * BaseVariableWidthVector.OFFSET_WIDTH);
+ }
+ }
+
+ @Test
+ public void testLargeVarCharAllocateNew() throws Exception {
+ final int count = 6000;
+
+ try (final LargeVarCharVector vector = new LargeVarCharVector("", allocator)) {
+ vector.allocateNew(count);
+
+ // verify that the validity buffer and value buffer have capacity for atleast 'count' elements.
+ Assert.assertTrue(vector.getValidityBuffer().capacity() >= DataSizeRoundingUtil.divideBy8Ceil(count));
+ Assert.assertTrue(vector.getOffsetBuffer().capacity() >= (count + 1) * BaseLargeVariableWidthVector.OFFSET_WIDTH);
+ }
+ }
+
+ @Test
+ public void testVarCharAllocateNewUsingHelper() throws Exception {
+ final int count = 6000;
+
+ try (final VarCharVector vector = new VarCharVector("", allocator)) {
+ AllocationHelper.allocateNew(vector, count);
+
+ // verify that the validity buffer and value buffer have capacity for atleast 'count' elements.
+ Assert.assertTrue(vector.getValidityBuffer().capacity() >= DataSizeRoundingUtil.divideBy8Ceil(count));
+ Assert.assertTrue(vector.getOffsetBuffer().capacity() >= (count + 1) * BaseVariableWidthVector.OFFSET_WIDTH);
+ }
+ }
+
+ @Test
+ public void testLargeVarCharAllocateNewUsingHelper() throws Exception {
+ final int count = 6000;
+
+ try (final LargeVarCharVector vector = new LargeVarCharVector("", allocator)) {
+ AllocationHelper.allocateNew(vector, count);
+
+ // verify that the validity buffer and value buffer have capacity for atleast 'count' elements.
+ Assert.assertTrue(vector.getValidityBuffer().capacity() >= DataSizeRoundingUtil.divideBy8Ceil(count));
+ Assert.assertTrue(vector.getOffsetBuffer().capacity() >= (count + 1) * BaseLargeVariableWidthVector.OFFSET_WIDTH);
+ }
+ }
+
+ @Test
+ public void testFixedRepeatedClearAndSet() throws Exception {
+ try (final IntVector vector = new IntVector("", allocator)) {
+ vector.allocateNewSafe(); // Initial allocation
+ vector.clear(); // clear vector.
+ vector.setSafe(0, 10);
+ int savedValueCapacity = vector.getValueCapacity();
+
+ for (int i = 0; i < 1024; ++i) {
+ vector.clear(); // clear vector.
+ vector.setSafe(0, 10);
+ }
+
+ // should be deterministic, and not cause a run-away increase in capacity.
+ Assert.assertEquals(vector.getValueCapacity(), savedValueCapacity);
+ }
+ }
+
+ @Test
+ public void testVariableRepeatedClearAndSet() throws Exception {
+ try (final VarCharVector vector = new VarCharVector("", allocator)) {
+ vector.allocateNewSafe(); // Initial allocation
+
+ vector.clear(); // clear vector.
+ vector.setSafe(0, "hello world".getBytes());
+ int savedValueCapacity = vector.getValueCapacity();
+
+ for (int i = 0; i < 1024; ++i) {
+ vector.clear(); // clear vector.
+ vector.setSafe(0, "hello world".getBytes());
+ }
+
+ // should be deterministic, and not cause a run-away increase in capacity.
+ Assert.assertEquals(vector.getValueCapacity(), savedValueCapacity);
+ }
+ }
+
+ @Test
+ public void testRepeatedValueVectorClearAndSet() throws Exception {
+ try (final ListVector vector = new ListVector("", allocator, FieldType.nullable(MinorType.INT.getType()), null)) {
+ vector.allocateNewSafe(); // Initial allocation
+ UnionListWriter writer = vector.getWriter();
+
+ vector.clear(); // clear vector.
+ writer.setPosition(0); // optional
+ writer.startList();
+ writer.writeInt(0);
+ writer.endList();
+ int savedValueCapacity = vector.getValueCapacity();
+
+ for (int i = 0; i < 1024; ++i) {
+ vector.clear(); // clear vector.
+ writer.setPosition(0); // optional
+ writer.startList();
+ writer.writeInt(i);
+ writer.endList();
+ }
+
+ // should be deterministic, and not cause a run-away increase in capacity.
+ Assert.assertEquals(vector.getValueCapacity(), savedValueCapacity);
+ }
+ }
+
+ @Test
+ public void testStructVectorClearAndSet() throws Exception {
+ try (final StructVector vector = StructVector.empty("v", allocator)) {
+ vector.allocateNewSafe(); // Initial allocation
+
+ NullableStructWriter writer = vector.getWriter();
+
+ vector.clear(); // clear vector.
+ writer.setPosition(0); // optional
+ writer.start();
+ writer.integer("int").writeInt(0);
+ writer.end();
+ int savedValueCapacity = vector.getValueCapacity();
+
+ for (int i = 0; i < 1024; ++i) {
+ vector.clear(); // clear vector.
+ writer.setPosition(0); // optional
+ writer.start();
+ writer.integer("int").writeInt(i);
+ writer.end();
+ }
+
+ // should be deterministic, and not cause a run-away increase in capacity.
+ Assert.assertEquals(vector.getValueCapacity(), savedValueCapacity);
+ }
+ }
+
+ @Test
+ public void testFixedSizeListVectorClearAndSet() {
+ try (final FixedSizeListVector vector = new FixedSizeListVector("", allocator,
+ FieldType.nullable(new ArrowType.FixedSizeList(2)), null)) {
+ vector.allocateNewSafe(); // Initial allocation
+ UnionFixedSizeListWriter writer = vector.getWriter();
+
+ vector.clear(); // clear vector.
+ writer.setPosition(0); // optional
+ writer.startList();
+ writer.writeInt(0);
+ writer.writeInt(1);
+ writer.endList();
+ int savedValueCapacity = vector.getValueCapacity();
+
+ for (int i = 0; i < 1024; ++i) {
+ vector.clear(); // clear vector.
+ writer.setPosition(0); // optional
+ writer.startList();
+ writer.writeInt(i);
+ writer.writeInt(i + 1);
+ writer.endList();
+ }
+
+ // should be deterministic, and not cause a run-away increase in capacity.
+ Assert.assertEquals(vector.getValueCapacity(), savedValueCapacity);
+ }
+ }
+
+ @Test
+ public void testUnionVectorClearAndSet() {
+ try (final UnionVector vector = new UnionVector("", allocator, /* field type */ null, /* call-back */ null)) {
+ vector.allocateNewSafe(); // Initial allocation
+
+ NullableIntHolder holder = new NullableIntHolder();
+ holder.isSet = 1;
+ holder.value = 1;
+
+ vector.clear(); // clear vector.
+ vector.setType(0, MinorType.INT);
+ vector.setSafe(0, holder);
+ int savedValueCapacity = vector.getValueCapacity();
+
+ for (int i = 0; i < 1024; ++i) {
+ vector.clear(); // clear vector.
+ vector.setType(0, MinorType.INT);
+ vector.setSafe(0, holder);
+ }
+
+ // should be deterministic, and not cause a run-away increase in capacity.
+ Assert.assertEquals(vector.getValueCapacity(), savedValueCapacity);
+ }
+ }
+
+ @Test
+ public void testDenseUnionVectorClearAndSet() {
+ try (final DenseUnionVector vector = new DenseUnionVector("", allocator, null, null)) {
+ vector.allocateNewSafe(); // Initial allocation
+
+ NullableIntHolder holder = new NullableIntHolder();
+ holder.isSet = 1;
+ holder.value = 1;
+
+ byte intTypeId = vector.registerNewTypeId(Field.nullable("", MinorType.INT.getType()));
+
+ vector.clear();
+ vector.setTypeId(0, intTypeId);
+ vector.setSafe(0, holder);
+
+ int savedValueCapacity = vector.getValueCapacity();
+
+ for (int i = 0; i < 1024; ++i) {
+ vector.clear();
+ vector.setTypeId(0, intTypeId);
+ vector.setSafe(0, holder);
+ }
+
+ // should be deterministic, and not cause a run-away increase in capacity.
+ Assert.assertEquals(vector.getValueCapacity(), savedValueCapacity);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReset.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReset.java
new file mode 100644
index 000000000..71009a333
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReset.java
@@ -0,0 +1,168 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.nio.charset.StandardCharsets;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.complex.FixedSizeListVector;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.NonNullableStructVector;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.complex.UnionVector;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType.FixedSizeList;
+import org.apache.arrow.vector.types.pojo.ArrowType.Int;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestVectorReset {
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void init() {
+ allocator = new RootAllocator(Long.MAX_VALUE);
+ }
+
+ @After
+ public void terminate() throws Exception {
+ allocator.close();
+ }
+
+ private void resetVectorAndVerify(ValueVector vector, ArrowBuf[] bufs) {
+ long[] sizeBefore = new long[bufs.length];
+ for (int i = 0; i < bufs.length; i++) {
+ sizeBefore[i] = bufs[i].capacity();
+ }
+ vector.reset();
+ for (int i = 0; i < bufs.length; i++) {
+ assertEquals(sizeBefore[i], bufs[i].capacity());
+ verifyBufferZeroed(bufs[i]);
+ }
+ assertEquals(0, vector.getValueCount());
+ }
+
+ private void verifyBufferZeroed(ArrowBuf buf) {
+ for (int i = 0; i < buf.capacity(); i++) {
+ assertTrue((byte) 0 == buf.getByte(i));
+ }
+ }
+
+ @Test
+ public void testFixedTypeReset() {
+ try (final UInt4Vector vector = new UInt4Vector("UInt4", allocator)) {
+ vector.allocateNewSafe();
+ vector.setNull(0);
+ vector.setValueCount(1);
+ resetVectorAndVerify(vector, vector.getBuffers(false));
+ }
+ }
+
+ @Test
+ public void testVariableTypeReset() {
+ try (final VarCharVector vector = new VarCharVector("VarChar", allocator)) {
+ vector.allocateNewSafe();
+ vector.set(0, "a".getBytes(StandardCharsets.UTF_8));
+ vector.setLastSet(0);
+ vector.setValueCount(1);
+ resetVectorAndVerify(vector, vector.getBuffers(false));
+ assertEquals(-1, vector.getLastSet());
+ }
+ }
+
+ @Test
+ public void testLargeVariableTypeReset() {
+ try (final LargeVarCharVector vector = new LargeVarCharVector("LargeVarChar", allocator)) {
+ vector.allocateNewSafe();
+ vector.set(0, "a".getBytes(StandardCharsets.UTF_8));
+ vector.setLastSet(0);
+ vector.setValueCount(1);
+ resetVectorAndVerify(vector, vector.getBuffers(false));
+ assertEquals(-1, vector.getLastSet());
+ }
+ }
+
+ @Test
+ public void testListTypeReset() {
+ try (final ListVector variableList =
+ new ListVector("VarList", allocator, FieldType.nullable(MinorType.INT.getType()), null);
+ final FixedSizeListVector fixedList =
+ new FixedSizeListVector("FixedList", allocator, FieldType.nullable(new FixedSizeList(2)), null)
+ ) {
+ // ListVector
+ variableList.allocateNewSafe();
+ variableList.startNewValue(0);
+ variableList.endValue(0, 0);
+ variableList.setValueCount(1);
+ resetVectorAndVerify(variableList, variableList.getBuffers(false));
+ assertEquals(-1, variableList.getLastSet());
+
+ // FixedSizeListVector
+ fixedList.allocateNewSafe();
+ fixedList.setNull(0);
+ fixedList.setValueCount(1);
+ resetVectorAndVerify(fixedList, fixedList.getBuffers(false));
+ }
+ }
+
+ @Test
+ public void testStructTypeReset() {
+ try (final NonNullableStructVector nonNullableStructVector =
+ new NonNullableStructVector("Struct", allocator, FieldType.nullable(MinorType.INT.getType()), null);
+ final StructVector structVector =
+ new StructVector("NullableStruct", allocator, FieldType.nullable(MinorType.INT.getType()), null)
+ ) {
+ // NonNullableStructVector
+ nonNullableStructVector.allocateNewSafe();
+ IntVector structChild = nonNullableStructVector
+ .addOrGet("child", FieldType.nullable(new Int(32, true)), IntVector.class);
+ structChild.setNull(0);
+ nonNullableStructVector.setValueCount(1);
+ resetVectorAndVerify(nonNullableStructVector, nonNullableStructVector.getBuffers(false));
+
+ // StructVector
+ structVector.allocateNewSafe();
+ structVector.setNull(0);
+ structVector.setValueCount(1);
+ resetVectorAndVerify(structVector, structVector.getBuffers(false));
+ }
+ }
+
+ @Test
+ public void testUnionTypeReset() {
+ try (final UnionVector vector = new UnionVector("Union", allocator, /* field type */ null, /* call-back */ null);
+ final IntVector dataVector = new IntVector("Int", allocator)
+ ) {
+ vector.getBufferSize();
+ vector.allocateNewSafe();
+ dataVector.allocateNewSafe();
+ vector.addVector(dataVector);
+ dataVector.setNull(0);
+ vector.setValueCount(1);
+ resetVectorAndVerify(vector, vector.getBuffers(false));
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestVectorSchemaRoot.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestVectorSchemaRoot.java
new file mode 100644
index 000000000..4c5b6540f
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestVectorSchemaRoot.java
@@ -0,0 +1,318 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static junit.framework.TestCase.assertTrue;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.stream.Collectors;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.impl.UnionListWriter;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestVectorSchemaRoot {
+ private BufferAllocator allocator;
+
+ @Before
+ public void init() {
+ allocator = new RootAllocator(Long.MAX_VALUE);
+ }
+
+ @After
+ public void terminate() {
+ allocator.close();
+ }
+
+ @Test
+ public void testResetRowCount() {
+ final int size = 20;
+ try (final BitVector vec1 = new BitVector("bit", allocator);
+ final IntVector vec2 = new IntVector("int", allocator)) {
+ VectorSchemaRoot vsr = VectorSchemaRoot.of(vec1, vec2);
+
+ vsr.allocateNew();
+ assertEquals(vsr.getRowCount(), 0);
+
+ for (int i = 0; i < size; i++) {
+ vec1.setSafe(i, i % 2);
+ vec2.setSafe(i, i);
+ }
+ vsr.setRowCount(size);
+ checkCount(vec1, vec2, vsr, size);
+
+ vsr.allocateNew();
+ checkCount(vec1, vec2, vsr, 0);
+
+ for (int i = 0; i < size; i++) {
+ vec1.setSafe(i, i % 2);
+ vec2.setSafe(i, i);
+ }
+ vsr.setRowCount(size);
+ checkCount(vec1, vec2, vsr, size);
+
+ vsr.clear();
+ checkCount(vec1, vec2, vsr, 0);
+ }
+ }
+
+ private void checkCount(BitVector vec1, IntVector vec2, VectorSchemaRoot vsr, int count) {
+ assertEquals(vec1.getValueCount(), count);
+ assertEquals(vec2.getValueCount(), count);
+ assertEquals(vsr.getRowCount(), count);
+ }
+
+ private VectorSchemaRoot createBatch() {
+ FieldType varCharType = new FieldType(true, new ArrowType.Utf8(), /*dictionary=*/null);
+ FieldType listType = new FieldType(true, new ArrowType.List(), /*dictionary=*/null);
+
+ // create the schema
+ List<Field> schemaFields = new ArrayList<>();
+ Field childField = new Field("varCharCol", varCharType, null);
+ List<Field> childFields = new ArrayList<>();
+ childFields.add(childField);
+ schemaFields.add(new Field("listCol", listType, childFields));
+ Schema schema = new Schema(schemaFields);
+
+ VectorSchemaRoot schemaRoot = VectorSchemaRoot.create(schema, allocator);
+ // get and allocate the vector
+ ListVector vector = (ListVector) schemaRoot.getVector("listCol");
+ vector.allocateNew();
+
+ // write data to the vector
+ UnionListWriter writer = vector.getWriter();
+
+ writer.setPosition(0);
+
+ // write data vector(0)
+ writer.startList();
+
+ // write data vector(0)(0)
+ writer.list().startList();
+
+ // According to the schema above, the list element should have varchar type.
+ // When we write a big int, the original writer cannot handle this, so the writer will
+ // be promoted, and the vector structure will be different from the schema.
+ writer.list().bigInt().writeBigInt(0);
+ writer.list().bigInt().writeBigInt(1);
+ writer.list().endList();
+
+ // write data vector(0)(1)
+ writer.list().startList();
+ writer.list().float8().writeFloat8(3.0D);
+ writer.list().float8().writeFloat8(7.0D);
+ writer.list().endList();
+
+ // finish data vector(0)
+ writer.endList();
+
+ writer.setPosition(1);
+
+ // write data vector(1)
+ writer.startList();
+
+ // write data vector(1)(0)
+ writer.list().startList();
+ writer.list().integer().writeInt(3);
+ writer.list().integer().writeInt(2);
+ writer.list().endList();
+
+ // finish data vector(1)
+ writer.endList();
+
+ vector.setValueCount(2);
+
+ return schemaRoot;
+ }
+
+ @Test
+ public void testAddVector() {
+ try (final IntVector intVector1 = new IntVector("intVector1", allocator);
+ final IntVector intVector2 = new IntVector("intVector2", allocator);
+ final IntVector intVector3 = new IntVector("intVector3", allocator);) {
+
+ VectorSchemaRoot original = new VectorSchemaRoot(Arrays.asList(intVector1, intVector2));
+ assertEquals(2, original.getFieldVectors().size());
+
+ VectorSchemaRoot newRecordBatch = original.addVector(1, intVector3);
+ assertEquals(3, newRecordBatch.getFieldVectors().size());
+ assertEquals(intVector3, newRecordBatch.getFieldVectors().get(1));
+
+ original.close();
+ newRecordBatch.close();
+ }
+ }
+
+ @Test
+ public void testRemoveVector() {
+ try (final IntVector intVector1 = new IntVector("intVector1", allocator);
+ final IntVector intVector2 = new IntVector("intVector2", allocator);
+ final IntVector intVector3 = new IntVector("intVector3", allocator);) {
+
+ VectorSchemaRoot original =
+ new VectorSchemaRoot(Arrays.asList(intVector1, intVector2, intVector3));
+ assertEquals(3, original.getFieldVectors().size());
+
+ VectorSchemaRoot newRecordBatch = original.removeVector(0);
+ assertEquals(2, newRecordBatch.getFieldVectors().size());
+ assertEquals(intVector2, newRecordBatch.getFieldVectors().get(0));
+ assertEquals(intVector3, newRecordBatch.getFieldVectors().get(1));
+
+ original.close();
+ newRecordBatch.close();
+ }
+ }
+
+ @Test
+ public void testSlice() {
+ try (final IntVector intVector = new IntVector("intVector", allocator);
+ final Float4Vector float4Vector = new Float4Vector("float4Vector", allocator)) {
+ intVector.setValueCount(10);
+ float4Vector.setValueCount(10);
+ for (int i = 0; i < 10; i++) {
+ intVector.setSafe(i, i);
+ float4Vector.setSafe(i, i + 0.1f);
+ }
+ final VectorSchemaRoot original = new VectorSchemaRoot(Arrays.asList(intVector, float4Vector));
+
+ VectorSchemaRoot slice1 = original.slice(0, original.getRowCount());
+ assertEquals(original, slice1);
+
+ VectorSchemaRoot slice2 = original.slice(0, 5);
+ assertEquals(5, slice2.getRowCount());
+ // validate data
+ IntVector childVector1 = (IntVector) slice2.getFieldVectors().get(0);
+ Float4Vector childVector2 = (Float4Vector) slice2.getFieldVectors().get(1);
+ for (int i = 0; i < 5; i++) {
+ assertEquals(i, childVector1.get(i));
+ assertEquals(i + 0.1f, childVector2.get(i), 0);
+ }
+
+ original.close();
+ slice2.close();
+ }
+ }
+
+ @Test(expected = IllegalArgumentException.class)
+ public void testSliceWithInvalidParam() {
+ try (final IntVector intVector = new IntVector("intVector", allocator);
+ final Float4Vector float4Vector = new Float4Vector("float4Vector", allocator)) {
+ intVector.setValueCount(10);
+ float4Vector.setValueCount(10);
+ for (int i = 0; i < 10; i++) {
+ intVector.setSafe(i, i);
+ float4Vector.setSafe(i, i + 0.1f);
+ }
+ final VectorSchemaRoot original = new VectorSchemaRoot(Arrays.asList(intVector, float4Vector));
+
+ original.slice(0, 20);
+ }
+ }
+
+ @Test
+ public void testEquals() {
+ try (final IntVector intVector1 = new IntVector("intVector1", allocator);
+ final IntVector intVector2 = new IntVector("intVector2", allocator);
+ final IntVector intVector3 = new IntVector("intVector3", allocator);) {
+
+ intVector1.setValueCount(5);
+ for (int i = 0; i < 5; i++) {
+ intVector1.set(i, i);
+ }
+
+ VectorSchemaRoot root1 =
+ new VectorSchemaRoot(Arrays.asList(intVector1, intVector2, intVector3));
+
+ VectorSchemaRoot root2 =
+ new VectorSchemaRoot(Arrays.asList(intVector1, intVector2));
+
+ VectorSchemaRoot root3 =
+ new VectorSchemaRoot(Arrays.asList(intVector1, intVector2, intVector3));
+
+ assertFalse(root1.equals(root2));
+ assertTrue(root1.equals(root3));
+
+ root1.close();
+ root2.close();
+ root3.close();
+ }
+ }
+
+ @Test
+ public void testApproxEquals() {
+ try (final Float4Vector float4Vector1 = new Float4Vector("floatVector", allocator);
+ final Float4Vector float4Vector2 = new Float4Vector("floatVector", allocator);
+ final Float4Vector float4Vector3 = new Float4Vector("floatVector", allocator);) {
+
+ float4Vector1.setValueCount(5);
+ float4Vector2.setValueCount(5);
+ float4Vector3.setValueCount(5);
+ final float epsilon = 1.0E-6f;
+ for (int i = 0; i < 5; i++) {
+ float4Vector1.set(i, i);
+ float4Vector2.set(i, i + epsilon * 2);
+ float4Vector3.set(i, i + epsilon / 2);
+ }
+
+ VectorSchemaRoot root1 =
+ new VectorSchemaRoot(Arrays.asList(float4Vector1));
+
+ VectorSchemaRoot root2 =
+ new VectorSchemaRoot(Arrays.asList(float4Vector2));
+
+ VectorSchemaRoot root3 =
+ new VectorSchemaRoot(Arrays.asList(float4Vector3));
+
+ assertFalse(root1.approxEquals(root2));
+ assertTrue(root1.approxEquals(root3));
+
+ root1.close();
+ root2.close();
+ root3.close();
+ }
+ }
+
+ @Test
+ public void testSchemaSync() {
+ //create vector schema root
+ try (VectorSchemaRoot schemaRoot = createBatch()) {
+ Schema newSchema = new Schema(
+ schemaRoot.getFieldVectors().stream().map(vec -> vec.getField()).collect(Collectors.toList()));
+
+ assertNotEquals(newSchema, schemaRoot.getSchema());
+ assertTrue(schemaRoot.syncSchema());
+ assertEquals(newSchema, schemaRoot.getSchema());
+
+ // no schema update this time.
+ assertFalse(schemaRoot.syncSchema());
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestVectorUnloadLoad.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestVectorUnloadLoad.java
new file mode 100644
index 000000000..8e1941a8c
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/TestVectorUnloadLoad.java
@@ -0,0 +1,332 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector;
+
+import static java.util.Arrays.asList;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.complex.NonNullableStructVector;
+import org.apache.arrow.vector.complex.impl.ComplexWriterImpl;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.complex.writer.BaseWriter.ComplexWriter;
+import org.apache.arrow.vector.complex.writer.BaseWriter.ListWriter;
+import org.apache.arrow.vector.complex.writer.BaseWriter.StructWriter;
+import org.apache.arrow.vector.complex.writer.BigIntWriter;
+import org.apache.arrow.vector.complex.writer.IntWriter;
+import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
+import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestVectorUnloadLoad {
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void init() {
+ allocator = new RootAllocator(Long.MAX_VALUE);
+ }
+
+ @After
+ public void terminate() throws Exception {
+ allocator.close();
+ }
+
+ @Test
+ public void testUnloadLoad() throws IOException {
+ int count = 10000;
+ Schema schema;
+
+ try (
+ BufferAllocator originalVectorsAllocator =
+ allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE);
+ NonNullableStructVector parent = NonNullableStructVector.empty("parent", originalVectorsAllocator)) {
+
+ // write some data
+ ComplexWriter writer = new ComplexWriterImpl("root", parent);
+ StructWriter rootWriter = writer.rootAsStruct();
+ IntWriter intWriter = rootWriter.integer("int");
+ BigIntWriter bigIntWriter = rootWriter.bigInt("bigInt");
+ for (int i = 0; i < count; i++) {
+ intWriter.setPosition(i);
+ intWriter.writeInt(i);
+ bigIntWriter.setPosition(i);
+ bigIntWriter.writeBigInt(i);
+ }
+ writer.setValueCount(count);
+
+ // unload it
+ FieldVector root = parent.getChild("root");
+ schema = new Schema(root.getField().getChildren());
+ VectorUnloader vectorUnloader = newVectorUnloader(root);
+ try (
+ ArrowRecordBatch recordBatch = vectorUnloader.getRecordBatch();
+ BufferAllocator finalVectorsAllocator = allocator.newChildAllocator("final vectors", 0, Integer.MAX_VALUE);
+ VectorSchemaRoot newRoot = VectorSchemaRoot.create(schema, finalVectorsAllocator);
+ ) {
+
+ // load it
+ VectorLoader vectorLoader = new VectorLoader(newRoot);
+
+ vectorLoader.load(recordBatch);
+
+ FieldReader intReader = newRoot.getVector("int").getReader();
+ FieldReader bigIntReader = newRoot.getVector("bigInt").getReader();
+ for (int i = 0; i < count; i++) {
+ intReader.setPosition(i);
+ Assert.assertEquals(i, intReader.readInteger().intValue());
+ bigIntReader.setPosition(i);
+ Assert.assertEquals(i, bigIntReader.readLong().longValue());
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testUnloadLoadAddPadding() throws IOException {
+ int count = 10000;
+ Schema schema;
+ try (
+ BufferAllocator originalVectorsAllocator =
+ allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE);
+ NonNullableStructVector parent = NonNullableStructVector.empty("parent", originalVectorsAllocator)) {
+
+ // write some data
+ ComplexWriter writer = new ComplexWriterImpl("root", parent);
+ StructWriter rootWriter = writer.rootAsStruct();
+ ListWriter list = rootWriter.list("list");
+ IntWriter intWriter = list.integer();
+ for (int i = 0; i < count; i++) {
+ list.setPosition(i);
+ list.startList();
+ for (int j = 0; j < i % 4 + 1; j++) {
+ intWriter.writeInt(i);
+ }
+ list.endList();
+ }
+ writer.setValueCount(count);
+
+ // unload it
+ FieldVector root = parent.getChild("root");
+ schema = new Schema(root.getField().getChildren());
+ VectorUnloader vectorUnloader = newVectorUnloader(root);
+ try (
+ ArrowRecordBatch recordBatch = vectorUnloader.getRecordBatch();
+ BufferAllocator finalVectorsAllocator = allocator.newChildAllocator("final vectors", 0, Integer.MAX_VALUE);
+ VectorSchemaRoot newRoot = VectorSchemaRoot.create(schema, finalVectorsAllocator);
+ ) {
+ List<ArrowBuf> oldBuffers = recordBatch.getBuffers();
+ List<ArrowBuf> newBuffers = new ArrayList<>();
+ for (ArrowBuf oldBuffer : oldBuffers) {
+ long l = oldBuffer.readableBytes();
+ if (l % 64 != 0) {
+ // pad
+ l = l + 64 - l % 64;
+ }
+ ArrowBuf newBuffer = allocator.buffer(l);
+ for (long i = oldBuffer.readerIndex(); i < oldBuffer.writerIndex(); i++) {
+ newBuffer.setByte(i - oldBuffer.readerIndex(), oldBuffer.getByte(i));
+ }
+ newBuffer.readerIndex(0);
+ newBuffer.writerIndex(l);
+ newBuffers.add(newBuffer);
+ }
+
+ try (ArrowRecordBatch newBatch =
+ new ArrowRecordBatch(recordBatch.getLength(), recordBatch.getNodes(), newBuffers);) {
+ // load it
+ VectorLoader vectorLoader = new VectorLoader(newRoot);
+
+ vectorLoader.load(newBatch);
+
+ FieldReader reader = newRoot.getVector("list").getReader();
+ for (int i = 0; i < count; i++) {
+ reader.setPosition(i);
+ List<Integer> expected = new ArrayList<>();
+ for (int j = 0; j < i % 4 + 1; j++) {
+ expected.add(i);
+ }
+ Assert.assertEquals(expected, reader.readObject());
+ }
+ }
+
+ for (ArrowBuf newBuf : newBuffers) {
+ newBuf.getReferenceManager().release();
+ }
+ }
+ }
+ }
+
+ /**
+ * The validity buffer can be empty if:
+ * - all values are defined.
+ * - all values are null.
+ *
+ * @throws IOException on error
+ */
+ @Test
+ public void testLoadValidityBuffer() throws IOException {
+ Schema schema = new Schema(asList(
+ new Field("intDefined", FieldType.nullable(new ArrowType.Int(32, true)), Collections.<Field>emptyList()),
+ new Field("intNull", FieldType.nullable(new ArrowType.Int(32, true)), Collections.<Field>emptyList())
+ ));
+ int count = 10;
+ ArrowBuf[] values = new ArrowBuf[4];
+ for (int i = 0; i < 4; i += 2) {
+ ArrowBuf buf1 = allocator.buffer(BitVectorHelper.getValidityBufferSize(count));
+ ArrowBuf buf2 = allocator.buffer(count * 4); // integers
+ buf1.setZero(0, buf1.capacity());
+ buf2.setZero(0, buf2.capacity());
+ values[i] = buf1;
+ values[i + 1] = buf2;
+ for (int j = 0; j < count; j++) {
+ if (i == 2) {
+ BitVectorHelper.unsetBit(buf1, j);
+ } else {
+ BitVectorHelper.setBit(buf1, j);
+ }
+
+ buf2.setInt(j * 4, j);
+ }
+ buf1.writerIndex((int) Math.ceil(count / 8));
+ buf2.writerIndex(count * 4);
+ }
+
+ /*
+ * values[0] - validity buffer for first vector
+ * values[1] - data buffer for first vector
+ * values[2] - validity buffer for second vector
+ * values[3] - data buffer for second vector
+ */
+
+ try (
+ ArrowRecordBatch recordBatch = new ArrowRecordBatch(count, asList(new ArrowFieldNode(count, 0),
+ new ArrowFieldNode(count, count)), asList(values[0], values[1], values[2], values[3]));
+ BufferAllocator finalVectorsAllocator = allocator.newChildAllocator("final vectors", 0, Integer.MAX_VALUE);
+ VectorSchemaRoot newRoot = VectorSchemaRoot.create(schema, finalVectorsAllocator);
+ ) {
+
+ // load it
+ VectorLoader vectorLoader = new VectorLoader(newRoot);
+
+ vectorLoader.load(recordBatch);
+
+ IntVector intDefinedVector = (IntVector) newRoot.getVector("intDefined");
+ IntVector intNullVector = (IntVector) newRoot.getVector("intNull");
+ for (int i = 0; i < count; i++) {
+ assertFalse("#" + i, intDefinedVector.isNull(i));
+ assertEquals("#" + i, i, intDefinedVector.get(i));
+ assertTrue("#" + i, intNullVector.isNull(i));
+ }
+ intDefinedVector.setSafe(count + 10, 1234);
+ assertTrue(intDefinedVector.isNull(count + 1));
+ // empty slots should still default to unset
+ intDefinedVector.setSafe(count + 1, 789);
+ assertFalse(intDefinedVector.isNull(count + 1));
+ assertEquals(789, intDefinedVector.get(count + 1));
+ assertTrue(intDefinedVector.isNull(count));
+ assertTrue(intDefinedVector.isNull(count + 2));
+ assertTrue(intDefinedVector.isNull(count + 3));
+ assertTrue(intDefinedVector.isNull(count + 4));
+ assertTrue(intDefinedVector.isNull(count + 5));
+ assertTrue(intDefinedVector.isNull(count + 6));
+ assertTrue(intDefinedVector.isNull(count + 7));
+ assertTrue(intDefinedVector.isNull(count + 8));
+ assertTrue(intDefinedVector.isNull(count + 9));
+ assertFalse(intDefinedVector.isNull(count + 10));
+ assertEquals(1234, intDefinedVector.get(count + 10));
+ } finally {
+ for (ArrowBuf arrowBuf : values) {
+ arrowBuf.getReferenceManager().release();
+ }
+ }
+ }
+
+ @Test
+ public void testUnloadLoadDuplicates() throws IOException {
+ int count = 10;
+ Schema schema = new Schema(asList(
+ new Field("duplicate", FieldType.nullable(new ArrowType.Int(32, true)), Collections.<Field>emptyList()),
+ new Field("duplicate", FieldType.nullable(new ArrowType.Int(32, true)), Collections.<Field>emptyList())
+ ));
+
+ try (
+ BufferAllocator originalVectorsAllocator =
+ allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE);
+ ) {
+ List<FieldVector> sources = new ArrayList<>();
+ for (Field field : schema.getFields()) {
+ FieldVector vector = field.createVector(originalVectorsAllocator);
+ vector.allocateNew();
+ sources.add(vector);
+ IntVector intVector = (IntVector) vector;
+ for (int i = 0; i < count; i++) {
+ intVector.set(i, i);
+ }
+ intVector.setValueCount(count);
+ }
+
+ try (VectorSchemaRoot root = new VectorSchemaRoot(schema.getFields(), sources, count)) {
+ VectorUnloader vectorUnloader = new VectorUnloader(root);
+ try (ArrowRecordBatch recordBatch = vectorUnloader.getRecordBatch();
+ BufferAllocator finalVectorsAllocator =
+ allocator.newChildAllocator("final vectors", 0, Integer.MAX_VALUE);
+ VectorSchemaRoot newRoot = VectorSchemaRoot.create(schema, finalVectorsAllocator);) {
+ // load it
+ VectorLoader vectorLoader = new VectorLoader(newRoot);
+ vectorLoader.load(recordBatch);
+
+ List<FieldVector> targets = newRoot.getFieldVectors();
+ Assert.assertEquals(sources.size(), targets.size());
+ for (int k = 0; k < sources.size(); k++) {
+ IntVector src = (IntVector) sources.get(k);
+ IntVector tgt = (IntVector) targets.get(k);
+ Assert.assertEquals(src.getValueCount(), tgt.getValueCount());
+ for (int i = 0; i < count; i++) {
+ Assert.assertEquals(src.get(i), tgt.get(i));
+ }
+ }
+ }
+ }
+ }
+ }
+
+ public static VectorUnloader newVectorUnloader(FieldVector root) {
+ Schema schema = new Schema(root.getField().getChildren());
+ int valueCount = root.getValueCount();
+ List<FieldVector> fields = root.getChildrenFromFields();
+ VectorSchemaRoot vsr = new VectorSchemaRoot(schema.getFields(), fields, valueCount);
+ return new VectorUnloader(vsr);
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/compare/TestRangeEqualsVisitor.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/compare/TestRangeEqualsVisitor.java
new file mode 100644
index 000000000..4495881ad
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/compare/TestRangeEqualsVisitor.java
@@ -0,0 +1,740 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.compare;
+
+import static org.apache.arrow.vector.testing.ValueVectorDataPopulator.setVector;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import java.nio.charset.Charset;
+import java.util.Arrays;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.BigIntVector;
+import org.apache.arrow.vector.Float4Vector;
+import org.apache.arrow.vector.Float8Vector;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.LargeVarCharVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.ZeroVector;
+import org.apache.arrow.vector.compare.util.ValueEpsilonEqualizers;
+import org.apache.arrow.vector.complex.DenseUnionVector;
+import org.apache.arrow.vector.complex.FixedSizeListVector;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.complex.UnionVector;
+import org.apache.arrow.vector.complex.impl.NullableStructWriter;
+import org.apache.arrow.vector.complex.impl.UnionFixedSizeListWriter;
+import org.apache.arrow.vector.complex.impl.UnionListWriter;
+import org.apache.arrow.vector.holders.NullableBigIntHolder;
+import org.apache.arrow.vector.holders.NullableFloat4Holder;
+import org.apache.arrow.vector.holders.NullableFloat8Holder;
+import org.apache.arrow.vector.holders.NullableIntHolder;
+import org.apache.arrow.vector.holders.NullableUInt4Holder;
+import org.apache.arrow.vector.types.FloatingPointPrecision;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Ignore;
+import org.junit.Test;
+
+public class TestRangeEqualsVisitor {
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void init() {
+ allocator = new RootAllocator(Long.MAX_VALUE);
+ }
+
+ private static final Charset utf8Charset = Charset.forName("UTF-8");
+ private static final byte[] STR1 = "AAAAA1".getBytes(utf8Charset);
+ private static final byte[] STR2 = "BBBBBBBBB2".getBytes(utf8Charset);
+ private static final byte[] STR3 = "CCCC3".getBytes(utf8Charset);
+
+ @After
+ public void terminate() throws Exception {
+ allocator.close();
+ }
+
+ @Test
+ public void testIntVectorEqualsWithNull() {
+ try (final IntVector vector1 = new IntVector("int", allocator);
+ final IntVector vector2 = new IntVector("int", allocator)) {
+
+ setVector(vector1, 1, 2);
+ setVector(vector2, 1, null);
+
+ assertFalse(VectorEqualsVisitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testEqualsWithTypeChange() {
+ try (final IntVector vector1 = new IntVector("vector", allocator);
+ final IntVector vector2 = new IntVector("vector", allocator);
+ final BigIntVector vector3 = new BigIntVector("vector", allocator)) {
+
+ setVector(vector1, 1, 2);
+ setVector(vector2, 1, 2);
+
+ RangeEqualsVisitor visitor = new RangeEqualsVisitor(vector1, vector2);
+ Range range = new Range(0, 0, 2);
+ assertTrue(vector1.accept(visitor, range));
+ // visitor left vector changed, will reset and check type again
+ assertFalse(vector3.accept(visitor, range));
+ }
+ }
+
+ @Test
+ public void testBaseFixedWidthVectorRangeEqual() {
+ try (final IntVector vector1 = new IntVector("int", allocator);
+ final IntVector vector2 = new IntVector("int", allocator)) {
+
+ setVector(vector1, 1, 2, 3, 4, 5);
+ setVector(vector2, 11, 2, 3, 4, 55);
+
+ RangeEqualsVisitor visitor = new RangeEqualsVisitor(vector1, vector2);
+ assertTrue(visitor.rangeEquals(new Range(1, 1, 3)));
+ }
+ }
+
+ @Test
+ public void testBaseVariableVectorRangeEquals() {
+ try (final VarCharVector vector1 = new VarCharVector("varchar", allocator);
+ final VarCharVector vector2 = new VarCharVector("varchar", allocator)) {
+
+ setVector(vector1, STR1, STR2, STR3, STR2, STR1);
+ setVector(vector2, STR1, STR2, STR3, STR2, STR1);
+
+ RangeEqualsVisitor visitor = new RangeEqualsVisitor(vector1, vector2);
+ assertTrue(visitor.rangeEquals(new Range(1, 1, 3)));
+ }
+ }
+
+ @Test
+ public void testListVectorWithDifferentChild() {
+ try (final ListVector vector1 = ListVector.empty("list", allocator);
+ final ListVector vector2 = ListVector.empty("list", allocator);) {
+
+ vector1.allocateNew();
+ vector1.initializeChildrenFromFields(
+ Arrays.asList(Field.nullable("child", new ArrowType.Int(32, true))));
+
+ vector2.allocateNew();
+ vector2.initializeChildrenFromFields(
+ Arrays.asList(Field.nullable("child", new ArrowType.Int(64, true))));
+
+ RangeEqualsVisitor visitor = new RangeEqualsVisitor(vector1, vector2);
+ assertFalse(visitor.rangeEquals(new Range(0, 0, 0)));
+ }
+ }
+
+ @Test
+ public void testListVectorRangeEquals() {
+ try (final ListVector vector1 = ListVector.empty("list", allocator);
+ final ListVector vector2 = ListVector.empty("list", allocator);) {
+
+ UnionListWriter writer1 = vector1.getWriter();
+ writer1.allocate();
+
+ //set some values
+ writeListVector(writer1, new int[] {1, 2});
+ writeListVector(writer1, new int[] {3, 4});
+ writeListVector(writer1, new int[] {5, 6});
+ writeListVector(writer1, new int[] {7, 8});
+ writeListVector(writer1, new int[] {9, 10});
+ writer1.setValueCount(5);
+
+ UnionListWriter writer2 = vector2.getWriter();
+ writer2.allocate();
+
+ //set some values
+ writeListVector(writer2, new int[] {0, 0});
+ writeListVector(writer2, new int[] {3, 4});
+ writeListVector(writer2, new int[] {5, 6});
+ writeListVector(writer2, new int[] {7, 8});
+ writeListVector(writer2, new int[] {0, 0});
+ writer2.setValueCount(5);
+
+ RangeEqualsVisitor visitor = new RangeEqualsVisitor(vector1, vector2);
+ assertTrue(visitor.rangeEquals(new Range(1, 1, 3)));
+ }
+ }
+
+ @Test
+ public void testFixedSizeListVectorRangeEquals() {
+ try (final FixedSizeListVector vector1 = FixedSizeListVector.empty("list", 2, allocator);
+ final FixedSizeListVector vector2 = FixedSizeListVector.empty("list", 2, allocator);) {
+
+ UnionFixedSizeListWriter writer1 = vector1.getWriter();
+ writer1.allocate();
+
+ //set some values
+ writeFixedSizeListVector(writer1, new int[] {1, 2});
+ writeFixedSizeListVector(writer1, new int[] {3, 4});
+ writeFixedSizeListVector(writer1, new int[] {5, 6});
+ writeFixedSizeListVector(writer1, new int[] {7, 8});
+ writeFixedSizeListVector(writer1, new int[] {9, 10});
+ writer1.setValueCount(5);
+
+ UnionFixedSizeListWriter writer2 = vector2.getWriter();
+ writer2.allocate();
+
+ //set some values
+ writeFixedSizeListVector(writer2, new int[] {0, 0});
+ writeFixedSizeListVector(writer2, new int[] {3, 4});
+ writeFixedSizeListVector(writer2, new int[] {5, 6});
+ writeFixedSizeListVector(writer2, new int[] {7, 8});
+ writeFixedSizeListVector(writer2, new int[] {0, 0});
+ writer2.setValueCount(5);
+
+ RangeEqualsVisitor visitor = new RangeEqualsVisitor(vector1, vector2);
+ assertTrue(visitor.rangeEquals(new Range(1, 1, 3)));
+ assertFalse(visitor.rangeEquals(new Range(0, 0, 5)));
+ }
+ }
+
+ @Test
+ public void testLargeVariableWidthVectorRangeEquals() {
+ try (final LargeVarCharVector vector1 = new LargeVarCharVector("vector1", allocator);
+ final LargeVarCharVector vector2 = new LargeVarCharVector("vector2", allocator)) {
+ setVector(vector1, "aaa", "bbb", "ccc", null, "ddd");
+ setVector(vector2, "ccc", "aaa", "bbb", null, "ddd");
+
+ RangeEqualsVisitor visitor = new RangeEqualsVisitor(vector1, vector2,
+ (v1, v2) -> new TypeEqualsVisitor(v2, /*check name*/ false, /*check metadata*/ false).equals(v1));
+
+ assertFalse(visitor.rangeEquals(new Range(/*left start*/ 0, /*right start*/ 0, /*length*/ 1)));
+ assertTrue(visitor.rangeEquals(new Range(/*left start*/ 0, /*right start*/ 1, /*length*/ 1)));
+ assertFalse(visitor.rangeEquals(new Range(/*left start*/ 0, /*right start*/ 0, /*length*/ 3)));
+ assertTrue(visitor.rangeEquals(new Range(/*left start*/ 0, /*right start*/ 1, /*length*/ 2)));
+ assertTrue(visitor.rangeEquals(new Range(/*left start*/ 3, /*right start*/ 3, /*length*/ 1)));
+ assertTrue(visitor.rangeEquals(new Range(/*left start*/ 3, /*right start*/ 3, /*length*/ 2)));
+ assertFalse(visitor.rangeEquals(new Range(/*left start*/ 2, /*right start*/ 2, /*length*/ 2)));
+ }
+ }
+
+ @Test
+ public void testStructVectorRangeEquals() {
+ try (final StructVector vector1 = StructVector.empty("struct", allocator);
+ final StructVector vector2 = StructVector.empty("struct", allocator);) {
+ vector1.addOrGet("f0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class);
+ vector1.addOrGet("f1", FieldType.nullable(new ArrowType.Int(64, true)), BigIntVector.class);
+ vector2.addOrGet("f0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class);
+ vector2.addOrGet("f1", FieldType.nullable(new ArrowType.Int(64, true)), BigIntVector.class);
+
+ NullableStructWriter writer1 = vector1.getWriter();
+ writer1.allocate();
+
+ writeStructVector(writer1, 1, 10L);
+ writeStructVector(writer1, 2, 20L);
+ writeStructVector(writer1, 3, 30L);
+ writeStructVector(writer1, 4, 40L);
+ writeStructVector(writer1, 5, 50L);
+ writer1.setValueCount(5);
+
+ NullableStructWriter writer2 = vector2.getWriter();
+ writer2.allocate();
+
+ writeStructVector(writer2, 0, 00L);
+ writeStructVector(writer2, 2, 20L);
+ writeStructVector(writer2, 3, 30L);
+ writeStructVector(writer2, 4, 40L);
+ writeStructVector(writer2, 0, 0L);
+ writer2.setValueCount(5);
+
+ RangeEqualsVisitor visitor = new RangeEqualsVisitor(vector1, vector2);
+ assertTrue(visitor.rangeEquals(new Range(1, 1, 3)));
+ }
+ }
+
+ @Test
+ public void testUnionVectorRangeEquals() {
+ try (final UnionVector vector1 = new UnionVector("union", allocator, /* field type */ null, /* call-back */ null);
+ final UnionVector vector2 =
+ new UnionVector("union", allocator, /* field type */ null, /* call-back */ null);) {
+
+ final NullableUInt4Holder uInt4Holder = new NullableUInt4Holder();
+ uInt4Holder.value = 10;
+ uInt4Holder.isSet = 1;
+
+ final NullableIntHolder intHolder = new NullableIntHolder();
+ uInt4Holder.value = 20;
+ uInt4Holder.isSet = 1;
+
+ vector1.setType(0, Types.MinorType.UINT4);
+ vector1.setSafe(0, uInt4Holder);
+
+ vector1.setType(1, Types.MinorType.INT);
+ vector1.setSafe(1, intHolder);
+
+ vector1.setType(2, Types.MinorType.INT);
+ vector1.setSafe(2, intHolder);
+ vector1.setValueCount(3);
+
+ vector2.setType(0, Types.MinorType.UINT4);
+ vector2.setSafe(0, uInt4Holder);
+
+ vector2.setType(1, Types.MinorType.INT);
+ vector2.setSafe(1, intHolder);
+
+ vector2.setType(2, Types.MinorType.INT);
+ vector2.setSafe(2, intHolder);
+ vector2.setValueCount(3);
+
+ RangeEqualsVisitor visitor = new RangeEqualsVisitor(vector1, vector2);
+ assertTrue(visitor.rangeEquals(new Range(1, 1, 2)));
+ }
+ }
+
+ /**
+ * Test comparing two union vectors.
+ * The two vectors are different in total, but have a range with equal values.
+ */
+ @Test
+ public void testUnionVectorSubRangeEquals() {
+ try (final UnionVector vector1 = new UnionVector("union", allocator, null, null);
+ final UnionVector vector2 = new UnionVector("union", allocator, null, null);) {
+
+ final NullableUInt4Holder uInt4Holder = new NullableUInt4Holder();
+ uInt4Holder.value = 10;
+ uInt4Holder.isSet = 1;
+
+ final NullableIntHolder intHolder = new NullableIntHolder();
+ intHolder.value = 20;
+ intHolder.isSet = 1;
+
+ vector1.setType(0, Types.MinorType.UINT4);
+ vector1.setSafe(0, uInt4Holder);
+
+ vector1.setType(1, Types.MinorType.INT);
+ vector1.setSafe(1, intHolder);
+
+ vector1.setType(2, Types.MinorType.INT);
+ vector1.setSafe(2, intHolder);
+
+ vector1.setType(3, Types.MinorType.INT);
+ vector1.setSafe(3, intHolder);
+
+ vector1.setValueCount(4);
+
+ vector2.setType(0, Types.MinorType.UINT4);
+ vector2.setSafe(0, uInt4Holder);
+
+ vector2.setType(1, Types.MinorType.INT);
+ vector2.setSafe(1, intHolder);
+
+ vector2.setType(2, Types.MinorType.INT);
+ vector2.setSafe(2, intHolder);
+
+ vector2.setType(3, Types.MinorType.UINT4);
+ vector2.setSafe(3, uInt4Holder);
+
+ vector2.setValueCount(4);
+
+ RangeEqualsVisitor visitor = new RangeEqualsVisitor(vector1, vector2);
+ assertFalse(visitor.rangeEquals(new Range(0, 0, 4)));
+ assertTrue(visitor.rangeEquals(new Range(1, 1, 2)));
+ }
+ }
+
+ @Test
+ public void testDenseUnionVectorEquals() {
+ final NullableIntHolder intHolder = new NullableIntHolder();
+ intHolder.isSet = 1;
+ intHolder.value = 100;
+
+ final NullableBigIntHolder bigIntHolder = new NullableBigIntHolder();
+ bigIntHolder.isSet = 1;
+ bigIntHolder.value = 200L;
+
+ final NullableFloat4Holder float4Holder = new NullableFloat4Holder();
+ float4Holder.isSet = 1;
+ float4Holder.value = 400F;
+
+ final NullableFloat8Holder float8Holder = new NullableFloat8Holder();
+ float8Holder.isSet = 1;
+ float8Holder.value = 800D;
+
+ try (DenseUnionVector vector1 = new DenseUnionVector("vector1", allocator, null, null);
+ DenseUnionVector vector2 = new DenseUnionVector("vector2", allocator, null, null)) {
+ vector1.allocateNew();
+ vector2.allocateNew();
+
+ // populate vector1: {100, 200L, null, 400F, 800D}
+ byte intTypeId = vector1.registerNewTypeId(Field.nullable("int", Types.MinorType.INT.getType()));
+ byte longTypeId = vector1.registerNewTypeId(Field.nullable("long", Types.MinorType.BIGINT.getType()));
+ byte floatTypeId = vector1.registerNewTypeId(Field.nullable("float", Types.MinorType.FLOAT4.getType()));
+ byte doubleTypeId = vector1.registerNewTypeId(Field.nullable("double", Types.MinorType.FLOAT8.getType()));
+
+ vector1.setTypeId(0, intTypeId);
+ vector1.setSafe(0, intHolder);
+
+ vector1.setTypeId(1, longTypeId);
+ vector1.setSafe(1, bigIntHolder);
+
+ vector1.setTypeId(3, floatTypeId);
+ vector1.setSafe(3, float4Holder);
+
+ vector1.setTypeId(4, doubleTypeId);
+ vector1.setSafe(4, float8Holder);
+
+ vector1.setValueCount(5);
+
+ // populate vector2: {400F, null, 200L, null, 400F, 800D, 100}
+ intTypeId = vector2.registerNewTypeId(Field.nullable("int", Types.MinorType.INT.getType()));
+ longTypeId = vector2.registerNewTypeId(Field.nullable("long", Types.MinorType.BIGINT.getType()));
+ floatTypeId = vector2.registerNewTypeId(Field.nullable("float", Types.MinorType.FLOAT4.getType()));
+ doubleTypeId = vector2.registerNewTypeId(Field.nullable("double", Types.MinorType.FLOAT8.getType()));
+
+ vector2.setTypeId(0, floatTypeId);
+ vector2.setSafe(0, float4Holder);
+
+ vector2.setTypeId(2, longTypeId);
+ vector2.setSafe(2, bigIntHolder);
+
+ vector2.setTypeId(4, floatTypeId);
+ vector2.setSafe(4, float4Holder);
+
+ vector2.setTypeId(5, doubleTypeId);
+ vector2.setSafe(5, float8Holder);
+
+ vector2.setTypeId(6, intTypeId);
+ vector2.setSafe(6, intHolder);
+
+ vector2.setValueCount(7);
+
+ // compare ranges
+ TypeEqualsVisitor typeVisitor =
+ new TypeEqualsVisitor(vector2, /* check name */ false, /* check meta data */ true);
+ RangeEqualsVisitor equalsVisitor =
+ new RangeEqualsVisitor(vector1, vector2, (left, right) -> typeVisitor.equals(left));
+
+ // different ranges {100, 200L} != {400F, null}
+ assertFalse(equalsVisitor.rangeEquals(new Range(0, 0, 2)));
+
+ // different ranges without null {100, 200L} != {400F, null}
+ assertFalse(equalsVisitor.rangeEquals(new Range(3, 5, 2)));
+
+ // equal ranges {200L, null, 400F, 800D}
+ assertTrue(equalsVisitor.rangeEquals(new Range(1, 2, 4)));
+
+ // equal ranges without null {400F, 800D}
+ assertTrue(equalsVisitor.rangeEquals(new Range(3, 4, 2)));
+
+ // equal ranges with only null {null}
+ assertTrue(equalsVisitor.rangeEquals(new Range(2, 3, 1)));
+
+ // equal ranges with single element {100}
+ assertTrue(equalsVisitor.rangeEquals(new Range(0, 6, 1)));
+
+ // different ranges with single element {100} != {200L}
+ assertFalse(equalsVisitor.rangeEquals(new Range(0, 2, 1)));
+ }
+ }
+
+ @Ignore
+ @Test
+ public void testEqualsWithOutTypeCheck() {
+ try (final IntVector intVector = new IntVector("int", allocator);
+ final ZeroVector zeroVector = new ZeroVector("zero")) {
+
+ assertTrue(VectorEqualsVisitor.vectorEquals(intVector, zeroVector, null));
+ assertTrue(VectorEqualsVisitor.vectorEquals(zeroVector, intVector, null));
+ }
+ }
+
+ @Test
+ public void testFloat4ApproxEquals() {
+ try (final Float4Vector vector1 = new Float4Vector("float", allocator);
+ final Float4Vector vector2 = new Float4Vector("float", allocator);
+ final Float4Vector vector3 = new Float4Vector("float", allocator)) {
+
+ final float epsilon = 1.0E-6f;
+ setVector(vector1, 1.1f, 2.2f);
+ setVector(vector2, 1.1f + epsilon / 2, 2.2f + epsilon / 2);
+ setVector(vector3, 1.1f + epsilon * 2, 2.2f + epsilon * 2);
+
+ Range range = new Range(0, 0, vector1.getValueCount());
+
+ ApproxEqualsVisitor visitor12 = new ApproxEqualsVisitor(vector1, vector2, epsilon, epsilon);
+ assertTrue(visitor12.rangeEquals(range));
+
+ ApproxEqualsVisitor visitor13 = new ApproxEqualsVisitor(vector1, vector3, epsilon, epsilon);
+ assertFalse(visitor13.rangeEquals(range));
+ }
+ }
+
+ @Test
+ public void testFloat8ApproxEquals() {
+ try (final Float8Vector vector1 = new Float8Vector("float", allocator);
+ final Float8Vector vector2 = new Float8Vector("float", allocator);
+ final Float8Vector vector3 = new Float8Vector("float", allocator)) {
+
+ final float epsilon = 1.0E-6f;
+ setVector(vector1, 1.1, 2.2);
+ setVector(vector2, 1.1 + epsilon / 2, 2.2 + epsilon / 2);
+ setVector(vector3, 1.1 + epsilon * 2, 2.2 + epsilon * 2);
+
+ Range range = new Range(0, 0, vector1.getValueCount());
+ assertTrue(new ApproxEqualsVisitor(vector1, vector2, epsilon, epsilon).rangeEquals(range));
+ assertFalse(new ApproxEqualsVisitor(vector1, vector3, epsilon, epsilon).rangeEquals(range));
+ }
+ }
+
+ @Test
+ public void testStructVectorApproxEquals() {
+ try (final StructVector right = StructVector.empty("struct", allocator);
+ final StructVector left1 = StructVector.empty("struct", allocator);
+ final StructVector left2 = StructVector.empty("struct", allocator)) {
+ right.addOrGet("f0",
+ FieldType.nullable(new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)), Float4Vector.class);
+ right.addOrGet("f1",
+ FieldType.nullable(new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)), Float8Vector.class);
+ left1.addOrGet("f0",
+ FieldType.nullable(new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)), Float4Vector.class);
+ left1.addOrGet("f1",
+ FieldType.nullable(new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)), Float8Vector.class);
+ left2.addOrGet("f0",
+ FieldType.nullable(new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)), Float4Vector.class);
+ left2.addOrGet("f1",
+ FieldType.nullable(new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)), Float8Vector.class);
+
+ final float epsilon = 1.0E-6f;
+
+ NullableStructWriter rightWriter = right.getWriter();
+ rightWriter.allocate();
+ writeStructVector(rightWriter, 1.1f, 2.2);
+ writeStructVector(rightWriter, 2.02f, 4.04);
+ rightWriter.setValueCount(2);
+
+ NullableStructWriter leftWriter1 = left1.getWriter();
+ leftWriter1.allocate();
+ writeStructVector(leftWriter1, 1.1f + epsilon / 2, 2.2 + epsilon / 2);
+ writeStructVector(leftWriter1, 2.02f - epsilon / 2, 4.04 - epsilon / 2);
+ leftWriter1.setValueCount(2);
+
+ NullableStructWriter leftWriter2 = left2.getWriter();
+ leftWriter2.allocate();
+ writeStructVector(leftWriter2, 1.1f + epsilon * 2, 2.2 + epsilon * 2);
+ writeStructVector(leftWriter2, 2.02f - epsilon * 2, 4.04 - epsilon * 2);
+ leftWriter2.setValueCount(2);
+
+ Range range = new Range(0, 0, right.getValueCount());
+ assertTrue(new ApproxEqualsVisitor(left1, right, epsilon, epsilon).rangeEquals(range));
+ assertFalse(new ApproxEqualsVisitor(left2, right, epsilon, epsilon).rangeEquals(range));
+ }
+ }
+
+ @Test
+ public void testUnionVectorApproxEquals() {
+ try (final UnionVector right = new UnionVector("union", allocator, /* field type */ null, /* call-back */ null);
+ final UnionVector left1 = new UnionVector("union", allocator, /* field type */ null, /* call-back */ null);
+ final UnionVector left2 = new UnionVector("union", allocator, /* field type */ null, /* call-back */ null);) {
+
+ final NullableFloat4Holder float4Holder = new NullableFloat4Holder();
+ float4Holder.value = 1.01f;
+ float4Holder.isSet = 1;
+
+ final NullableFloat8Holder float8Holder = new NullableFloat8Holder();
+ float8Holder.value = 2.02f;
+ float8Holder.isSet = 1;
+
+ final float epsilon = 1.0E-6f;
+
+ right.setType(0, Types.MinorType.FLOAT4);
+ right.setSafe(0, float4Holder);
+ right.setType(1, Types.MinorType.FLOAT8);
+ right.setSafe(1, float8Holder);
+ right.setValueCount(2);
+
+ float4Holder.value += epsilon / 2;
+ float8Holder.value += epsilon / 2;
+
+ left1.setType(0, Types.MinorType.FLOAT4);
+ left1.setSafe(0, float4Holder);
+ left1.setType(1, Types.MinorType.FLOAT8);
+ left1.setSafe(1, float8Holder);
+ left1.setValueCount(2);
+
+ float4Holder.value += epsilon * 2;
+ float8Holder.value += epsilon * 2;
+
+ left2.setType(0, Types.MinorType.FLOAT4);
+ left2.setSafe(0, float4Holder);
+ left2.setType(1, Types.MinorType.FLOAT8);
+ left2.setSafe(1, float8Holder);
+ left2.setValueCount(2);
+
+ Range range = new Range(0, 0, right.getValueCount());
+ assertTrue(new ApproxEqualsVisitor(left1, right, epsilon, epsilon).rangeEquals(range));
+ assertFalse(new ApproxEqualsVisitor(left2, right, epsilon, epsilon).rangeEquals(range));
+ }
+ }
+
+ @Test
+ public void testDenseUnionVectorApproxEquals() {
+ final NullableFloat4Holder float4Holder = new NullableFloat4Holder();
+ float4Holder.isSet = 1;
+
+ final NullableFloat8Holder float8Holder = new NullableFloat8Holder();
+ float8Holder.isSet = 1;
+
+ final float floatEpsilon = 0.02F;
+ final double doubleEpsilon = 0.02;
+
+ try (final DenseUnionVector vector1 = new DenseUnionVector("vector1", allocator, null, null);
+ final DenseUnionVector vector2 = new DenseUnionVector("vector2", allocator, null, null);
+ final DenseUnionVector vector3 = new DenseUnionVector("vector2", allocator, null, null)) {
+
+ vector1.allocateNew();
+ vector2.allocateNew();
+ vector3.allocateNew();
+
+ // populate vector1: {1.0f, 2.0D}
+ byte floatTypeId = vector1.registerNewTypeId(Field.nullable("float", Types.MinorType.FLOAT4.getType()));
+ byte doubleTypeId = vector1.registerNewTypeId(Field.nullable("double", Types.MinorType.FLOAT8.getType()));
+
+ float4Holder.value = 1.0f;
+ vector1.setTypeId(0, floatTypeId);
+ vector1.setSafe(0, float4Holder);
+ float8Holder.value = 2.0D;
+ vector1.setTypeId(1, doubleTypeId);
+ vector1.setSafe(1, float8Holder);
+ vector1.setValueCount(2);
+
+ // populate vector2: {1.01f, 2.01D}
+ floatTypeId = vector2.registerNewTypeId(Field.nullable("float", Types.MinorType.FLOAT4.getType()));
+ doubleTypeId = vector2.registerNewTypeId(Field.nullable("double", Types.MinorType.FLOAT8.getType()));
+
+ float4Holder.value = 1.01f;
+ vector2.setTypeId(0, floatTypeId);
+ vector2.setSafe(0, float4Holder);
+ float8Holder.value = 2.01D;
+ vector2.setTypeId(1, doubleTypeId);
+ vector2.setSafe(1, float8Holder);
+ vector2.setValueCount(2);
+
+ // populate vector3: {1.05f, 2.05D}
+ floatTypeId = vector3.registerNewTypeId(Field.nullable("float", Types.MinorType.FLOAT4.getType()));
+ doubleTypeId = vector3.registerNewTypeId(Field.nullable("double", Types.MinorType.FLOAT8.getType()));
+
+ float4Holder.value = 1.05f;
+ vector3.setTypeId(0, floatTypeId);
+ vector3.setSafe(0, float4Holder);
+ float8Holder.value = 2.05D;
+ vector3.setTypeId(1, doubleTypeId);
+ vector3.setSafe(1, float8Holder);
+ vector3.setValueCount(2);
+
+ // verify comparison results
+ Range range = new Range(0, 0, 2);
+
+ // compare vector1 and vector2
+ ApproxEqualsVisitor approxEqualsVisitor = new ApproxEqualsVisitor(
+ vector1, vector2,
+ new ValueEpsilonEqualizers.Float4EpsilonEqualizer(floatEpsilon),
+ new ValueEpsilonEqualizers.Float8EpsilonEqualizer(doubleEpsilon),
+ (v1, v2) -> new TypeEqualsVisitor(v2, /* check name */ false, /* check meta */ true).equals(v1));
+ assertTrue(approxEqualsVisitor.rangeEquals(range));
+
+ // compare vector1 and vector3
+ approxEqualsVisitor = new ApproxEqualsVisitor(
+ vector1, vector3,
+ new ValueEpsilonEqualizers.Float4EpsilonEqualizer(floatEpsilon),
+ new ValueEpsilonEqualizers.Float8EpsilonEqualizer(doubleEpsilon),
+ (v1, v2) -> new TypeEqualsVisitor(v2, /* check name */ false, /* check meta */ true).equals(v1));
+ assertFalse(approxEqualsVisitor.rangeEquals(range));
+ }
+ }
+
+ @Test
+ public void testListVectorApproxEquals() {
+ try (final ListVector right = ListVector.empty("list", allocator);
+ final ListVector left1 = ListVector.empty("list", allocator);
+ final ListVector left2 = ListVector.empty("list", allocator);) {
+
+ final float epsilon = 1.0E-6f;
+
+ UnionListWriter rightWriter = right.getWriter();
+ rightWriter.allocate();
+ writeListVector(rightWriter, new double[] {1, 2});
+ writeListVector(rightWriter, new double[] {1.01, 2.02});
+ rightWriter.setValueCount(2);
+
+ UnionListWriter leftWriter1 = left1.getWriter();
+ leftWriter1.allocate();
+ writeListVector(leftWriter1, new double[] {1, 2});
+ writeListVector(leftWriter1, new double[] {1.01 + epsilon / 2, 2.02 - epsilon / 2});
+ leftWriter1.setValueCount(2);
+
+ UnionListWriter leftWriter2 = left2.getWriter();
+ leftWriter2.allocate();
+ writeListVector(leftWriter2, new double[] {1, 2});
+ writeListVector(leftWriter2, new double[] {1.01 + epsilon * 2, 2.02 - epsilon * 2});
+ leftWriter2.setValueCount(2);
+
+ Range range = new Range(0, 0, right.getValueCount());
+ assertTrue(new ApproxEqualsVisitor(left1, right, epsilon, epsilon).rangeEquals(range));
+ assertFalse(new ApproxEqualsVisitor(left2, right, epsilon, epsilon).rangeEquals(range));
+ }
+ }
+
+ private void writeStructVector(NullableStructWriter writer, int value1, long value2) {
+ writer.start();
+ writer.integer("f0").writeInt(value1);
+ writer.bigInt("f1").writeBigInt(value2);
+ writer.end();
+ }
+
+ private void writeStructVector(NullableStructWriter writer, float value1, double value2) {
+ writer.start();
+ writer.float4("f0").writeFloat4(value1);
+ writer.float8("f1").writeFloat8(value2);
+ writer.end();
+ }
+
+ private void writeListVector(UnionListWriter writer, int[] values) {
+ writer.startList();
+ for (int v: values) {
+ writer.integer().writeInt(v);
+ }
+ writer.endList();
+ }
+
+ private void writeFixedSizeListVector(UnionFixedSizeListWriter writer, int[] values) {
+ writer.startList();
+ for (int v: values) {
+ writer.integer().writeInt(v);
+ }
+ writer.endList();
+ }
+
+ private void writeListVector(UnionListWriter writer, double[] values) {
+ writer.startList();
+ for (double v: values) {
+ writer.float8().writeFloat8(v);
+ }
+ writer.endList();
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/compare/TestTypeEqualsVisitor.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/compare/TestTypeEqualsVisitor.java
new file mode 100644
index 000000000..c0a3bd89d
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/compare/TestTypeEqualsVisitor.java
@@ -0,0 +1,185 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.compare;
+
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import java.nio.charset.Charset;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.BigIntVector;
+import org.apache.arrow.vector.Float4Vector;
+import org.apache.arrow.vector.Float8Vector;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.complex.DenseUnionVector;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.complex.UnionVector;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestTypeEqualsVisitor {
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void init() {
+ allocator = new RootAllocator(Long.MAX_VALUE);
+ }
+
+ private static final Charset utf8Charset = Charset.forName("UTF-8");
+ private static final byte[] STR1 = "AAAAA1".getBytes(utf8Charset);
+ private static final byte[] STR2 = "BBBBBBBBB2".getBytes(utf8Charset);
+ private static final byte[] STR3 = "CCCC3".getBytes(utf8Charset);
+
+ @After
+ public void terminate() throws Exception {
+ allocator.close();
+ }
+
+ @Test
+ public void testTypeEqualsWithName() {
+ try (final IntVector right = new IntVector("int", allocator);
+ final IntVector left1 = new IntVector("int", allocator);
+ final IntVector left2 = new IntVector("int2", allocator)) {
+
+ TypeEqualsVisitor visitor = new TypeEqualsVisitor(right);
+ assertTrue(visitor.equals(left1));
+ assertFalse(visitor.equals(left2));
+ }
+ }
+
+ @Test
+ public void testTypeEqualsWithMetadata() {
+ Map<String, String> metadata = new HashMap<>();
+ metadata.put("key1", "value1");
+ FieldType typeWithoutMeta = new FieldType(true, new ArrowType.Int(32, true),
+ null, null);
+ FieldType typeWithMeta = new FieldType(true, new ArrowType.Int(32, true),
+ null, metadata);
+
+ try (IntVector right = (IntVector) typeWithoutMeta.createNewSingleVector("int", allocator, null);
+ IntVector left1 = (IntVector) typeWithoutMeta.createNewSingleVector("int", allocator, null);
+ IntVector left2 = (IntVector) typeWithMeta.createNewSingleVector("int", allocator, null)) {
+
+ TypeEqualsVisitor visitor = new TypeEqualsVisitor(right);
+ assertTrue(visitor.equals(left1));
+ assertFalse(visitor.equals(left2));
+ }
+ }
+
+ @Test
+ public void testListTypeEquals() {
+ try (final ListVector right = ListVector.empty("list", allocator);
+ final ListVector left1 = ListVector.empty("list", allocator);
+ final ListVector left2 = ListVector.empty("list", allocator)) {
+
+ right.addOrGetVector(FieldType.nullable(new ArrowType.Utf8()));
+ left1.addOrGetVector(FieldType.nullable(new ArrowType.Utf8()));
+ left2.addOrGetVector(FieldType.nullable(new ArrowType.FixedSizeBinary(2)));
+
+ TypeEqualsVisitor visitor = new TypeEqualsVisitor(right);
+ assertTrue(visitor.equals(left1));
+ assertFalse(visitor.equals(left2));
+ }
+ }
+
+ @Test
+ public void testStructTypeEquals() {
+ try (final StructVector right = StructVector.empty("struct", allocator);
+ final StructVector left1 = StructVector.empty("struct", allocator);
+ final StructVector left2 = StructVector.empty("struct", allocator)) {
+
+ right.addOrGet("child", FieldType.nullable(new ArrowType.Utf8()), VarCharVector.class);
+ left1.addOrGet("child", FieldType.nullable(new ArrowType.Utf8()), VarCharVector.class);
+ left2.addOrGet("child2", FieldType.nullable(new ArrowType.Utf8()), VarCharVector.class);
+
+ TypeEqualsVisitor visitor = new TypeEqualsVisitor(right);
+ assertTrue(visitor.equals(left1));
+ assertFalse(visitor.equals(left2));
+ }
+ }
+
+ @Test
+ public void testUnionTypeEquals() {
+ try (final UnionVector right = new UnionVector("union", allocator, /* field type */ null, /* call-back */ null);
+ final UnionVector left1 = new UnionVector("union", allocator, /* field type */ null, /* call-back */ null);
+ final UnionVector left2 = new UnionVector("union", allocator, /* field type */ null, /* call-back */ null)) {
+
+ right.addVector(new IntVector("int", allocator));
+ left1.addVector(new IntVector("int", allocator));
+ left2.addVector(new BigIntVector("bigint", allocator));
+
+ TypeEqualsVisitor visitor = new TypeEqualsVisitor(right);
+ assertTrue(visitor.equals(left1));
+ assertFalse(visitor.equals(left2));
+ }
+ }
+
+ @Test
+ public void testDenseUnionTypeEquals() {
+ try (DenseUnionVector vector1 = new DenseUnionVector("vector1", allocator, null, null);
+ DenseUnionVector vector2 = new DenseUnionVector("vector2", allocator, null, null)) {
+ vector1.allocateNew();
+ vector2.allocateNew();
+
+ // set children for vector1
+ byte intTypeId = vector1.registerNewTypeId(Field.nullable("int", Types.MinorType.INT.getType()));
+ byte longTypeId = vector1.registerNewTypeId(Field.nullable("long", Types.MinorType.BIGINT.getType()));
+ byte floatTypeId = vector1.registerNewTypeId(Field.nullable("float", Types.MinorType.FLOAT4.getType()));
+ byte doubleTypeId = vector1.registerNewTypeId(Field.nullable("double", Types.MinorType.FLOAT8.getType()));
+
+ vector1.addVector(floatTypeId, new Float4Vector("", allocator));
+ vector1.addVector(longTypeId, new BigIntVector("", allocator));
+ vector1.addVector(intTypeId, new IntVector("", allocator));
+ vector1.addVector(doubleTypeId, new Float8Vector("", allocator));
+
+ // set children for vector2
+ intTypeId = vector2.registerNewTypeId(Field.nullable("int", Types.MinorType.INT.getType()));
+ longTypeId = vector2.registerNewTypeId(Field.nullable("long", Types.MinorType.BIGINT.getType()));
+ floatTypeId = vector2.registerNewTypeId(Field.nullable("float", Types.MinorType.FLOAT4.getType()));
+ doubleTypeId = vector2.registerNewTypeId(Field.nullable("double", Types.MinorType.FLOAT8.getType()));
+
+ // add vectors in a different order
+ vector2.addVector(intTypeId, new IntVector("", allocator));
+ vector2.addVector(floatTypeId, new Float4Vector("", allocator));
+ vector2.addVector(doubleTypeId, new Float8Vector("", allocator));
+ vector2.addVector(longTypeId, new BigIntVector("", allocator));
+
+ // compare ranges
+ TypeEqualsVisitor typeVisitor =
+ new TypeEqualsVisitor(vector2, /* check name */ false, /* check meta data */ true);
+ assertTrue(typeVisitor.equals(vector1));
+
+ // if we check names, the types should be different
+ typeVisitor =
+ new TypeEqualsVisitor(vector2, /* check name */ true, /* check meta data */ true);
+ assertFalse(typeVisitor.equals(vector1));
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestComplexCopier.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestComplexCopier.java
new file mode 100644
index 000000000..f314a98ee
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestComplexCopier.java
@@ -0,0 +1,763 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex.impl;
+
+import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+
+import java.math.BigDecimal;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.DecimalVector;
+import org.apache.arrow.vector.compare.VectorEqualsVisitor;
+import org.apache.arrow.vector.complex.FixedSizeListVector;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.MapVector;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.complex.writer.BaseWriter;
+import org.apache.arrow.vector.complex.writer.BaseWriter.StructWriter;
+import org.apache.arrow.vector.complex.writer.FieldWriter;
+import org.apache.arrow.vector.holders.DecimalHolder;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.DecimalUtility;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestComplexCopier {
+
+ private BufferAllocator allocator;
+
+ private static final int COUNT = 100;
+
+ @Before
+ public void init() {
+ allocator = new RootAllocator(Long.MAX_VALUE);
+ }
+
+ @After
+ public void terminate() throws Exception {
+ allocator.close();
+ }
+
+ @Test
+ public void testCopyFixedSizeListVector() {
+ try (FixedSizeListVector from = FixedSizeListVector.empty("v", 3, allocator);
+ FixedSizeListVector to = FixedSizeListVector.empty("v", 3, allocator)) {
+
+ from.addOrGetVector(FieldType.nullable(Types.MinorType.INT.getType()));
+ to.addOrGetVector(FieldType.nullable(Types.MinorType.INT.getType()));
+
+ // populate from vector
+ UnionFixedSizeListWriter writer = from.getWriter();
+ for (int i = 0; i < COUNT; i++) {
+ writer.startList();
+ writer.integer().writeInt(i);
+ writer.integer().writeInt(i * 2);
+ writer.integer().writeInt(i * 3);
+ writer.endList();
+ }
+ from.setValueCount(COUNT);
+ to.setValueCount(COUNT);
+
+ // copy values
+ FieldReader in = from.getReader();
+ FieldWriter out = to.getWriter();
+ for (int i = 0; i < COUNT; i++) {
+ in.setPosition(i);
+ out.setPosition(i);
+ ComplexCopier.copy(in, out);
+ }
+
+ // validate equals
+ assertTrue(VectorEqualsVisitor.vectorEquals(from, to));
+
+ }
+ }
+
+ @Test
+ public void testInvalidCopyFixedSizeListVector() {
+ try (FixedSizeListVector from = FixedSizeListVector.empty("v", 3, allocator);
+ FixedSizeListVector to = FixedSizeListVector.empty("v", 2, allocator)) {
+
+ from.addOrGetVector(FieldType.nullable(Types.MinorType.INT.getType()));
+ to.addOrGetVector(FieldType.nullable(Types.MinorType.INT.getType()));
+
+ // populate from vector
+ UnionFixedSizeListWriter writer = from.getWriter();
+ for (int i = 0; i < COUNT; i++) {
+ writer.startList();
+ writer.integer().writeInt(i);
+ writer.integer().writeInt(i * 2);
+ writer.integer().writeInt(i * 3);
+ writer.endList();
+ }
+ from.setValueCount(COUNT);
+ to.setValueCount(COUNT);
+
+ // copy values
+ FieldReader in = from.getReader();
+ FieldWriter out = to.getWriter();
+ IllegalStateException e = assertThrows(IllegalStateException.class,
+ () -> ComplexCopier.copy(in, out));
+ assertTrue(e.getMessage().contains("greater than listSize"));
+ }
+ }
+
+ @Test
+ public void testCopyMapVector() {
+ try (final MapVector from = MapVector.empty("v", allocator, false);
+ final MapVector to = MapVector.empty("v", allocator, false)) {
+
+ from.allocateNew();
+
+ UnionMapWriter mapWriter = from.getWriter();
+ for (int i = 0; i < COUNT; i++) {
+ mapWriter.setPosition(i);
+ mapWriter.startMap();
+ mapWriter.startEntry();
+ mapWriter.key().integer().writeInt(i);
+ mapWriter.value().integer().writeInt(i);
+ mapWriter.endEntry();
+ mapWriter.startEntry();
+ mapWriter.key().decimal().writeDecimal(BigDecimal.valueOf(i * 2));
+ mapWriter.value().decimal().writeDecimal(BigDecimal.valueOf(i * 2));
+ mapWriter.endEntry();
+ mapWriter.endMap();
+ }
+
+ from.setValueCount(COUNT);
+
+ // copy values
+ FieldReader in = from.getReader();
+ FieldWriter out = to.getWriter();
+ for (int i = 0; i < COUNT; i++) {
+ in.setPosition(i);
+ out.setPosition(i);
+ ComplexCopier.copy(in, out);
+ }
+ to.setValueCount(COUNT);
+
+ // validate equals
+ assertTrue(VectorEqualsVisitor.vectorEquals(from, to));
+ }
+ }
+
+ @Test
+ public void testCopyListVector() {
+ try (ListVector from = ListVector.empty("v", allocator);
+ ListVector to = ListVector.empty("v", allocator)) {
+
+ UnionListWriter listWriter = from.getWriter();
+ listWriter.allocate();
+
+ for (int i = 0; i < COUNT; i++) {
+ listWriter.setPosition(i);
+ listWriter.startList();
+
+ listWriter.integer().writeInt(i);
+ listWriter.integer().writeInt(i * 2);
+
+ listWriter.list().startList();
+ listWriter.list().bigInt().writeBigInt(i);
+ listWriter.list().bigInt().writeBigInt(i * 2);
+ listWriter.list().bigInt().writeBigInt(i * 3);
+ listWriter.list().endList();
+
+ listWriter.list().startList();
+ listWriter.list().decimal().writeDecimal(BigDecimal.valueOf(i * 4));
+ listWriter.list().decimal().writeDecimal(BigDecimal.valueOf(i * 5));
+ listWriter.list().endList();
+ listWriter.endList();
+ }
+ from.setValueCount(COUNT);
+
+ // copy values
+ FieldReader in = from.getReader();
+ FieldWriter out = to.getWriter();
+ for (int i = 0; i < COUNT; i++) {
+ in.setPosition(i);
+ out.setPosition(i);
+ ComplexCopier.copy(in, out);
+ }
+
+ to.setValueCount(COUNT);
+
+ // validate equals
+ assertTrue(VectorEqualsVisitor.vectorEquals(from, to));
+
+ }
+ }
+
+ @Test
+ public void testCopyListVectorToANonEmptyList() {
+ try (ListVector from = ListVector.empty("v", allocator);
+ ListVector to = ListVector.empty("v", allocator)) {
+
+ UnionListWriter listWriter = from.getWriter();
+ listWriter.allocate();
+
+ for (int i = 0; i < COUNT; i++) {
+ listWriter.setPosition(i);
+ listWriter.startList();
+ listWriter.integer().writeInt(i);
+ listWriter.integer().writeInt(i * 2);
+ listWriter.endList();
+ }
+ from.setValueCount(COUNT);
+
+ // copy values
+ FieldReader in = from.getReader();
+ FieldWriter out = to.getWriter();
+ for (int i = 0; i < COUNT; i++) {
+ in.setPosition(i);
+ out.setPosition(i);
+ ComplexCopier.copy(in, out);
+ }
+ to.setValueCount(COUNT);
+ // validate equals
+ assertTrue(VectorEqualsVisitor.vectorEquals(from, to));
+
+ // Copy again to the target vector which is non-empty
+ for (int i = 0; i < COUNT; i++) {
+ in.setPosition(i);
+ out.setPosition(i);
+ ComplexCopier.copy(in, out);
+ }
+ to.setValueCount(COUNT);
+
+ // validate equals
+ assertTrue(VectorEqualsVisitor.vectorEquals(from, to));
+
+ // copy using copyFromSafe method
+ for (int i = 0; i < COUNT; i++) {
+ to.copyFromSafe(i, i, from);
+ }
+ to.setValueCount(COUNT);
+
+ // validate equals
+ assertTrue(VectorEqualsVisitor.vectorEquals(from, to));
+ }
+ }
+
+ @Test
+ public void testCopyListVectorWithNulls() {
+ try (ListVector from = ListVector.empty("v", allocator);
+ ListVector to = ListVector.empty("v", allocator)) {
+
+ UnionListWriter listWriter = from.getWriter();
+ listWriter.allocate();
+
+ // writer null, [null,i,null,i*2,null] alternatively
+ for (int i = 0; i < COUNT; i++) {
+ listWriter.setPosition(i);
+ if (i % 2 == 0) {
+ listWriter.writeNull();
+ continue;
+ }
+ listWriter.startList();
+ listWriter.integer().writeNull();
+ listWriter.integer().writeInt(i);
+ listWriter.integer().writeNull();
+ listWriter.integer().writeInt(i * 2);
+ listWriter.integer().writeNull();
+ listWriter.endList();
+ }
+ from.setValueCount(COUNT);
+
+ // copy values
+ FieldReader in = from.getReader();
+ FieldWriter out = to.getWriter();
+ for (int i = 0; i < COUNT; i++) {
+ in.setPosition(i);
+ out.setPosition(i);
+ ComplexCopier.copy(in, out);
+ }
+
+ to.setValueCount(COUNT);
+
+ // validate equals
+ assertTrue(VectorEqualsVisitor.vectorEquals(from, to));
+ }
+ }
+
+ @Test
+ public void testCopyListOfListVectorWithNulls() {
+ try (ListVector from = ListVector.empty("v", allocator);
+ ListVector to = ListVector.empty("v", allocator);) {
+
+ UnionListWriter listWriter = from.getWriter();
+ listWriter.allocate();
+
+ // write null, [null,[50,100,null,200],null,
+ // [null,50,null,100,null,200,null],null] alternatively
+ for (int i = 0; i < COUNT; i++) {
+ listWriter.setPosition(i);
+ if (i % 2 == 0) {
+ listWriter.writeNull();
+ continue;
+ }
+ listWriter.startList();
+ listWriter.list().writeNull();
+ listWriter.list().startList();
+ listWriter.list().bigInt().writeBigInt(50);
+ listWriter.list().bigInt().writeBigInt(100);
+ listWriter.list().bigInt().writeNull();
+ listWriter.list().bigInt().writeBigInt(200);
+ listWriter.list().endList();
+ listWriter.list().writeNull();
+ listWriter.list().startList();
+ listWriter.list().bigInt().writeNull();
+ listWriter.list().bigInt().writeBigInt(50);
+ listWriter.list().bigInt().writeNull();
+ listWriter.list().bigInt().writeBigInt(100);
+ listWriter.list().bigInt().writeNull();
+ listWriter.list().bigInt().writeBigInt(200);
+ listWriter.list().bigInt().writeNull();
+ listWriter.list().endList();
+ listWriter.list().writeNull();
+ listWriter.endList();
+ }
+ from.setValueCount(COUNT);
+
+ // copy values
+ FieldReader in = from.getReader();
+ FieldWriter out = to.getWriter();
+ for (int i = 0; i < COUNT; i++) {
+ in.setPosition(i);
+ out.setPosition(i);
+ ComplexCopier.copy(in, out);
+ }
+
+ to.setValueCount(COUNT);
+
+ // validate equals
+ assertTrue(VectorEqualsVisitor.vectorEquals(from, to));
+ }
+ }
+
+ @Test
+ public void testCopyListOStructVectorWithNulls() {
+ try (ListVector from = ListVector.empty("v", allocator);
+ ListVector to = ListVector.empty("v", allocator);) {
+
+ UnionListWriter listWriter = from.getWriter();
+ listWriter.allocate();
+
+ // write null, [null,{"f1":1,"f2":2},null,
+ // {"f1":1,"f2":2},null] alternatively
+ for (int i = 0; i < COUNT; i++) {
+ listWriter.setPosition(i);
+ if (i % 2 == 0) {
+ listWriter.writeNull();
+ continue;
+ }
+ listWriter.startList();
+ listWriter.struct().writeNull();
+ listWriter.struct().start();
+ listWriter.struct().integer("f1").writeInt(1);
+ listWriter.struct().integer("f2").writeInt(2);
+ listWriter.struct().integer("f3").writeNull();
+ listWriter.struct().end();
+ listWriter.struct().writeNull();
+ listWriter.struct().start();
+ listWriter.struct().integer("f1").writeInt(1);
+ listWriter.struct().integer("f2").writeInt(2);
+ listWriter.struct().integer("f3").writeNull();
+ listWriter.struct().end();
+ listWriter.struct().writeNull();
+ listWriter.endList();
+ }
+ from.setValueCount(COUNT);
+
+ // copy values
+ FieldReader in = from.getReader();
+ FieldWriter out = to.getWriter();
+ for (int i = 0; i < COUNT; i++) {
+ in.setPosition(i);
+ out.setPosition(i);
+ ComplexCopier.copy(in, out);
+ }
+
+ to.setValueCount(COUNT);
+
+ // validate equals
+ assertTrue(VectorEqualsVisitor.vectorEquals(from, to));
+ }
+ }
+
+ @Test
+ public void testCopyListOfListOfStructVectorWithNulls() {
+ try (ListVector from = ListVector.empty("v", allocator);
+ ListVector to = ListVector.empty("v", allocator);) {
+
+ UnionListWriter listWriter = from.getWriter();
+ listWriter.allocate();
+
+ // write null,
+ // [null,[{"f1":50},null,{"f1":100},null,{"f1":200}],null,
+ // [null,{"f1":50},null,{"f1":100},null,{"f1":200},null],null]
+ // alternatively
+ for (int i = 0; i < COUNT; i++) {
+ listWriter.setPosition(i);
+ if (i % 2 == 0) {
+ listWriter.writeNull();
+ continue;
+ }
+ listWriter.startList();
+ listWriter.list().writeNull();
+ listWriter.list().startList();
+ listWriter.list().struct().start();
+ listWriter.list().struct().bigInt("f1").writeBigInt(50);
+ listWriter.list().struct().end();
+ listWriter.list().struct().writeNull();
+ listWriter.list().struct().start();
+ listWriter.list().struct().bigInt("f1").writeBigInt(100);
+ listWriter.list().struct().end();
+ listWriter.list().struct().writeNull();
+ listWriter.list().struct().start();
+ listWriter.list().struct().bigInt("f1").writeBigInt(200);
+ listWriter.list().struct().end();
+ listWriter.list().endList();
+
+ listWriter.list().writeNull();
+
+ listWriter.list().startList();
+ listWriter.list().struct().writeNull();
+ listWriter.list().struct().start();
+ listWriter.list().struct().bigInt("f1").writeBigInt(50);
+ listWriter.list().struct().end();
+
+ listWriter.list().struct().writeNull();
+ listWriter.list().struct().start();
+ listWriter.list().struct().bigInt("f1").writeBigInt(100);
+ listWriter.list().struct().end();
+
+ listWriter.list().struct().writeNull();
+ listWriter.list().struct().start();
+ listWriter.list().struct().bigInt("f1").writeBigInt(200);
+ listWriter.list().struct().end();
+
+ listWriter.list().struct().writeNull();
+ listWriter.list().endList();
+
+ listWriter.list().writeNull();
+
+ listWriter.endList();
+ }
+ from.setValueCount(COUNT);
+
+ // copy values
+ FieldReader in = from.getReader();
+ FieldWriter out = to.getWriter();
+ for (int i = 0; i < COUNT; i++) {
+ in.setPosition(i);
+ out.setPosition(i);
+ ComplexCopier.copy(in, out);
+ }
+
+ to.setValueCount(COUNT);
+
+ // validate equals
+ assertTrue(VectorEqualsVisitor.vectorEquals(from, to));
+ }
+ }
+
+ @Test
+ public void testMapWithListValue() throws Exception {
+ try (MapVector from = MapVector.empty("map", allocator, false);
+ MapVector to = MapVector.empty("map", allocator, false)) {
+
+ UnionMapWriter mapWriter = from.getWriter();
+ BaseWriter.ListWriter valueWriter;
+
+ /* allocate memory */
+ mapWriter.allocate();
+
+ // write null, [{},
+ // {"value":[]},{"key":1,"value":[null,50,null,100,null,200,null]},
+ // null,{"key":2,"value":[null,75,null,125,null,150,null,175,null]}]
+ // alternatively
+ for (int i = 0; i < COUNT; i++) {
+ mapWriter.setPosition(i);
+ if (i % 2 == 0) {
+ mapWriter.writeNull();
+ continue;
+ }
+
+ mapWriter.startMap();
+
+ mapWriter.startEntry();
+ mapWriter.key().bigInt().writeNull();
+ mapWriter.value().list().writeNull();
+ mapWriter.endEntry();
+
+ mapWriter.startEntry();
+ mapWriter.key().bigInt().writeNull();
+ valueWriter = mapWriter.value().list();
+ valueWriter.startList();
+ valueWriter.endList();
+ mapWriter.endEntry();
+
+ mapWriter.startEntry();
+ mapWriter.key().bigInt().writeBigInt(1);
+ valueWriter = mapWriter.value().list();
+ valueWriter.startList();
+ valueWriter.bigInt().writeNull();
+ valueWriter.bigInt().writeBigInt(50);
+ valueWriter.bigInt().writeNull();
+ valueWriter.bigInt().writeBigInt(100);
+ valueWriter.bigInt().writeNull();
+ valueWriter.bigInt().writeBigInt(200);
+ valueWriter.bigInt().writeNull();
+ valueWriter.endList();
+ mapWriter.endEntry();
+
+ mapWriter.writeNull();
+
+ mapWriter.startEntry();
+ mapWriter.key().bigInt().writeBigInt(2);
+ valueWriter = mapWriter.value().list();
+ valueWriter.startList();
+ valueWriter.bigInt().writeNull();
+ valueWriter.bigInt().writeBigInt(75);
+ valueWriter.bigInt().writeNull();
+ valueWriter.bigInt().writeBigInt(125);
+ valueWriter.bigInt().writeNull();
+ valueWriter.bigInt().writeBigInt(150);
+ valueWriter.bigInt().writeNull();
+ valueWriter.bigInt().writeBigInt(175);
+ valueWriter.bigInt().writeNull();
+ valueWriter.endList();
+ mapWriter.endEntry();
+
+ mapWriter.endMap();
+ }
+ mapWriter.setValueCount(COUNT);
+
+ // copy values
+ FieldReader in = from.getReader();
+ FieldWriter out = to.getWriter();
+ for (int i = 0; i < COUNT; i++) {
+ in.setPosition(i);
+ out.setPosition(i);
+ ComplexCopier.copy(in, out);
+ }
+
+ to.setValueCount(COUNT);
+
+ // validate equals
+ assertTrue(VectorEqualsVisitor.vectorEquals(from, to));
+ }
+ }
+
+ @Test
+ public void testCopyFixedSizedListOfDecimalsVector() {
+ try (FixedSizeListVector from = FixedSizeListVector.empty("v", 4, allocator);
+ FixedSizeListVector to = FixedSizeListVector.empty("v", 4, allocator)) {
+ from.addOrGetVector(FieldType.nullable(new ArrowType.Decimal(3, 0, 128)));
+ to.addOrGetVector(FieldType.nullable(new ArrowType.Decimal(3, 0, 128)));
+
+ DecimalHolder holder = new DecimalHolder();
+ holder.buffer = allocator.buffer(DecimalVector.TYPE_WIDTH);
+ ArrowType arrowType = new ArrowType.Decimal(3, 0, 128);
+
+ // populate from vector
+ UnionFixedSizeListWriter writer = from.getWriter();
+ for (int i = 0; i < COUNT; i++) {
+ writer.startList();
+ writer.decimal().writeDecimal(BigDecimal.valueOf(i));
+
+ DecimalUtility.writeBigDecimalToArrowBuf(new BigDecimal(i * 2), holder.buffer, 0, DecimalVector.TYPE_WIDTH);
+ holder.start = 0;
+ holder.scale = 0;
+ holder.precision = 3;
+ writer.decimal().write(holder);
+
+ DecimalUtility.writeBigDecimalToArrowBuf(new BigDecimal(i * 3), holder.buffer, 0, DecimalVector.TYPE_WIDTH);
+ writer.decimal().writeDecimal(0, holder.buffer, arrowType);
+
+ writer.decimal().writeBigEndianBytesToDecimal(BigDecimal.valueOf(i * 4).unscaledValue().toByteArray(),
+ arrowType);
+
+ writer.endList();
+ }
+ from.setValueCount(COUNT);
+ to.setValueCount(COUNT);
+
+ // copy values
+ FieldReader in = from.getReader();
+ FieldWriter out = to.getWriter();
+ for (int i = 0; i < COUNT; i++) {
+ in.setPosition(i);
+ out.setPosition(i);
+ ComplexCopier.copy(in, out);
+ }
+
+ // validate equals
+ assertTrue(VectorEqualsVisitor.vectorEquals(from, to));
+ holder.buffer.close();
+ }
+ }
+
+ @Test
+ public void testCopyUnionListWithDecimal() {
+ try (ListVector from = ListVector.empty("v", allocator);
+ ListVector to = ListVector.empty("v", allocator)) {
+
+ UnionListWriter listWriter = from.getWriter();
+ listWriter.allocate();
+
+ for (int i = 0; i < COUNT; i++) {
+ listWriter.setPosition(i);
+ listWriter.startList();
+
+ listWriter.decimal().writeDecimal(BigDecimal.valueOf(i * 2));
+ listWriter.integer().writeInt(i);
+ listWriter.decimal().writeBigEndianBytesToDecimal(BigDecimal.valueOf(i * 3).unscaledValue().toByteArray(),
+ new ArrowType.Decimal(3, 0, 128));
+
+ listWriter.endList();
+ }
+ from.setValueCount(COUNT);
+
+ // copy values
+ FieldReader in = from.getReader();
+ FieldWriter out = to.getWriter();
+ for (int i = 0; i < COUNT; i++) {
+ in.setPosition(i);
+ out.setPosition(i);
+ ComplexCopier.copy(in, out);
+ }
+
+ to.setValueCount(COUNT);
+
+ // validate equals
+ assertTrue(VectorEqualsVisitor.vectorEquals(from, to));
+
+ }
+ }
+
+ @Test
+ public void testCopyStructVector() {
+ try (final StructVector from = StructVector.empty("v", allocator);
+ final StructVector to = StructVector.empty("v", allocator)) {
+
+ from.allocateNewSafe();
+
+ NullableStructWriter structWriter = from.getWriter();
+ for (int i = 0; i < COUNT; i++) {
+ structWriter.setPosition(i);
+ structWriter.start();
+ structWriter.integer("int").writeInt(i);
+ structWriter.decimal("dec", 0, 38).writeDecimal(BigDecimal.valueOf(i * 2));
+ StructWriter innerStructWriter = structWriter.struct("struc");
+ innerStructWriter.start();
+ innerStructWriter.integer("innerint").writeInt(i * 3);
+ innerStructWriter.decimal("innerdec", 0, 38).writeDecimal(BigDecimal.valueOf(i * 4));
+ innerStructWriter.decimal("innerdec", 0, 38).writeBigEndianBytesToDecimal(BigDecimal.valueOf(i * 4)
+ .unscaledValue().toByteArray(), new ArrowType.Decimal(3, 0, 128));
+ innerStructWriter.end();
+ structWriter.end();
+ }
+
+ from.setValueCount(COUNT);
+
+ // copy values
+ FieldReader in = from.getReader();
+ FieldWriter out = to.getWriter();
+ for (int i = 0; i < COUNT; i++) {
+ in.setPosition(i);
+ out.setPosition(i);
+ ComplexCopier.copy(in, out);
+ }
+ to.setValueCount(COUNT);
+
+ // validate equals
+ assertTrue(VectorEqualsVisitor.vectorEquals(from, to));
+ }
+ }
+
+ @Test
+ public void testCopyDecimalVectorWrongScale() {
+ try (FixedSizeListVector from = FixedSizeListVector.empty("v", 3, allocator);
+ FixedSizeListVector to = FixedSizeListVector.empty("v", 3, allocator)) {
+ from.addOrGetVector(FieldType.nullable(new ArrowType.Decimal(3, 2, 128)));
+ to.addOrGetVector(FieldType.nullable(new ArrowType.Decimal(3, 1, 128)));
+
+ // populate from vector
+ UnionFixedSizeListWriter writer = from.getWriter();
+ for (int i = 0; i < COUNT; i++) {
+ writer.startList();
+ writer.decimal().writeDecimal(BigDecimal.valueOf(1.23));
+ writer.decimal().writeDecimal(BigDecimal.valueOf(2.45));
+ writer.endList();
+ }
+ from.setValueCount(COUNT);
+ to.setValueCount(COUNT);
+
+ // copy values
+ FieldReader in = from.getReader();
+ FieldWriter out = to.getWriter();
+ UnsupportedOperationException e = assertThrows(UnsupportedOperationException.class,
+ () -> ComplexCopier.copy(in, out));
+ assertTrue(e.getMessage().contains("BigDecimal scale must equal that in the Arrow vector: 2 != 1"));
+ }
+ }
+
+ @Test
+ public void testCopyStructVectorWithNulls() {
+ try (StructVector from = StructVector.empty("v", allocator);
+ StructVector to = StructVector.empty("v", allocator)) {
+
+ NullableStructWriter writer = from.getWriter();
+
+ for (int i = 0; i < COUNT; ++i) {
+ writer.setPosition(i);
+ writer.start();
+ writer.integer("int").writeInt(i);
+ if (i % 3 == 0) {
+ writer.float4("child").writeFloat4(12.3f);
+ } else if (i % 3 == 1) {
+ writer.integer("child").writeInt(123);
+ } else {
+ writer.integer("child").writeNull();
+ }
+ writer.end();
+ }
+ from.setValueCount(COUNT);
+
+ // copy values
+ FieldReader in = from.getReader();
+ FieldWriter out = to.getWriter();
+ for (int i = 0; i < COUNT; i++) {
+ in.setPosition(i);
+ out.setPosition(i);
+ ComplexCopier.copy(in, out);
+ }
+ to.setValueCount(COUNT);
+
+ // validate equals
+ assertTrue(VectorEqualsVisitor.vectorEquals(from, to));
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java
new file mode 100644
index 000000000..9dce33122
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java
@@ -0,0 +1,167 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex.impl;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNull;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.DirtyRootAllocator;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.NonNullableStructVector;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.complex.UnionVector;
+import org.apache.arrow.vector.complex.writer.BaseWriter.StructWriter;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.ArrowType.ArrowTypeID;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestPromotableWriter {
+ private static final String EMPTY_SCHEMA_PATH = "";
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void init() {
+ allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100);
+ }
+
+ @After
+ public void terminate() throws Exception {
+ allocator.close();
+ }
+
+ @Test
+ public void testPromoteToUnion() throws Exception {
+
+ try (final NonNullableStructVector container = NonNullableStructVector.empty(EMPTY_SCHEMA_PATH, allocator);
+ final StructVector v = container.addOrGetStruct("test");
+ final PromotableWriter writer = new PromotableWriter(v, container)) {
+
+ container.allocateNew();
+
+ writer.start();
+
+ writer.setPosition(0);
+ writer.bit("A").writeBit(0);
+
+ writer.setPosition(1);
+ writer.bit("A").writeBit(1);
+
+ writer.decimal("dec", 10, 10);
+
+ writer.setPosition(2);
+ writer.integer("A").writeInt(10);
+
+ // we don't write anything in 3
+
+ writer.setPosition(4);
+ writer.integer("A").writeInt(100);
+
+ writer.end();
+
+ container.setValueCount(5);
+
+ final UnionVector uv = v.getChild("A", UnionVector.class);
+
+ assertFalse("0 shouldn't be null", uv.isNull(0));
+ assertEquals(false, uv.getObject(0));
+
+ assertFalse("1 shouldn't be null", uv.isNull(1));
+ assertEquals(true, uv.getObject(1));
+
+ assertFalse("2 shouldn't be null", uv.isNull(2));
+ assertEquals(10, uv.getObject(2));
+
+ assertNull("3 should be null", uv.getObject(3));
+
+ assertFalse("4 shouldn't be null", uv.isNull(4));
+ assertEquals(100, uv.getObject(4));
+
+ container.clear();
+ container.allocateNew();
+
+ ComplexWriterImpl newWriter = new ComplexWriterImpl(EMPTY_SCHEMA_PATH, container);
+
+ StructWriter newStructWriter = newWriter.rootAsStruct();
+
+ newStructWriter.start();
+
+ newStructWriter.setPosition(2);
+ newStructWriter.integer("A").writeInt(10);
+
+ Field childField1 = container.getField().getChildren().get(0).getChildren().get(0);
+ Field childField2 = container.getField().getChildren().get(0).getChildren().get(1);
+ assertEquals("Child field should be union type: " +
+ childField1.getName(), ArrowTypeID.Union, childField1.getType().getTypeID());
+ assertEquals("Child field should be decimal type: " +
+ childField2.getName(), ArrowTypeID.Decimal, childField2.getType().getTypeID());
+ }
+ }
+
+ @Test
+ public void testNoPromoteToUnionWithNull() throws Exception {
+
+ try (final NonNullableStructVector container = NonNullableStructVector.empty(EMPTY_SCHEMA_PATH, allocator);
+ final StructVector v = container.addOrGetStruct("test");
+ final PromotableWriter writer = new PromotableWriter(v, container)) {
+
+ container.allocateNew();
+
+ writer.start();
+ writer.list("list").startList();
+ writer.list("list").endList();
+ writer.end();
+
+ FieldType childTypeOfListInContainer = container.getField().getChildren().get(0).getChildren().get(0)
+ .getChildren().get(0).getFieldType();
+
+
+ // create a listvector with same type as list in container to, say, hold a copy
+ // this will be a nullvector
+ ListVector lv = ListVector.empty("name", allocator);
+ lv.addOrGetVector(childTypeOfListInContainer);
+ assertEquals(childTypeOfListInContainer.getType(), Types.MinorType.NULL.getType());
+ assertEquals(lv.getChildrenFromFields().get(0).getMinorType().getType(), Types.MinorType.NULL.getType());
+
+ writer.start();
+ writer.list("list").startList();
+ writer.list("list").float4().writeFloat4(1.36f);
+ writer.list("list").endList();
+ writer.end();
+
+ container.setValueCount(2);
+
+ childTypeOfListInContainer = container.getField().getChildren().get(0).getChildren().get(0)
+ .getChildren().get(0).getFieldType();
+
+ // repeat but now the type in container has been changed from null to float
+ // we expect same behaviour from listvector
+ lv.addOrGetVector(childTypeOfListInContainer);
+ assertEquals(childTypeOfListInContainer.getType(), Types.MinorType.FLOAT4.getType());
+ assertEquals(lv.getChildrenFromFields().get(0).getMinorType().getType(), Types.MinorType.FLOAT4.getType());
+
+ lv.close();
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java
new file mode 100644
index 000000000..d4cf6ea89
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java
@@ -0,0 +1,1335 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.complex.writer;
+
+import static org.junit.Assert.*;
+
+import java.math.BigDecimal;
+import java.time.LocalDateTime;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.util.AutoCloseables;
+import org.apache.arrow.vector.BigIntVector;
+import org.apache.arrow.vector.DecimalVector;
+import org.apache.arrow.vector.Float4Vector;
+import org.apache.arrow.vector.Float8Vector;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.SchemaChangeCallBack;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.MapVector;
+import org.apache.arrow.vector.complex.NonNullableStructVector;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.complex.UnionVector;
+import org.apache.arrow.vector.complex.impl.ComplexWriterImpl;
+import org.apache.arrow.vector.complex.impl.SingleStructReaderImpl;
+import org.apache.arrow.vector.complex.impl.SingleStructWriter;
+import org.apache.arrow.vector.complex.impl.UnionListReader;
+import org.apache.arrow.vector.complex.impl.UnionListWriter;
+import org.apache.arrow.vector.complex.impl.UnionMapReader;
+import org.apache.arrow.vector.complex.impl.UnionReader;
+import org.apache.arrow.vector.complex.impl.UnionWriter;
+import org.apache.arrow.vector.complex.reader.BaseReader.StructReader;
+import org.apache.arrow.vector.complex.reader.BigIntReader;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.complex.reader.Float4Reader;
+import org.apache.arrow.vector.complex.reader.Float8Reader;
+import org.apache.arrow.vector.complex.reader.IntReader;
+import org.apache.arrow.vector.complex.writer.BaseWriter.ComplexWriter;
+import org.apache.arrow.vector.complex.writer.BaseWriter.ListWriter;
+import org.apache.arrow.vector.complex.writer.BaseWriter.MapWriter;
+import org.apache.arrow.vector.complex.writer.BaseWriter.StructWriter;
+import org.apache.arrow.vector.holders.DecimalHolder;
+import org.apache.arrow.vector.holders.IntHolder;
+import org.apache.arrow.vector.holders.NullableTimeStampNanoTZHolder;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.ArrowType.ArrowTypeID;
+import org.apache.arrow.vector.types.pojo.ArrowType.Int;
+import org.apache.arrow.vector.types.pojo.ArrowType.Struct;
+import org.apache.arrow.vector.types.pojo.ArrowType.Timestamp;
+import org.apache.arrow.vector.types.pojo.ArrowType.Union;
+import org.apache.arrow.vector.types.pojo.ArrowType.Utf8;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.CallBack;
+import org.apache.arrow.vector.util.DecimalUtility;
+import org.apache.arrow.vector.util.JsonStringArrayList;
+import org.apache.arrow.vector.util.JsonStringHashMap;
+import org.apache.arrow.vector.util.Text;
+import org.apache.arrow.vector.util.TransferPair;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestComplexWriter {
+
+ private BufferAllocator allocator;
+
+ private static final int COUNT = 100;
+
+ @Before
+ public void init() {
+ allocator = new RootAllocator(Integer.MAX_VALUE);
+ }
+
+ @After
+ public void terminate() throws Exception {
+ allocator.close();
+ }
+
+ @Test
+ public void simpleNestedTypes() {
+ NonNullableStructVector parent = populateStructVector(null);
+ StructReader rootReader = new SingleStructReaderImpl(parent).reader("root");
+ for (int i = 0; i < COUNT; i++) {
+ rootReader.setPosition(i);
+ Assert.assertEquals(i, rootReader.reader("int").readInteger().intValue());
+ Assert.assertEquals(i, rootReader.reader("bigInt").readLong().longValue());
+ }
+
+ parent.close();
+ }
+
+ @Test
+ public void transferPairSchemaChange() {
+ SchemaChangeCallBack callBack1 = new SchemaChangeCallBack();
+ SchemaChangeCallBack callBack2 = new SchemaChangeCallBack();
+ try (NonNullableStructVector parent = populateStructVector(callBack1)) {
+ TransferPair tp = parent.getTransferPair("newVector", allocator, callBack2);
+
+ ComplexWriter writer = new ComplexWriterImpl("newWriter", parent);
+ StructWriter rootWriter = writer.rootAsStruct();
+ IntWriter intWriter = rootWriter.integer("newInt");
+ intWriter.writeInt(1);
+ writer.setValueCount(1);
+
+ assertTrue(callBack1.getSchemaChangedAndReset());
+ // The second vector should not have registered a schema change
+ assertFalse(callBack1.getSchemaChangedAndReset());
+ }
+ }
+
+ private NonNullableStructVector populateStructVector(CallBack callBack) {
+ NonNullableStructVector parent =
+ new NonNullableStructVector("parent", allocator, new FieldType(false, Struct.INSTANCE, null, null), callBack);
+ ComplexWriter writer = new ComplexWriterImpl("root", parent);
+ StructWriter rootWriter = writer.rootAsStruct();
+ IntWriter intWriter = rootWriter.integer("int");
+ BigIntWriter bigIntWriter = rootWriter.bigInt("bigInt");
+ for (int i = 0; i < COUNT; i++) {
+ rootWriter.start();
+ intWriter.writeInt(i);
+ bigIntWriter.writeBigInt(i);
+ rootWriter.end();
+ }
+ writer.setValueCount(COUNT);
+ return parent;
+ }
+
+ @Test
+ public void nullableStruct() {
+ try (NonNullableStructVector structVector = NonNullableStructVector.empty("parent", allocator)) {
+ ComplexWriter writer = new ComplexWriterImpl("root", structVector);
+ StructWriter rootWriter = writer.rootAsStruct();
+ for (int i = 0; i < COUNT; i++) {
+ rootWriter.start();
+ if (i % 2 == 0) {
+ StructWriter structWriter = rootWriter.struct("struct");
+ structWriter.setPosition(i);
+ structWriter.start();
+ structWriter.bigInt("nested").writeBigInt(i);
+ structWriter.end();
+ }
+ rootWriter.end();
+ }
+ writer.setValueCount(COUNT);
+ checkNullableStruct(structVector);
+ }
+ }
+
+ /**
+ * This test is similar to {@link #nullableStruct()} ()} but we get the inner struct writer once at the beginning.
+ */
+ @Test
+ public void nullableStruct2() {
+ try (NonNullableStructVector structVector = NonNullableStructVector.empty("parent", allocator)) {
+ ComplexWriter writer = new ComplexWriterImpl("root", structVector);
+ StructWriter rootWriter = writer.rootAsStruct();
+ StructWriter structWriter = rootWriter.struct("struct");
+
+ for (int i = 0; i < COUNT; i++) {
+ rootWriter.start();
+ if (i % 2 == 0) {
+ structWriter.setPosition(i);
+ structWriter.start();
+ structWriter.bigInt("nested").writeBigInt(i);
+ structWriter.end();
+ }
+ rootWriter.end();
+ }
+ writer.setValueCount(COUNT);
+ checkNullableStruct(structVector);
+ }
+ }
+
+ private void checkNullableStruct(NonNullableStructVector structVector) {
+ StructReader rootReader = new SingleStructReaderImpl(structVector).reader("root");
+ for (int i = 0; i < COUNT; i++) {
+ rootReader.setPosition(i);
+ assertTrue("index is set: " + i, rootReader.isSet());
+ FieldReader struct = rootReader.reader("struct");
+ if (i % 2 == 0) {
+ assertTrue("index is set: " + i, struct.isSet());
+ assertNotNull("index is set: " + i, struct.readObject());
+ assertEquals(i, struct.reader("nested").readLong().longValue());
+ } else {
+ assertFalse("index is not set: " + i, struct.isSet());
+ assertNull("index is not set: " + i, struct.readObject());
+ }
+ }
+ }
+
+ @Test
+ public void testList() {
+ try (NonNullableStructVector parent = NonNullableStructVector.empty("parent", allocator)) {
+ ComplexWriter writer = new ComplexWriterImpl("root", parent);
+ StructWriter rootWriter = writer.rootAsStruct();
+
+ rootWriter.start();
+ rootWriter.bigInt("int").writeBigInt(0);
+ rootWriter.list("list").startList();
+ rootWriter.list("list").bigInt().writeBigInt(0);
+ rootWriter.list("list").endList();
+ rootWriter.end();
+
+ rootWriter.start();
+ rootWriter.bigInt("int").writeBigInt(1);
+ rootWriter.end();
+
+ writer.setValueCount(2);
+
+ StructReader rootReader = new SingleStructReaderImpl(parent).reader("root");
+
+ rootReader.setPosition(0);
+ assertTrue("row 0 list is not set", rootReader.reader("list").isSet());
+ assertEquals(Long.valueOf(0), rootReader.reader("list").reader().readLong());
+
+ rootReader.setPosition(1);
+ assertFalse("row 1 list is set", rootReader.reader("list").isSet());
+ }
+ }
+
+ @Test
+ public void listScalarType() {
+ try (ListVector listVector = ListVector.empty("list", allocator)) {
+ listVector.allocateNew();
+ UnionListWriter listWriter = new UnionListWriter(listVector);
+ for (int i = 0; i < COUNT; i++) {
+ listWriter.startList();
+ for (int j = 0; j < i % 7; j++) {
+ if (j % 2 == 0) {
+ listWriter.writeInt(j);
+ } else {
+ IntHolder holder = new IntHolder();
+ holder.value = j;
+ listWriter.write(holder);
+ }
+ }
+ listWriter.endList();
+ }
+ listWriter.setValueCount(COUNT);
+ UnionListReader listReader = new UnionListReader(listVector);
+ for (int i = 0; i < COUNT; i++) {
+ listReader.setPosition(i);
+ for (int j = 0; j < i % 7; j++) {
+ listReader.next();
+ assertEquals(j, listReader.reader().readInteger().intValue());
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testListScalarNull() {
+ /* Write to a integer list vector
+ * each list of size 8 and having it's data values alternating between null and a non-null.
+ * Read and verify
+ */
+ try (ListVector listVector = ListVector.empty("list", allocator)) {
+ listVector.allocateNew();
+ UnionListWriter listWriter = new UnionListWriter(listVector);
+ for (int i = 0; i < COUNT; i++) {
+ listWriter.startList();
+ for (int j = 0; j < i % 7; j++) {
+ if (j % 2 == 0) {
+ listWriter.writeNull();
+ } else {
+ IntHolder holder = new IntHolder();
+ holder.value = j;
+ listWriter.write(holder);
+ }
+ }
+ listWriter.endList();
+ }
+ listWriter.setValueCount(COUNT);
+ UnionListReader listReader = new UnionListReader(listVector);
+ for (int i = 0; i < COUNT; i++) {
+ listReader.setPosition(i);
+ for (int j = 0; j < i % 7; j++) {
+ listReader.next();
+ if (j % 2 == 0) {
+ assertFalse("index is set: " + j, listReader.reader().isSet());
+ } else {
+ assertTrue("index is not set: " + j, listReader.reader().isSet());
+ assertEquals(j, listReader.reader().readInteger().intValue());
+ }
+ }
+ }
+ }
+ }
+
+ @Test
+ public void listDecimalType() {
+ try (ListVector listVector = ListVector.empty("list", allocator)) {
+ listVector.allocateNew();
+ UnionListWriter listWriter = new UnionListWriter(listVector);
+ DecimalHolder holder = new DecimalHolder();
+ holder.buffer = allocator.buffer(DecimalVector.TYPE_WIDTH);
+ ArrowType arrowType = new ArrowType.Decimal(10, 0, 128);
+ for (int i = 0; i < COUNT; i++) {
+ listWriter.startList();
+ for (int j = 0; j < i % 7; j++) {
+ if (j % 4 == 0) {
+ listWriter.writeDecimal(new BigDecimal(j));
+ } else if (j % 4 == 1) {
+ DecimalUtility.writeBigDecimalToArrowBuf(new BigDecimal(j), holder.buffer, 0, DecimalVector.TYPE_WIDTH);
+ holder.start = 0;
+ holder.scale = 0;
+ holder.precision = 10;
+ listWriter.write(holder);
+ } else if (j % 4 == 2) {
+ DecimalUtility.writeBigDecimalToArrowBuf(new BigDecimal(j), holder.buffer, 0, DecimalVector.TYPE_WIDTH);
+ listWriter.writeDecimal(0, holder.buffer, arrowType);
+ } else {
+ byte[] value = BigDecimal.valueOf(j).unscaledValue().toByteArray();
+ listWriter.writeBigEndianBytesToDecimal(value, arrowType);
+ }
+ }
+ listWriter.endList();
+ }
+ listWriter.setValueCount(COUNT);
+ UnionListReader listReader = new UnionListReader(listVector);
+ for (int i = 0; i < COUNT; i++) {
+ listReader.setPosition(i);
+ for (int j = 0; j < i % 7; j++) {
+ listReader.next();
+ Object expected = new BigDecimal(j);
+ Object actual = listReader.reader().readBigDecimal();
+ assertEquals(expected, actual);
+ }
+ }
+ holder.buffer.close();
+ }
+ }
+
+ @Test
+ public void listScalarTypeNullable() {
+ try (ListVector listVector = ListVector.empty("list", allocator)) {
+ listVector.allocateNew();
+ UnionListWriter listWriter = new UnionListWriter(listVector);
+ for (int i = 0; i < COUNT; i++) {
+ if (i % 2 == 0) {
+ listWriter.setPosition(i);
+ listWriter.startList();
+ for (int j = 0; j < i % 7; j++) {
+ listWriter.writeInt(j);
+ }
+ listWriter.endList();
+ }
+ }
+ listWriter.setValueCount(COUNT);
+ UnionListReader listReader = new UnionListReader(listVector);
+ for (int i = 0; i < COUNT; i++) {
+ listReader.setPosition(i);
+ if (i % 2 == 0) {
+ assertTrue("index is set: " + i, listReader.isSet());
+ assertEquals("correct length at: " + i, i % 7, ((List<?>) listReader.readObject()).size());
+ } else {
+ assertFalse("index is not set: " + i, listReader.isSet());
+ assertNull("index is not set: " + i, listReader.readObject());
+ }
+ }
+ }
+ }
+
+ @Test
+ public void listStructType() {
+ try (ListVector listVector = ListVector.empty("list", allocator)) {
+ listVector.allocateNew();
+ UnionListWriter listWriter = new UnionListWriter(listVector);
+ StructWriter structWriter = listWriter.struct();
+ for (int i = 0; i < COUNT; i++) {
+ listWriter.startList();
+ for (int j = 0; j < i % 7; j++) {
+ structWriter.start();
+ structWriter.integer("int").writeInt(j);
+ structWriter.bigInt("bigInt").writeBigInt(j);
+ structWriter.end();
+ }
+ listWriter.endList();
+ }
+ listWriter.setValueCount(COUNT);
+ UnionListReader listReader = new UnionListReader(listVector);
+ for (int i = 0; i < COUNT; i++) {
+ listReader.setPosition(i);
+ for (int j = 0; j < i % 7; j++) {
+ listReader.next();
+ Assert.assertEquals("record: " + i, j, listReader.reader().reader("int").readInteger().intValue());
+ Assert.assertEquals(j, listReader.reader().reader("bigInt").readLong().longValue());
+ }
+ }
+ }
+ }
+
+ @Test
+ public void listListType() {
+ try (ListVector listVector = ListVector.empty("list", allocator)) {
+ listVector.allocateNew();
+ UnionListWriter listWriter = new UnionListWriter(listVector);
+ for (int i = 0; i < COUNT; i++) {
+ listWriter.startList();
+ for (int j = 0; j < i % 7; j++) {
+ ListWriter innerListWriter = listWriter.list();
+ innerListWriter.startList();
+ for (int k = 0; k < i % 13; k++) {
+ innerListWriter.integer().writeInt(k);
+ }
+ innerListWriter.endList();
+ }
+ listWriter.endList();
+ }
+ listWriter.setValueCount(COUNT);
+ checkListOfLists(listVector);
+ }
+ }
+
+ /**
+ * This test is similar to {@link #listListType()} but we get the inner list writer once at the beginning.
+ */
+ @Test
+ public void listListType2() {
+ try (ListVector listVector = ListVector.empty("list", allocator)) {
+ listVector.allocateNew();
+ UnionListWriter listWriter = new UnionListWriter(listVector);
+ ListWriter innerListWriter = listWriter.list();
+
+ for (int i = 0; i < COUNT; i++) {
+ listWriter.startList();
+ for (int j = 0; j < i % 7; j++) {
+ innerListWriter.startList();
+ for (int k = 0; k < i % 13; k++) {
+ innerListWriter.integer().writeInt(k);
+ }
+ innerListWriter.endList();
+ }
+ listWriter.endList();
+ }
+ listWriter.setValueCount(COUNT);
+ checkListOfLists(listVector);
+ }
+ }
+
+ private void checkListOfLists(final ListVector listVector) {
+ UnionListReader listReader = new UnionListReader(listVector);
+ for (int i = 0; i < COUNT; i++) {
+ listReader.setPosition(i);
+ for (int j = 0; j < i % 7; j++) {
+ listReader.next();
+ FieldReader innerListReader = listReader.reader();
+ for (int k = 0; k < i % 13; k++) {
+ innerListReader.next();
+ Assert.assertEquals("record: " + i, k, innerListReader.reader().readInteger().intValue());
+ }
+ }
+ }
+ }
+
+ @Test
+ public void unionListListType() {
+ try (ListVector listVector = ListVector.empty("list", allocator)) {
+ listVector.allocateNew();
+ UnionListWriter listWriter = new UnionListWriter(listVector);
+ for (int i = 0; i < COUNT; i++) {
+ listWriter.startList();
+ for (int j = 0; j < i % 7; j++) {
+ ListWriter innerListWriter = listWriter.list();
+ innerListWriter.startList();
+ for (int k = 0; k < i % 13; k++) {
+ if (k % 2 == 0) {
+ innerListWriter.integer().writeInt(k);
+ } else {
+ innerListWriter.bigInt().writeBigInt(k);
+ }
+ }
+ innerListWriter.endList();
+ }
+ listWriter.endList();
+ }
+ listWriter.setValueCount(COUNT);
+ checkUnionList(listVector);
+ }
+ }
+
+ /**
+ * This test is similar to {@link #unionListListType()} but we get the inner list writer once at the beginning.
+ */
+ @Test
+ public void unionListListType2() {
+ try (ListVector listVector = ListVector.empty("list", allocator)) {
+ listVector.allocateNew();
+ UnionListWriter listWriter = new UnionListWriter(listVector);
+ ListWriter innerListWriter = listWriter.list();
+
+ for (int i = 0; i < COUNT; i++) {
+ listWriter.startList();
+ for (int j = 0; j < i % 7; j++) {
+ innerListWriter.startList();
+ for (int k = 0; k < i % 13; k++) {
+ if (k % 2 == 0) {
+ innerListWriter.integer().writeInt(k);
+ } else {
+ innerListWriter.bigInt().writeBigInt(k);
+ }
+ }
+ innerListWriter.endList();
+ }
+ listWriter.endList();
+ }
+ listWriter.setValueCount(COUNT);
+ checkUnionList(listVector);
+ }
+ }
+
+ private void checkUnionList(ListVector listVector) {
+ UnionListReader listReader = new UnionListReader(listVector);
+ for (int i = 0; i < COUNT; i++) {
+ listReader.setPosition(i);
+ for (int j = 0; j < i % 7; j++) {
+ listReader.next();
+ FieldReader innerListReader = listReader.reader();
+ for (int k = 0; k < i % 13; k++) {
+ innerListReader.next();
+ if (k % 2 == 0) {
+ Assert.assertEquals("record: " + i, k, innerListReader.reader().readInteger().intValue());
+ } else {
+ Assert.assertEquals("record: " + i, k, innerListReader.reader().readLong().longValue());
+ }
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testListMapType() {
+ try (ListVector listVector = ListVector.empty("list", allocator)) {
+ listVector.allocateNew();
+ UnionListWriter listWriter = new UnionListWriter(listVector);
+ MapWriter innerMapWriter = listWriter.map(true);
+
+ for (int i = 0; i < COUNT; i++) {
+ listWriter.startList();
+ for (int j = 0; j < i % 7; j++) {
+ innerMapWriter.startMap();
+ for (int k = 0; k < i % 13; k++) {
+ innerMapWriter.startEntry();
+ innerMapWriter.key().integer().writeInt(k);
+ if (k % 2 == 0) {
+ innerMapWriter.value().bigInt().writeBigInt(k);
+ }
+ innerMapWriter.endEntry();
+ }
+ innerMapWriter.endMap();
+ }
+ listWriter.endList();
+ }
+ listWriter.setValueCount(COUNT);
+ checkListMap(listVector);
+
+ // Verify that the map vector has keysSorted = true
+ MapVector mapVector = (MapVector) listVector.getDataVector();
+ ArrowType arrowType = mapVector.getField().getFieldType().getType();
+ assertTrue(((ArrowType.Map) arrowType).getKeysSorted());
+ }
+ }
+
+ private void checkListMap(ListVector listVector) {
+ UnionListReader listReader = new UnionListReader(listVector);
+ for (int i = 0; i < COUNT; i++) {
+ listReader.setPosition(i);
+ for (int j = 0; j < i % 7; j++) {
+ listReader.next();
+ UnionMapReader mapReader = (UnionMapReader) listReader.reader();
+ for (int k = 0; k < i % 13; k++) {
+ mapReader.next();
+ Assert.assertEquals("record key: " + i, k, mapReader.key().readInteger().intValue());
+ if (k % 2 == 0) {
+ Assert.assertEquals("record value: " + i, k, mapReader.value().readLong().longValue());
+ } else {
+ Assert.assertNull("record value: " + i, mapReader.value().readLong());
+ }
+ }
+ }
+ }
+ }
+
+ @Test
+ public void simpleUnion() {
+ UnionVector vector = new UnionVector("union", allocator, /* field type */ null, /* call-back */ null);
+ UnionWriter unionWriter = new UnionWriter(vector);
+ unionWriter.allocate();
+ for (int i = 0; i < COUNT; i++) {
+ unionWriter.setPosition(i);
+ if (i % 2 == 0) {
+ unionWriter.writeInt(i);
+ } else {
+ unionWriter.writeFloat4((float) i);
+ }
+ }
+ vector.setValueCount(COUNT);
+ UnionReader unionReader = new UnionReader(vector);
+ for (int i = 0; i < COUNT; i++) {
+ unionReader.setPosition(i);
+ if (i % 2 == 0) {
+ Assert.assertEquals(i, i, unionReader.readInteger());
+ } else {
+ Assert.assertEquals((float) i, unionReader.readFloat(), 1e-12);
+ }
+ }
+ vector.close();
+ }
+
+ @Test
+ public void promotableWriter() {
+ try (NonNullableStructVector parent = NonNullableStructVector.empty("parent", allocator)) {
+
+ ComplexWriter writer = new ComplexWriterImpl("root", parent);
+ StructWriter rootWriter = writer.rootAsStruct();
+ for (int i = 0; i < 100; i++) {
+ BigIntWriter bigIntWriter = rootWriter.bigInt("a");
+ bigIntWriter.setPosition(i);
+ bigIntWriter.writeBigInt(i);
+ }
+ Field field = parent.getField().getChildren().get(0).getChildren().get(0);
+ Assert.assertEquals("a", field.getName());
+ Assert.assertEquals(Int.TYPE_TYPE, field.getType().getTypeID());
+ Int intType = (Int) field.getType();
+
+ Assert.assertEquals(64, intType.getBitWidth());
+ Assert.assertTrue(intType.getIsSigned());
+ for (int i = 100; i < 200; i++) {
+ VarCharWriter varCharWriter = rootWriter.varChar("a");
+ varCharWriter.setPosition(i);
+ byte[] bytes = Integer.toString(i).getBytes();
+ ArrowBuf tempBuf = allocator.buffer(bytes.length);
+ tempBuf.setBytes(0, bytes);
+ varCharWriter.writeVarChar(0, bytes.length, tempBuf);
+ tempBuf.close();
+ }
+ field = parent.getField().getChildren().get(0).getChildren().get(0);
+ Assert.assertEquals("a", field.getName());
+ Assert.assertEquals(Union.TYPE_TYPE, field.getType().getTypeID());
+ Assert.assertEquals(Int.TYPE_TYPE, field.getChildren().get(0).getType().getTypeID());
+ Assert.assertEquals(Utf8.TYPE_TYPE, field.getChildren().get(1).getType().getTypeID());
+ StructReader rootReader = new SingleStructReaderImpl(parent).reader("root");
+ for (int i = 0; i < 100; i++) {
+ rootReader.setPosition(i);
+ FieldReader reader = rootReader.reader("a");
+ Long value = reader.readLong();
+ Assert.assertNotNull("index: " + i, value);
+ Assert.assertEquals(i, value.intValue());
+ }
+ for (int i = 100; i < 200; i++) {
+ rootReader.setPosition(i);
+ FieldReader reader = rootReader.reader("a");
+ Text value = reader.readText();
+ Assert.assertEquals(Integer.toString(i), value.toString());
+ }
+ }
+ }
+
+ /**
+ * Even without writing to the writer, the union schema is created correctly.
+ */
+ @Test
+ public void promotableWriterSchema() {
+ try (NonNullableStructVector parent = NonNullableStructVector.empty("parent", allocator)) {
+ ComplexWriter writer = new ComplexWriterImpl("root", parent);
+ StructWriter rootWriter = writer.rootAsStruct();
+ rootWriter.bigInt("a");
+ rootWriter.varChar("a");
+
+ Field field = parent.getField().getChildren().get(0).getChildren().get(0);
+ Assert.assertEquals("a", field.getName());
+ Assert.assertEquals(ArrowTypeID.Union, field.getType().getTypeID());
+
+ Assert.assertEquals(ArrowTypeID.Int, field.getChildren().get(0).getType().getTypeID());
+ Int intType = (Int) field.getChildren().get(0).getType();
+ Assert.assertEquals(64, intType.getBitWidth());
+ Assert.assertTrue(intType.getIsSigned());
+ Assert.assertEquals(ArrowTypeID.Utf8, field.getChildren().get(1).getType().getTypeID());
+ }
+ }
+
+ private Set<String> getFieldNames(List<Field> fields) {
+ Set<String> fieldNames = new HashSet<>();
+ for (Field field : fields) {
+ fieldNames.add(field.getName());
+ if (!field.getChildren().isEmpty()) {
+ for (String name : getFieldNames(field.getChildren())) {
+ fieldNames.add(field.getName() + "::" + name);
+ }
+ }
+ }
+ return fieldNames;
+ }
+
+ @Test
+ public void structWriterMixedCaseFieldNames() {
+ try (NonNullableStructVector parent = NonNullableStructVector.empty("parent", allocator)) {
+ // test case-sensitive StructWriter
+ ComplexWriter writer = new ComplexWriterImpl("rootCaseSensitive", parent, false, true);
+ StructWriter rootWriterCaseSensitive = writer.rootAsStruct();
+ rootWriterCaseSensitive.bigInt("int_field");
+ rootWriterCaseSensitive.bigInt("Int_Field");
+ rootWriterCaseSensitive.float4("float_field");
+ rootWriterCaseSensitive.float4("Float_Field");
+ StructWriter structFieldWriterCaseSensitive = rootWriterCaseSensitive.struct("struct_field");
+ structFieldWriterCaseSensitive.varChar("char_field");
+ structFieldWriterCaseSensitive.varChar("Char_Field");
+ ListWriter listFieldWriterCaseSensitive = rootWriterCaseSensitive.list("list_field");
+ StructWriter listStructFieldWriterCaseSensitive = listFieldWriterCaseSensitive.struct();
+ listStructFieldWriterCaseSensitive.bit("bit_field");
+ listStructFieldWriterCaseSensitive.bit("Bit_Field");
+
+ List<Field> fieldsCaseSensitive = parent.getField().getChildren().get(0).getChildren();
+ Set<String> fieldNamesCaseSensitive = getFieldNames(fieldsCaseSensitive);
+ Assert.assertEquals(11, fieldNamesCaseSensitive.size());
+ Assert.assertTrue(fieldNamesCaseSensitive.contains("int_field"));
+ Assert.assertTrue(fieldNamesCaseSensitive.contains("Int_Field"));
+ Assert.assertTrue(fieldNamesCaseSensitive.contains("float_field"));
+ Assert.assertTrue(fieldNamesCaseSensitive.contains("Float_Field"));
+ Assert.assertTrue(fieldNamesCaseSensitive.contains("struct_field"));
+ Assert.assertTrue(fieldNamesCaseSensitive.contains("struct_field::char_field"));
+ Assert.assertTrue(fieldNamesCaseSensitive.contains("struct_field::Char_Field"));
+ Assert.assertTrue(fieldNamesCaseSensitive.contains("list_field"));
+ Assert.assertTrue(fieldNamesCaseSensitive.contains("list_field::$data$"));
+ Assert.assertTrue(fieldNamesCaseSensitive.contains("list_field::$data$::bit_field"));
+ Assert.assertTrue(fieldNamesCaseSensitive.contains("list_field::$data$::Bit_Field"));
+
+ // test case-insensitive StructWriter
+ ComplexWriter writerCaseInsensitive = new ComplexWriterImpl("rootCaseInsensitive", parent, false, false);
+ StructWriter rootWriterCaseInsensitive = writerCaseInsensitive.rootAsStruct();
+
+ rootWriterCaseInsensitive.bigInt("int_field");
+ rootWriterCaseInsensitive.bigInt("Int_Field");
+ rootWriterCaseInsensitive.float4("float_field");
+ rootWriterCaseInsensitive.float4("Float_Field");
+ StructWriter structFieldWriterCaseInsensitive = rootWriterCaseInsensitive.struct("struct_field");
+ structFieldWriterCaseInsensitive.varChar("char_field");
+ structFieldWriterCaseInsensitive.varChar("Char_Field");
+ ListWriter listFieldWriterCaseInsensitive = rootWriterCaseInsensitive.list("list_field");
+ StructWriter listStructFieldWriterCaseInsensitive = listFieldWriterCaseInsensitive.struct();
+ listStructFieldWriterCaseInsensitive.bit("bit_field");
+ listStructFieldWriterCaseInsensitive.bit("Bit_Field");
+
+ List<Field> fieldsCaseInsensitive = parent.getField().getChildren().get(1).getChildren();
+ Set<String> fieldNamesCaseInsensitive = getFieldNames(fieldsCaseInsensitive);
+ Assert.assertEquals(7, fieldNamesCaseInsensitive.size());
+ Assert.assertTrue(fieldNamesCaseInsensitive.contains("int_field"));
+ Assert.assertTrue(fieldNamesCaseInsensitive.contains("float_field"));
+ Assert.assertTrue(fieldNamesCaseInsensitive.contains("struct_field"));
+ Assert.assertTrue(fieldNamesCaseInsensitive.contains("struct_field::char_field"));
+ Assert.assertTrue(fieldNamesCaseSensitive.contains("list_field"));
+ Assert.assertTrue(fieldNamesCaseSensitive.contains("list_field::$data$"));
+ Assert.assertTrue(fieldNamesCaseSensitive.contains("list_field::$data$::bit_field"));
+ }
+ }
+
+ @Test
+ public void timeStampSecWriter() throws Exception {
+ // test values
+ final long expectedSecs = 981173106L;
+ final LocalDateTime expectedSecDateTime = LocalDateTime.of(2001, 2, 3, 4, 5, 6, 0);
+
+ try (NonNullableStructVector parent = NonNullableStructVector.empty("parent", allocator)) {
+ // write
+
+ ComplexWriter writer = new ComplexWriterImpl("root", parent);
+ StructWriter rootWriter = writer.rootAsStruct();
+
+ {
+ TimeStampSecWriter timeStampSecWriter = rootWriter.timeStampSec("sec");
+ timeStampSecWriter.setPosition(0);
+ timeStampSecWriter.writeTimeStampSec(expectedSecs);
+ }
+ {
+ TimeStampSecTZWriter timeStampSecTZWriter = rootWriter.timeStampSecTZ("secTZ", "UTC");
+ timeStampSecTZWriter.setPosition(1);
+ timeStampSecTZWriter.writeTimeStampSecTZ(expectedSecs);
+ }
+ // schema
+ List<Field> children = parent.getField().getChildren().get(0).getChildren();
+ checkTimestampField(children.get(0), "sec");
+ checkTimestampTZField(children.get(1), "secTZ", "UTC");
+
+ // read
+ StructReader rootReader = new SingleStructReaderImpl(parent).reader("root");
+ {
+ FieldReader secReader = rootReader.reader("sec");
+ secReader.setPosition(0);
+ LocalDateTime secDateTime = secReader.readLocalDateTime();
+ Assert.assertEquals(expectedSecDateTime, secDateTime);
+ long secLong = secReader.readLong();
+ Assert.assertEquals(expectedSecs, secLong);
+ }
+ {
+ FieldReader secTZReader = rootReader.reader("secTZ");
+ secTZReader.setPosition(1);
+ long secTZLong = secTZReader.readLong();
+ Assert.assertEquals(expectedSecs, secTZLong);
+ }
+ }
+ }
+
+ @Test
+ public void timeStampMilliWriters() throws Exception {
+ // test values
+ final long expectedMillis = 981173106123L;
+ final LocalDateTime expectedMilliDateTime = LocalDateTime.of(2001, 2, 3, 4, 5, 6, 123 * 1_000_000);
+
+ try (NonNullableStructVector parent = NonNullableStructVector.empty("parent", allocator);) {
+ // write
+ ComplexWriter writer = new ComplexWriterImpl("root", parent);
+ StructWriter rootWriter = writer.rootAsStruct();
+ {
+ TimeStampMilliWriter timeStampWriter = rootWriter.timeStampMilli("milli");
+ timeStampWriter.setPosition(0);
+ timeStampWriter.writeTimeStampMilli(expectedMillis);
+ }
+ String tz = "UTC";
+ {
+ TimeStampMilliTZWriter timeStampTZWriter = rootWriter.timeStampMilliTZ("milliTZ", tz);
+ timeStampTZWriter.setPosition(0);
+ timeStampTZWriter.writeTimeStampMilliTZ(expectedMillis);
+ }
+ // schema
+ List<Field> children = parent.getField().getChildren().get(0).getChildren();
+ checkTimestampField(children.get(0), "milli");
+ checkTimestampTZField(children.get(1), "milliTZ", tz);
+
+ // read
+ StructReader rootReader = new SingleStructReaderImpl(parent).reader("root");
+
+ {
+ FieldReader milliReader = rootReader.reader("milli");
+ milliReader.setPosition(0);
+ LocalDateTime milliDateTime = milliReader.readLocalDateTime();
+ Assert.assertEquals(expectedMilliDateTime, milliDateTime);
+ long milliLong = milliReader.readLong();
+ Assert.assertEquals(expectedMillis, milliLong);
+ }
+ {
+ FieldReader milliTZReader = rootReader.reader("milliTZ");
+ milliTZReader.setPosition(0);
+ long milliTZLong = milliTZReader.readLong();
+ Assert.assertEquals(expectedMillis, milliTZLong);
+ }
+ }
+ }
+
+ private void checkTimestampField(Field field, String name) {
+ Assert.assertEquals(name, field.getName());
+ Assert.assertEquals(ArrowType.Timestamp.TYPE_TYPE, field.getType().getTypeID());
+ }
+
+ private void checkTimestampTZField(Field field, String name, String tz) {
+ checkTimestampField(field, name);
+ Assert.assertEquals(tz, ((Timestamp) field.getType()).getTimezone());
+ }
+
+ @Test
+ public void timeStampMicroWriters() throws Exception {
+ // test values
+ final long expectedMicros = 981173106123456L;
+ final LocalDateTime expectedMicroDateTime = LocalDateTime.of(2001, 2, 3, 4, 5, 6, 123456 * 1000);
+
+ try (NonNullableStructVector parent = NonNullableStructVector.empty("parent", allocator)) {
+ // write
+ ComplexWriter writer = new ComplexWriterImpl("root", parent);
+ StructWriter rootWriter = writer.rootAsStruct();
+
+ {
+ TimeStampMicroWriter timeStampMicroWriter = rootWriter.timeStampMicro("micro");
+ timeStampMicroWriter.setPosition(0);
+ timeStampMicroWriter.writeTimeStampMicro(expectedMicros);
+ }
+ String tz = "UTC";
+ {
+ TimeStampMicroTZWriter timeStampMicroWriter = rootWriter.timeStampMicroTZ("microTZ", tz);
+ timeStampMicroWriter.setPosition(1);
+ timeStampMicroWriter.writeTimeStampMicroTZ(expectedMicros);
+ }
+
+ // schema
+ List<Field> children = parent.getField().getChildren().get(0).getChildren();
+ checkTimestampField(children.get(0), "micro");
+ checkTimestampTZField(children.get(1), "microTZ", tz);
+
+ // read
+ StructReader rootReader = new SingleStructReaderImpl(parent).reader("root");
+ {
+ FieldReader microReader = rootReader.reader("micro");
+ microReader.setPosition(0);
+ LocalDateTime microDateTime = microReader.readLocalDateTime();
+ Assert.assertEquals(expectedMicroDateTime, microDateTime);
+ long microLong = microReader.readLong();
+ Assert.assertEquals(expectedMicros, microLong);
+ }
+ {
+ FieldReader microReader = rootReader.reader("microTZ");
+ microReader.setPosition(1);
+ long microLong = microReader.readLong();
+ Assert.assertEquals(expectedMicros, microLong);
+ }
+ }
+ }
+
+ @Test
+ public void timeStampNanoWriters() throws Exception {
+ // test values
+ final long expectedNanos = 981173106123456789L;
+ final LocalDateTime expectedNanoDateTime = LocalDateTime.of(2001, 2, 3, 4, 5, 6, 123456789);
+
+ try (NonNullableStructVector parent = NonNullableStructVector.empty("parent", allocator)) {
+ // write
+ ComplexWriter writer = new ComplexWriterImpl("root", parent);
+ StructWriter rootWriter = writer.rootAsStruct();
+
+ {
+ TimeStampNanoWriter timeStampNanoWriter = rootWriter.timeStampNano("nano");
+ timeStampNanoWriter.setPosition(0);
+ timeStampNanoWriter.writeTimeStampNano(expectedNanos);
+ }
+ String tz = "UTC";
+ {
+ TimeStampNanoTZWriter timeStampNanoWriter = rootWriter.timeStampNanoTZ("nanoTZ", tz);
+ timeStampNanoWriter.setPosition(0);
+ timeStampNanoWriter.writeTimeStampNanoTZ(expectedNanos);
+ }
+ // schema
+ List<Field> children = parent.getField().getChildren().get(0).getChildren();
+ checkTimestampField(children.get(0), "nano");
+ checkTimestampTZField(children.get(1), "nanoTZ", tz);
+ // read
+ StructReader rootReader = new SingleStructReaderImpl(parent).reader("root");
+
+ {
+ FieldReader nanoReader = rootReader.reader("nano");
+ nanoReader.setPosition(0);
+ LocalDateTime nanoDateTime = nanoReader.readLocalDateTime();
+ Assert.assertEquals(expectedNanoDateTime, nanoDateTime);
+ long nanoLong = nanoReader.readLong();
+ Assert.assertEquals(expectedNanos, nanoLong);
+ }
+ {
+ FieldReader nanoReader = rootReader.reader("nanoTZ");
+ nanoReader.setPosition(0);
+ long nanoLong = nanoReader.readLong();
+ Assert.assertEquals(expectedNanos, nanoLong);
+ NullableTimeStampNanoTZHolder h = new NullableTimeStampNanoTZHolder();
+ nanoReader.read(h);
+ Assert.assertEquals(expectedNanos, h.value);
+ }
+ }
+
+ }
+
+ @Test
+ public void fixedSizeBinaryWriters() throws Exception {
+ // test values
+ int numValues = 10;
+ int byteWidth = 9;
+ byte[][] values = new byte[numValues][byteWidth];
+ for (int i = 0; i < numValues; i++) {
+ for (int j = 0; j < byteWidth; j++) {
+ values[i][j] = ((byte) i);
+ }
+ }
+ ArrowBuf[] bufs = new ArrowBuf[numValues];
+ for (int i = 0; i < numValues; i++) {
+ bufs[i] = allocator.buffer(byteWidth);
+ bufs[i].setBytes(0, values[i]);
+ }
+
+ try (NonNullableStructVector parent = NonNullableStructVector.empty("parent", allocator)) {
+ // write
+ ComplexWriter writer = new ComplexWriterImpl("root", parent);
+ StructWriter rootWriter = writer.rootAsStruct();
+
+ String fieldName = "fixedSizeBinary";
+ FixedSizeBinaryWriter fixedSizeBinaryWriter = rootWriter.fixedSizeBinary(fieldName, byteWidth);
+ for (int i = 0; i < numValues; i++) {
+ fixedSizeBinaryWriter.setPosition(i);
+ fixedSizeBinaryWriter.writeFixedSizeBinary(bufs[i]);
+ }
+
+ // schema
+ List<Field> children = parent.getField().getChildren().get(0).getChildren();
+ Assert.assertEquals(fieldName, children.get(0).getName());
+ Assert.assertEquals(ArrowType.FixedSizeBinary.TYPE_TYPE, children.get(0).getType().getTypeID());
+
+ // read
+ StructReader rootReader = new SingleStructReaderImpl(parent).reader("root");
+
+ FieldReader fixedSizeBinaryReader = rootReader.reader(fieldName);
+ for (int i = 0; i < numValues; i++) {
+ fixedSizeBinaryReader.setPosition(i);
+ byte[] readValues = fixedSizeBinaryReader.readByteArray();
+ Assert.assertArrayEquals(values[i], readValues);
+ }
+ }
+
+ AutoCloseables.close(bufs);
+ }
+
+ @Test
+ public void complexCopierWithList() {
+ try (NonNullableStructVector parent = NonNullableStructVector.empty("parent", allocator)) {
+ ComplexWriter writer = new ComplexWriterImpl("root", parent);
+ StructWriter rootWriter = writer.rootAsStruct();
+ ListWriter listWriter = rootWriter.list("list");
+ StructWriter innerStructWriter = listWriter.struct();
+ IntWriter outerIntWriter = listWriter.integer();
+ rootWriter.start();
+ listWriter.startList();
+ outerIntWriter.writeInt(1);
+ outerIntWriter.writeInt(2);
+ innerStructWriter.start();
+ IntWriter intWriter = innerStructWriter.integer("a");
+ intWriter.writeInt(1);
+ innerStructWriter.end();
+ innerStructWriter.start();
+ intWriter = innerStructWriter.integer("a");
+ intWriter.writeInt(2);
+ innerStructWriter.end();
+ listWriter.endList();
+ rootWriter.end();
+ writer.setValueCount(1);
+
+ StructVector structVector = (StructVector) parent.getChild("root");
+ TransferPair tp = structVector.getTransferPair(allocator);
+ tp.splitAndTransfer(0, 1);
+ NonNullableStructVector toStructVector = (NonNullableStructVector) tp.getTo();
+ JsonStringHashMap<?, ?> toMapValue = (JsonStringHashMap<?, ?>) toStructVector.getObject(0);
+ JsonStringArrayList<?> object = (JsonStringArrayList<?>) toMapValue.get("list");
+ assertEquals(1, object.get(0));
+ assertEquals(2, object.get(1));
+ JsonStringHashMap<?, ?> innerStruct = (JsonStringHashMap<?, ?>) object.get(2);
+ assertEquals(1, innerStruct.get("a"));
+ innerStruct = (JsonStringHashMap<?, ?>) object.get(3);
+ assertEquals(2, innerStruct.get("a"));
+ toStructVector.close();
+ }
+ }
+
+ @Test
+ public void testSingleStructWriter1() {
+ /* initialize a SingleStructWriter with empty StructVector and then lazily
+ * create all vectors with expected initialCapacity.
+ */
+ try (NonNullableStructVector parent = NonNullableStructVector.empty("parent", allocator)) {
+ SingleStructWriter singleStructWriter = new SingleStructWriter(parent);
+
+ int initialCapacity = 1024;
+ singleStructWriter.setInitialCapacity(initialCapacity);
+
+ IntWriter intWriter = singleStructWriter.integer("intField");
+ BigIntWriter bigIntWriter = singleStructWriter.bigInt("bigIntField");
+ Float4Writer float4Writer = singleStructWriter.float4("float4Field");
+ Float8Writer float8Writer = singleStructWriter.float8("float8Field");
+ ListWriter listWriter = singleStructWriter.list("listField");
+ MapWriter mapWriter = singleStructWriter.map("mapField", false);
+
+ int intValue = 100;
+ long bigIntValue = 10000;
+ float float4Value = 100.5f;
+ double float8Value = 100.375;
+
+ for (int i = 0; i < initialCapacity; i++) {
+ singleStructWriter.start();
+
+ intWriter.writeInt(intValue + i);
+ bigIntWriter.writeBigInt(bigIntValue + (long) i);
+ float4Writer.writeFloat4(float4Value + (float) i);
+ float8Writer.writeFloat8(float8Value + (double) i);
+
+ listWriter.setPosition(i);
+ listWriter.startList();
+ listWriter.integer().writeInt(intValue + i);
+ listWriter.integer().writeInt(intValue + i + 1);
+ listWriter.integer().writeInt(intValue + i + 2);
+ listWriter.integer().writeInt(intValue + i + 3);
+ listWriter.endList();
+
+ mapWriter.setPosition(i);
+ mapWriter.startMap();
+ mapWriter.startEntry();
+ mapWriter.key().integer().writeInt(intValue + i);
+ mapWriter.value().integer().writeInt(intValue + i + 1);
+ mapWriter.endEntry();
+ mapWriter.startEntry();
+ mapWriter.key().integer().writeInt(intValue + i + 2);
+ mapWriter.value().integer().writeInt(intValue + i + 3);
+ mapWriter.endEntry();
+ mapWriter.endMap();
+
+ singleStructWriter.end();
+ }
+
+ IntVector intVector = (IntVector) parent.getChild("intField");
+ BigIntVector bigIntVector = (BigIntVector) parent.getChild("bigIntField");
+ Float4Vector float4Vector = (Float4Vector) parent.getChild("float4Field");
+ Float8Vector float8Vector = (Float8Vector) parent.getChild("float8Field");
+
+ int capacity = singleStructWriter.getValueCapacity();
+ assertTrue(capacity >= initialCapacity && capacity < initialCapacity * 2);
+ capacity = intVector.getValueCapacity();
+ assertTrue(capacity >= initialCapacity && capacity < initialCapacity * 2);
+ capacity = bigIntVector.getValueCapacity();
+ assertTrue(capacity >= initialCapacity && capacity < initialCapacity * 2);
+ capacity = float4Vector.getValueCapacity();
+ assertTrue(capacity >= initialCapacity && capacity < initialCapacity * 2);
+ capacity = float8Vector.getValueCapacity();
+ assertTrue(capacity >= initialCapacity && capacity < initialCapacity * 2);
+
+ StructReader singleStructReader = new SingleStructReaderImpl(parent);
+
+ IntReader intReader = singleStructReader.reader("intField");
+ BigIntReader bigIntReader = singleStructReader.reader("bigIntField");
+ Float4Reader float4Reader = singleStructReader.reader("float4Field");
+ Float8Reader float8Reader = singleStructReader.reader("float8Field");
+ UnionListReader listReader = (UnionListReader) singleStructReader.reader("listField");
+ UnionMapReader mapReader = (UnionMapReader) singleStructReader.reader("mapField");
+
+ for (int i = 0; i < initialCapacity; i++) {
+ intReader.setPosition(i);
+ bigIntReader.setPosition(i);
+ float4Reader.setPosition(i);
+ float8Reader.setPosition(i);
+ listReader.setPosition(i);
+ mapReader.setPosition(i);
+
+ assertEquals(intValue + i, intReader.readInteger().intValue());
+ assertEquals(bigIntValue + (long) i, bigIntReader.readLong().longValue());
+ assertEquals(float4Value + (float) i, float4Reader.readFloat().floatValue(), 0);
+ assertEquals(float8Value + (double) i, float8Reader.readDouble().doubleValue(), 0);
+
+ for (int j = 0; j < 4; j++) {
+ listReader.next();
+ assertEquals(intValue + i + j, listReader.reader().readInteger().intValue());
+ }
+
+ for (int k = 0; k < 4; k += 2) {
+ mapReader.next();
+ assertEquals(intValue + k + i, mapReader.key().readInteger().intValue());
+ assertEquals(intValue + k + i + 1, mapReader.value().readInteger().intValue());
+ }
+ }
+ }
+
+
+ }
+
+ @Test
+ public void testListWriterWithNulls() {
+ try (ListVector listVector = ListVector.empty("list", allocator)) {
+ listVector.setInitialCapacity(COUNT);
+ listVector.allocateNew();
+ listVector.getValidityBuffer().setOne(0, (int) listVector.getValidityBuffer().capacity());
+
+ UnionListWriter listWriter = listVector.getWriter();
+
+ // expected listVector : [[null], null, [2, 4], null, [null], null, [6, 12], ...]
+ for (int i = 0; i < COUNT; i++) {
+ listWriter.setPosition(i);
+ if (i % 2 == 0) {
+ listWriter.startList();
+ if (i % 4 == 0) {
+ listWriter.integer().writeNull();
+ } else {
+ listWriter.integer().writeInt(i);
+ listWriter.integer().writeInt(i * 2);
+ }
+ listWriter.endList();
+ } else {
+ listWriter.writeNull();
+ }
+ }
+ listVector.setValueCount(COUNT);
+
+ UnionListReader listReader = new UnionListReader(listVector);
+ for (int i = 0; i < COUNT; i++) {
+ listReader.setPosition(i);
+ if (i % 2 == 0) {
+ Assert.assertTrue(listReader.isSet());
+ listReader.next();
+ if (i % 4 == 0) {
+ Assert.assertNull(listReader.reader().readInteger());
+ } else {
+ Assert.assertEquals(i, listReader.reader().readInteger().intValue());
+ listReader.next();
+ Assert.assertEquals(i * 2, listReader.reader().readInteger().intValue());
+ }
+ } else {
+ Assert.assertFalse(listReader.isSet());
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testListOfListWriterWithNulls() {
+ try (ListVector listVector = ListVector.empty("listoflist", allocator)) {
+ listVector.setInitialCapacity(COUNT);
+ listVector.allocateNew();
+ listVector.getValidityBuffer().setOne(0, (int) listVector.getValidityBuffer().capacity());
+
+ UnionListWriter listWriter = listVector.getWriter();
+
+ // create list : [ [null], null, [[null, 2, 4]], null, [null], null, [[null, 6, 12]], ... ]
+ for (int i = 0; i < COUNT; i++) {
+ listWriter.setPosition(i);
+ if (i % 2 == 0) {
+ listWriter.startList();
+ if (i % 4 == 0) {
+ listWriter.list().writeNull();
+ } else {
+ listWriter.list().startList();
+ listWriter.list().integer().writeNull();
+ listWriter.list().integer().writeInt(i);
+ listWriter.list().integer().writeInt(i * 2);
+ listWriter.list().endList();
+ }
+ listWriter.endList();
+ } else {
+ listWriter.writeNull();
+ }
+ }
+ listVector.setValueCount(COUNT);
+
+ UnionListReader listReader = new UnionListReader(listVector);
+ for (int i = 0; i < COUNT; i++) {
+ listReader.setPosition(i);
+ if (i % 2 == 0) {
+ Assert.assertTrue(listReader.isSet());
+ listReader.next();
+ if (i % 4 == 0) {
+ Assert.assertFalse(listReader.reader().isSet());
+ } else {
+ listReader.reader().next();
+ Assert.assertFalse(listReader.reader().reader().isSet());
+ listReader.reader().next();
+ Assert.assertEquals(i, listReader.reader().reader().readInteger().intValue());
+ listReader.reader().next();
+ Assert.assertEquals(i * 2, listReader.reader().reader().readInteger().intValue());
+ }
+ } else {
+ Assert.assertFalse(listReader.isSet());
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testListOfListOfListWriterWithNulls() {
+ try (ListVector listVector = ListVector.empty("listoflistoflist", allocator)) {
+ listVector.setInitialCapacity(COUNT);
+ listVector.allocateNew();
+ listVector.getValidityBuffer().setOne(0, (int) listVector.getValidityBuffer().capacity());
+
+ UnionListWriter listWriter = listVector.getWriter();
+
+ // create list : [ null, [null], [[null]], [[[null, 1, 2]]], null, [null], ...
+ for (int i = 0; i < COUNT; i++) {
+ listWriter.setPosition(i);
+ if (i % 4 == 0) {
+ listWriter.writeNull();
+ } else {
+ listWriter.startList();
+ if (i % 4 == 1) {
+ listWriter.list().writeNull();
+ } else if (i % 4 == 2) {
+ listWriter.list().startList();
+ listWriter.list().list().writeNull();
+ listWriter.list().endList();
+ } else {
+ listWriter.list().startList();
+ listWriter.list().list().startList();
+ listWriter.list().list().integer().writeNull();
+ listWriter.list().list().integer().writeInt(i);
+ listWriter.list().list().integer().writeInt(i * 2);
+ listWriter.list().list().endList();
+ listWriter.list().endList();
+ }
+ listWriter.endList();
+ }
+ }
+ listVector.setValueCount(COUNT);
+
+ UnionListReader listReader = new UnionListReader(listVector);
+ for (int i = 0; i < COUNT; i++) {
+ listReader.setPosition(i);
+ if (i % 4 == 0) {
+ Assert.assertFalse(listReader.isSet());
+ } else {
+ Assert.assertTrue(listReader.isSet());
+ listReader.next();
+ if (i % 4 == 1) {
+ Assert.assertFalse(listReader.reader().isSet());
+ } else if (i % 4 == 2) {
+ listReader.reader().next();
+ Assert.assertFalse(listReader.reader().reader().isSet());
+ } else {
+ listReader.reader().next();
+ listReader.reader().reader().next();
+ Assert.assertFalse(listReader.reader().reader().reader().isSet());
+ listReader.reader().reader().next();
+ Assert.assertEquals(i, listReader.reader().reader().reader().readInteger().intValue());
+ listReader.reader().reader().next();
+ Assert.assertEquals(i * 2, listReader.reader().reader().reader().readInteger().intValue());
+ }
+ }
+ }
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/BaseFileTest.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/BaseFileTest.java
new file mode 100644
index 000000000..8663c0c49
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/BaseFileTest.java
@@ -0,0 +1,849 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc;
+
+import static org.apache.arrow.vector.TestUtils.newVarCharVector;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.math.BigDecimal;
+import java.math.BigInteger;
+import java.nio.charset.StandardCharsets;
+import java.time.LocalDateTime;
+import java.time.LocalTime;
+import java.time.ZoneId;
+import java.time.ZoneOffset;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.util.Collections2;
+import org.apache.arrow.vector.BigIntVector;
+import org.apache.arrow.vector.DateMilliVector;
+import org.apache.arrow.vector.DecimalVector;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.NullVector;
+import org.apache.arrow.vector.TimeMilliVector;
+import org.apache.arrow.vector.UInt1Vector;
+import org.apache.arrow.vector.UInt2Vector;
+import org.apache.arrow.vector.UInt4Vector;
+import org.apache.arrow.vector.UInt8Vector;
+import org.apache.arrow.vector.VarBinaryVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.MapVector;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.complex.impl.ComplexWriterImpl;
+import org.apache.arrow.vector.complex.impl.UnionListWriter;
+import org.apache.arrow.vector.complex.impl.UnionMapReader;
+import org.apache.arrow.vector.complex.impl.UnionMapWriter;
+import org.apache.arrow.vector.complex.reader.FieldReader;
+import org.apache.arrow.vector.complex.writer.BaseWriter.ComplexWriter;
+import org.apache.arrow.vector.complex.writer.BaseWriter.ListWriter;
+import org.apache.arrow.vector.complex.writer.BaseWriter.StructWriter;
+import org.apache.arrow.vector.complex.writer.BigIntWriter;
+import org.apache.arrow.vector.complex.writer.DateMilliWriter;
+import org.apache.arrow.vector.complex.writer.Float4Writer;
+import org.apache.arrow.vector.complex.writer.IntWriter;
+import org.apache.arrow.vector.complex.writer.TimeMilliWriter;
+import org.apache.arrow.vector.complex.writer.TimeStampMilliTZWriter;
+import org.apache.arrow.vector.complex.writer.TimeStampMilliWriter;
+import org.apache.arrow.vector.complex.writer.TimeStampNanoWriter;
+import org.apache.arrow.vector.complex.writer.UInt1Writer;
+import org.apache.arrow.vector.complex.writer.UInt2Writer;
+import org.apache.arrow.vector.complex.writer.UInt4Writer;
+import org.apache.arrow.vector.complex.writer.UInt8Writer;
+import org.apache.arrow.vector.dictionary.Dictionary;
+import org.apache.arrow.vector.dictionary.DictionaryEncoder;
+import org.apache.arrow.vector.dictionary.DictionaryProvider;
+import org.apache.arrow.vector.holders.NullableTimeStampMilliHolder;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.DictionaryEncoding;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.util.JsonStringArrayList;
+import org.apache.arrow.vector.util.Text;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Helps testing the file formats.
+ */
+public class BaseFileTest {
+ private static final Logger LOGGER = LoggerFactory.getLogger(BaseFileTest.class);
+ protected static final int COUNT = 10;
+ protected BufferAllocator allocator;
+
+ @Before
+ public void init() {
+ allocator = new RootAllocator(Integer.MAX_VALUE);
+ }
+
+ @After
+ public void tearDown() {
+ allocator.close();
+ }
+
+
+ private static short [] uint1Values = new short[]{0, 255, 1, 128, 2};
+ private static char [] uint2Values = new char[]{0, Character.MAX_VALUE, 1, Short.MAX_VALUE * 2, 2};
+ private static long [] uint4Values = new long[]{0, Integer.MAX_VALUE + 1L, 1, Integer.MAX_VALUE * 2L, 2};
+ private static BigInteger[] uint8Values = new BigInteger[]{BigInteger.valueOf(0),
+ BigInteger.valueOf(Long.MAX_VALUE).multiply(BigInteger.valueOf(2)), BigInteger.valueOf(2),
+ BigInteger.valueOf(Long.MAX_VALUE).add(BigInteger.valueOf(1)), BigInteger.valueOf(2)};
+
+ protected void writeData(int count, StructVector parent) {
+ ComplexWriter writer = new ComplexWriterImpl("root", parent);
+ StructWriter rootWriter = writer.rootAsStruct();
+ IntWriter intWriter = rootWriter.integer("int");
+ UInt1Writer uint1Writer = rootWriter.uInt1("uint1");
+ UInt2Writer uint2Writer = rootWriter.uInt2("uint2");
+ UInt4Writer uint4Writer = rootWriter.uInt4("uint4");
+ UInt8Writer uint8Writer = rootWriter.uInt8("uint8");
+ BigIntWriter bigIntWriter = rootWriter.bigInt("bigInt");
+ Float4Writer float4Writer = rootWriter.float4("float");
+ for (int i = 0; i < count; i++) {
+ intWriter.setPosition(i);
+ intWriter.writeInt(i);
+ uint1Writer.setPosition(i);
+ // TODO: Fix add safe write methods on uint methods.
+ uint1Writer.setPosition(i);
+ uint1Writer.writeUInt1((byte) uint1Values[i % uint1Values.length] );
+ uint2Writer.setPosition(i);
+ uint2Writer.writeUInt2((char) uint2Values[i % uint2Values.length] );
+ uint4Writer.setPosition(i);
+ uint4Writer.writeUInt4((int) uint4Values[i % uint4Values.length] );
+ uint8Writer.setPosition(i);
+ uint8Writer.writeUInt8(uint8Values[i % uint8Values.length].longValue());
+ bigIntWriter.setPosition(i);
+ bigIntWriter.writeBigInt(i);
+ float4Writer.setPosition(i);
+ float4Writer.writeFloat4(i == 0 ? Float.NaN : i);
+ }
+ writer.setValueCount(count);
+ }
+
+
+ protected void validateContent(int count, VectorSchemaRoot root) {
+ for (int i = 0; i < count; i++) {
+ Assert.assertEquals(i, root.getVector("int").getObject(i));
+ Assert.assertEquals((Short) uint1Values[i % uint1Values.length],
+ ((UInt1Vector) root.getVector("uint1")).getObjectNoOverflow(i));
+ Assert.assertEquals("Failed for index: " + i, (Character) uint2Values[i % uint2Values.length],
+ (Character) ((UInt2Vector) root.getVector("uint2")).get(i));
+ Assert.assertEquals("Failed for index: " + i, (Long) uint4Values[i % uint4Values.length],
+ ((UInt4Vector) root.getVector("uint4")).getObjectNoOverflow(i));
+ Assert.assertEquals("Failed for index: " + i, uint8Values[i % uint8Values.length],
+ ((UInt8Vector) root.getVector("uint8")).getObjectNoOverflow(i));
+ Assert.assertEquals(Long.valueOf(i), root.getVector("bigInt").getObject(i));
+ Assert.assertEquals(i == 0 ? Float.NaN : i, root.getVector("float").getObject(i));
+ }
+ }
+
+ protected void writeComplexData(int count, StructVector parent) {
+ ArrowBuf varchar = allocator.buffer(3);
+ varchar.readerIndex(0);
+ varchar.setByte(0, 'a');
+ varchar.setByte(1, 'b');
+ varchar.setByte(2, 'c');
+ varchar.writerIndex(3);
+ ComplexWriter writer = new ComplexWriterImpl("root", parent);
+ StructWriter rootWriter = writer.rootAsStruct();
+ IntWriter intWriter = rootWriter.integer("int");
+ BigIntWriter bigIntWriter = rootWriter.bigInt("bigInt");
+ ListWriter listWriter = rootWriter.list("list");
+ StructWriter structWriter = rootWriter.struct("struct");
+ for (int i = 0; i < count; i++) {
+ if (i % 5 != 3) {
+ intWriter.setPosition(i);
+ intWriter.writeInt(i);
+ }
+ bigIntWriter.setPosition(i);
+ bigIntWriter.writeBigInt(i);
+ listWriter.setPosition(i);
+ listWriter.startList();
+ for (int j = 0; j < i % 3; j++) {
+ listWriter.varChar().writeVarChar(0, 3, varchar);
+ }
+ listWriter.endList();
+ structWriter.setPosition(i);
+ structWriter.start();
+ structWriter.timeStampMilli("timestamp").writeTimeStampMilli(i);
+ structWriter.end();
+ }
+ writer.setValueCount(count);
+ varchar.getReferenceManager().release();
+ }
+
+ public void printVectors(List<FieldVector> vectors) {
+ for (FieldVector vector : vectors) {
+ LOGGER.debug(vector.getField().getName());
+ int valueCount = vector.getValueCount();
+ for (int i = 0; i < valueCount; i++) {
+ LOGGER.debug(String.valueOf(vector.getObject(i)));
+ }
+ }
+ }
+
+ protected void validateComplexContent(int count, VectorSchemaRoot root) {
+ Assert.assertEquals(count, root.getRowCount());
+ printVectors(root.getFieldVectors());
+ for (int i = 0; i < count; i++) {
+
+ Object intVal = root.getVector("int").getObject(i);
+ if (i % 5 != 3) {
+ Assert.assertEquals(i, intVal);
+ } else {
+ Assert.assertNull(intVal);
+ }
+ Assert.assertEquals(Long.valueOf(i), root.getVector("bigInt").getObject(i));
+ Assert.assertEquals(i % 3, ((List<?>) root.getVector("list").getObject(i)).size());
+ NullableTimeStampMilliHolder h = new NullableTimeStampMilliHolder();
+ FieldReader structReader = root.getVector("struct").getReader();
+ structReader.setPosition(i);
+ structReader.reader("timestamp").read(h);
+ Assert.assertEquals(i, h.value);
+ }
+ }
+
+ private LocalDateTime makeDateTimeFromCount(int i) {
+ return LocalDateTime.of(2000 + i, 1 + i, 1 + i, i, i, i, i * 100_000_000 + i);
+ }
+
+ protected void writeDateTimeData(int count, StructVector parent) {
+ Assert.assertTrue(count < 100);
+ ComplexWriter writer = new ComplexWriterImpl("root", parent);
+ StructWriter rootWriter = writer.rootAsStruct();
+ DateMilliWriter dateWriter = rootWriter.dateMilli("date");
+ TimeMilliWriter timeWriter = rootWriter.timeMilli("time");
+ TimeStampMilliWriter timeStampMilliWriter = rootWriter.timeStampMilli("timestamp-milli");
+ TimeStampMilliTZWriter timeStampMilliTZWriter = rootWriter.timeStampMilliTZ("timestamp-milliTZ", "Europe/Paris");
+ TimeStampNanoWriter timeStampNanoWriter = rootWriter.timeStampNano("timestamp-nano");
+ for (int i = 0; i < count; i++) {
+ LocalDateTime dt = makeDateTimeFromCount(i);
+ // Number of days in milliseconds since epoch, stored as 64-bit integer, only date part is used
+ dateWriter.setPosition(i);
+ long dateLong = dt.toLocalDate().atStartOfDay().toInstant(ZoneOffset.UTC).toEpochMilli();
+ dateWriter.writeDateMilli(dateLong);
+ // Time is a value in milliseconds since midnight, stored as 32-bit integer
+ timeWriter.setPosition(i);
+ int milliOfDay = (int) java.util.concurrent.TimeUnit.NANOSECONDS.toMillis(dt.toLocalTime().toNanoOfDay());
+ timeWriter.writeTimeMilli(milliOfDay);
+ // Timestamp as milliseconds since the epoch, stored as 64-bit integer
+ timeStampMilliWriter.setPosition(i);
+ timeStampMilliWriter.writeTimeStampMilli(dt.toInstant(ZoneOffset.UTC).toEpochMilli());
+ // Timestamp as milliseconds since epoch with timezone
+ timeStampMilliTZWriter.setPosition(i);
+ timeStampMilliTZWriter.writeTimeStampMilliTZ(dt.atZone(ZoneId.of("Europe/Paris")).toInstant().toEpochMilli());
+ // Timestamp as nanoseconds since epoch
+ timeStampNanoWriter.setPosition(i);
+ long tsNanos = dt.toInstant(ZoneOffset.UTC).toEpochMilli() * 1_000_000 + i; // need to add back in nano val
+ timeStampNanoWriter.writeTimeStampNano(tsNanos);
+ }
+ writer.setValueCount(count);
+ }
+
+ protected void validateDateTimeContent(int count, VectorSchemaRoot root) {
+ Assert.assertEquals(count, root.getRowCount());
+ printVectors(root.getFieldVectors());
+ for (int i = 0; i < count; i++) {
+ LocalDateTime dt = makeDateTimeFromCount(i);
+ LocalDateTime dtMilli = dt.minusNanos(i);
+ LocalDateTime dateVal = ((DateMilliVector) root.getVector("date")).getObject(i);
+ LocalDateTime dateExpected = dt.toLocalDate().atStartOfDay();
+ Assert.assertEquals(dateExpected, dateVal);
+ LocalTime timeVal = ((TimeMilliVector) root.getVector("time")).getObject(i).toLocalTime();
+ Assert.assertEquals(dtMilli.toLocalTime(), timeVal);
+ Object timestampMilliVal = root.getVector("timestamp-milli").getObject(i);
+ Assert.assertEquals(dtMilli, timestampMilliVal);
+ Object timestampMilliTZVal = root.getVector("timestamp-milliTZ").getObject(i);
+ Assert.assertEquals(dt.atZone(ZoneId.of("Europe/Paris")).toInstant().toEpochMilli(), timestampMilliTZVal);
+ Object timestampNanoVal = root.getVector("timestamp-nano").getObject(i);
+ Assert.assertEquals(dt, timestampNanoVal);
+ }
+ }
+
+ protected VectorSchemaRoot writeFlatDictionaryData(
+ BufferAllocator bufferAllocator,
+ DictionaryProvider.MapDictionaryProvider provider) {
+
+ // Define dictionaries and add to provider
+ VarCharVector dictionary1Vector = newVarCharVector("D1", bufferAllocator);
+ dictionary1Vector.allocateNewSafe();
+ dictionary1Vector.set(0, "foo".getBytes(StandardCharsets.UTF_8));
+ dictionary1Vector.set(1, "bar".getBytes(StandardCharsets.UTF_8));
+ dictionary1Vector.set(2, "baz".getBytes(StandardCharsets.UTF_8));
+ dictionary1Vector.setValueCount(3);
+
+ Dictionary dictionary1 = new Dictionary(dictionary1Vector, new DictionaryEncoding(1L, false, null));
+ provider.put(dictionary1);
+
+ VarCharVector dictionary2Vector = newVarCharVector("D2", bufferAllocator);
+ dictionary2Vector.allocateNewSafe();
+ dictionary2Vector.set(0, "micro".getBytes(StandardCharsets.UTF_8));
+ dictionary2Vector.set(1, "small".getBytes(StandardCharsets.UTF_8));
+ dictionary2Vector.set(2, "large".getBytes(StandardCharsets.UTF_8));
+ dictionary2Vector.setValueCount(3);
+
+ Dictionary dictionary2 = new Dictionary(dictionary2Vector, new DictionaryEncoding(2L, false, null));
+ provider.put(dictionary2);
+
+ // Populate the vectors
+ VarCharVector vector1A = newVarCharVector("varcharA", bufferAllocator);
+ vector1A.allocateNewSafe();
+ vector1A.set(0, "foo".getBytes(StandardCharsets.UTF_8));
+ vector1A.set(1, "bar".getBytes(StandardCharsets.UTF_8));
+ vector1A.set(3, "baz".getBytes(StandardCharsets.UTF_8));
+ vector1A.set(4, "bar".getBytes(StandardCharsets.UTF_8));
+ vector1A.set(5, "baz".getBytes(StandardCharsets.UTF_8));
+ vector1A.setValueCount(6);
+
+ FieldVector encodedVector1A = (FieldVector) DictionaryEncoder.encode(vector1A, dictionary1);
+ vector1A.close(); // Done with this vector after encoding
+
+ // Write this vector using indices instead of encoding
+ IntVector encodedVector1B = new IntVector("varcharB", bufferAllocator);
+ encodedVector1B.allocateNewSafe();
+ encodedVector1B.set(0, 2); // "baz"
+ encodedVector1B.set(1, 1); // "bar"
+ encodedVector1B.set(2, 2); // "baz"
+ encodedVector1B.set(4, 1); // "bar"
+ encodedVector1B.set(5, 0); // "foo"
+ encodedVector1B.setValueCount(6);
+
+ VarCharVector vector2 = newVarCharVector("sizes", bufferAllocator);
+ vector2.allocateNewSafe();
+ vector2.set(1, "large".getBytes(StandardCharsets.UTF_8));
+ vector2.set(2, "small".getBytes(StandardCharsets.UTF_8));
+ vector2.set(3, "small".getBytes(StandardCharsets.UTF_8));
+ vector2.set(4, "large".getBytes(StandardCharsets.UTF_8));
+ vector2.setValueCount(6);
+
+ FieldVector encodedVector2 = (FieldVector) DictionaryEncoder.encode(vector2, dictionary2);
+ vector2.close(); // Done with this vector after encoding
+
+ List<Field> fields = Arrays.asList(encodedVector1A.getField(), encodedVector1B.getField(),
+ encodedVector2.getField());
+ List<FieldVector> vectors = Collections2.asImmutableList(encodedVector1A, encodedVector1B, encodedVector2);
+
+ return new VectorSchemaRoot(fields, vectors, encodedVector1A.getValueCount());
+ }
+
+ protected void validateFlatDictionary(VectorSchemaRoot root, DictionaryProvider provider) {
+ FieldVector vector1A = root.getVector("varcharA");
+ Assert.assertNotNull(vector1A);
+
+ DictionaryEncoding encoding1A = vector1A.getField().getDictionary();
+ Assert.assertNotNull(encoding1A);
+ Assert.assertEquals(1L, encoding1A.getId());
+
+ Assert.assertEquals(6, vector1A.getValueCount());
+ Assert.assertEquals(0, vector1A.getObject(0));
+ Assert.assertEquals(1, vector1A.getObject(1));
+ Assert.assertEquals(null, vector1A.getObject(2));
+ Assert.assertEquals(2, vector1A.getObject(3));
+ Assert.assertEquals(1, vector1A.getObject(4));
+ Assert.assertEquals(2, vector1A.getObject(5));
+
+ FieldVector vector1B = root.getVector("varcharB");
+ Assert.assertNotNull(vector1B);
+
+ DictionaryEncoding encoding1B = vector1A.getField().getDictionary();
+ Assert.assertNotNull(encoding1B);
+ Assert.assertTrue(encoding1A.equals(encoding1B));
+ Assert.assertEquals(1L, encoding1B.getId());
+
+ Assert.assertEquals(6, vector1B.getValueCount());
+ Assert.assertEquals(2, vector1B.getObject(0));
+ Assert.assertEquals(1, vector1B.getObject(1));
+ Assert.assertEquals(2, vector1B.getObject(2));
+ Assert.assertEquals(null, vector1B.getObject(3));
+ Assert.assertEquals(1, vector1B.getObject(4));
+ Assert.assertEquals(0, vector1B.getObject(5));
+
+ FieldVector vector2 = root.getVector("sizes");
+ Assert.assertNotNull(vector2);
+
+ DictionaryEncoding encoding2 = vector2.getField().getDictionary();
+ Assert.assertNotNull(encoding2);
+ Assert.assertEquals(2L, encoding2.getId());
+
+ Assert.assertEquals(6, vector2.getValueCount());
+ Assert.assertEquals(null, vector2.getObject(0));
+ Assert.assertEquals(2, vector2.getObject(1));
+ Assert.assertEquals(1, vector2.getObject(2));
+ Assert.assertEquals(1, vector2.getObject(3));
+ Assert.assertEquals(2, vector2.getObject(4));
+ Assert.assertEquals(null, vector2.getObject(5));
+
+ Dictionary dictionary1 = provider.lookup(1L);
+ Assert.assertNotNull(dictionary1);
+ VarCharVector dictionaryVector = ((VarCharVector) dictionary1.getVector());
+ Assert.assertEquals(3, dictionaryVector.getValueCount());
+ Assert.assertEquals(new Text("foo"), dictionaryVector.getObject(0));
+ Assert.assertEquals(new Text("bar"), dictionaryVector.getObject(1));
+ Assert.assertEquals(new Text("baz"), dictionaryVector.getObject(2));
+
+ Dictionary dictionary2 = provider.lookup(2L);
+ Assert.assertNotNull(dictionary2);
+ dictionaryVector = ((VarCharVector) dictionary2.getVector());
+ Assert.assertEquals(3, dictionaryVector.getValueCount());
+ Assert.assertEquals(new Text("micro"), dictionaryVector.getObject(0));
+ Assert.assertEquals(new Text("small"), dictionaryVector.getObject(1));
+ Assert.assertEquals(new Text("large"), dictionaryVector.getObject(2));
+ }
+
+ protected VectorSchemaRoot writeNestedDictionaryData(
+ BufferAllocator bufferAllocator,
+ DictionaryProvider.MapDictionaryProvider provider) {
+
+ // Define the dictionary and add to the provider
+ VarCharVector dictionaryVector = newVarCharVector("D2", bufferAllocator);
+ dictionaryVector.allocateNewSafe();
+ dictionaryVector.set(0, "foo".getBytes(StandardCharsets.UTF_8));
+ dictionaryVector.set(1, "bar".getBytes(StandardCharsets.UTF_8));
+ dictionaryVector.setValueCount(2);
+
+ Dictionary dictionary = new Dictionary(dictionaryVector, new DictionaryEncoding(2L, false, null));
+ provider.put(dictionary);
+
+ // Write the vector data using dictionary indices
+ ListVector listVector = ListVector.empty("list", bufferAllocator);
+ DictionaryEncoding encoding = dictionary.getEncoding();
+ listVector.addOrGetVector(new FieldType(true, encoding.getIndexType(), encoding));
+ listVector.allocateNew();
+ UnionListWriter listWriter = new UnionListWriter(listVector);
+ listWriter.startList();
+ listWriter.writeInt(0);
+ listWriter.writeInt(1);
+ listWriter.endList();
+ listWriter.startList();
+ listWriter.writeInt(0);
+ listWriter.endList();
+ listWriter.startList();
+ listWriter.writeInt(1);
+ listWriter.endList();
+ listWriter.setValueCount(3);
+
+ List<Field> fields = Collections2.asImmutableList(listVector.getField());
+ List<FieldVector> vectors = Collections2.asImmutableList(listVector);
+ return new VectorSchemaRoot(fields, vectors, 3);
+ }
+
+ protected void validateNestedDictionary(VectorSchemaRoot root, DictionaryProvider provider) {
+ FieldVector vector = root.getFieldVectors().get(0);
+ Assert.assertNotNull(vector);
+ Assert.assertNull(vector.getField().getDictionary());
+ Field nestedField = vector.getField().getChildren().get(0);
+
+ DictionaryEncoding encoding = nestedField.getDictionary();
+ Assert.assertNotNull(encoding);
+ Assert.assertEquals(2L, encoding.getId());
+ Assert.assertEquals(new ArrowType.Int(32, true), encoding.getIndexType());
+
+ Assert.assertEquals(3, vector.getValueCount());
+ Assert.assertEquals(Arrays.asList(0, 1), vector.getObject(0));
+ Assert.assertEquals(Arrays.asList(0), vector.getObject(1));
+ Assert.assertEquals(Arrays.asList(1), vector.getObject(2));
+
+ Dictionary dictionary = provider.lookup(2L);
+ Assert.assertNotNull(dictionary);
+ VarCharVector dictionaryVector = ((VarCharVector) dictionary.getVector());
+ Assert.assertEquals(2, dictionaryVector.getValueCount());
+ Assert.assertEquals(new Text("foo"), dictionaryVector.getObject(0));
+ Assert.assertEquals(new Text("bar"), dictionaryVector.getObject(1));
+ }
+
+ protected VectorSchemaRoot writeDecimalData(BufferAllocator bufferAllocator) {
+ DecimalVector decimalVector1 = new DecimalVector("decimal1", bufferAllocator, 10, 3);
+ DecimalVector decimalVector2 = new DecimalVector("decimal2", bufferAllocator, 4, 2);
+ DecimalVector decimalVector3 = new DecimalVector("decimal3", bufferAllocator, 16, 8);
+
+ int count = 10;
+ decimalVector1.allocateNew(count);
+ decimalVector2.allocateNew(count);
+ decimalVector3.allocateNew(count);
+
+ for (int i = 0; i < count; i++) {
+ decimalVector1.setSafe(i, new BigDecimal(BigInteger.valueOf(i), 3));
+ decimalVector2.setSafe(i, new BigDecimal(BigInteger.valueOf(i * (1 << 10)), 2));
+ decimalVector3.setSafe(i, new BigDecimal(BigInteger.valueOf(i * 1111111111111111L), 8));
+ }
+
+ decimalVector1.setValueCount(count);
+ decimalVector2.setValueCount(count);
+ decimalVector3.setValueCount(count);
+
+ List<Field> fields = Collections2.asImmutableList(decimalVector1.getField(), decimalVector2.getField(),
+ decimalVector3.getField());
+ List<FieldVector> vectors = Collections2.asImmutableList(decimalVector1, decimalVector2, decimalVector3);
+ return new VectorSchemaRoot(fields, vectors, count);
+ }
+
+ protected void validateDecimalData(VectorSchemaRoot root) {
+ DecimalVector decimalVector1 = (DecimalVector) root.getVector("decimal1");
+ DecimalVector decimalVector2 = (DecimalVector) root.getVector("decimal2");
+ DecimalVector decimalVector3 = (DecimalVector) root.getVector("decimal3");
+ int count = 10;
+ Assert.assertEquals(count, root.getRowCount());
+
+ for (int i = 0; i < count; i++) {
+ // Verify decimal 1 vector
+ BigDecimal readValue = decimalVector1.getObject(i);
+ ArrowType.Decimal type = (ArrowType.Decimal) decimalVector1.getField().getType();
+ BigDecimal genValue = new BigDecimal(BigInteger.valueOf(i), type.getScale());
+ Assert.assertEquals(genValue, readValue);
+
+ // Verify decimal 2 vector
+ readValue = decimalVector2.getObject(i);
+ type = (ArrowType.Decimal) decimalVector2.getField().getType();
+ genValue = new BigDecimal(BigInteger.valueOf(i * (1 << 10)), type.getScale());
+ Assert.assertEquals(genValue, readValue);
+
+ // Verify decimal 3 vector
+ readValue = decimalVector3.getObject(i);
+ type = (ArrowType.Decimal) decimalVector3.getField().getType();
+ genValue = new BigDecimal(BigInteger.valueOf(i * 1111111111111111L), type.getScale());
+ Assert.assertEquals(genValue, readValue);
+ }
+ }
+
+ protected VectorSchemaRoot writeNullData(int valueCount) {
+ NullVector nullVector1 = new NullVector("vector1");
+ NullVector nullVector2 = new NullVector("vector2");
+ nullVector1.setValueCount(valueCount);
+ nullVector2.setValueCount(valueCount);
+
+ List<Field> fields = Collections2.asImmutableList(nullVector1.getField(), nullVector2.getField());
+ List<FieldVector> vectors = Collections2.asImmutableList(nullVector1, nullVector2);
+ return new VectorSchemaRoot(fields, vectors, valueCount);
+ }
+
+ protected void validateNullData(VectorSchemaRoot root, int valueCount) {
+
+ NullVector vector1 = (NullVector) root.getFieldVectors().get(0);
+ NullVector vector2 = (NullVector) root.getFieldVectors().get(1);
+
+ assertEquals(valueCount, vector1.getValueCount());
+ assertEquals(valueCount, vector2.getValueCount());
+ }
+
+ public void validateUnionData(int count, VectorSchemaRoot root) {
+ FieldReader unionReader = root.getVector("union").getReader();
+ for (int i = 0; i < count; i++) {
+ unionReader.setPosition(i);
+ switch (i % 4) {
+ case 0:
+ Assert.assertEquals(i, unionReader.readInteger().intValue());
+ break;
+ case 1:
+ Assert.assertEquals(i, unionReader.readLong().longValue());
+ break;
+ case 2:
+ Assert.assertEquals(i % 3, unionReader.size());
+ break;
+ case 3:
+ NullableTimeStampMilliHolder h = new NullableTimeStampMilliHolder();
+ unionReader.reader("timestamp").read(h);
+ Assert.assertEquals(i, h.value);
+ break;
+ default:
+ assert false : "Unexpected value in switch statement: " + i;
+ }
+ }
+ }
+
+ public void writeUnionData(int count, StructVector parent) {
+ ArrowBuf varchar = allocator.buffer(3);
+ varchar.readerIndex(0);
+ varchar.setByte(0, 'a');
+ varchar.setByte(1, 'b');
+ varchar.setByte(2, 'c');
+ varchar.writerIndex(3);
+ ComplexWriter writer = new ComplexWriterImpl("root", parent);
+ StructWriter rootWriter = writer.rootAsStruct();
+ IntWriter intWriter = rootWriter.integer("union");
+ BigIntWriter bigIntWriter = rootWriter.bigInt("union");
+ ListWriter listWriter = rootWriter.list("union");
+ StructWriter structWriter = rootWriter.struct("union");
+ for (int i = 0; i < count; i++) {
+ switch (i % 4) {
+ case 0:
+ intWriter.setPosition(i);
+ intWriter.writeInt(i);
+ break;
+ case 1:
+ bigIntWriter.setPosition(i);
+ bigIntWriter.writeBigInt(i);
+ break;
+ case 2:
+ listWriter.setPosition(i);
+ listWriter.startList();
+ for (int j = 0; j < i % 3; j++) {
+ listWriter.varChar().writeVarChar(0, 3, varchar);
+ }
+ listWriter.endList();
+ break;
+ case 3:
+ structWriter.setPosition(i);
+ structWriter.start();
+ structWriter.timeStampMilli("timestamp").writeTimeStampMilli(i);
+ structWriter.end();
+ break;
+ default:
+ assert false : "Unexpected value in switch statement: " + i;
+ }
+ }
+ writer.setValueCount(count);
+ varchar.getReferenceManager().release();
+ }
+
+ protected void writeVarBinaryData(int count, StructVector parent) {
+ Assert.assertTrue(count < 100);
+ ComplexWriter writer = new ComplexWriterImpl("root", parent);
+ StructWriter rootWriter = writer.rootAsStruct();
+ ListWriter listWriter = rootWriter.list("list");
+ ArrowBuf varbin = allocator.buffer(count);
+ for (int i = 0; i < count; i++) {
+ varbin.setByte(i, i);
+ listWriter.setPosition(i);
+ listWriter.startList();
+ for (int j = 0; j < i % 3; j++) {
+ listWriter.varBinary().writeVarBinary(0, i + 1, varbin);
+ }
+ listWriter.endList();
+ }
+ writer.setValueCount(count);
+ varbin.getReferenceManager().release();
+ }
+
+ protected void validateVarBinary(int count, VectorSchemaRoot root) {
+ Assert.assertEquals(count, root.getRowCount());
+ ListVector listVector = (ListVector) root.getVector("list");
+ byte[] expectedArray = new byte[count];
+ int numVarBinaryValues = 0;
+ for (int i = 0; i < count; i++) {
+ expectedArray[i] = (byte) i;
+ List<?> objList = listVector.getObject(i);
+ if (i % 3 == 0) {
+ Assert.assertTrue(objList.isEmpty());
+ } else {
+ byte[] expected = Arrays.copyOfRange(expectedArray, 0, i + 1);
+ for (int j = 0; j < i % 3; j++) {
+ byte[] result = (byte[]) objList.get(j);
+ Assert.assertArrayEquals(result, expected);
+ numVarBinaryValues++;
+ }
+ }
+ }
+
+ // ListVector lastSet should be the index of last value + 1
+ Assert.assertEquals(listVector.getLastSet(), count - 1);
+
+ // VarBinaryVector lastSet should be the index of last value
+ VarBinaryVector binaryVector = (VarBinaryVector) listVector.getChildrenFromFields().get(0);
+ Assert.assertEquals(binaryVector.getLastSet(), numVarBinaryValues - 1);
+ }
+
+ protected void writeBatchData(ArrowWriter writer, IntVector vector, VectorSchemaRoot root) throws IOException {
+ writer.start();
+
+ vector.setNull(0);
+ vector.setSafe(1, 1);
+ vector.setSafe(2, 2);
+ vector.setNull(3);
+ vector.setSafe(4, 1);
+ vector.setValueCount(5);
+ root.setRowCount(5);
+ writer.writeBatch();
+
+ vector.setNull(0);
+ vector.setSafe(1, 1);
+ vector.setSafe(2, 2);
+ vector.setValueCount(3);
+ root.setRowCount(3);
+ writer.writeBatch();
+
+ writer.end();
+ }
+
+ protected void validateBatchData(ArrowReader reader, IntVector vector) throws IOException {
+ reader.loadNextBatch();
+
+ assertEquals(vector.getValueCount(), 5);
+ assertTrue(vector.isNull(0));
+ assertEquals(vector.get(1), 1);
+ assertEquals(vector.get(2), 2);
+ assertTrue(vector.isNull(3));
+ assertEquals(vector.get(4), 1);
+
+ reader.loadNextBatch();
+
+ assertEquals(vector.getValueCount(), 3);
+ assertTrue(vector.isNull(0));
+ assertEquals(vector.get(1), 1);
+ assertEquals(vector.get(2), 2);
+ }
+
+ protected VectorSchemaRoot writeMapData(BufferAllocator bufferAllocator) {
+ MapVector mapVector = MapVector.empty("map", bufferAllocator, false);
+ MapVector sortedMapVector = MapVector.empty("mapSorted", bufferAllocator, true);
+ mapVector.allocateNew();
+ sortedMapVector.allocateNew();
+ UnionMapWriter mapWriter = mapVector.getWriter();
+ UnionMapWriter sortedMapWriter = sortedMapVector.getWriter();
+
+ final int count = 10;
+ for (int i = 0; i < count; i++) {
+ // Write mapVector with NULL values
+ // i == 1 is a NULL
+ if (i != 1) {
+ mapWriter.setPosition(i);
+ mapWriter.startMap();
+ // i == 3 is an empty map
+ if (i != 3) {
+ for (int j = 0; j < i + 1; j++) {
+ mapWriter.startEntry();
+ mapWriter.key().bigInt().writeBigInt(j);
+ // i == 5 maps to a NULL value
+ if (i != 5) {
+ mapWriter.value().integer().writeInt(j);
+ }
+ mapWriter.endEntry();
+ }
+ }
+ mapWriter.endMap();
+ }
+ // Write sortedMapVector
+ sortedMapWriter.setPosition(i);
+ sortedMapWriter.startMap();
+ for (int j = 0; j < i + 1; j++) {
+ sortedMapWriter.startEntry();
+ sortedMapWriter.key().bigInt().writeBigInt(j);
+ sortedMapWriter.value().integer().writeInt(j);
+ sortedMapWriter.endEntry();
+ }
+ sortedMapWriter.endMap();
+ }
+ mapWriter.setValueCount(COUNT);
+ sortedMapWriter.setValueCount(COUNT);
+
+ List<Field> fields = Collections2.asImmutableList(mapVector.getField(), sortedMapVector.getField());
+ List<FieldVector> vectors = Collections2.asImmutableList(mapVector, sortedMapVector);
+ return new VectorSchemaRoot(fields, vectors, count);
+ }
+
+ protected void validateMapData(VectorSchemaRoot root) {
+ MapVector mapVector = (MapVector) root.getVector("map");
+ MapVector sortedMapVector = (MapVector) root.getVector("mapSorted");
+
+ final int count = 10;
+ Assert.assertEquals(count, root.getRowCount());
+
+ UnionMapReader mapReader = new UnionMapReader(mapVector);
+ UnionMapReader sortedMapReader = new UnionMapReader(sortedMapVector);
+ for (int i = 0; i < count; i++) {
+ // Read mapVector with NULL values
+ mapReader.setPosition(i);
+ if (i == 1) {
+ assertFalse(mapReader.isSet());
+ } else {
+ if (i == 3) {
+ JsonStringArrayList<?> result = (JsonStringArrayList<?>) mapReader.readObject();
+ assertTrue(result.isEmpty());
+ } else {
+ for (int j = 0; j < i + 1; j++) {
+ mapReader.next();
+ assertEquals(j, mapReader.key().readLong().longValue());
+ if (i == 5) {
+ assertFalse(mapReader.value().isSet());
+ } else {
+ assertEquals(j, mapReader.value().readInteger().intValue());
+ }
+ }
+ }
+ }
+ // Read sortedMapVector
+ sortedMapReader.setPosition(i);
+ for (int j = 0; j < i + 1; j++) {
+ sortedMapReader.next();
+ assertEquals(j, sortedMapReader.key().readLong().longValue());
+ assertEquals(j, sortedMapReader.value().readInteger().intValue());
+ }
+ }
+ }
+
+ protected VectorSchemaRoot writeListAsMapData(BufferAllocator bufferAllocator) {
+ ListVector mapEntryList = ListVector.empty("entryList", bufferAllocator);
+ FieldType mapEntryType = new FieldType(false, ArrowType.Struct.INSTANCE, null, null);
+ StructVector mapEntryData = new StructVector("entryData", bufferAllocator, mapEntryType, null);
+ mapEntryData.addOrGet("myKey", new FieldType(false, new ArrowType.Int(64, true), null), BigIntVector.class);
+ mapEntryData.addOrGet("myValue", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class);
+ mapEntryList.initializeChildrenFromFields(Collections2.asImmutableList(mapEntryData.getField()));
+ UnionListWriter entryWriter = mapEntryList.getWriter();
+ entryWriter.allocate();
+
+ final int count = 10;
+ for (int i = 0; i < count; i++) {
+ entryWriter.setPosition(i);
+ entryWriter.startList();
+ for (int j = 0; j < i + 1; j++) {
+ entryWriter.struct().start();
+ entryWriter.struct().bigInt("myKey").writeBigInt(j);
+ entryWriter.struct().integer("myValue").writeInt(j);
+ entryWriter.struct().end();
+ }
+ entryWriter.endList();
+ }
+ entryWriter.setValueCount(COUNT);
+
+ MapVector mapVector = MapVector.empty("map", bufferAllocator, false);
+ mapEntryList.makeTransferPair(mapVector).transfer();
+
+ List<Field> fields = Collections2.asImmutableList(mapVector.getField());
+ List<FieldVector> vectors = Collections2.asImmutableList(mapVector);
+ return new VectorSchemaRoot(fields, vectors, count);
+ }
+
+ protected void validateListAsMapData(VectorSchemaRoot root) {
+ MapVector sortedMapVector = (MapVector) root.getVector("map");
+
+ final int count = 10;
+ Assert.assertEquals(count, root.getRowCount());
+
+ UnionMapReader sortedMapReader = new UnionMapReader(sortedMapVector);
+ sortedMapReader.setKeyValueNames("myKey", "myValue");
+ for (int i = 0; i < count; i++) {
+ sortedMapReader.setPosition(i);
+ for (int j = 0; j < i + 1; j++) {
+ sortedMapReader.next();
+ assertEquals(j, sortedMapReader.key().readLong().longValue());
+ assertEquals(j, sortedMapReader.value().readInteger().intValue());
+ }
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/ITTestIPCWithLargeArrowBuffers.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/ITTestIPCWithLargeArrowBuffers.java
new file mode 100644
index 000000000..d3c91fd14
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/ITTestIPCWithLargeArrowBuffers.java
@@ -0,0 +1,187 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Map;
+
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.BigIntVector;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.dictionary.Dictionary;
+import org.apache.arrow.vector.dictionary.DictionaryProvider;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.DictionaryEncoding;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Integration test for reading/writing {@link org.apache.arrow.vector.VectorSchemaRoot} with
+ * large (more than 2GB) buffers by {@link ArrowReader} and {@link ArrowWriter}..
+ * To run this test, please make sure there is at least 8GB free memory, and 8GB
+ * free.disk space in the system.
+ */
+public class ITTestIPCWithLargeArrowBuffers {
+
+ private static final Logger logger = LoggerFactory.getLogger(ITTestIPCWithLargeArrowBuffers.class);
+
+ // 4GB buffer size
+ static final long BUFFER_SIZE = 4 * 1024 * 1024 * 1024L;
+
+ static final int DICTIONARY_VECTOR_SIZE = (int) (BUFFER_SIZE / BigIntVector.TYPE_WIDTH);
+
+ static final int ENCODED_VECTOR_SIZE = (int) (BUFFER_SIZE / IntVector.TYPE_WIDTH);
+
+ static final String FILE_NAME = "largeArrowData.data";
+
+ static final long DICTIONARY_ID = 123L;
+
+ static final ArrowType.Int ENCODED_VECTOR_TYPE = new ArrowType.Int(32, true);
+
+ static final DictionaryEncoding DICTIONARY_ENCODING =
+ new DictionaryEncoding(DICTIONARY_ID, false, ENCODED_VECTOR_TYPE);
+
+ static final FieldType ENCODED_FIELD_TYPE =
+ new FieldType(true, ENCODED_VECTOR_TYPE, DICTIONARY_ENCODING, null);
+
+ static final Field ENCODED_VECTOR_FIELD = new Field("encoded vector", ENCODED_FIELD_TYPE, null);
+
+ private void testWriteLargeArrowData(boolean streamMode) throws IOException {
+ // simulate encoding big int as int
+ try (RootAllocator allocator = new RootAllocator(Long.MAX_VALUE);
+ BigIntVector dictVector = new BigIntVector("dic vector", allocator);
+ FileOutputStream out = new FileOutputStream(FILE_NAME);
+ IntVector encodedVector = (IntVector) ENCODED_VECTOR_FIELD.createVector(allocator)) {
+
+ // prepare dictionary provider.
+ DictionaryProvider.MapDictionaryProvider provider = new DictionaryProvider.MapDictionaryProvider();
+ Dictionary dictionary = new Dictionary(dictVector, DICTIONARY_ENCODING);
+ provider.put(dictionary);
+
+ // populate the dictionary vector
+ dictVector.allocateNew(DICTIONARY_VECTOR_SIZE);
+ for (int i = 0; i < DICTIONARY_VECTOR_SIZE; i++) {
+ dictVector.set(i, i);
+ }
+ dictVector.setValueCount(DICTIONARY_VECTOR_SIZE);
+ assertTrue(dictVector.getDataBuffer().capacity() > Integer.MAX_VALUE);
+ logger.trace("Populating dictionary vector finished");
+
+ // populate the encoded vector
+ encodedVector.allocateNew(ENCODED_VECTOR_SIZE);
+ for (int i = 0; i < ENCODED_VECTOR_SIZE; i++) {
+ encodedVector.set(i, i % DICTIONARY_VECTOR_SIZE);
+ }
+ encodedVector.setValueCount(ENCODED_VECTOR_SIZE);
+ assertTrue(encodedVector.getDataBuffer().capacity() > Integer.MAX_VALUE);
+ logger.trace("Populating encoded vector finished");
+
+ // build vector schema root and write data.
+ try (VectorSchemaRoot root =
+ new VectorSchemaRoot(
+ Arrays.asList(ENCODED_VECTOR_FIELD), Arrays.asList(encodedVector), ENCODED_VECTOR_SIZE);
+ ArrowWriter writer = streamMode ?
+ new ArrowStreamWriter(root, provider, out) :
+ new ArrowFileWriter(root, provider, out.getChannel())) {
+ writer.start();
+ writer.writeBatch();
+ writer.end();
+ logger.trace("Writing data finished");
+ }
+ }
+
+ assertTrue(new File(FILE_NAME).exists());
+ }
+
+ private void testReadLargeArrowData(boolean streamMode) throws IOException {
+ try (RootAllocator allocator = new RootAllocator(Long.MAX_VALUE);
+ FileInputStream in = new FileInputStream(FILE_NAME);
+ ArrowReader reader = streamMode ?
+ new ArrowStreamReader(in, allocator) :
+ new ArrowFileReader(in.getChannel(), allocator)) {
+
+ // verify schema
+ Schema readSchema = reader.getVectorSchemaRoot().getSchema();
+ assertEquals(1, readSchema.getFields().size());
+ assertEquals(ENCODED_VECTOR_FIELD, readSchema.getFields().get(0));
+ logger.trace("Verifying schema finished");
+
+ // verify vector schema root
+ assertTrue(reader.loadNextBatch());
+ VectorSchemaRoot root = reader.getVectorSchemaRoot();
+
+ assertEquals(ENCODED_VECTOR_SIZE, root.getRowCount());
+ assertEquals(1, root.getFieldVectors().size());
+ assertTrue(root.getFieldVectors().get(0) instanceof IntVector);
+
+ IntVector encodedVector = (IntVector) root.getVector(0);
+ for (int i = 0; i < ENCODED_VECTOR_SIZE; i++) {
+ assertEquals(i % DICTIONARY_VECTOR_SIZE, encodedVector.get(i));
+ }
+ logger.trace("Verifying encoded vector finished");
+
+ // verify dictionary
+ Map<Long, Dictionary> dictVectors = reader.getDictionaryVectors();
+ assertEquals(1, dictVectors.size());
+ Dictionary dictionary = dictVectors.get(DICTIONARY_ID);
+ assertNotNull(dictionary);
+
+ assertTrue(dictionary.getVector() instanceof BigIntVector);
+ BigIntVector dictVector = (BigIntVector) dictionary.getVector();
+ assertEquals(DICTIONARY_VECTOR_SIZE, dictVector.getValueCount());
+ for (int i = 0; i < DICTIONARY_VECTOR_SIZE; i++) {
+ assertEquals(i, dictVector.get(i));
+ }
+ logger.trace("Verifying dictionary vector finished");
+
+ // ensure no more data available
+ assertFalse(reader.loadNextBatch());
+ } finally {
+ File dataFile = new File(FILE_NAME);
+ dataFile.delete();
+ assertFalse(dataFile.exists());
+ }
+ }
+
+ @Test
+ public void testIPC() throws IOException {
+ logger.trace("Start testing reading/writing large arrow stream data");
+ testWriteLargeArrowData(true);
+ testReadLargeArrowData(true);
+ logger.trace("Finish testing reading/writing large arrow stream data");
+
+ logger.trace("Start testing reading/writing large arrow file data");
+ testWriteLargeArrowData(false);
+ testReadLargeArrowData(false);
+ logger.trace("Finish testing reading/writing large arrow file data");
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/MessageSerializerTest.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/MessageSerializerTest.java
new file mode 100644
index 000000000..11b8d4fad
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/MessageSerializerTest.java
@@ -0,0 +1,247 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc;
+
+import static java.util.Arrays.asList;
+import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt;
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.Channels;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
+import org.apache.arrow.vector.ipc.message.ArrowMessage;
+import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
+import org.apache.arrow.vector.ipc.message.IpcOption;
+import org.apache.arrow.vector.ipc.message.MessageSerializer;
+import org.apache.arrow.vector.types.MetadataVersion;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.DictionaryEncoding;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+
+public class MessageSerializerTest {
+
+ public static ArrowBuf buf(BufferAllocator alloc, byte[] bytes) {
+ ArrowBuf buffer = alloc.buffer(bytes.length);
+ buffer.writeBytes(bytes);
+ return buffer;
+ }
+
+ public static byte[] array(ArrowBuf buf) {
+ byte[] bytes = new byte[checkedCastToInt(buf.readableBytes())];
+ buf.readBytes(bytes);
+ return bytes;
+ }
+
+ private int intToByteRoundtrip(int v, byte[] bytes) {
+ MessageSerializer.intToBytes(v, bytes);
+ return MessageSerializer.bytesToInt(bytes);
+ }
+
+ @Test
+ public void testIntToBytes() {
+ byte[] bytes = new byte[4];
+ int[] values = new int[]{1, 15, 1 << 8, 1 << 16, Integer.MAX_VALUE};
+ for (int v : values) {
+ assertEquals(intToByteRoundtrip(v, bytes), v);
+ }
+ }
+
+ @Test
+ public void testWriteMessageBufferAligned() throws IOException {
+ ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
+ WriteChannel out = new WriteChannel(Channels.newChannel(outputStream));
+
+ // This is not a valid Arrow Message, only to test writing and alignment
+ ByteBuffer buffer = ByteBuffer.allocate(8).order(ByteOrder.nativeOrder());
+ buffer.putInt(1);
+ buffer.putInt(2);
+ buffer.flip();
+
+ int bytesWritten = MessageSerializer.writeMessageBuffer(out, 8, buffer);
+ assertEquals(16, bytesWritten);
+
+ buffer.rewind();
+ buffer.putInt(3);
+ buffer.flip();
+ bytesWritten = MessageSerializer.writeMessageBuffer(out, 4, buffer);
+ assertEquals(16, bytesWritten);
+
+ ByteArrayInputStream inputStream = new ByteArrayInputStream(outputStream.toByteArray());
+ ReadChannel in = new ReadChannel(Channels.newChannel(inputStream));
+ ByteBuffer result = ByteBuffer.allocate(32).order(ByteOrder.nativeOrder());
+ in.readFully(result);
+ result.rewind();
+
+ // First message continuation, size, and 2 int values
+ assertEquals(MessageSerializer.IPC_CONTINUATION_TOKEN, result.getInt());
+ // mesage length is represented in little endian
+ result.order(ByteOrder.LITTLE_ENDIAN);
+ assertEquals(8, result.getInt());
+ result.order(ByteOrder.nativeOrder());
+ assertEquals(1, result.getInt());
+ assertEquals(2, result.getInt());
+
+ // Second message continuation, size, 1 int value and 4 bytes padding
+ assertEquals(MessageSerializer.IPC_CONTINUATION_TOKEN, result.getInt());
+ // mesage length is represented in little endian
+ result.order(ByteOrder.LITTLE_ENDIAN);
+ assertEquals(8, result.getInt());
+ result.order(ByteOrder.nativeOrder());
+ assertEquals(3, result.getInt());
+ assertEquals(0, result.getInt());
+ }
+
+ @Test
+ public void testSchemaMessageSerialization() throws IOException {
+ Schema schema = testSchema();
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ long size = MessageSerializer.serialize(
+ new WriteChannel(Channels.newChannel(out)), schema);
+ assertEquals(size, out.toByteArray().length);
+
+ ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray());
+ Schema deserialized = MessageSerializer.deserializeSchema(
+ new ReadChannel(Channels.newChannel(in)));
+ assertEquals(schema, deserialized);
+ assertEquals(1, deserialized.getFields().size());
+ }
+
+ @Test
+ public void testSchemaDictionaryMessageSerialization() throws IOException {
+ DictionaryEncoding dictionary = new DictionaryEncoding(9L, false, new ArrowType.Int(8, true));
+ Field field = new Field("test", new FieldType(true, ArrowType.Utf8.INSTANCE, dictionary, null), null);
+ Schema schema = new Schema(Collections.singletonList(field));
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ long size = MessageSerializer.serialize(new WriteChannel(Channels.newChannel(out)), schema);
+ assertEquals(size, out.toByteArray().length);
+
+ ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray());
+ Schema deserialized = MessageSerializer.deserializeSchema(new ReadChannel(Channels.newChannel(in)));
+ assertEquals(schema, deserialized);
+ }
+
+ @Rule
+ public ExpectedException expectedEx = ExpectedException.none();
+
+ @Test
+ public void testSerializeRecordBatchV4() throws IOException {
+ byte[] validity = new byte[]{(byte) 255, 0};
+ // second half is "undefined"
+ byte[] values = new byte[]{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
+
+ BufferAllocator alloc = new RootAllocator(Long.MAX_VALUE);
+ ArrowBuf validityb = buf(alloc, validity);
+ ArrowBuf valuesb = buf(alloc, values);
+
+ ArrowRecordBatch batch = new ArrowRecordBatch(
+ 16, asList(new ArrowFieldNode(16, 8)), asList(validityb, valuesb));
+
+ // avoid writing legacy ipc format by default
+ IpcOption option = new IpcOption(false, MetadataVersion.V4);
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ MessageSerializer.serialize(new WriteChannel(Channels.newChannel(out)), batch, option);
+
+ ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray());
+ ReadChannel channel = new ReadChannel(Channels.newChannel(in));
+ ArrowMessage deserialized = MessageSerializer.deserializeMessageBatch(channel, alloc);
+ assertEquals(ArrowRecordBatch.class, deserialized.getClass());
+ verifyBatch((ArrowRecordBatch) deserialized, validity, values);
+ }
+
+ @Test
+ public void testSerializeRecordBatchV5() throws Exception {
+ byte[] validity = new byte[]{(byte) 255, 0};
+ // second half is "undefined"
+ byte[] values = new byte[]{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
+
+ BufferAllocator alloc = new RootAllocator(Long.MAX_VALUE);
+ ArrowBuf validityb = buf(alloc, validity);
+ ArrowBuf valuesb = buf(alloc, values);
+
+ ArrowRecordBatch batch = new ArrowRecordBatch(
+ 16, asList(new ArrowFieldNode(16, 8)), asList(validityb, valuesb));
+
+ // avoid writing legacy ipc format by default
+ IpcOption option = new IpcOption(false, MetadataVersion.V5);
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ MessageSerializer.serialize(new WriteChannel(Channels.newChannel(out)), batch, option);
+ validityb.close();
+ valuesb.close();
+ batch.close();
+
+ {
+ ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray());
+ ReadChannel channel = new ReadChannel(Channels.newChannel(in));
+ ArrowMessage deserialized = MessageSerializer.deserializeMessageBatch(channel, alloc);
+ assertEquals(ArrowRecordBatch.class, deserialized.getClass());
+ verifyBatch((ArrowRecordBatch) deserialized, validity, values);
+ deserialized.close();
+ }
+
+ {
+ byte[] validBytes = out.toByteArray();
+ byte[] missingBytes = Arrays.copyOfRange(validBytes, /*from=*/0, validBytes.length - 1);
+
+ ByteArrayInputStream in = new ByteArrayInputStream(missingBytes);
+ ReadChannel channel = new ReadChannel(Channels.newChannel(in));
+
+ assertThrows(IOException.class, () -> MessageSerializer.deserializeMessageBatch(channel, alloc));
+ }
+
+ alloc.close();
+ }
+
+ public static Schema testSchema() {
+ return new Schema(asList(new Field(
+ "testField", FieldType.nullable(new ArrowType.Int(8, true)), Collections.<Field>emptyList())));
+ }
+
+ // Verifies batch contents matching test schema.
+ public static void verifyBatch(ArrowRecordBatch batch, byte[] validity, byte[] values) {
+ assertTrue(batch != null);
+ List<ArrowFieldNode> nodes = batch.getNodes();
+ assertEquals(1, nodes.size());
+ ArrowFieldNode node = nodes.get(0);
+ assertEquals(16, node.getLength());
+ assertEquals(8, node.getNullCount());
+ List<ArrowBuf> buffers = batch.getBuffers();
+ assertEquals(2, buffers.size());
+ assertArrayEquals(validity, MessageSerializerTest.array(buffers.get(0)));
+ assertArrayEquals(values, MessageSerializerTest.array(buffers.get(1)));
+ }
+
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowFile.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowFile.java
new file mode 100644
index 000000000..4fb582278
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowFile.java
@@ -0,0 +1,134 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc;
+
+import static java.nio.channels.Channels.newChannel;
+import static org.apache.arrow.vector.TestUtils.newVarCharVector;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.util.Collections2;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TestArrowFile extends BaseFileTest {
+ private static final Logger LOGGER = LoggerFactory.getLogger(TestArrowFile.class);
+
+ @Test
+ public void testWrite() throws IOException {
+ File file = new File("target/mytest_write.arrow");
+ int count = COUNT;
+ try (
+ BufferAllocator vectorAllocator = allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE);
+ StructVector parent = StructVector.empty("parent", vectorAllocator)) {
+ writeData(count, parent);
+ write(parent.getChild("root"), file, new ByteArrayOutputStream());
+ }
+ }
+
+ @Test
+ public void testWriteComplex() throws IOException {
+ File file = new File("target/mytest_write_complex.arrow");
+ int count = COUNT;
+ try (
+ BufferAllocator vectorAllocator = allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE);
+ StructVector parent = StructVector.empty("parent", vectorAllocator)) {
+ writeComplexData(count, parent);
+ FieldVector root = parent.getChild("root");
+ validateComplexContent(count, new VectorSchemaRoot(root));
+ write(root, file, new ByteArrayOutputStream());
+ }
+ }
+
+ /**
+ * Writes the contents of parents to file. If outStream is non-null, also writes it
+ * to outStream in the streaming serialized format.
+ */
+ private void write(FieldVector parent, File file, OutputStream outStream) throws IOException {
+ VectorSchemaRoot root = new VectorSchemaRoot(parent);
+
+ try (FileOutputStream fileOutputStream = new FileOutputStream(file);
+ ArrowFileWriter arrowWriter = new ArrowFileWriter(root, null, fileOutputStream.getChannel());) {
+ LOGGER.debug("writing schema: " + root.getSchema());
+ arrowWriter.start();
+ arrowWriter.writeBatch();
+ arrowWriter.end();
+ }
+
+ // Also try serializing to the stream writer.
+ if (outStream != null) {
+ try (ArrowStreamWriter arrowWriter = new ArrowStreamWriter(root, null, outStream)) {
+ arrowWriter.start();
+ arrowWriter.writeBatch();
+ arrowWriter.end();
+ }
+ }
+ }
+
+ @Test
+ public void testFileStreamHasEos() throws IOException {
+
+ try (VarCharVector vector1 = newVarCharVector("varchar1", allocator)) {
+ vector1.allocateNewSafe();
+ vector1.set(0, "foo".getBytes(StandardCharsets.UTF_8));
+ vector1.set(1, "bar".getBytes(StandardCharsets.UTF_8));
+ vector1.set(3, "baz".getBytes(StandardCharsets.UTF_8));
+ vector1.set(4, "bar".getBytes(StandardCharsets.UTF_8));
+ vector1.set(5, "baz".getBytes(StandardCharsets.UTF_8));
+ vector1.setValueCount(6);
+
+ List<Field> fields = Arrays.asList(vector1.getField());
+ List<FieldVector> vectors = Collections2.asImmutableList(vector1);
+ VectorSchemaRoot root = new VectorSchemaRoot(fields, vectors, vector1.getValueCount());
+
+ // write data
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ ArrowFileWriter writer = new ArrowFileWriter(root, null, newChannel(out));
+ writer.start();
+ writer.writeBatch();
+ writer.end();
+
+ byte[] bytes = out.toByteArray();
+ byte[] bytesWithoutMagic = new byte[bytes.length - 8];
+ System.arraycopy(bytes, 8, bytesWithoutMagic, 0, bytesWithoutMagic.length);
+
+ try (ArrowStreamReader reader = new ArrowStreamReader(new ByteArrayInputStream(bytesWithoutMagic), allocator)) {
+ assertTrue(reader.loadNextBatch());
+ // here will throw exception if read footer instead of eos.
+ assertFalse(reader.loadNextBatch());
+ }
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowFooter.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowFooter.java
new file mode 100644
index 000000000..38c65bdde
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowFooter.java
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc;
+
+import static java.util.Arrays.asList;
+import static org.junit.Assert.assertEquals;
+
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.arrow.flatbuf.Footer;
+import org.apache.arrow.vector.ipc.message.ArrowBlock;
+import org.apache.arrow.vector.ipc.message.ArrowFooter;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.Test;
+
+import com.google.flatbuffers.FlatBufferBuilder;
+
+public class TestArrowFooter {
+
+ @Test
+ public void test() {
+ Schema schema = new Schema(asList(
+ new Field("a", FieldType.nullable(new ArrowType.Int(8, true)), Collections.<Field>emptyList())
+ ));
+ ArrowFooter footer =
+ new ArrowFooter(schema, Collections.<ArrowBlock>emptyList(), Collections.<ArrowBlock>emptyList());
+ ArrowFooter newFooter = roundTrip(footer);
+ assertEquals(footer, newFooter);
+
+ List<ArrowBlock> ids = new ArrayList<>();
+ ids.add(new ArrowBlock(0, 1, 2));
+ ids.add(new ArrowBlock(4, 5, 6));
+ footer = new ArrowFooter(schema, ids, ids);
+ assertEquals(footer, roundTrip(footer));
+ }
+
+
+ private ArrowFooter roundTrip(ArrowFooter footer) {
+ FlatBufferBuilder builder = new FlatBufferBuilder();
+ int i = footer.writeTo(builder);
+ builder.finish(i);
+ ByteBuffer dataBuffer = builder.dataBuffer();
+ ArrowFooter newFooter = new ArrowFooter(Footer.getRootAsFooter(dataBuffer));
+ return newFooter;
+ }
+
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowReaderWriter.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowReaderWriter.java
new file mode 100644
index 000000000..1167819de
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowReaderWriter.java
@@ -0,0 +1,882 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc;
+
+import static java.nio.channels.Channels.newChannel;
+import static java.util.Arrays.asList;
+import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt;
+import static org.apache.arrow.vector.TestUtils.newVarCharVector;
+import static org.apache.arrow.vector.TestUtils.newVector;
+import static org.apache.arrow.vector.testing.ValueVectorDataPopulator.setVector;
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.Channels;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.function.BiFunction;
+import java.util.stream.Collectors;
+
+import org.apache.arrow.flatbuf.FieldNode;
+import org.apache.arrow.flatbuf.Message;
+import org.apache.arrow.flatbuf.RecordBatch;
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.util.AutoCloseables;
+import org.apache.arrow.util.Collections2;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.NullVector;
+import org.apache.arrow.vector.TestUtils;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.VectorLoader;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.VectorUnloader;
+import org.apache.arrow.vector.compare.Range;
+import org.apache.arrow.vector.compare.RangeEqualsVisitor;
+import org.apache.arrow.vector.compare.TypeEqualsVisitor;
+import org.apache.arrow.vector.compare.VectorEqualsVisitor;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.dictionary.Dictionary;
+import org.apache.arrow.vector.dictionary.DictionaryEncoder;
+import org.apache.arrow.vector.dictionary.DictionaryProvider;
+import org.apache.arrow.vector.ipc.message.ArrowBlock;
+import org.apache.arrow.vector.ipc.message.ArrowDictionaryBatch;
+import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
+import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
+import org.apache.arrow.vector.ipc.message.IpcOption;
+import org.apache.arrow.vector.ipc.message.MessageSerializer;
+import org.apache.arrow.vector.types.MetadataVersion;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.DictionaryEncoding;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.apache.arrow.vector.util.ByteArrayReadableSeekableByteChannel;
+import org.apache.arrow.vector.util.DictionaryUtility;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestArrowReaderWriter {
+
+ private BufferAllocator allocator;
+
+ private VarCharVector dictionaryVector1;
+ private VarCharVector dictionaryVector2;
+ private VarCharVector dictionaryVector3;
+ private StructVector dictionaryVector4;
+
+ private Dictionary dictionary1;
+ private Dictionary dictionary2;
+ private Dictionary dictionary3;
+ private Dictionary dictionary4;
+
+ private Schema schema;
+ private Schema encodedSchema;
+
+ @Before
+ public void init() {
+ allocator = new RootAllocator(Long.MAX_VALUE);
+
+ dictionaryVector1 = newVarCharVector("D1", allocator);
+ setVector(dictionaryVector1,
+ "foo".getBytes(StandardCharsets.UTF_8),
+ "bar".getBytes(StandardCharsets.UTF_8),
+ "baz".getBytes(StandardCharsets.UTF_8));
+
+ dictionaryVector2 = newVarCharVector("D2", allocator);
+ setVector(dictionaryVector2,
+ "aa".getBytes(StandardCharsets.UTF_8),
+ "bb".getBytes(StandardCharsets.UTF_8),
+ "cc".getBytes(StandardCharsets.UTF_8));
+
+ dictionaryVector3 = newVarCharVector("D3", allocator);
+ setVector(dictionaryVector3,
+ "foo".getBytes(StandardCharsets.UTF_8),
+ "bar".getBytes(StandardCharsets.UTF_8),
+ "baz".getBytes(StandardCharsets.UTF_8),
+ "aa".getBytes(StandardCharsets.UTF_8),
+ "bb".getBytes(StandardCharsets.UTF_8),
+ "cc".getBytes(StandardCharsets.UTF_8));
+
+ dictionaryVector4 = newVector(StructVector.class, "D4", MinorType.STRUCT, allocator);
+ final Map<String, List<Integer>> dictionaryValues4 = new HashMap<>();
+ dictionaryValues4.put("a", Arrays.asList(1, 2, 3));
+ dictionaryValues4.put("b", Arrays.asList(4, 5, 6));
+ setVector(dictionaryVector4, dictionaryValues4);
+
+ dictionary1 = new Dictionary(dictionaryVector1,
+ new DictionaryEncoding(/*id=*/1L, /*ordered=*/false, /*indexType=*/null));
+ dictionary2 = new Dictionary(dictionaryVector2,
+ new DictionaryEncoding(/*id=*/2L, /*ordered=*/false, /*indexType=*/null));
+ dictionary3 = new Dictionary(dictionaryVector3,
+ new DictionaryEncoding(/*id=*/1L, /*ordered=*/false, /*indexType=*/null));
+ dictionary4 = new Dictionary(dictionaryVector4,
+ new DictionaryEncoding(/*id=*/3L, /*ordered=*/false, /*indexType=*/null));
+ }
+
+ @After
+ public void terminate() throws Exception {
+ dictionaryVector1.close();
+ dictionaryVector2.close();
+ dictionaryVector3.close();
+ dictionaryVector4.close();
+ allocator.close();
+ }
+
+ ArrowBuf buf(byte[] bytes) {
+ ArrowBuf buffer = allocator.buffer(bytes.length);
+ buffer.writeBytes(bytes);
+ return buffer;
+ }
+
+ byte[] array(ArrowBuf buf) {
+ byte[] bytes = new byte[checkedCastToInt(buf.readableBytes())];
+ buf.readBytes(bytes);
+ return bytes;
+ }
+
+ @Test
+ public void test() throws IOException {
+ Schema schema = new Schema(asList(new Field("testField", FieldType.nullable(new ArrowType.Int(8, true)),
+ Collections.<Field>emptyList())));
+ ArrowType type = schema.getFields().get(0).getType();
+ FieldVector vector = TestUtils.newVector(FieldVector.class, "testField", type, allocator);
+ vector.initializeChildrenFromFields(schema.getFields().get(0).getChildren());
+
+ byte[] validity = new byte[] {(byte) 255, 0};
+ // second half is "undefined"
+ byte[] values = new byte[] {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
+
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ try (VectorSchemaRoot root = new VectorSchemaRoot(schema.getFields(), asList(vector), 16);
+ ArrowFileWriter writer = new ArrowFileWriter(root, null, newChannel(out))) {
+ ArrowBuf validityb = buf(validity);
+ ArrowBuf valuesb = buf(values);
+ ArrowRecordBatch batch = new ArrowRecordBatch(16, asList(new ArrowFieldNode(16, 8)), asList(validityb, valuesb));
+ VectorLoader loader = new VectorLoader(root);
+ loader.load(batch);
+ writer.writeBatch();
+
+ validityb.close();
+ valuesb.close();
+ batch.close();
+ }
+
+ byte[] byteArray = out.toByteArray();
+
+ try (SeekableReadChannel channel = new SeekableReadChannel(new ByteArrayReadableSeekableByteChannel(byteArray));
+ ArrowFileReader reader = new ArrowFileReader(channel, allocator)) {
+ Schema readSchema = reader.getVectorSchemaRoot().getSchema();
+ assertEquals(schema, readSchema);
+ // TODO: dictionaries
+ List<ArrowBlock> recordBatches = reader.getRecordBlocks();
+ assertEquals(1, recordBatches.size());
+ reader.loadNextBatch();
+ VectorUnloader unloader = new VectorUnloader(reader.getVectorSchemaRoot());
+ ArrowRecordBatch recordBatch = unloader.getRecordBatch();
+ List<ArrowFieldNode> nodes = recordBatch.getNodes();
+ assertEquals(1, nodes.size());
+ ArrowFieldNode node = nodes.get(0);
+ assertEquals(16, node.getLength());
+ assertEquals(8, node.getNullCount());
+ List<ArrowBuf> buffers = recordBatch.getBuffers();
+ assertEquals(2, buffers.size());
+ assertArrayEquals(validity, array(buffers.get(0)));
+ assertArrayEquals(values, array(buffers.get(1)));
+
+ // Read just the header. This demonstrates being able to read without need to
+ // deserialize the buffer.
+ ByteBuffer headerBuffer = ByteBuffer.allocate(recordBatches.get(0).getMetadataLength());
+ headerBuffer.put(byteArray, (int) recordBatches.get(0).getOffset(), headerBuffer.capacity());
+ // new format prefix_size ==8
+ headerBuffer.position(8);
+ Message messageFB = Message.getRootAsMessage(headerBuffer);
+ RecordBatch recordBatchFB = (RecordBatch) messageFB.header(new RecordBatch());
+ assertEquals(2, recordBatchFB.buffersLength());
+ assertEquals(1, recordBatchFB.nodesLength());
+ FieldNode nodeFB = recordBatchFB.nodes(0);
+ assertEquals(16, nodeFB.length());
+ assertEquals(8, nodeFB.nullCount());
+
+ recordBatch.close();
+ }
+ }
+
+ @Test
+ public void testWriteReadNullVector() throws IOException {
+
+ int valueCount = 3;
+
+ NullVector nullVector = new NullVector("vector");
+ nullVector.setValueCount(valueCount);
+
+ Schema schema = new Schema(asList(nullVector.getField()));
+
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ try (VectorSchemaRoot root = new VectorSchemaRoot(schema.getFields(), asList(nullVector), valueCount);
+ ArrowFileWriter writer = new ArrowFileWriter(root, null, newChannel(out))) {
+ ArrowRecordBatch batch = new ArrowRecordBatch(valueCount,
+ asList(new ArrowFieldNode(valueCount, 0)),
+ Collections.emptyList());
+ VectorLoader loader = new VectorLoader(root);
+ loader.load(batch);
+ writer.writeBatch();
+ }
+
+ byte[] byteArray = out.toByteArray();
+
+ try (SeekableReadChannel channel = new SeekableReadChannel(new ByteArrayReadableSeekableByteChannel(byteArray));
+ ArrowFileReader reader = new ArrowFileReader(channel, allocator)) {
+ Schema readSchema = reader.getVectorSchemaRoot().getSchema();
+ assertEquals(schema, readSchema);
+ List<ArrowBlock> recordBatches = reader.getRecordBlocks();
+ assertEquals(1, recordBatches.size());
+
+ assertTrue(reader.loadNextBatch());
+ assertEquals(1, reader.getVectorSchemaRoot().getFieldVectors().size());
+
+ NullVector readNullVector = (NullVector) reader.getVectorSchemaRoot().getFieldVectors().get(0);
+ assertEquals(valueCount, readNullVector.getValueCount());
+ }
+ }
+
+ @Test
+ public void testWriteReadWithDictionaries() throws IOException {
+ DictionaryProvider.MapDictionaryProvider provider = new DictionaryProvider.MapDictionaryProvider();
+ provider.put(dictionary1);
+
+ VarCharVector vector1 = newVarCharVector("varchar1", allocator);
+ vector1.allocateNewSafe();
+ vector1.set(0, "foo".getBytes(StandardCharsets.UTF_8));
+ vector1.set(1, "bar".getBytes(StandardCharsets.UTF_8));
+ vector1.set(3, "baz".getBytes(StandardCharsets.UTF_8));
+ vector1.set(4, "bar".getBytes(StandardCharsets.UTF_8));
+ vector1.set(5, "baz".getBytes(StandardCharsets.UTF_8));
+ vector1.setValueCount(6);
+ FieldVector encodedVector1 = (FieldVector) DictionaryEncoder.encode(vector1, dictionary1);
+ vector1.close();
+
+ VarCharVector vector2 = newVarCharVector("varchar2", allocator);
+ vector2.allocateNewSafe();
+ vector2.set(0, "bar".getBytes(StandardCharsets.UTF_8));
+ vector2.set(1, "baz".getBytes(StandardCharsets.UTF_8));
+ vector2.set(2, "foo".getBytes(StandardCharsets.UTF_8));
+ vector2.set(3, "foo".getBytes(StandardCharsets.UTF_8));
+ vector2.set(4, "foo".getBytes(StandardCharsets.UTF_8));
+ vector2.set(5, "bar".getBytes(StandardCharsets.UTF_8));
+ vector2.setValueCount(6);
+ FieldVector encodedVector2 = (FieldVector) DictionaryEncoder.encode(vector2, dictionary1);
+ vector2.close();
+
+ List<Field> fields = Arrays.asList(encodedVector1.getField(), encodedVector2.getField());
+ List<FieldVector> vectors = Collections2.asImmutableList(encodedVector1, encodedVector2);
+ try (VectorSchemaRoot root = new VectorSchemaRoot(fields, vectors, encodedVector1.getValueCount());
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ ArrowFileWriter writer = new ArrowFileWriter(root, provider, newChannel(out));) {
+
+ writer.start();
+ writer.writeBatch();
+ writer.end();
+
+ try (SeekableReadChannel channel = new SeekableReadChannel(
+ new ByteArrayReadableSeekableByteChannel(out.toByteArray()));
+ ArrowFileReader reader = new ArrowFileReader(channel, allocator)) {
+ Schema readSchema = reader.getVectorSchemaRoot().getSchema();
+ assertEquals(root.getSchema(), readSchema);
+ assertEquals(1, reader.getDictionaryBlocks().size());
+ assertEquals(1, reader.getRecordBlocks().size());
+
+ reader.loadNextBatch();
+ assertEquals(2, reader.getVectorSchemaRoot().getFieldVectors().size());
+ }
+ }
+ }
+
+ @Test
+ public void testWriteReadWithStructDictionaries() throws IOException {
+ DictionaryProvider.MapDictionaryProvider provider =
+ new DictionaryProvider.MapDictionaryProvider();
+ provider.put(dictionary4);
+
+ try (final StructVector vector =
+ newVector(StructVector.class, "D4", MinorType.STRUCT, allocator)) {
+ final Map<String, List<Integer>> values = new HashMap<>();
+ // Index: 0, 2, 1, 2, 1, 0, 0
+ values.put("a", Arrays.asList(1, 3, 2, 3, 2, 1, 1));
+ values.put("b", Arrays.asList(4, 6, 5, 6, 5, 4, 4));
+ setVector(vector, values);
+ FieldVector encodedVector = (FieldVector) DictionaryEncoder.encode(vector, dictionary4);
+
+ List<Field> fields = Arrays.asList(encodedVector.getField());
+ List<FieldVector> vectors = Collections2.asImmutableList(encodedVector);
+ try (
+ VectorSchemaRoot root =
+ new VectorSchemaRoot(fields, vectors, encodedVector.getValueCount());
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ ArrowFileWriter writer = new ArrowFileWriter(root, provider, newChannel(out));) {
+
+ writer.start();
+ writer.writeBatch();
+ writer.end();
+
+ try (
+ SeekableReadChannel channel = new SeekableReadChannel(
+ new ByteArrayReadableSeekableByteChannel(out.toByteArray()));
+ ArrowFileReader reader = new ArrowFileReader(channel, allocator)) {
+ final VectorSchemaRoot readRoot = reader.getVectorSchemaRoot();
+ final Schema readSchema = readRoot.getSchema();
+ assertEquals(root.getSchema(), readSchema);
+ assertEquals(1, reader.getDictionaryBlocks().size());
+ assertEquals(1, reader.getRecordBlocks().size());
+
+ reader.loadNextBatch();
+ assertEquals(1, readRoot.getFieldVectors().size());
+ assertEquals(1, reader.getDictionaryVectors().size());
+
+ // Read the encoded vector and check it
+ final FieldVector readEncoded = readRoot.getVector(0);
+ assertEquals(encodedVector.getValueCount(), readEncoded.getValueCount());
+ assertTrue(new RangeEqualsVisitor(encodedVector, readEncoded)
+ .rangeEquals(new Range(0, 0, encodedVector.getValueCount())));
+
+ // Read the dictionary
+ final Map<Long, Dictionary> readDictionaryMap = reader.getDictionaryVectors();
+ final Dictionary readDictionary =
+ readDictionaryMap.get(readEncoded.getField().getDictionary().getId());
+ assertNotNull(readDictionary);
+
+ // Assert the dictionary vector is correct
+ final FieldVector readDictionaryVector = readDictionary.getVector();
+ assertEquals(dictionaryVector4.getValueCount(), readDictionaryVector.getValueCount());
+ final BiFunction<ValueVector, ValueVector, Boolean> typeComparatorIgnoreName =
+ (v1, v2) -> new TypeEqualsVisitor(v1, false, true).equals(v2);
+ assertTrue("Dictionary vectors are not equal",
+ new RangeEqualsVisitor(dictionaryVector4, readDictionaryVector,
+ typeComparatorIgnoreName)
+ .rangeEquals(new Range(0, 0, dictionaryVector4.getValueCount())));
+
+ // Assert the decoded vector is correct
+ try (final ValueVector readVector =
+ DictionaryEncoder.decode(readEncoded, readDictionary)) {
+ assertEquals(vector.getValueCount(), readVector.getValueCount());
+ assertTrue("Decoded vectors are not equal",
+ new RangeEqualsVisitor(vector, readVector, typeComparatorIgnoreName)
+ .rangeEquals(new Range(0, 0, vector.getValueCount())));
+ }
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testEmptyStreamInFileIPC() throws IOException {
+
+ DictionaryProvider.MapDictionaryProvider provider = new DictionaryProvider.MapDictionaryProvider();
+ provider.put(dictionary1);
+
+ VarCharVector vector = newVarCharVector("varchar", allocator);
+ vector.allocateNewSafe();
+ vector.set(0, "foo".getBytes(StandardCharsets.UTF_8));
+ vector.set(1, "bar".getBytes(StandardCharsets.UTF_8));
+ vector.set(3, "baz".getBytes(StandardCharsets.UTF_8));
+ vector.set(4, "bar".getBytes(StandardCharsets.UTF_8));
+ vector.set(5, "baz".getBytes(StandardCharsets.UTF_8));
+ vector.setValueCount(6);
+
+ FieldVector encodedVector1A = (FieldVector) DictionaryEncoder.encode(vector, dictionary1);
+ vector.close();
+
+ List<Field> fields = Arrays.asList(encodedVector1A.getField());
+ List<FieldVector> vectors = Collections2.asImmutableList(encodedVector1A);
+
+ try (VectorSchemaRoot root = new VectorSchemaRoot(fields, vectors, encodedVector1A.getValueCount());
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ ArrowFileWriter writer = new ArrowFileWriter(root, provider, newChannel(out))) {
+
+ writer.start();
+ writer.end();
+
+ try (SeekableReadChannel channel = new SeekableReadChannel(
+ new ByteArrayReadableSeekableByteChannel(out.toByteArray()));
+ ArrowFileReader reader = new ArrowFileReader(channel, allocator)) {
+ Schema readSchema = reader.getVectorSchemaRoot().getSchema();
+ assertEquals(root.getSchema(), readSchema);
+ assertEquals(1, reader.getDictionaryVectors().size());
+ assertEquals(0, reader.getDictionaryBlocks().size());
+ assertEquals(0, reader.getRecordBlocks().size());
+ }
+ }
+
+ }
+
+ @Test
+ public void testEmptyStreamInStreamingIPC() throws IOException {
+
+ DictionaryProvider.MapDictionaryProvider provider = new DictionaryProvider.MapDictionaryProvider();
+ provider.put(dictionary1);
+
+ VarCharVector vector = newVarCharVector("varchar", allocator);
+ vector.allocateNewSafe();
+ vector.set(0, "foo".getBytes(StandardCharsets.UTF_8));
+ vector.set(1, "bar".getBytes(StandardCharsets.UTF_8));
+ vector.set(3, "baz".getBytes(StandardCharsets.UTF_8));
+ vector.set(4, "bar".getBytes(StandardCharsets.UTF_8));
+ vector.set(5, "baz".getBytes(StandardCharsets.UTF_8));
+ vector.setValueCount(6);
+
+ FieldVector encodedVector = (FieldVector) DictionaryEncoder.encode(vector, dictionary1);
+ vector.close();
+
+ List<Field> fields = Arrays.asList(encodedVector.getField());
+ try (VectorSchemaRoot root =
+ new VectorSchemaRoot(fields, Arrays.asList(encodedVector), encodedVector.getValueCount());
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ ArrowStreamWriter writer = new ArrowStreamWriter(root, provider, newChannel(out))) {
+
+ writer.start();
+ writer.end();
+
+
+ try (ArrowStreamReader reader = new ArrowStreamReader(
+ new ByteArrayReadableSeekableByteChannel(out.toByteArray()), allocator)) {
+ Schema readSchema = reader.getVectorSchemaRoot().getSchema();
+ assertEquals(root.getSchema(), readSchema);
+ assertEquals(1, reader.getDictionaryVectors().size());
+ assertFalse(reader.loadNextBatch());
+ }
+ }
+
+ }
+
+ @Test
+ public void testDictionaryReplacement() throws Exception {
+ VarCharVector vector1 = newVarCharVector("varchar1", allocator);
+ setVector(vector1,
+ "foo".getBytes(StandardCharsets.UTF_8),
+ "bar".getBytes(StandardCharsets.UTF_8),
+ "baz".getBytes(StandardCharsets.UTF_8),
+ "bar".getBytes(StandardCharsets.UTF_8));
+
+ FieldVector encodedVector1 = (FieldVector) DictionaryEncoder.encode(vector1, dictionary1);
+
+ VarCharVector vector2 = newVarCharVector("varchar2", allocator);
+ setVector(vector2,
+ "foo".getBytes(StandardCharsets.UTF_8),
+ "foo".getBytes(StandardCharsets.UTF_8),
+ "foo".getBytes(StandardCharsets.UTF_8),
+ "foo".getBytes(StandardCharsets.UTF_8));
+
+ FieldVector encodedVector2 = (FieldVector) DictionaryEncoder.encode(vector2, dictionary1);
+
+ DictionaryProvider.MapDictionaryProvider provider = new DictionaryProvider.MapDictionaryProvider();
+ provider.put(dictionary1);
+ List<Field> schemaFields = new ArrayList<>();
+ schemaFields.add(DictionaryUtility.toMessageFormat(encodedVector1.getField(), provider, new HashSet<>()));
+ schemaFields.add(DictionaryUtility.toMessageFormat(encodedVector2.getField(), provider, new HashSet<>()));
+ Schema schema = new Schema(schemaFields);
+
+ ByteArrayOutputStream outStream = new ByteArrayOutputStream();
+ WriteChannel out = new WriteChannel(newChannel(outStream));
+
+ // write schema
+ MessageSerializer.serialize(out, schema);
+
+ List<AutoCloseable> closeableList = new ArrayList<>();
+
+ // write non-delta dictionary with id=1
+ serializeDictionaryBatch(out, dictionary3, false, closeableList);
+
+ // write non-delta dictionary with id=1
+ serializeDictionaryBatch(out, dictionary1, false, closeableList);
+
+ // write recordBatch2
+ serializeRecordBatch(out, Arrays.asList(encodedVector1, encodedVector2), closeableList);
+
+ // write eos
+ out.writeIntLittleEndian(0);
+
+ try (ArrowStreamReader reader = new ArrowStreamReader(
+ new ByteArrayReadableSeekableByteChannel(outStream.toByteArray()), allocator)) {
+ assertEquals(1, reader.getDictionaryVectors().size());
+ assertTrue(reader.loadNextBatch());
+ FieldVector dictionaryVector = reader.getDictionaryVectors().get(1L).getVector();
+ // make sure the delta dictionary is concatenated.
+ assertTrue(VectorEqualsVisitor.vectorEquals(dictionaryVector, dictionaryVector1, null));
+ assertFalse(reader.loadNextBatch());
+ }
+
+ vector1.close();
+ vector2.close();
+ AutoCloseables.close(closeableList);
+ }
+
+ @Test
+ public void testDeltaDictionary() throws Exception {
+ VarCharVector vector1 = newVarCharVector("varchar1", allocator);
+ setVector(vector1,
+ "foo".getBytes(StandardCharsets.UTF_8),
+ "bar".getBytes(StandardCharsets.UTF_8),
+ "baz".getBytes(StandardCharsets.UTF_8),
+ "bar".getBytes(StandardCharsets.UTF_8));
+
+ FieldVector encodedVector1 = (FieldVector) DictionaryEncoder.encode(vector1, dictionary1);
+
+ VarCharVector vector2 = newVarCharVector("varchar2", allocator);
+ setVector(vector2,
+ "foo".getBytes(StandardCharsets.UTF_8),
+ "aa".getBytes(StandardCharsets.UTF_8),
+ "bb".getBytes(StandardCharsets.UTF_8),
+ "cc".getBytes(StandardCharsets.UTF_8));
+
+ FieldVector encodedVector2 = (FieldVector) DictionaryEncoder.encode(vector2, dictionary3);
+
+ DictionaryProvider.MapDictionaryProvider provider = new DictionaryProvider.MapDictionaryProvider();
+ provider.put(dictionary1);
+ provider.put(dictionary3);
+ List<Field> schemaFields = new ArrayList<>();
+ schemaFields.add(DictionaryUtility.toMessageFormat(encodedVector1.getField(), provider, new HashSet<>()));
+ schemaFields.add(DictionaryUtility.toMessageFormat(encodedVector2.getField(), provider, new HashSet<>()));
+ Schema schema = new Schema(schemaFields);
+
+ ByteArrayOutputStream outStream = new ByteArrayOutputStream();
+ WriteChannel out = new WriteChannel(newChannel(outStream));
+
+ // write schema
+ MessageSerializer.serialize(out, schema);
+
+ List<AutoCloseable> closeableList = new ArrayList<>();
+
+ // write non-delta dictionary with id=1
+ serializeDictionaryBatch(out, dictionary1, false, closeableList);
+
+ // write delta dictionary with id=1
+ Dictionary deltaDictionary =
+ new Dictionary(dictionaryVector2, new DictionaryEncoding(1L, false, null));
+ serializeDictionaryBatch(out, deltaDictionary, true, closeableList);
+ deltaDictionary.getVector().close();
+
+ // write recordBatch2
+ serializeRecordBatch(out, Arrays.asList(encodedVector1, encodedVector2), closeableList);
+
+ // write eos
+ out.writeIntLittleEndian(0);
+
+ try (ArrowStreamReader reader = new ArrowStreamReader(
+ new ByteArrayReadableSeekableByteChannel(outStream.toByteArray()), allocator)) {
+ assertEquals(1, reader.getDictionaryVectors().size());
+ assertTrue(reader.loadNextBatch());
+ FieldVector dictionaryVector = reader.getDictionaryVectors().get(1L).getVector();
+ // make sure the delta dictionary is concatenated.
+ assertTrue(VectorEqualsVisitor.vectorEquals(dictionaryVector, dictionaryVector3, null));
+ assertFalse(reader.loadNextBatch());
+ }
+
+ vector1.close();
+ vector2.close();
+ AutoCloseables.close(closeableList);
+
+ }
+
+ private void serializeDictionaryBatch(
+ WriteChannel out,
+ Dictionary dictionary,
+ boolean isDelta,
+ List<AutoCloseable> closeables) throws IOException {
+
+ FieldVector dictVector = dictionary.getVector();
+ VectorSchemaRoot root = new VectorSchemaRoot(
+ Collections.singletonList(dictVector.getField()),
+ Collections.singletonList(dictVector),
+ dictVector.getValueCount());
+ ArrowDictionaryBatch batch =
+ new ArrowDictionaryBatch(dictionary.getEncoding().getId(), new VectorUnloader(root).getRecordBatch(), isDelta);
+ MessageSerializer.serialize(out, batch);
+ closeables.add(batch);
+ closeables.add(root);
+ }
+
+ private void serializeRecordBatch(
+ WriteChannel out,
+ List<FieldVector> vectors,
+ List<AutoCloseable> closeables) throws IOException {
+
+ List<Field> fields = vectors.stream().map(v -> v.getField()).collect(Collectors.toList());
+ VectorSchemaRoot root = new VectorSchemaRoot(
+ fields,
+ vectors,
+ vectors.get(0).getValueCount());
+ VectorUnloader unloader = new VectorUnloader(root);
+ ArrowRecordBatch batch = unloader.getRecordBatch();
+ MessageSerializer.serialize(out, batch);
+ closeables.add(batch);
+ closeables.add(root);
+ }
+
+ @Test
+ public void testReadInterleavedData() throws IOException {
+ List<ArrowRecordBatch> batches = createRecordBatches();
+
+ ByteArrayOutputStream outStream = new ByteArrayOutputStream();
+ WriteChannel out = new WriteChannel(newChannel(outStream));
+
+ // write schema
+ MessageSerializer.serialize(out, schema);
+
+ // write dictionary1
+ FieldVector dictVector1 = dictionary1.getVector();
+ VectorSchemaRoot dictRoot1 = new VectorSchemaRoot(
+ Collections.singletonList(dictVector1.getField()),
+ Collections.singletonList(dictVector1),
+ dictVector1.getValueCount());
+ ArrowDictionaryBatch dictionaryBatch1 =
+ new ArrowDictionaryBatch(1, new VectorUnloader(dictRoot1).getRecordBatch());
+ MessageSerializer.serialize(out, dictionaryBatch1);
+ dictionaryBatch1.close();
+ dictRoot1.close();
+
+ // write recordBatch1
+ MessageSerializer.serialize(out, batches.get(0));
+
+ // write dictionary2
+ FieldVector dictVector2 = dictionary2.getVector();
+ VectorSchemaRoot dictRoot2 = new VectorSchemaRoot(
+ Collections.singletonList(dictVector2.getField()),
+ Collections.singletonList(dictVector2),
+ dictVector2.getValueCount());
+ ArrowDictionaryBatch dictionaryBatch2 =
+ new ArrowDictionaryBatch(2, new VectorUnloader(dictRoot2).getRecordBatch());
+ MessageSerializer.serialize(out, dictionaryBatch2);
+ dictionaryBatch2.close();
+ dictRoot2.close();
+
+ // write recordBatch1
+ MessageSerializer.serialize(out, batches.get(1));
+
+ // write eos
+ out.writeIntLittleEndian(0);
+
+ try (ArrowStreamReader reader = new ArrowStreamReader(
+ new ByteArrayReadableSeekableByteChannel(outStream.toByteArray()), allocator)) {
+ Schema readSchema = reader.getVectorSchemaRoot().getSchema();
+ assertEquals(encodedSchema, readSchema);
+ assertEquals(2, reader.getDictionaryVectors().size());
+ assertTrue(reader.loadNextBatch());
+ assertTrue(reader.loadNextBatch());
+ assertFalse(reader.loadNextBatch());
+ }
+
+ batches.forEach(batch -> batch.close());
+ }
+
+ private List<ArrowRecordBatch> createRecordBatches() {
+ List<ArrowRecordBatch> batches = new ArrayList<>();
+
+ DictionaryProvider.MapDictionaryProvider provider = new DictionaryProvider.MapDictionaryProvider();
+ provider.put(dictionary1);
+ provider.put(dictionary2);
+
+ VarCharVector vectorA1 = newVarCharVector("varcharA1", allocator);
+ vectorA1.allocateNewSafe();
+ vectorA1.set(0, "foo".getBytes(StandardCharsets.UTF_8));
+ vectorA1.set(1, "bar".getBytes(StandardCharsets.UTF_8));
+ vectorA1.set(3, "baz".getBytes(StandardCharsets.UTF_8));
+ vectorA1.set(4, "bar".getBytes(StandardCharsets.UTF_8));
+ vectorA1.set(5, "baz".getBytes(StandardCharsets.UTF_8));
+ vectorA1.setValueCount(6);
+
+ VarCharVector vectorA2 = newVarCharVector("varcharA2", allocator);
+ vectorA2.setValueCount(6);
+ FieldVector encodedVectorA1 = (FieldVector) DictionaryEncoder.encode(vectorA1, dictionary1);
+ vectorA1.close();
+ FieldVector encodedVectorA2 = (FieldVector) DictionaryEncoder.encode(vectorA1, dictionary2);
+ vectorA2.close();
+
+ List<Field> fields = Arrays.asList(encodedVectorA1.getField(), encodedVectorA2.getField());
+ List<FieldVector> vectors = Collections2.asImmutableList(encodedVectorA1, encodedVectorA2);
+ VectorSchemaRoot root = new VectorSchemaRoot(fields, vectors, encodedVectorA1.getValueCount());
+ VectorUnloader unloader = new VectorUnloader(root);
+ batches.add(unloader.getRecordBatch());
+ root.close();
+
+ VarCharVector vectorB1 = newVarCharVector("varcharB1", allocator);
+ vectorB1.setValueCount(6);
+
+ VarCharVector vectorB2 = newVarCharVector("varcharB2", allocator);
+ vectorB2.allocateNew();
+ vectorB2.setValueCount(6);
+ vectorB2.set(0, "aa".getBytes(StandardCharsets.UTF_8));
+ vectorB2.set(1, "aa".getBytes(StandardCharsets.UTF_8));
+ vectorB2.set(3, "bb".getBytes(StandardCharsets.UTF_8));
+ vectorB2.set(4, "bb".getBytes(StandardCharsets.UTF_8));
+ vectorB2.set(5, "cc".getBytes(StandardCharsets.UTF_8));
+ vectorB2.setValueCount(6);
+ FieldVector encodedVectorB1 = (FieldVector) DictionaryEncoder.encode(vectorB1, dictionary1);
+ vectorB1.close();
+ FieldVector encodedVectorB2 = (FieldVector) DictionaryEncoder.encode(vectorB2, dictionary2);
+ vectorB2.close();
+
+ List<Field> fieldsB = Arrays.asList(encodedVectorB1.getField(), encodedVectorB2.getField());
+ List<FieldVector> vectorsB = Collections2.asImmutableList(encodedVectorB1, encodedVectorB2);
+ VectorSchemaRoot rootB = new VectorSchemaRoot(fieldsB, vectorsB, 6);
+ VectorUnloader unloaderB = new VectorUnloader(rootB);
+ batches.add(unloaderB.getRecordBatch());
+ rootB.close();
+
+ List<Field> schemaFields = new ArrayList<>();
+ schemaFields.add(DictionaryUtility.toMessageFormat(encodedVectorA1.getField(), provider, new HashSet<>()));
+ schemaFields.add(DictionaryUtility.toMessageFormat(encodedVectorA2.getField(), provider, new HashSet<>()));
+ schema = new Schema(schemaFields);
+
+ encodedSchema = new Schema(Arrays.asList(encodedVectorA1.getField(), encodedVectorA2.getField()));
+
+ return batches;
+ }
+
+ @Test
+ public void testLegacyIpcBackwardsCompatibility() throws Exception {
+ Schema schema = new Schema(asList(Field.nullable("field", new ArrowType.Int(32, true))));
+ IntVector vector = new IntVector("vector", allocator);
+ final int valueCount = 2;
+ vector.setValueCount(valueCount);
+ vector.setSafe(0, 1);
+ vector.setSafe(1, 2);
+ ArrowRecordBatch batch = new ArrowRecordBatch(valueCount, asList(new ArrowFieldNode(valueCount, 0)),
+ asList(vector.getValidityBuffer(), vector.getDataBuffer()));
+
+ ByteArrayOutputStream outStream = new ByteArrayOutputStream();
+ WriteChannel out = new WriteChannel(newChannel(outStream));
+
+ // write legacy ipc format
+ IpcOption option = new IpcOption(true, MetadataVersion.DEFAULT);
+ MessageSerializer.serialize(out, schema, option);
+ MessageSerializer.serialize(out, batch);
+
+ ReadChannel in = new ReadChannel(newChannel(new ByteArrayInputStream(outStream.toByteArray())));
+ Schema readSchema = MessageSerializer.deserializeSchema(in);
+ assertEquals(schema, readSchema);
+ ArrowRecordBatch readBatch = MessageSerializer.deserializeRecordBatch(in, allocator);
+ assertEquals(batch.getLength(), readBatch.getLength());
+ assertEquals(batch.computeBodyLength(), readBatch.computeBodyLength());
+ readBatch.close();
+
+ // write ipc format with continuation
+ option = IpcOption.DEFAULT;
+ MessageSerializer.serialize(out, schema, option);
+ MessageSerializer.serialize(out, batch);
+
+ ReadChannel in2 = new ReadChannel(newChannel(new ByteArrayInputStream(outStream.toByteArray())));
+ Schema readSchema2 = MessageSerializer.deserializeSchema(in2);
+ assertEquals(schema, readSchema2);
+ ArrowRecordBatch readBatch2 = MessageSerializer.deserializeRecordBatch(in2, allocator);
+ assertEquals(batch.getLength(), readBatch2.getLength());
+ assertEquals(batch.computeBodyLength(), readBatch2.computeBodyLength());
+ readBatch2.close();
+
+ batch.close();
+ vector.close();
+ }
+
+ @Test
+ public void testChannelReadFully() throws IOException {
+ final ByteBuffer buf = ByteBuffer.allocate(4).order(ByteOrder.nativeOrder());
+ buf.putInt(200);
+ buf.rewind();
+
+ try (ReadChannel channel = new ReadChannel(Channels.newChannel(new ByteArrayInputStream(buf.array())));
+ ArrowBuf arrBuf = allocator.buffer(8)) {
+ arrBuf.setInt(0, 100);
+ arrBuf.writerIndex(4);
+ assertEquals(4, arrBuf.writerIndex());
+
+ long n = channel.readFully(arrBuf, 4);
+ assertEquals(4, n);
+ assertEquals(8, arrBuf.writerIndex());
+
+ assertEquals(100, arrBuf.getInt(0));
+ assertEquals(200, arrBuf.getInt(4));
+ }
+ }
+
+ @Test
+ public void testChannelReadFullyEos() throws IOException {
+ final ByteBuffer buf = ByteBuffer.allocate(4).order(ByteOrder.nativeOrder());
+ buf.putInt(10);
+ buf.rewind();
+
+ try (ReadChannel channel = new ReadChannel(Channels.newChannel(new ByteArrayInputStream(buf.array())));
+ ArrowBuf arrBuf = allocator.buffer(8)) {
+ int n = channel.readFully(arrBuf.nioBuffer(0, 8));
+ assertEquals(4, n);
+
+ // the input has only 4 bytes, so the number of bytes read should be 4
+ assertEquals(4, channel.bytesRead());
+
+ // the first 4 bytes have been read successfully.
+ assertEquals(10, arrBuf.getInt(0));
+ }
+ }
+
+ @Test
+ public void testCustomMetaData() throws IOException {
+
+ VarCharVector vector = newVarCharVector("varchar1", allocator);
+
+ List<Field> fields = Arrays.asList(vector.getField());
+ List<FieldVector> vectors = Collections2.asImmutableList(vector);
+ Map<String, String> metadata = new HashMap<>();
+ metadata.put("key1", "value1");
+ metadata.put("key2", "value2");
+ try (VectorSchemaRoot root = new VectorSchemaRoot(fields, vectors, vector.getValueCount());
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ ArrowFileWriter writer = new ArrowFileWriter(root, null, newChannel(out), metadata);) {
+
+ writer.start();
+ writer.end();
+
+ try (SeekableReadChannel channel = new SeekableReadChannel(
+ new ByteArrayReadableSeekableByteChannel(out.toByteArray()));
+ ArrowFileReader reader = new ArrowFileReader(channel, allocator)) {
+ reader.getVectorSchemaRoot();
+
+ Map<String, String> readMeta = reader.getMetaData();
+ assertEquals(2, readMeta.size());
+ assertEquals("value1", readMeta.get("key1"));
+ assertEquals("value2", readMeta.get("key2"));
+ }
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowStream.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowStream.java
new file mode 100644
index 000000000..9348cd3a6
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowStream.java
@@ -0,0 +1,147 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.nio.channels.Channels;
+import java.util.Collections;
+
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.TinyIntVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class TestArrowStream extends BaseFileTest {
+ @Test
+ public void testEmptyStream() throws IOException {
+ Schema schema = MessageSerializerTest.testSchema();
+ VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator);
+
+ // Write the stream.
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ ArrowStreamWriter writer = new ArrowStreamWriter(root, null, out);
+ writer.close();
+ Assert.assertTrue(out.size() > 0);
+
+ ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray());
+ try (ArrowStreamReader reader = new ArrowStreamReader(in, allocator)) {
+ assertEquals(schema, reader.getVectorSchemaRoot().getSchema());
+ // Empty should return false
+ Assert.assertFalse(reader.loadNextBatch());
+ assertEquals(0, reader.getVectorSchemaRoot().getRowCount());
+ Assert.assertFalse(reader.loadNextBatch());
+ assertEquals(0, reader.getVectorSchemaRoot().getRowCount());
+ }
+ }
+
+ @Test
+ public void testStreamZeroLengthBatch() throws IOException {
+ ByteArrayOutputStream os = new ByteArrayOutputStream();
+
+ try (IntVector vector = new IntVector("foo", allocator);) {
+ Schema schema = new Schema(Collections.singletonList(vector.getField()));
+ try (VectorSchemaRoot root =
+ new VectorSchemaRoot(schema, Collections.singletonList(vector), vector.getValueCount());
+ ArrowStreamWriter writer = new ArrowStreamWriter(root, null, Channels.newChannel(os));) {
+ vector.setValueCount(0);
+ root.setRowCount(0);
+ writer.writeBatch();
+ writer.end();
+ }
+ }
+
+ ByteArrayInputStream in = new ByteArrayInputStream(os.toByteArray());
+
+ try (ArrowStreamReader reader = new ArrowStreamReader(in, allocator);) {
+ VectorSchemaRoot root = reader.getVectorSchemaRoot();
+ IntVector vector = (IntVector) root.getFieldVectors().get(0);
+ reader.loadNextBatch();
+ assertEquals(vector.getValueCount(), 0);
+ assertEquals(root.getRowCount(), 0);
+ }
+ }
+
+ @Test
+ public void testReadWrite() throws IOException {
+ Schema schema = MessageSerializerTest.testSchema();
+ try (VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) {
+ int numBatches = 1;
+
+ root.getFieldVectors().get(0).allocateNew();
+ TinyIntVector vector = (TinyIntVector) root.getFieldVectors().get(0);
+ for (int i = 0; i < 16; i++) {
+ vector.set(i, i < 8 ? 1 : 0, (byte) (i + 1));
+ }
+ vector.setValueCount(16);
+ root.setRowCount(16);
+
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ long bytesWritten = 0;
+ try (ArrowStreamWriter writer = new ArrowStreamWriter(root, null, out)) {
+ writer.start();
+ for (int i = 0; i < numBatches; i++) {
+ writer.writeBatch();
+ }
+ writer.end();
+ bytesWritten = writer.bytesWritten();
+ }
+
+ ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray());
+ try (ArrowStreamReader reader = new ArrowStreamReader(in, allocator)) {
+ Schema readSchema = reader.getVectorSchemaRoot().getSchema();
+ assertEquals(schema, readSchema);
+ for (int i = 0; i < numBatches; i++) {
+ assertTrue(reader.loadNextBatch());
+ }
+ // TODO figure out why reader isn't getting padding bytes
+ assertEquals(bytesWritten, reader.bytesRead() + 8);
+ assertFalse(reader.loadNextBatch());
+ assertEquals(0, reader.getVectorSchemaRoot().getRowCount());
+ }
+ }
+ }
+
+ @Test
+ public void testReadWriteMultipleBatches() throws IOException {
+ ByteArrayOutputStream os = new ByteArrayOutputStream();
+
+ try (IntVector vector = new IntVector("foo", allocator);) {
+ Schema schema = new Schema(Collections.singletonList(vector.getField()));
+ try (VectorSchemaRoot root =
+ new VectorSchemaRoot(schema, Collections.singletonList(vector), vector.getValueCount());
+ ArrowStreamWriter writer = new ArrowStreamWriter(root, null, Channels.newChannel(os));) {
+ writeBatchData(writer, vector, root);
+ }
+ }
+
+ ByteArrayInputStream in = new ByteArrayInputStream(os.toByteArray());
+
+ try (ArrowStreamReader reader = new ArrowStreamReader(in, allocator);) {
+ IntVector vector = (IntVector) reader.getVectorSchemaRoot().getFieldVectors().get(0);
+ validateBatchData(reader, vector);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowStreamPipe.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowStreamPipe.java
new file mode 100644
index 000000000..422a63f57
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowStreamPipe.java
@@ -0,0 +1,161 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.nio.channels.Pipe;
+import java.nio.channels.ReadableByteChannel;
+import java.nio.channels.WritableByteChannel;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.TinyIntVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.ipc.ArrowStreamReader;
+import org.apache.arrow.vector.ipc.ArrowStreamWriter;
+import org.apache.arrow.vector.ipc.MessageSerializerTest;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class TestArrowStreamPipe {
+ Schema schema = MessageSerializerTest.testSchema();
+ BufferAllocator alloc = new RootAllocator(Long.MAX_VALUE);
+
+ private final class WriterThread extends Thread {
+
+ private final int numBatches;
+ private final ArrowStreamWriter writer;
+ private final VectorSchemaRoot root;
+
+ public WriterThread(int numBatches, WritableByteChannel sinkChannel)
+ throws IOException {
+ this.numBatches = numBatches;
+ BufferAllocator allocator = alloc.newChildAllocator("writer thread", 0, Integer.MAX_VALUE);
+ root = VectorSchemaRoot.create(schema, allocator);
+ writer = new ArrowStreamWriter(root, null, sinkChannel);
+ }
+
+ @Override
+ public void run() {
+ try {
+ writer.start();
+ for (int j = 0; j < numBatches; j++) {
+ root.getFieldVectors().get(0).allocateNew();
+ TinyIntVector vector = (TinyIntVector) root.getFieldVectors().get(0);
+ // Send a changing batch id first
+ vector.set(0, j);
+ for (int i = 1; i < 16; i++) {
+ vector.set(i, i < 8 ? 1 : 0, (byte) (i + 1));
+ }
+ vector.setValueCount(16);
+ root.setRowCount(16);
+
+ writer.writeBatch();
+ }
+ writer.close();
+ root.close();
+ } catch (IOException e) {
+ e.printStackTrace();
+ Assert.fail(e.toString()); // have to explicitly fail since we're in a separate thread
+ }
+ }
+
+ public long bytesWritten() {
+ return writer.bytesWritten();
+ }
+ }
+
+ private final class ReaderThread extends Thread {
+ private int batchesRead = 0;
+ private final ArrowStreamReader reader;
+ private final BufferAllocator alloc = new RootAllocator(Long.MAX_VALUE);
+ private boolean done = false;
+
+ public ReaderThread(ReadableByteChannel sourceChannel)
+ throws IOException {
+ reader = new ArrowStreamReader(sourceChannel, alloc) {
+
+ @Override
+ public boolean loadNextBatch() throws IOException {
+ if (super.loadNextBatch()) {
+ batchesRead++;
+ } else {
+ done = true;
+ return false;
+ }
+ VectorSchemaRoot root = getVectorSchemaRoot();
+ Assert.assertEquals(16, root.getRowCount());
+ TinyIntVector vector = (TinyIntVector) root.getFieldVectors().get(0);
+ Assert.assertEquals((byte) (batchesRead - 1), vector.get(0));
+ for (int i = 1; i < 16; i++) {
+ if (i < 8) {
+ Assert.assertEquals((byte) (i + 1), vector.get(i));
+ } else {
+ Assert.assertTrue(vector.isNull(i));
+ }
+ }
+
+ return true;
+ }
+ };
+ }
+
+ @Override
+ public void run() {
+ try {
+ assertEquals(schema, reader.getVectorSchemaRoot().getSchema());
+ while (!done) {
+ assertTrue(reader.loadNextBatch() != done);
+ }
+ reader.close();
+ } catch (IOException e) {
+ e.printStackTrace();
+ Assert.fail(e.toString()); // have to explicitly fail since we're in a separate thread
+ }
+ }
+
+ public int getBatchesRead() {
+ return batchesRead;
+ }
+
+ public long bytesRead() {
+ return reader.bytesRead();
+ }
+ }
+
+ // Starts up a producer and consumer thread to read/write batches.
+ @Test
+ public void pipeTest() throws IOException, InterruptedException {
+ final int NUM_BATCHES = 10;
+ Pipe pipe = Pipe.open();
+ WriterThread writer = new WriterThread(NUM_BATCHES, pipe.sink());
+ ReaderThread reader = new ReaderThread(pipe.source());
+
+ writer.start();
+ reader.start();
+ reader.join();
+ writer.join();
+
+ assertEquals(NUM_BATCHES, reader.getBatchesRead());
+ assertEquals(writer.bytesWritten(), reader.bytesRead());
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestJSONFile.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestJSONFile.java
new file mode 100644
index 000000000..f0aa226e2
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestJSONFile.java
@@ -0,0 +1,458 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.File;
+import java.io.IOException;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.UInt1Vector;
+import org.apache.arrow.vector.UInt4Vector;
+import org.apache.arrow.vector.UInt8Vector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.complex.impl.ComplexWriterImpl;
+import org.apache.arrow.vector.complex.writer.BaseWriter;
+import org.apache.arrow.vector.dictionary.DictionaryProvider;
+import org.apache.arrow.vector.dictionary.DictionaryProvider.MapDictionaryProvider;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.apache.arrow.vector.util.Validator;
+import org.junit.Assert;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TestJSONFile extends BaseFileTest {
+ private static final Logger LOGGER = LoggerFactory.getLogger(TestJSONFile.class);
+
+ @Test
+ public void testNoBatches() throws IOException {
+ File file = new File("target/no_batches.json");
+
+ try (BufferAllocator originalVectorAllocator =
+ allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE);
+ StructVector parent = StructVector.empty("parent", originalVectorAllocator)) {
+ BaseWriter.ComplexWriter writer = new ComplexWriterImpl("root", parent);
+ BaseWriter.StructWriter rootWriter = writer.rootAsStruct();
+ rootWriter.integer("int");
+ rootWriter.uInt1("uint1");
+ rootWriter.bigInt("bigInt");
+ rootWriter.float4("float");
+ JsonFileWriter jsonWriter = new JsonFileWriter(file, JsonFileWriter.config().pretty(true));
+ jsonWriter.start(new VectorSchemaRoot(parent.getChild("root")).getSchema(), null);
+ jsonWriter.close();
+ }
+
+ // read
+ try (
+ BufferAllocator readerAllocator = allocator.newChildAllocator("reader", 0, Integer.MAX_VALUE);
+ JsonFileReader reader = new JsonFileReader(file, readerAllocator)
+ ) {
+ Schema schema = reader.start();
+ LOGGER.debug("reading schema: " + schema);
+ }
+ }
+
+ @Test
+ public void testWriteRead() throws IOException {
+ File file = new File("target/mytest.json");
+ int count = COUNT;
+
+ // write
+ try (BufferAllocator originalVectorAllocator =
+ allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE);
+ StructVector parent = StructVector.empty("parent", originalVectorAllocator)) {
+ writeData(count, parent);
+ writeJSON(file, new VectorSchemaRoot(parent.getChild("root")), null);
+ }
+
+ // read
+ try (
+ BufferAllocator readerAllocator = allocator.newChildAllocator("reader", 0, Integer.MAX_VALUE);
+ JsonFileReader reader = new JsonFileReader(file, readerAllocator)
+ ) {
+ Schema schema = reader.start();
+ LOGGER.debug("reading schema: " + schema);
+
+ // initialize vectors
+ try (VectorSchemaRoot root = reader.read();) {
+ validateContent(count, root);
+ }
+ }
+ }
+
+ @Test
+ public void testWriteReadComplexJSON() throws IOException {
+ File file = new File("target/mytest_complex.json");
+ int count = COUNT;
+
+ // write
+ try (
+ BufferAllocator originalVectorAllocator = allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE);
+ StructVector parent = StructVector.empty("parent", originalVectorAllocator)) {
+ writeComplexData(count, parent);
+ writeJSON(file, new VectorSchemaRoot(parent.getChild("root")), null);
+ }
+
+ // read
+ try (
+ BufferAllocator readerAllocator = allocator.newChildAllocator("reader", 0, Integer.MAX_VALUE);
+ JsonFileReader reader = new JsonFileReader(file, readerAllocator);
+ ) {
+ Schema schema = reader.start();
+ LOGGER.debug("reading schema: " + schema);
+
+ // initialize vectors
+ try (VectorSchemaRoot root = reader.read();) {
+ validateComplexContent(count, root);
+ }
+ }
+ }
+
+ @Test
+ public void testWriteComplexJSON() throws IOException {
+ File file = new File("target/mytest_write_complex.json");
+ int count = COUNT;
+ try (
+ BufferAllocator vectorAllocator = allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE);
+ StructVector parent = StructVector.empty("parent", vectorAllocator)) {
+ writeComplexData(count, parent);
+ VectorSchemaRoot root = new VectorSchemaRoot(parent.getChild("root"));
+ validateComplexContent(root.getRowCount(), root);
+ writeJSON(file, root, null);
+ }
+ }
+
+ public void writeJSON(File file, VectorSchemaRoot root, DictionaryProvider provider) throws IOException {
+ JsonFileWriter writer = new JsonFileWriter(file, JsonFileWriter.config().pretty(true));
+ writer.start(root.getSchema(), provider);
+ writer.write(root);
+ writer.close();
+ }
+
+
+ @Test
+ public void testWriteReadUnionJSON() throws IOException {
+ File file = new File("target/mytest_write_union.json");
+ int count = COUNT;
+ try (
+ BufferAllocator vectorAllocator = allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE);
+ StructVector parent = StructVector.empty("parent", vectorAllocator)) {
+ writeUnionData(count, parent);
+ printVectors(parent.getChildrenFromFields());
+
+ try (VectorSchemaRoot root = new VectorSchemaRoot(parent.getChild("root"))) {
+ validateUnionData(count, root);
+ writeJSON(file, root, null);
+
+ // read
+ try (BufferAllocator readerAllocator = allocator.newChildAllocator("reader", 0, Integer.MAX_VALUE)) {
+ JsonFileReader reader = new JsonFileReader(file, readerAllocator);
+
+ Schema schema = reader.start();
+ LOGGER.debug("reading schema: " + schema);
+
+ try (VectorSchemaRoot rootFromJson = reader.read();) {
+ validateUnionData(count, rootFromJson);
+ Validator.compareVectorSchemaRoot(root, rootFromJson);
+ }
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testWriteReadDateTimeJSON() throws IOException {
+ File file = new File("target/mytest_datetime.json");
+ int count = COUNT;
+
+ // write
+ try (
+ BufferAllocator vectorAllocator = allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE);
+ StructVector parent = StructVector.empty("parent", vectorAllocator)) {
+
+ writeDateTimeData(count, parent);
+
+ printVectors(parent.getChildrenFromFields());
+
+ VectorSchemaRoot root = new VectorSchemaRoot(parent.getChild("root"));
+ validateDateTimeContent(count, root);
+
+ writeJSON(file, new VectorSchemaRoot(parent.getChild("root")), null);
+ }
+
+ // read
+ try (
+ BufferAllocator readerAllocator = allocator.newChildAllocator("reader", 0, Integer.MAX_VALUE);
+ JsonFileReader reader = new JsonFileReader(file, readerAllocator)
+ ) {
+ Schema schema = reader.start();
+ LOGGER.debug("reading schema: " + schema);
+
+ // initialize vectors
+ try (VectorSchemaRoot root = reader.read();) {
+ validateDateTimeContent(count, root);
+ }
+ }
+ }
+
+ @Test
+ public void testWriteReadDictionaryJSON() throws IOException {
+ File file = new File("target/mytest_dictionary.json");
+
+ // write
+ try (
+ BufferAllocator vectorAllocator = allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE)
+ ) {
+ MapDictionaryProvider provider = new MapDictionaryProvider();
+
+ try (VectorSchemaRoot root = writeFlatDictionaryData(vectorAllocator, provider)) {
+ printVectors(root.getFieldVectors());
+ validateFlatDictionary(root, provider);
+ writeJSON(file, root, provider);
+ }
+
+ // Need to close dictionary vectors
+ for (long id : provider.getDictionaryIds()) {
+ provider.lookup(id).getVector().close();
+ }
+ }
+
+ // read
+ try (
+ BufferAllocator readerAllocator = allocator.newChildAllocator("reader", 0, Integer.MAX_VALUE);
+ JsonFileReader reader = new JsonFileReader(file, readerAllocator)
+ ) {
+ Schema schema = reader.start();
+ LOGGER.debug("reading schema: " + schema);
+
+ // initialize vectors
+ try (VectorSchemaRoot root = reader.read();) {
+ validateFlatDictionary(root, reader);
+ }
+ }
+ }
+
+ @Test
+ public void testWriteReadNestedDictionaryJSON() throws IOException {
+ File file = new File("target/mytest_dict_nested.json");
+
+ // data being written:
+ // [['foo', 'bar'], ['foo'], ['bar']] -> [[0, 1], [0], [1]]
+
+ // write
+ try (
+ BufferAllocator vectorAllocator = allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE)
+ ) {
+ MapDictionaryProvider provider = new MapDictionaryProvider();
+
+ try (VectorSchemaRoot root = writeNestedDictionaryData(vectorAllocator, provider)) {
+ printVectors(root.getFieldVectors());
+ validateNestedDictionary(root, provider);
+ writeJSON(file, root, provider);
+ }
+
+ // Need to close dictionary vectors
+ for (long id : provider.getDictionaryIds()) {
+ provider.lookup(id).getVector().close();
+ }
+ }
+
+ // read
+ try (
+ BufferAllocator readerAllocator = allocator.newChildAllocator("reader", 0, Integer.MAX_VALUE);
+ JsonFileReader reader = new JsonFileReader(file, readerAllocator)
+ ) {
+ Schema schema = reader.start();
+ LOGGER.debug("reading schema: " + schema);
+
+ // initialize vectors
+ try (VectorSchemaRoot root = reader.read();) {
+ validateNestedDictionary(root, reader);
+ }
+ }
+ }
+
+ @Test
+ public void testWriteReadDecimalJSON() throws IOException {
+ File file = new File("target/mytest_decimal.json");
+
+ // write
+ try (BufferAllocator vectorAllocator = allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE);
+ VectorSchemaRoot root = writeDecimalData(vectorAllocator)) {
+ printVectors(root.getFieldVectors());
+ validateDecimalData(root);
+ writeJSON(file, root, null);
+ }
+
+ // read
+ try (
+ BufferAllocator readerAllocator = allocator.newChildAllocator("reader", 0, Integer.MAX_VALUE);
+ JsonFileReader reader = new JsonFileReader(file, readerAllocator)
+ ) {
+ Schema schema = reader.start();
+ LOGGER.debug("reading schema: " + schema);
+
+ // initialize vectors
+ try (VectorSchemaRoot root = reader.read();) {
+ validateDecimalData(root);
+ }
+ }
+ }
+
+ @Test
+ public void testSetStructLength() throws IOException {
+ File file = new File("../../docs/source/format/integration_json_examples/struct.json");
+ if (!file.exists()) {
+ file = new File("../docs/source/format/integration_json_examples/struct.json");
+ }
+ try (
+ BufferAllocator readerAllocator = allocator.newChildAllocator("reader", 0, Integer.MAX_VALUE);
+ JsonFileReader reader = new JsonFileReader(file, readerAllocator)
+ ) {
+ Schema schema = reader.start();
+ LOGGER.debug("reading schema: " + schema);
+
+ // initialize vectors
+ try (VectorSchemaRoot root = reader.read();) {
+ FieldVector vector = root.getVector("struct_nullable");
+ Assert.assertEquals(7, vector.getValueCount());
+ }
+ }
+ }
+
+ @Test
+ public void testWriteReadVarBinJSON() throws IOException {
+ File file = new File("target/mytest_varbin.json");
+ int count = COUNT;
+
+ // write
+ try (
+ BufferAllocator vectorAllocator = allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE);
+ StructVector parent = StructVector.empty("parent", vectorAllocator)) {
+ writeVarBinaryData(count, parent);
+ VectorSchemaRoot root = new VectorSchemaRoot(parent.getChild("root"));
+ validateVarBinary(count, root);
+ writeJSON(file, new VectorSchemaRoot(parent.getChild("root")), null);
+ }
+
+ // read
+ try (BufferAllocator readerAllocator = allocator.newChildAllocator("reader", 0, Integer.MAX_VALUE);
+ JsonFileReader reader = new JsonFileReader(file, readerAllocator)) {
+ Schema schema = reader.start();
+ LOGGER.debug("reading schema: " + schema);
+
+ // initialize vectors
+ try (VectorSchemaRoot root = reader.read();) {
+ validateVarBinary(count, root);
+ }
+ }
+ }
+
+ @Test
+ public void testWriteReadMapJSON() throws IOException {
+ File file = new File("target/mytest_map.json");
+
+ // write
+ try (BufferAllocator vectorAllocator = allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE);
+ VectorSchemaRoot root = writeMapData(vectorAllocator)) {
+ printVectors(root.getFieldVectors());
+ validateMapData(root);
+ writeJSON(file, root, null);
+ }
+
+ // read
+ try (BufferAllocator readerAllocator = allocator.newChildAllocator("reader", 0, Integer.MAX_VALUE);
+ JsonFileReader reader = new JsonFileReader(file, readerAllocator)) {
+ Schema schema = reader.start();
+ LOGGER.debug("reading schema: " + schema);
+
+ // initialize vectors
+ try (VectorSchemaRoot root = reader.read();) {
+ validateMapData(root);
+ }
+ }
+ }
+
+ @Test
+ public void testWriteReadNullJSON() throws IOException {
+ File file = new File("target/mytest_null.json");
+ int valueCount = 10;
+
+ // write
+ try (BufferAllocator vectorAllocator = allocator.newChildAllocator("original vectors", 0, Integer.MAX_VALUE);
+ VectorSchemaRoot root = writeNullData(valueCount)) {
+ printVectors(root.getFieldVectors());
+ validateNullData(root, valueCount);
+ writeJSON(file, root, null);
+ }
+
+ // read
+ try (
+ BufferAllocator readerAllocator = allocator.newChildAllocator("reader", 0, Integer.MAX_VALUE);
+ JsonFileReader reader = new JsonFileReader(file, readerAllocator)
+ ) {
+
+ Schema schema = reader.start();
+ LOGGER.debug("reading schema: " + schema);
+
+ // initialize vectors
+ try (VectorSchemaRoot root = reader.read();) {
+ validateNullData(root, valueCount);
+ }
+ }
+ }
+
+ @Test
+ public void testNoOverFlowWithUINT() {
+ try (final UInt8Vector uInt8Vector = new UInt8Vector("uint8", allocator);
+ final UInt4Vector uInt4Vector = new UInt4Vector("uint4", allocator);
+ final UInt1Vector uInt1Vector = new UInt1Vector("uint1", allocator)) {
+
+ long[] longValues = new long[]{Long.MIN_VALUE, Long.MAX_VALUE, -1L};
+ uInt8Vector.allocateNew(3);
+ uInt8Vector.setValueCount(3);
+ for (int i = 0; i < longValues.length; i++) {
+ uInt8Vector.set(i, longValues[i]);
+ long readValue = uInt8Vector.getObjectNoOverflow(i).longValue();
+ assertEquals(readValue, longValues[i]);
+ }
+
+ int[] intValues = new int[]{Integer.MIN_VALUE, Integer.MAX_VALUE, -1};
+ uInt4Vector.allocateNew(3);
+ uInt4Vector.setValueCount(3);
+ for (int i = 0; i < intValues.length; i++) {
+ uInt4Vector.set(i, intValues[i]);
+ int actualValue = (int) UInt4Vector.getNoOverflow(uInt4Vector.getDataBuffer(), i);
+ assertEquals(intValues[i], actualValue);
+ }
+
+ byte[] byteValues = new byte[]{Byte.MIN_VALUE, Byte.MAX_VALUE, -1};
+ uInt1Vector.allocateNew(3);
+ uInt1Vector.setValueCount(3);
+ for (int i = 0; i < byteValues.length; i++) {
+ uInt1Vector.set(i, byteValues[i]);
+ byte actualValue = (byte) UInt1Vector.getNoOverflow(uInt1Vector.getDataBuffer(), i);
+ assertEquals(byteValues[i], actualValue);
+ }
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestRoundTrip.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestRoundTrip.java
new file mode 100644
index 000000000..5f57e90f6
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestRoundTrip.java
@@ -0,0 +1,628 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc;
+
+import static org.apache.arrow.vector.dictionary.DictionaryProvider.MapDictionaryProvider;
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.nio.channels.Channels;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.function.BiConsumer;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.util.Collections2;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.FixedSizeBinaryVector;
+import org.apache.arrow.vector.Float4Vector;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.TinyIntVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.VectorUnloader;
+import org.apache.arrow.vector.complex.FixedSizeListVector;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.dictionary.DictionaryProvider;
+import org.apache.arrow.vector.ipc.message.ArrowBlock;
+import org.apache.arrow.vector.ipc.message.ArrowBuffer;
+import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
+import org.apache.arrow.vector.ipc.message.IpcOption;
+import org.apache.arrow.vector.ipc.message.MessageMetadataResult;
+import org.apache.arrow.vector.ipc.message.MessageSerializer;
+import org.apache.arrow.vector.types.FloatingPointPrecision;
+import org.apache.arrow.vector.types.MetadataVersion;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.AfterClass;
+import org.junit.Assume;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+@RunWith(Parameterized.class)
+public class TestRoundTrip extends BaseFileTest {
+ private static final Logger LOGGER = LoggerFactory.getLogger(TestRoundTrip.class);
+ private static BufferAllocator allocator;
+ private final String name;
+ private final IpcOption writeOption;
+
+ public TestRoundTrip(String name, IpcOption writeOption) {
+ this.name = name;
+ this.writeOption = writeOption;
+ }
+
+ @Parameterized.Parameters(name = "options = {0}")
+ public static Collection<Object[]> getWriteOption() {
+ final IpcOption legacy = new IpcOption(true, MetadataVersion.V4);
+ final IpcOption version4 = new IpcOption(false, MetadataVersion.V4);
+ return Arrays.asList(
+ new Object[] {"V4Legacy", legacy},
+ new Object[] {"V4", version4},
+ new Object[] {"V5", IpcOption.DEFAULT}
+ );
+ }
+
+ @BeforeClass
+ public static void setUpClass() {
+ allocator = new RootAllocator(Integer.MAX_VALUE);
+ }
+
+ @AfterClass
+ public static void tearDownClass() {
+ allocator.close();
+ }
+
+ @Test
+ public void testStruct() throws Exception {
+ try (final BufferAllocator originalVectorAllocator =
+ allocator.newChildAllocator("original vectors", 0, allocator.getLimit());
+ final StructVector parent = StructVector.empty("parent", originalVectorAllocator)) {
+ writeData(COUNT, parent);
+ roundTrip(
+ new VectorSchemaRoot(parent.getChild("root")),
+ /* dictionaryProvider */null,
+ TestRoundTrip::writeSingleBatch,
+ validateFileBatches(new int[] {COUNT}, this::validateContent),
+ validateStreamBatches(new int[] {COUNT}, this::validateContent));
+ }
+ }
+
+ @Test
+ public void testComplex() throws Exception {
+ try (final BufferAllocator originalVectorAllocator =
+ allocator.newChildAllocator("original vectors", 0, allocator.getLimit());
+ final StructVector parent = StructVector.empty("parent", originalVectorAllocator)) {
+ writeComplexData(COUNT, parent);
+ roundTrip(
+ new VectorSchemaRoot(parent.getChild("root")),
+ /* dictionaryProvider */null,
+ TestRoundTrip::writeSingleBatch,
+ validateFileBatches(new int[] {COUNT}, this::validateComplexContent),
+ validateStreamBatches(new int[] {COUNT}, this::validateComplexContent));
+ }
+ }
+
+ @Test
+ public void testMultipleRecordBatches() throws Exception {
+ int[] counts = {10, 5};
+ try (final BufferAllocator originalVectorAllocator =
+ allocator.newChildAllocator("original vectors", 0, allocator.getLimit());
+ final StructVector parent = StructVector.empty("parent", originalVectorAllocator)) {
+ writeData(counts[0], parent);
+ roundTrip(
+ new VectorSchemaRoot(parent.getChild("root")),
+ /* dictionaryProvider */null,
+ (root, writer) -> {
+ writer.start();
+ parent.allocateNew();
+ writeData(counts[0], parent);
+ root.setRowCount(counts[0]);
+ writer.writeBatch();
+
+ parent.allocateNew();
+ // if we write the same data we don't catch that the metadata is stored in the wrong order.
+ writeData(counts[1], parent);
+ root.setRowCount(counts[1]);
+ writer.writeBatch();
+
+ writer.end();
+ },
+ validateFileBatches(counts, this::validateContent),
+ validateStreamBatches(counts, this::validateContent));
+ }
+ }
+
+ @Test
+ public void testUnionV4() throws Exception {
+ Assume.assumeTrue(writeOption.metadataVersion == MetadataVersion.V4);
+ final File temp = File.createTempFile("arrow-test-" + name + "-", ".arrow");
+ temp.deleteOnExit();
+ final ByteArrayOutputStream memoryStream = new ByteArrayOutputStream();
+
+ try (final BufferAllocator originalVectorAllocator =
+ allocator.newChildAllocator("original vectors", 0, allocator.getLimit());
+ final StructVector parent = StructVector.empty("parent", originalVectorAllocator)) {
+ writeUnionData(COUNT, parent);
+ final VectorSchemaRoot root = new VectorSchemaRoot(parent.getChild("root"));
+ IllegalArgumentException e = assertThrows(IllegalArgumentException.class, () -> {
+ try (final FileOutputStream fileStream = new FileOutputStream(temp)) {
+ new ArrowFileWriter(root, null, fileStream.getChannel(), writeOption);
+ new ArrowStreamWriter(root, null, Channels.newChannel(memoryStream), writeOption);
+ }
+ });
+ assertTrue(e.getMessage(), e.getMessage().contains("Cannot write union with V4 metadata"));
+ e = assertThrows(IllegalArgumentException.class, () -> {
+ new ArrowStreamWriter(root, null, Channels.newChannel(memoryStream), writeOption);
+ });
+ assertTrue(e.getMessage(), e.getMessage().contains("Cannot write union with V4 metadata"));
+ }
+ }
+
+ @Test
+ public void testUnionV5() throws Exception {
+ Assume.assumeTrue(writeOption.metadataVersion == MetadataVersion.V5);
+ try (final BufferAllocator originalVectorAllocator =
+ allocator.newChildAllocator("original vectors", 0, allocator.getLimit());
+ final StructVector parent = StructVector.empty("parent", originalVectorAllocator)) {
+ writeUnionData(COUNT, parent);
+ VectorSchemaRoot root = new VectorSchemaRoot(parent.getChild("root"));
+ validateUnionData(COUNT, root);
+ roundTrip(
+ root,
+ /* dictionaryProvider */null,
+ TestRoundTrip::writeSingleBatch,
+ validateFileBatches(new int[] {COUNT}, this::validateUnionData),
+ validateStreamBatches(new int[] {COUNT}, this::validateUnionData));
+ }
+ }
+
+ @Test
+ public void testTiny() throws Exception {
+ try (final VectorSchemaRoot root = VectorSchemaRoot.create(MessageSerializerTest.testSchema(), allocator)) {
+ root.getFieldVectors().get(0).allocateNew();
+ int count = 16;
+ TinyIntVector vector = (TinyIntVector) root.getFieldVectors().get(0);
+ for (int i = 0; i < count; i++) {
+ vector.set(i, i < 8 ? 1 : 0, (byte) (i + 1));
+ }
+ vector.setValueCount(count);
+ root.setRowCount(count);
+
+ roundTrip(
+ root,
+ /* dictionaryProvider */null,
+ TestRoundTrip::writeSingleBatch,
+ validateFileBatches(new int[] {count}, this::validateTinyData),
+ validateStreamBatches(new int[] {count}, this::validateTinyData));
+ }
+ }
+
+ private void validateTinyData(int count, VectorSchemaRoot root) {
+ assertEquals(count, root.getRowCount());
+ TinyIntVector vector = (TinyIntVector) root.getFieldVectors().get(0);
+ for (int i = 0; i < count; i++) {
+ if (i < 8) {
+ assertEquals((byte) (i + 1), vector.get(i));
+ } else {
+ assertTrue(vector.isNull(i));
+ }
+ }
+ }
+
+ @Test
+ public void testMetadata() throws Exception {
+ List<Field> childFields = new ArrayList<>();
+ childFields.add(new Field("varchar-child", new FieldType(true, ArrowType.Utf8.INSTANCE, null, metadata(1)), null));
+ childFields.add(new Field("float-child",
+ new FieldType(true, new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE), null, metadata(2)), null));
+ childFields.add(new Field("int-child", new FieldType(false, new ArrowType.Int(32, true), null, metadata(3)), null));
+ childFields.add(new Field("list-child", new FieldType(true, ArrowType.List.INSTANCE, null, metadata(4)),
+ Collections2.asImmutableList(new Field("l1", FieldType.nullable(new ArrowType.Int(16, true)), null))));
+ Field field = new Field("meta", new FieldType(true, ArrowType.Struct.INSTANCE, null, metadata(0)), childFields);
+ Map<String, String> metadata = new HashMap<>();
+ metadata.put("s1", "v1");
+ metadata.put("s2", "v2");
+ Schema originalSchema = new Schema(Collections2.asImmutableList(field), metadata);
+ assertEquals(metadata, originalSchema.getCustomMetadata());
+
+ try (final BufferAllocator originalVectorAllocator =
+ allocator.newChildAllocator("original vectors", 0, allocator.getLimit());
+ final StructVector vector = (StructVector) field.createVector(originalVectorAllocator)) {
+ vector.allocateNewSafe();
+ vector.setValueCount(0);
+
+ List<FieldVector> vectors = Collections2.asImmutableList(vector);
+ VectorSchemaRoot root = new VectorSchemaRoot(originalSchema, vectors, 0);
+
+ BiConsumer<Integer, VectorSchemaRoot> validate = (count, readRoot) -> {
+ Schema schema = readRoot.getSchema();
+ assertEquals(originalSchema, schema);
+ assertEquals(originalSchema.getCustomMetadata(), schema.getCustomMetadata());
+ Field top = schema.getFields().get(0);
+ assertEquals(metadata(0), top.getMetadata());
+ for (int i = 0; i < 4; i++) {
+ assertEquals(metadata(i + 1), top.getChildren().get(i).getMetadata());
+ }
+ };
+ roundTrip(
+ root,
+ /* dictionaryProvider */null,
+ TestRoundTrip::writeSingleBatch,
+ validateFileBatches(new int[] {0}, validate),
+ validateStreamBatches(new int[] {0}, validate));
+ }
+ }
+
+ private Map<String, String> metadata(int i) {
+ Map<String, String> map = new HashMap<>();
+ map.put("k_" + i, "v_" + i);
+ map.put("k2_" + i, "v2_" + i);
+ return Collections.unmodifiableMap(map);
+ }
+
+ @Test
+ public void testFlatDictionary() throws Exception {
+ AtomicInteger numDictionaryBlocksWritten = new AtomicInteger();
+ MapDictionaryProvider provider = new MapDictionaryProvider();
+ try (final BufferAllocator originalVectorAllocator =
+ allocator.newChildAllocator("original vectors", 0, allocator.getLimit());
+ final VectorSchemaRoot root = writeFlatDictionaryData(originalVectorAllocator, provider)) {
+ roundTrip(
+ root,
+ provider,
+ (ignored, writer) -> {
+ writer.start();
+ writer.writeBatch();
+ writer.end();
+ if (writer instanceof ArrowFileWriter) {
+ numDictionaryBlocksWritten.set(((ArrowFileWriter) writer).getDictionaryBlocks().size());
+ }
+ },
+ (fileReader) -> {
+ VectorSchemaRoot readRoot = fileReader.getVectorSchemaRoot();
+ Schema schema = readRoot.getSchema();
+ LOGGER.debug("reading schema: " + schema);
+ assertTrue(fileReader.loadNextBatch());
+ validateFlatDictionary(readRoot, fileReader);
+ assertEquals(numDictionaryBlocksWritten.get(), fileReader.getDictionaryBlocks().size());
+ },
+ (streamReader) -> {
+ VectorSchemaRoot readRoot = streamReader.getVectorSchemaRoot();
+ Schema schema = readRoot.getSchema();
+ LOGGER.debug("reading schema: " + schema);
+ assertTrue(streamReader.loadNextBatch());
+ validateFlatDictionary(readRoot, streamReader);
+ });
+
+ // Need to close dictionary vectors
+ for (long id : provider.getDictionaryIds()) {
+ provider.lookup(id).getVector().close();
+ }
+ }
+ }
+
+ @Test
+ public void testNestedDictionary() throws Exception {
+ AtomicInteger numDictionaryBlocksWritten = new AtomicInteger();
+ MapDictionaryProvider provider = new MapDictionaryProvider();
+ // data being written:
+ // [['foo', 'bar'], ['foo'], ['bar']] -> [[0, 1], [0], [1]]
+ try (final BufferAllocator originalVectorAllocator =
+ allocator.newChildAllocator("original vectors", 0, allocator.getLimit());
+ final VectorSchemaRoot root = writeNestedDictionaryData(originalVectorAllocator, provider)) {
+ CheckedConsumer<ArrowReader> validateDictionary = (streamReader) -> {
+ VectorSchemaRoot readRoot = streamReader.getVectorSchemaRoot();
+ Schema schema = readRoot.getSchema();
+ LOGGER.debug("reading schema: " + schema);
+ assertTrue(streamReader.loadNextBatch());
+ validateNestedDictionary(readRoot, streamReader);
+ };
+ roundTrip(
+ root,
+ provider,
+ (ignored, writer) -> {
+ writer.start();
+ writer.writeBatch();
+ writer.end();
+ if (writer instanceof ArrowFileWriter) {
+ numDictionaryBlocksWritten.set(((ArrowFileWriter) writer).getDictionaryBlocks().size());
+ }
+ },
+ validateDictionary,
+ validateDictionary);
+
+ // Need to close dictionary vectors
+ for (long id : provider.getDictionaryIds()) {
+ provider.lookup(id).getVector().close();
+ }
+ }
+ }
+
+ @Test
+ public void testFixedSizeBinary() throws Exception {
+ final int count = 10;
+ final int typeWidth = 11;
+ byte[][] byteValues = new byte[count][typeWidth];
+ for (int i = 0; i < count; i++) {
+ for (int j = 0; j < typeWidth; j++) {
+ byteValues[i][j] = ((byte) i);
+ }
+ }
+
+ BiConsumer<Integer, VectorSchemaRoot> validator = (expectedCount, root) -> {
+ for (int i = 0; i < expectedCount; i++) {
+ assertArrayEquals(byteValues[i], ((byte[]) root.getVector("fixed-binary").getObject(i)));
+ }
+ };
+
+ try (final BufferAllocator originalVectorAllocator =
+ allocator.newChildAllocator("original vectors", 0, allocator.getLimit());
+ final StructVector parent = StructVector.empty("parent", originalVectorAllocator)) {
+ FixedSizeBinaryVector fixedSizeBinaryVector = parent.addOrGet("fixed-binary",
+ FieldType.nullable(new ArrowType.FixedSizeBinary(typeWidth)), FixedSizeBinaryVector.class);
+ parent.allocateNew();
+ for (int i = 0; i < count; i++) {
+ fixedSizeBinaryVector.set(i, byteValues[i]);
+ }
+ parent.setValueCount(count);
+
+ roundTrip(
+ new VectorSchemaRoot(parent),
+ /* dictionaryProvider */null,
+ TestRoundTrip::writeSingleBatch,
+ validateFileBatches(new int[] {count}, validator),
+ validateStreamBatches(new int[] {count}, validator));
+ }
+ }
+
+ @Test
+ public void testFixedSizeList() throws Exception {
+ BiConsumer<Integer, VectorSchemaRoot> validator = (expectedCount, root) -> {
+ for (int i = 0; i < expectedCount; i++) {
+ assertEquals(Collections2.asImmutableList(i + 0.1f, i + 10.1f), root.getVector("float-pairs")
+ .getObject(i));
+ assertEquals(i, root.getVector("ints").getObject(i));
+ }
+ };
+
+ try (final BufferAllocator originalVectorAllocator =
+ allocator.newChildAllocator("original vectors", 0, allocator.getLimit());
+ final StructVector parent = StructVector.empty("parent", originalVectorAllocator)) {
+ FixedSizeListVector tuples = parent.addOrGet("float-pairs",
+ FieldType.nullable(new ArrowType.FixedSizeList(2)), FixedSizeListVector.class);
+ Float4Vector floats = (Float4Vector) tuples.addOrGetVector(FieldType.nullable(Types.MinorType.FLOAT4.getType()))
+ .getVector();
+ IntVector ints = parent.addOrGet("ints", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class);
+ parent.allocateNew();
+ for (int i = 0; i < COUNT; i++) {
+ tuples.setNotNull(i);
+ floats.set(i * 2, i + 0.1f);
+ floats.set(i * 2 + 1, i + 10.1f);
+ ints.set(i, i);
+ }
+ parent.setValueCount(COUNT);
+
+ roundTrip(
+ new VectorSchemaRoot(parent),
+ /* dictionaryProvider */null,
+ TestRoundTrip::writeSingleBatch,
+ validateFileBatches(new int[] {COUNT}, validator),
+ validateStreamBatches(new int[] {COUNT}, validator));
+ }
+ }
+
+ @Test
+ public void testVarBinary() throws Exception {
+ try (final BufferAllocator originalVectorAllocator =
+ allocator.newChildAllocator("original vectors", 0, allocator.getLimit());
+ final StructVector parent = StructVector.empty("parent", originalVectorAllocator)) {
+ writeVarBinaryData(COUNT, parent);
+ VectorSchemaRoot root = new VectorSchemaRoot(parent.getChild("root"));
+ validateVarBinary(COUNT, root);
+
+ roundTrip(
+ root,
+ /* dictionaryProvider */null,
+ TestRoundTrip::writeSingleBatch,
+ validateFileBatches(new int[]{COUNT}, this::validateVarBinary),
+ validateStreamBatches(new int[]{COUNT}, this::validateVarBinary));
+ }
+ }
+
+ @Test
+ public void testReadWriteMultipleBatches() throws IOException {
+ File file = new File("target/mytest_nulls_multibatch.arrow");
+ int numBlocksWritten = 0;
+
+ try (IntVector vector = new IntVector("foo", allocator);) {
+ Schema schema = new Schema(Collections.singletonList(vector.getField()));
+ try (FileOutputStream fileOutputStream = new FileOutputStream(file);
+ VectorSchemaRoot root =
+ new VectorSchemaRoot(schema, Collections.singletonList((FieldVector) vector), vector.getValueCount());
+ ArrowFileWriter writer = new ArrowFileWriter(root, null, fileOutputStream.getChannel(), writeOption)) {
+ writeBatchData(writer, vector, root);
+ numBlocksWritten = writer.getRecordBlocks().size();
+ }
+ }
+
+ try (FileInputStream fileInputStream = new FileInputStream(file);
+ ArrowFileReader reader = new ArrowFileReader(fileInputStream.getChannel(), allocator);) {
+ IntVector vector = (IntVector) reader.getVectorSchemaRoot().getFieldVectors().get(0);
+ validateBatchData(reader, vector);
+ assertEquals(numBlocksWritten, reader.getRecordBlocks().size());
+ }
+ }
+
+ @Test
+ public void testMap() throws Exception {
+ try (final BufferAllocator originalVectorAllocator =
+ allocator.newChildAllocator("original vectors", 0, allocator.getLimit());
+ final VectorSchemaRoot root = writeMapData(originalVectorAllocator)) {
+ roundTrip(
+ root,
+ /* dictionaryProvider */null,
+ TestRoundTrip::writeSingleBatch,
+ validateFileBatches(new int[]{root.getRowCount()}, (count, readRoot) -> validateMapData(readRoot)),
+ validateStreamBatches(new int[]{root.getRowCount()}, (count, readRoot) -> validateMapData(readRoot)));
+ }
+ }
+
+ @Test
+ public void testListAsMap() throws Exception {
+ try (final BufferAllocator originalVectorAllocator =
+ allocator.newChildAllocator("original vectors", 0, allocator.getLimit());
+ final VectorSchemaRoot root = writeListAsMapData(originalVectorAllocator)) {
+ roundTrip(
+ root,
+ /* dictionaryProvider */null,
+ TestRoundTrip::writeSingleBatch,
+ validateFileBatches(new int[]{root.getRowCount()}, (count, readRoot) -> validateListAsMapData(readRoot)),
+ validateStreamBatches(new int[]{root.getRowCount()}, (count, readRoot) -> validateListAsMapData(readRoot)));
+ }
+ }
+
+ // Generic test helpers
+
+ private static void writeSingleBatch(VectorSchemaRoot root, ArrowWriter writer) throws IOException {
+ writer.start();
+ writer.writeBatch();
+ writer.end();
+ }
+
+ private CheckedConsumer<ArrowFileReader> validateFileBatches(
+ int[] counts, BiConsumer<Integer, VectorSchemaRoot> validator) {
+ return (arrowReader) -> {
+ VectorSchemaRoot root = arrowReader.getVectorSchemaRoot();
+ VectorUnloader unloader = new VectorUnloader(root);
+ Schema schema = root.getSchema();
+ LOGGER.debug("reading schema: " + schema);
+ int i = 0;
+ List<ArrowBlock> recordBatches = arrowReader.getRecordBlocks();
+ assertEquals(counts.length, recordBatches.size());
+ long previousOffset = 0;
+ for (ArrowBlock rbBlock : recordBatches) {
+ assertTrue(rbBlock.getOffset() + " > " + previousOffset, rbBlock.getOffset() > previousOffset);
+ previousOffset = rbBlock.getOffset();
+ arrowReader.loadRecordBatch(rbBlock);
+ assertEquals("RB #" + i, counts[i], root.getRowCount());
+ validator.accept(counts[i], root);
+ try (final ArrowRecordBatch batch = unloader.getRecordBatch()) {
+ List<ArrowBuffer> buffersLayout = batch.getBuffersLayout();
+ for (ArrowBuffer arrowBuffer : buffersLayout) {
+ assertEquals(0, arrowBuffer.getOffset() % 8);
+ }
+ }
+ ++i;
+ }
+ };
+ }
+
+ private CheckedConsumer<ArrowStreamReader> validateStreamBatches(
+ int[] counts, BiConsumer<Integer, VectorSchemaRoot> validator) {
+ return (arrowReader) -> {
+ VectorSchemaRoot root = arrowReader.getVectorSchemaRoot();
+ VectorUnloader unloader = new VectorUnloader(root);
+ Schema schema = root.getSchema();
+ LOGGER.debug("reading schema: " + schema);
+ int i = 0;
+
+ for (int n = 0; n < counts.length; n++) {
+ assertTrue(arrowReader.loadNextBatch());
+ assertEquals("RB #" + i, counts[i], root.getRowCount());
+ validator.accept(counts[i], root);
+ try (final ArrowRecordBatch batch = unloader.getRecordBatch()) {
+ final List<ArrowBuffer> buffersLayout = batch.getBuffersLayout();
+ for (ArrowBuffer arrowBuffer : buffersLayout) {
+ assertEquals(0, arrowBuffer.getOffset() % 8);
+ }
+ }
+ ++i;
+ }
+ assertFalse(arrowReader.loadNextBatch());
+ };
+ }
+
+ @FunctionalInterface
+ interface CheckedConsumer<T> {
+ void accept(T t) throws Exception;
+ }
+
+ @FunctionalInterface
+ interface CheckedBiConsumer<T, U> {
+ void accept(T t, U u) throws Exception;
+ }
+
+ private void roundTrip(VectorSchemaRoot root, DictionaryProvider provider,
+ CheckedBiConsumer<VectorSchemaRoot, ArrowWriter> writer,
+ CheckedConsumer<? super ArrowFileReader> fileValidator,
+ CheckedConsumer<? super ArrowStreamReader> streamValidator) throws Exception {
+ final File temp = File.createTempFile("arrow-test-" + name + "-", ".arrow");
+ temp.deleteOnExit();
+ final ByteArrayOutputStream memoryStream = new ByteArrayOutputStream();
+ final Map<String, String> metadata = new HashMap<>();
+ metadata.put("foo", "bar");
+ try (final FileOutputStream fileStream = new FileOutputStream(temp);
+ final ArrowFileWriter fileWriter =
+ new ArrowFileWriter(root, provider, fileStream.getChannel(), metadata, writeOption);
+ final ArrowStreamWriter streamWriter =
+ new ArrowStreamWriter(root, provider, Channels.newChannel(memoryStream), writeOption)) {
+ writer.accept(root, fileWriter);
+ writer.accept(root, streamWriter);
+ }
+
+ MessageMetadataResult metadataResult = MessageSerializer.readMessage(
+ new ReadChannel(Channels.newChannel(new ByteArrayInputStream(memoryStream.toByteArray()))));
+ assertNotNull(metadataResult);
+ assertEquals(writeOption.metadataVersion.toFlatbufID(), metadataResult.getMessage().version());
+
+ try (
+ BufferAllocator readerAllocator = allocator.newChildAllocator("reader", 0, allocator.getLimit());
+ FileInputStream fileInputStream = new FileInputStream(temp);
+ ByteArrayInputStream inputStream = new ByteArrayInputStream(memoryStream.toByteArray());
+ ArrowFileReader fileReader = new ArrowFileReader(fileInputStream.getChannel(), readerAllocator);
+ ArrowStreamReader streamReader = new ArrowStreamReader(inputStream, readerAllocator)) {
+ fileValidator.accept(fileReader);
+ streamValidator.accept(streamReader);
+ assertEquals(writeOption.metadataVersion, fileReader.getFooter().getMetadataVersion());
+ assertEquals(metadata, fileReader.getMetaData());
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestUIntDictionaryRoundTrip.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestUIntDictionaryRoundTrip.java
new file mode 100644
index 000000000..6aa7a0c6d
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestUIntDictionaryRoundTrip.java
@@ -0,0 +1,246 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc;
+
+import static org.apache.arrow.vector.testing.ValueVectorDataPopulator.setVector;
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.nio.channels.Channels;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Map;
+import java.util.function.ToIntBiFunction;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.UInt1Vector;
+import org.apache.arrow.vector.UInt2Vector;
+import org.apache.arrow.vector.UInt4Vector;
+import org.apache.arrow.vector.UInt8Vector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.dictionary.Dictionary;
+import org.apache.arrow.vector.dictionary.DictionaryProvider;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.DictionaryEncoding;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.apache.arrow.vector.util.ByteArrayReadableSeekableByteChannel;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+/**
+ * Test the round-trip of dictionary encoding,
+ * with unsigned integer as indices.
+ */
+@RunWith(Parameterized.class)
+public class TestUIntDictionaryRoundTrip {
+
+ private final boolean streamMode;
+
+ public TestUIntDictionaryRoundTrip(boolean streamMode) {
+ this.streamMode = streamMode;
+ }
+
+ private BufferAllocator allocator;
+
+ private DictionaryProvider.MapDictionaryProvider dictionaryProvider;
+
+ @Before
+ public void init() {
+ allocator = new RootAllocator(Long.MAX_VALUE);
+ dictionaryProvider = new DictionaryProvider.MapDictionaryProvider();
+ }
+
+ @After
+ public void terminate() throws Exception {
+ allocator.close();
+ }
+
+ private byte[] writeData(FieldVector encodedVector) throws IOException {
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ VectorSchemaRoot root =
+ new VectorSchemaRoot(
+ Arrays.asList(encodedVector.getField()), Arrays.asList(encodedVector), encodedVector.getValueCount());
+ try (ArrowWriter writer = streamMode ?
+ new ArrowStreamWriter(root, dictionaryProvider, out) :
+ new ArrowFileWriter(root, dictionaryProvider, Channels.newChannel(out))) {
+ writer.start();
+ writer.writeBatch();
+ writer.end();
+
+ return out.toByteArray();
+ }
+ }
+
+ private void readData(
+ byte[] data,
+ Field expectedField,
+ ToIntBiFunction<ValueVector, Integer> valGetter,
+ long dictionaryID,
+ int[] expectedIndices,
+ String[] expectedDictItems) throws IOException {
+ try (ArrowReader reader = streamMode ?
+ new ArrowStreamReader(new ByteArrayInputStream(data), allocator) :
+ new ArrowFileReader(new SeekableReadChannel(new ByteArrayReadableSeekableByteChannel(data)), allocator)) {
+
+ // verify schema
+ Schema readSchema = reader.getVectorSchemaRoot().getSchema();
+ assertEquals(1, readSchema.getFields().size());
+ assertEquals(expectedField, readSchema.getFields().get(0));
+
+ // verify vector schema root
+ assertTrue(reader.loadNextBatch());
+ VectorSchemaRoot root = reader.getVectorSchemaRoot();
+
+ assertEquals(1, root.getFieldVectors().size());
+ ValueVector encodedVector = root.getVector(0);
+ assertEquals(expectedIndices.length, encodedVector.getValueCount());
+
+ for (int i = 0; i < expectedIndices.length; i++) {
+ assertEquals(expectedIndices[i], valGetter.applyAsInt(encodedVector, i));
+ }
+
+ // verify dictionary
+ Map<Long, Dictionary> dictVectors = reader.getDictionaryVectors();
+ assertEquals(1, dictVectors.size());
+ Dictionary dictionary = dictVectors.get(dictionaryID);
+ assertNotNull(dictionary);
+
+ assertTrue(dictionary.getVector() instanceof VarCharVector);
+ VarCharVector dictVector = (VarCharVector) dictionary.getVector();
+ assertEquals(expectedDictItems.length, dictVector.getValueCount());
+ for (int i = 0; i < dictVector.getValueCount(); i++) {
+ assertArrayEquals(expectedDictItems[i].getBytes(), dictVector.get(i));
+ }
+ }
+ }
+
+ private ValueVector createEncodedVector(int bitWidth, VarCharVector dictionaryVector) {
+ final DictionaryEncoding dictionaryEncoding =
+ new DictionaryEncoding(bitWidth, false, new ArrowType.Int(bitWidth, false));
+ Dictionary dictionary = new Dictionary(dictionaryVector, dictionaryEncoding);
+ dictionaryProvider.put(dictionary);
+
+ final FieldType type =
+ new FieldType(true, dictionaryEncoding.getIndexType(), dictionaryEncoding, null);
+ final Field field = new Field("encoded", type, null);
+ return field.createVector(allocator);
+ }
+
+ @Test
+ public void testUInt1RoundTrip() throws IOException {
+ final int vectorLength = UInt1Vector.MAX_UINT1 & UInt1Vector.PROMOTION_MASK;
+ try (VarCharVector dictionaryVector = new VarCharVector("dictionary", allocator);
+ UInt1Vector encodedVector1 = (UInt1Vector) createEncodedVector(8, dictionaryVector)) {
+ int[] indices = new int[vectorLength];
+ String[] dictionaryItems = new String[vectorLength];
+ for (int i = 0; i < vectorLength; i++) {
+ encodedVector1.setSafe(i, (byte) i);
+ indices[i] = i;
+ dictionaryItems[i] = String.valueOf(i);
+ }
+ encodedVector1.setValueCount(vectorLength);
+ setVector(dictionaryVector, dictionaryItems);
+ byte[] data = writeData(encodedVector1);
+ readData(
+ data, encodedVector1.getField(), (vector, index) -> (int) ((UInt1Vector) vector).getValueAsLong(index),
+ 8L, indices, dictionaryItems);
+ }
+ }
+
+ @Test
+ public void testUInt2RoundTrip() throws IOException {
+ try (VarCharVector dictionaryVector = new VarCharVector("dictionary", allocator);
+ UInt2Vector encodedVector2 = (UInt2Vector) createEncodedVector(16, dictionaryVector)) {
+ int[] indices = new int[]{1, 3, 5, 7, 9, UInt2Vector.MAX_UINT2};
+ String[] dictItems = new String[UInt2Vector.MAX_UINT2];
+ for (int i = 0; i < UInt2Vector.MAX_UINT2; i++) {
+ dictItems[i] = String.valueOf(i);
+ }
+
+ setVector(encodedVector2, (char) 1, (char) 3, (char) 5, (char) 7, (char) 9, UInt2Vector.MAX_UINT2);
+ setVector(dictionaryVector, dictItems);
+
+ byte[] data = writeData(encodedVector2);
+ readData(data, encodedVector2.getField(), (vector, index) -> (int) ((UInt2Vector) vector).getValueAsLong(index),
+ 16L, indices, dictItems);
+ }
+ }
+
+ @Test
+ public void testUInt4RoundTrip() throws IOException {
+ final int dictLength = 10;
+ try (VarCharVector dictionaryVector = new VarCharVector("dictionary", allocator);
+ UInt4Vector encodedVector4 = (UInt4Vector) createEncodedVector(32, dictionaryVector)) {
+ int[] indices = new int[]{1, 3, 5, 7, 9};
+ String[] dictItems = new String[dictLength];
+ for (int i = 0; i < dictLength; i++) {
+ dictItems[i] = String.valueOf(i);
+ }
+
+ setVector(encodedVector4, 1, 3, 5, 7, 9);
+ setVector(dictionaryVector, dictItems);
+
+ setVector(encodedVector4, 1, 3, 5, 7, 9);
+ byte[] data = writeData(encodedVector4);
+ readData(data, encodedVector4.getField(), (vector, index) -> (int) ((UInt4Vector) vector).getValueAsLong(index),
+ 32L, indices, dictItems);
+ }
+ }
+
+ @Test
+ public void testUInt8RoundTrip() throws IOException {
+ final int dictLength = 10;
+ try (VarCharVector dictionaryVector = new VarCharVector("dictionary", allocator);
+ UInt8Vector encodedVector8 = (UInt8Vector) createEncodedVector(64, dictionaryVector)) {
+ int[] indices = new int[]{1, 3, 5, 7, 9};
+ String[] dictItems = new String[dictLength];
+ for (int i = 0; i < dictLength; i++) {
+ dictItems[i] = String.valueOf(i);
+ }
+
+ setVector(encodedVector8, 1L, 3L, 5L, 7L, 9L);
+ setVector(dictionaryVector, dictItems);
+
+ byte[] data = writeData(encodedVector8);
+ readData(data, encodedVector8.getField(), (vector, index) -> (int) ((UInt8Vector) vector).getValueAsLong(index),
+ 64L, indices, dictItems);
+ }
+ }
+
+ @Parameterized.Parameters(name = "stream mode = {0}")
+ public static Collection<Object[]> getRepeat() {
+ return Arrays.asList(
+ new Object[]{true},
+ new Object[]{false}
+ );
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/message/TestMessageMetadataResult.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/message/TestMessageMetadataResult.java
new file mode 100644
index 000000000..ee5361547
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/ipc/message/TestMessageMetadataResult.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.ipc.message;
+
+import static org.junit.Assert.assertEquals;
+
+import java.nio.ByteBuffer;
+
+import org.junit.Test;
+
+public class TestMessageMetadataResult {
+
+ @Test
+ public void getMessageLength_returnsConstructValue() {
+ // This API is used by spark.
+ MessageMetadataResult result = new MessageMetadataResult(1, ByteBuffer.allocate(0),
+ new org.apache.arrow.flatbuf.Message());
+ assertEquals(result.getMessageLength(), 1);
+ }
+
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/pojo/TestConvert.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/pojo/TestConvert.java
new file mode 100644
index 000000000..5cc0d0800
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/pojo/TestConvert.java
@@ -0,0 +1,169 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.pojo;
+
+import static org.apache.arrow.vector.types.FloatingPointPrecision.DOUBLE;
+import static org.apache.arrow.vector.types.FloatingPointPrecision.SINGLE;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.util.Collections2;
+import org.apache.arrow.vector.complex.FixedSizeListVector;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.types.TimeUnit;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.UnionMode;
+import org.apache.arrow.vector.types.pojo.ArrowType.FloatingPoint;
+import org.apache.arrow.vector.types.pojo.ArrowType.Int;
+import org.apache.arrow.vector.types.pojo.ArrowType.List;
+import org.apache.arrow.vector.types.pojo.ArrowType.Struct;
+import org.apache.arrow.vector.types.pojo.ArrowType.Timestamp;
+import org.apache.arrow.vector.types.pojo.ArrowType.Union;
+import org.apache.arrow.vector.types.pojo.ArrowType.Utf8;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.junit.Test;
+
+import com.google.flatbuffers.FlatBufferBuilder;
+
+/**
+ * Test conversion between Flatbuf and Pojo field representations.
+ */
+public class TestConvert {
+
+ @Test
+ public void simple() {
+ Field initialField = new Field("a", FieldType.nullable(new Int(32, true)), null);
+ run(initialField);
+ }
+
+ @Test
+ public void complex() {
+ java.util.List<Field> children = new ArrayList<>();
+ children.add(new Field("child1", FieldType.nullable(Utf8.INSTANCE), null));
+ children.add(new Field("child2", FieldType.nullable(new FloatingPoint(SINGLE)), Collections.emptyList()));
+
+ Field initialField = new Field("a", FieldType.nullable(Struct.INSTANCE), children);
+ run(initialField);
+ }
+
+ @Test
+ public void list() throws Exception {
+ java.util.List<Field> children = new ArrayList<>();
+ try (BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE);
+ ListVector writeVector = ListVector.empty("list", allocator);
+ FixedSizeListVector writeFixedVector = FixedSizeListVector.empty("fixedlist", 5, allocator)) {
+ Field listVectorField = writeVector.getField();
+ children.add(listVectorField);
+ Field listFixedVectorField = writeFixedVector.getField();
+ children.add(listFixedVectorField);
+ }
+
+ Field initialField = new Field("a", FieldType.nullable(Struct.INSTANCE), children);
+ java.util.List<Field> parent = new ArrayList<>();
+ parent.add(initialField);
+ FlatBufferBuilder builder = new FlatBufferBuilder();
+ builder.finish(initialField.getField(builder));
+ org.apache.arrow.flatbuf.Field flatBufField = org.apache.arrow.flatbuf.Field.getRootAsField(builder.dataBuffer());
+ Field finalField = Field.convertField(flatBufField);
+ assertEquals(initialField, finalField);
+ assertFalse(finalField.toString().contains("[DEFAULT]"));
+
+ Schema initialSchema = new Schema(parent);
+ String jsonSchema = initialSchema.toJson();
+ String modifiedSchema = jsonSchema.replace("$data$", "[DEFAULT]");
+
+ Schema tempSchema = Schema.fromJSON(modifiedSchema);
+ FlatBufferBuilder schemaBuilder = new FlatBufferBuilder();
+ org.apache.arrow.vector.types.pojo.Schema schema =
+ new org.apache.arrow.vector.types.pojo.Schema(tempSchema.getFields());
+ schemaBuilder.finish(schema.getSchema(schemaBuilder));
+ Schema finalSchema = Schema.deserialize(ByteBuffer.wrap(schemaBuilder.sizedByteArray()));
+ assertFalse(finalSchema.toString().contains("[DEFAULT]"));
+ }
+
+ @Test
+ public void schema() {
+ java.util.List<Field> children = new ArrayList<>();
+ children.add(new Field("child1", FieldType.nullable(Utf8.INSTANCE), null));
+ children.add(new Field("child2", FieldType.nullable(new FloatingPoint(SINGLE)), Collections.emptyList()));
+ Schema initialSchema = new Schema(children);
+ run(initialSchema);
+ }
+
+ @Test
+ public void schemaMetadata() {
+ java.util.List<Field> children = new ArrayList<>();
+ children.add(new Field("child1", FieldType.nullable(Utf8.INSTANCE), null));
+ children.add(new Field("child2", FieldType.nullable(new FloatingPoint(SINGLE)), Collections.emptyList()));
+ Map<String, String> metadata = new HashMap<>();
+ metadata.put("key1", "value1");
+ metadata.put("key2", "value2");
+ Schema initialSchema = new Schema(children, metadata);
+ run(initialSchema);
+ }
+
+ @Test
+ public void nestedSchema() {
+ java.util.List<Field> children = new ArrayList<>();
+ children.add(new Field("child1", FieldType.nullable(Utf8.INSTANCE), null));
+ children.add(new Field("child2", FieldType.nullable(new FloatingPoint(SINGLE)), Collections.emptyList()));
+ children.add(new Field("child3", FieldType.nullable(new Struct()), Collections2.asImmutableList(
+ new Field("child3.1", FieldType.nullable(Utf8.INSTANCE), null),
+ new Field("child3.2", FieldType.nullable(new FloatingPoint(DOUBLE)), Collections.emptyList())
+ )));
+ children.add(new Field("child4", FieldType.nullable(new List()), Collections2.asImmutableList(
+ new Field("child4.1", FieldType.nullable(Utf8.INSTANCE), null)
+ )));
+ children.add(new Field("child5", FieldType.nullable(
+ new Union(UnionMode.Sparse, new int[] {MinorType.TIMESTAMPMILLI.ordinal(), MinorType.FLOAT8.ordinal()})),
+ Collections2.asImmutableList(
+ new Field("child5.1", FieldType.nullable(new Timestamp(TimeUnit.MILLISECOND, null)), null),
+ new Field("child5.2", FieldType.nullable(new FloatingPoint(DOUBLE)), Collections.emptyList()),
+ new Field("child5.3", FieldType.nullable(new Timestamp(TimeUnit.MILLISECOND, "UTC")), null)
+ )));
+ Schema initialSchema = new Schema(children);
+ run(initialSchema);
+ }
+
+ private void run(Field initialField) {
+ FlatBufferBuilder builder = new FlatBufferBuilder();
+ builder.finish(initialField.getField(builder));
+ org.apache.arrow.flatbuf.Field flatBufField = org.apache.arrow.flatbuf.Field.getRootAsField(builder.dataBuffer());
+ Field finalField = Field.convertField(flatBufField);
+ assertEquals(initialField, finalField);
+ }
+
+ private void run(Schema initialSchema) {
+ FlatBufferBuilder builder = new FlatBufferBuilder();
+ builder.finish(initialSchema.getSchema(builder));
+ org.apache.arrow.flatbuf.Schema flatBufSchema =
+ org.apache.arrow.flatbuf.Schema.getRootAsSchema(builder.dataBuffer());
+ Schema finalSchema = Schema.convertSchema(flatBufSchema);
+ assertEquals(initialSchema, finalSchema);
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/testing/RandomDataGenerator.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/testing/RandomDataGenerator.java
new file mode 100644
index 000000000..4b1094d28
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/testing/RandomDataGenerator.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.testing;
+
+import java.util.Random;
+import java.util.function.Supplier;
+
+/**
+ * Utility for generating random data.
+ */
+public class RandomDataGenerator {
+
+ static final Random random = new Random(0);
+
+ public static final Supplier<Byte> TINY_INT_GENERATOR = () -> (byte) random.nextInt();
+
+ public static final Supplier<Short> SMALL_INT_GENERATOR = () -> (short) random.nextInt();
+
+ public static final Supplier<Integer> INT_GENERATOR = () -> random.nextInt();
+
+ public static final Supplier<Long> LONG_GENERATOR = () -> random.nextLong();
+
+ public static final Supplier<Float> FLOAT_GENERATOR = () -> random.nextFloat();
+
+ public static final Supplier<Double> DOUBLE_GENERATOR = () -> random.nextDouble();
+
+ private RandomDataGenerator() {
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/testing/TestValueVectorPopulator.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/testing/TestValueVectorPopulator.java
new file mode 100644
index 000000000..f5d15e2c6
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/testing/TestValueVectorPopulator.java
@@ -0,0 +1,604 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.testing;
+
+import static junit.framework.TestCase.assertTrue;
+import static org.apache.arrow.vector.testing.ValueVectorDataPopulator.setVector;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.BigIntVector;
+import org.apache.arrow.vector.BitVector;
+import org.apache.arrow.vector.DateDayVector;
+import org.apache.arrow.vector.DateMilliVector;
+import org.apache.arrow.vector.DecimalVector;
+import org.apache.arrow.vector.DurationVector;
+import org.apache.arrow.vector.FixedSizeBinaryVector;
+import org.apache.arrow.vector.Float4Vector;
+import org.apache.arrow.vector.Float8Vector;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.IntervalYearVector;
+import org.apache.arrow.vector.SmallIntVector;
+import org.apache.arrow.vector.TimeMicroVector;
+import org.apache.arrow.vector.TimeMilliVector;
+import org.apache.arrow.vector.TimeNanoVector;
+import org.apache.arrow.vector.TimeSecVector;
+import org.apache.arrow.vector.TimeStampMicroVector;
+import org.apache.arrow.vector.TimeStampMilliVector;
+import org.apache.arrow.vector.TimeStampNanoVector;
+import org.apache.arrow.vector.TimeStampSecVector;
+import org.apache.arrow.vector.TinyIntVector;
+import org.apache.arrow.vector.UInt1Vector;
+import org.apache.arrow.vector.UInt2Vector;
+import org.apache.arrow.vector.UInt4Vector;
+import org.apache.arrow.vector.UInt8Vector;
+import org.apache.arrow.vector.VarBinaryVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.compare.VectorEqualsVisitor;
+import org.apache.arrow.vector.testing.ValueVectorDataPopulator;
+import org.apache.arrow.vector.types.TimeUnit;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestValueVectorPopulator {
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void init() {
+ allocator = new RootAllocator(Long.MAX_VALUE);
+ }
+
+ @After
+ public void terminate() throws Exception {
+ allocator.close();
+ }
+
+ @Test
+ public void testPopulateBigIntVector() {
+ try (final BigIntVector vector1 = new BigIntVector("vector", allocator);
+ final BigIntVector vector2 = new BigIntVector("vector", allocator)) {
+
+ vector1.allocateNew(10);
+ for (int i = 0; i < 10; i++) {
+ if (i % 2 == 0) {
+ vector1.setNull(i);
+ } else {
+ vector1.set(i, i);
+ }
+ }
+ vector1.setValueCount(10);
+
+ setVector(vector2, null, 1L, null, 3L, null, 5L, null, 7L, null, 9L);
+ assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testPopulateBitVector() {
+ try (final BitVector vector1 = new BitVector("vector", allocator);
+ final BitVector vector2 = new BitVector("vector", allocator)) {
+
+ vector1.allocateNew(10);
+ for (int i = 0; i < 10; i++) {
+ if (i % 2 == 0) {
+ vector1.setNull(i);
+ } else {
+ vector1.set(i, i > 5 ? 0 : 1);
+ }
+ }
+ vector1.setValueCount(10);
+
+ setVector(vector2, null, 1, null, 1, null, 0, null, 0, null, 0);
+ assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testPopulateDateDayVector() {
+ try (final DateDayVector vector1 = new DateDayVector("vector", allocator);
+ final DateDayVector vector2 = new DateDayVector("vector", allocator)) {
+
+ vector1.allocateNew(10);
+ for (int i = 0; i < 10; i++) {
+ if (i % 2 == 0) {
+ vector1.setNull(i);
+ } else {
+ vector1.set(i, i * 10);
+ }
+ }
+ vector1.setValueCount(10);
+
+ setVector(vector2, null, 10, null, 30, null, 50, null, 70, null, 90);
+ assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testPopulateDateMilliVector() {
+ try (final DateMilliVector vector1 = new DateMilliVector("vector", allocator);
+ final DateMilliVector vector2 = new DateMilliVector("vector", allocator)) {
+
+ vector1.allocateNew(10);
+ for (int i = 0; i < 10; i++) {
+ if (i % 2 == 0) {
+ vector1.setNull(i);
+ } else {
+ vector1.set(i, i * 1000);
+ }
+ }
+ vector1.setValueCount(10);
+
+ setVector(vector2, null, 1000L, null, 3000L, null, 5000L, null, 7000L, null, 9000L);
+ assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testPopulateDecimalVector() {
+ try (final DecimalVector vector1 = new DecimalVector("vector", allocator, 10, 3);
+ final DecimalVector vector2 = new DecimalVector("vector", allocator, 10, 3)) {
+
+ vector1.allocateNew(10);
+ for (int i = 0; i < 10; i++) {
+ if (i % 2 == 0) {
+ vector1.setNull(i);
+ } else {
+ vector1.set(i, i);
+ }
+ }
+ vector1.setValueCount(10);
+
+ setVector(vector2, null, 1L, null, 3L, null, 5L, null, 7L, null, 9L);
+ assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testPopulateDurationVector() {
+ final FieldType fieldType = FieldType.nullable(new ArrowType.Duration(TimeUnit.SECOND));
+ try (final DurationVector vector1 = new DurationVector("vector", fieldType, allocator);
+ final DurationVector vector2 = new DurationVector("vector", fieldType, allocator)) {
+
+ vector1.allocateNew(10);
+ for (int i = 0; i < 10; i++) {
+ if (i % 2 == 0) {
+ vector1.setNull(i);
+ } else {
+ vector1.set(i, i);
+ }
+ }
+ vector1.setValueCount(10);
+
+ setVector(vector2, null, 1L, null, 3L, null, 5L, null, 7L, null, 9L);
+
+ assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testPopulateFixedSizeBinaryVector() {
+ try (final FixedSizeBinaryVector vector1 = new FixedSizeBinaryVector("vector", allocator, 5);
+ final FixedSizeBinaryVector vector2 = new FixedSizeBinaryVector("vector", allocator, 5)) {
+
+ vector1.allocateNew(10);
+ for (int i = 0; i < 10; i++) {
+ if (i % 2 == 0) {
+ vector1.setNull(i);
+ } else {
+ vector1.set(i, ("test" + i).getBytes());
+ }
+ }
+ vector1.setValueCount(10);
+
+ setVector(vector2, null, "test1".getBytes(), null, "test3".getBytes(), null, "test5".getBytes(), null,
+ "test7".getBytes(), null, "test9".getBytes());
+ assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testPopulateFloat4Vector() {
+ try (final Float4Vector vector1 = new Float4Vector("vector", allocator);
+ final Float4Vector vector2 = new Float4Vector("vector", allocator)) {
+
+ vector1.allocateNew(10);
+ for (int i = 0; i < 10; i++) {
+ if (i % 2 == 0) {
+ vector1.setNull(i);
+ } else {
+ vector1.set(i, i);
+ }
+ }
+ vector1.setValueCount(10);
+ setVector(vector2, null, 1f, null, 3f, null, 5f, null, 7f, null, 9f);
+ assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testPopulateFloat8Vector() {
+ try (final Float8Vector vector1 = new Float8Vector("vector", allocator);
+ final Float8Vector vector2 = new Float8Vector("vector", allocator)) {
+
+ vector1.allocateNew(10);
+ for (int i = 0; i < 10; i++) {
+ if (i % 2 == 0) {
+ vector1.setNull(i);
+ } else {
+ vector1.set(i, i);
+ }
+ }
+ vector1.setValueCount(10);
+ setVector(vector2, null, 1d, null, 3d, null, 5d, null, 7d, null, 9d);
+ assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testPopulateIntVector() {
+ try (final IntVector vector1 = new IntVector("vector", allocator);
+ final IntVector vector2 = new IntVector("vector", allocator)) {
+
+ vector1.allocateNew(10);
+ for (int i = 0; i < 10; i++) {
+ if (i % 2 == 0) {
+ vector1.setNull(i);
+ } else {
+ vector1.set(i, i);
+ }
+ }
+ vector1.setValueCount(10);
+
+ ValueVectorDataPopulator.setVector(vector2, null, 1, null, 3, null, 5, null, 7, null, 9);
+ assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testPopulateSmallIntVector() {
+ try (final SmallIntVector vector1 = new SmallIntVector("vector", allocator);
+ final SmallIntVector vector2 = new SmallIntVector("vector", allocator)) {
+
+ vector1.allocateNew(10);
+ for (int i = 0; i < 10; i++) {
+ if (i % 2 == 0) {
+ vector1.setNull(i);
+ } else {
+ vector1.set(i, i);
+ }
+ }
+ vector1.setValueCount(10);
+
+ ValueVectorDataPopulator.setVector(vector2, null, (short) 1, null, (short) 3, null, (short) 5,
+ null, (short) 7, null, (short) 9);
+ assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testPopulateIntervalDayVector() {
+ try (final IntervalYearVector vector1 = new IntervalYearVector("vector", allocator);
+ final IntervalYearVector vector2 = new IntervalYearVector("vector", allocator)) {
+
+ vector1.allocateNew(10);
+ for (int i = 0; i < 10; i++) {
+ if (i % 2 == 0) {
+ vector1.setNull(i);
+ } else {
+ vector1.set(i, i);
+ }
+ }
+ vector1.setValueCount(10);
+
+ ValueVectorDataPopulator.setVector(vector2, null, 1, null, 3, null, 5, null, 7, null, 9);
+ assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testPopulateTimeMicroVector() {
+ try (final TimeMicroVector vector1 = new TimeMicroVector("vector", allocator);
+ final TimeMicroVector vector2 = new TimeMicroVector("vector", allocator)) {
+
+ vector1.allocateNew(10);
+ for (int i = 0; i < 10; i++) {
+ if (i % 2 == 0) {
+ vector1.setNull(i);
+ } else {
+ vector1.set(i, i * 10000);
+ }
+ }
+ vector1.setValueCount(10);
+ setVector(vector2, null, 10000L, null, 30000L, null, 50000L, null, 70000L, null, 90000L);
+ assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testPopulateTimeMilliVector() {
+ try (final TimeMilliVector vector1 = new TimeMilliVector("vector", allocator);
+ final TimeMilliVector vector2 = new TimeMilliVector("vector", allocator)) {
+
+ vector1.allocateNew(10);
+ for (int i = 0; i < 10; i++) {
+ if (i % 2 == 0) {
+ vector1.setNull(i);
+ } else {
+ vector1.set(i, i * 100);
+ }
+ }
+ vector1.setValueCount(10);
+ setVector(vector2, null, 100, null, 300, null, 500, null, 700, null, 900);
+ assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testPopulateTimeNanoVector() {
+ try (final TimeNanoVector vector1 = new TimeNanoVector("vector", allocator);
+ final TimeNanoVector vector2 = new TimeNanoVector("vector", allocator)) {
+
+ vector1.allocateNew(10);
+ for (int i = 0; i < 10; i++) {
+ if (i % 2 == 0) {
+ vector1.setNull(i);
+ } else {
+ vector1.set(i, i * 10000);
+ }
+ }
+ vector1.setValueCount(10);
+ setVector(vector2, null, 10000L, null, 30000L, null, 50000L, null, 70000L, null, 90000L);
+ assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testPopulateTimeSecVector() {
+ try (final TimeSecVector vector1 = new TimeSecVector("vector", allocator);
+ final TimeSecVector vector2 = new TimeSecVector("vector", allocator)) {
+
+ vector1.allocateNew(10);
+ for (int i = 0; i < 10; i++) {
+ if (i % 2 == 0) {
+ vector1.setNull(i);
+ } else {
+ vector1.set(i, i * 100);
+ }
+ }
+ vector1.setValueCount(10);
+ setVector(vector2, null, 100, null, 300, null, 500, null, 700, null, 900);
+ assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testPopulateTimeStampMicroVector() {
+ try (final TimeStampMicroVector vector1 = new TimeStampMicroVector("vector", allocator);
+ final TimeStampMicroVector vector2 = new TimeStampMicroVector("vector", allocator)) {
+
+ vector1.allocateNew(10);
+ for (int i = 0; i < 10; i++) {
+ if (i % 2 == 0) {
+ vector1.setNull(i);
+ } else {
+ vector1.set(i, i * 10000);
+ }
+ }
+ vector1.setValueCount(10);
+ setVector(vector2, null, 10000L, null, 30000L, null, 50000L, null, 70000L, null, 90000L);
+ assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testPopulateTimeStampMilliVector() {
+ try (final TimeStampMilliVector vector1 = new TimeStampMilliVector("vector", allocator);
+ final TimeStampMilliVector vector2 = new TimeStampMilliVector("vector", allocator)) {
+
+ vector1.allocateNew(10);
+ for (int i = 0; i < 10; i++) {
+ if (i % 2 == 0) {
+ vector1.setNull(i);
+ } else {
+ vector1.set(i, i * 10000);
+ }
+ }
+ vector1.setValueCount(10);
+ setVector(vector2, null, 10000L, null, 30000L, null, 50000L, null, 70000L, null, 90000L);
+ assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testPopulateTimeStampNanoVector() {
+ try (final TimeStampNanoVector vector1 = new TimeStampNanoVector("vector", allocator);
+ final TimeStampNanoVector vector2 = new TimeStampNanoVector("vector", allocator)) {
+
+ vector1.allocateNew(10);
+ for (int i = 0; i < 10; i++) {
+ if (i % 2 == 0) {
+ vector1.setNull(i);
+ } else {
+ vector1.set(i, i * 10000);
+ }
+ }
+ vector1.setValueCount(10);
+ setVector(vector2, null, 10000L, null, 30000L, null, 50000L, null, 70000L, null, 90000L);
+ assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testPopulateTimeStampSecVector() {
+ try (final TimeStampSecVector vector1 = new TimeStampSecVector("vector", allocator);
+ final TimeStampSecVector vector2 = new TimeStampSecVector("vector", allocator)) {
+
+ vector1.allocateNew(10);
+ for (int i = 0; i < 10; i++) {
+ if (i % 2 == 0) {
+ vector1.setNull(i);
+ } else {
+ vector1.set(i, i * 100);
+ }
+ }
+ vector1.setValueCount(10);
+ setVector(vector2, null, 100L, null, 300L, null, 500L, null, 700L, null, 900L);
+ assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testPopulateTinyIntVector() {
+ try (final TinyIntVector vector1 = new TinyIntVector("vector", allocator);
+ final TinyIntVector vector2 = new TinyIntVector("vector", allocator)) {
+
+ vector1.allocateNew(10);
+ for (int i = 0; i < 10; i++) {
+ if (i % 2 == 0) {
+ vector1.setNull(i);
+ } else {
+ vector1.set(i, i);
+ }
+ }
+ vector1.setValueCount(10);
+ setVector(vector2, null, (byte) 1, null, (byte) 3, null, (byte) 5, null, (byte) 7, null, (byte) 9);
+ assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testPopulateUInt1Vector() {
+ try (final UInt1Vector vector1 = new UInt1Vector("vector", allocator);
+ final UInt1Vector vector2 = new UInt1Vector("vector", allocator)) {
+
+ vector1.allocateNew(10);
+ for (int i = 0; i < 10; i++) {
+ if (i % 2 == 0) {
+ vector1.setNull(i);
+ } else {
+ vector1.set(i, i);
+ }
+ }
+ vector1.setValueCount(10);
+ setVector(vector2, null, (byte) 1, null, (byte) 3, null, (byte) 5, null, (byte) 7, null, (byte) 9);
+ assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testPopulateUInt2Vector() {
+ try (final UInt2Vector vector1 = new UInt2Vector("vector", allocator);
+ final UInt2Vector vector2 = new UInt2Vector("vector", allocator)) {
+
+ vector1.allocateNew(10);
+ for (int i = 0; i < 10; i++) {
+ if (i % 2 == 0) {
+ vector1.setNull(i);
+ } else {
+ vector1.set(i, i);
+ }
+ }
+ vector1.setValueCount(10);
+ setVector(vector2, null, (char) 1, null, (char) 3, null, (char) 5, null, (char) 7, null, (char) 9);
+ assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testPopulateUInt4Vector() {
+ try (final UInt4Vector vector1 = new UInt4Vector("vector", allocator);
+ final UInt4Vector vector2 = new UInt4Vector("vector", allocator)) {
+
+ vector1.allocateNew(10);
+ for (int i = 0; i < 10; i++) {
+ if (i % 2 == 0) {
+ vector1.setNull(i);
+ } else {
+ vector1.set(i, i);
+ }
+ }
+ vector1.setValueCount(10);
+ setVector(vector2, null, 1, null, 3, null, 5, null, 7, null, 9);
+ assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testPopulateUInt8Vector() {
+ try (final UInt8Vector vector1 = new UInt8Vector("vector", allocator);
+ final UInt8Vector vector2 = new UInt8Vector("vector", allocator)) {
+
+ vector1.allocateNew(10);
+ for (int i = 0; i < 10; i++) {
+ if (i % 2 == 0) {
+ vector1.setNull(i);
+ } else {
+ vector1.set(i, i);
+ }
+ }
+ vector1.setValueCount(10);
+ setVector(vector2, null, 1L, null, 3L, null, 5L, null, 7L, null, 9L);
+ assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testPopulateVarBinaryVector() {
+ try (final VarBinaryVector vector1 = new VarBinaryVector("vector", allocator);
+ final VarBinaryVector vector2 = new VarBinaryVector("vector", allocator)) {
+
+ vector1.allocateNew(10);
+ for (int i = 0; i < 10; i++) {
+ if (i % 2 == 0) {
+ vector1.setNull(i);
+ } else {
+ vector1.set(i, ("test" + i).getBytes());
+ }
+ }
+ vector1.setValueCount(10);
+
+ setVector(vector2, null, "test1".getBytes(), null, "test3".getBytes(), null, "test5".getBytes(), null,
+ "test7".getBytes(), null, "test9".getBytes());
+ assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2));
+ }
+ }
+
+ @Test
+ public void testPopulateVarCharVector() {
+ try (final VarCharVector vector1 = new VarCharVector("vector", allocator);
+ final VarCharVector vector2 = new VarCharVector("vector", allocator)) {
+
+ vector1.allocateNew(10);
+ for (int i = 0; i < 10; i++) {
+ if (i % 2 == 0) {
+ vector1.setNull(i);
+ } else {
+ vector1.set(i, ("test" + i).getBytes());
+ }
+ }
+ vector1.setValueCount(10);
+
+ setVector(vector2, null, "test1", null, "test3", null, "test5", null, "test7", null, "test9");
+ assertTrue(VectorEqualsVisitor.vectorEquals(vector1, vector2));
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java
new file mode 100644
index 000000000..15d6a5cf9
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java
@@ -0,0 +1,708 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.testing;
+
+import static org.junit.Assert.assertEquals;
+
+import java.nio.charset.StandardCharsets;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+
+import org.apache.arrow.vector.BigIntVector;
+import org.apache.arrow.vector.BitVector;
+import org.apache.arrow.vector.BitVectorHelper;
+import org.apache.arrow.vector.DateDayVector;
+import org.apache.arrow.vector.DateMilliVector;
+import org.apache.arrow.vector.DecimalVector;
+import org.apache.arrow.vector.DurationVector;
+import org.apache.arrow.vector.FixedSizeBinaryVector;
+import org.apache.arrow.vector.Float4Vector;
+import org.apache.arrow.vector.Float8Vector;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.IntervalDayVector;
+import org.apache.arrow.vector.IntervalYearVector;
+import org.apache.arrow.vector.LargeVarCharVector;
+import org.apache.arrow.vector.SmallIntVector;
+import org.apache.arrow.vector.TimeMicroVector;
+import org.apache.arrow.vector.TimeMilliVector;
+import org.apache.arrow.vector.TimeNanoVector;
+import org.apache.arrow.vector.TimeSecVector;
+import org.apache.arrow.vector.TimeStampMicroTZVector;
+import org.apache.arrow.vector.TimeStampMicroVector;
+import org.apache.arrow.vector.TimeStampMilliTZVector;
+import org.apache.arrow.vector.TimeStampMilliVector;
+import org.apache.arrow.vector.TimeStampNanoTZVector;
+import org.apache.arrow.vector.TimeStampNanoVector;
+import org.apache.arrow.vector.TimeStampSecTZVector;
+import org.apache.arrow.vector.TimeStampSecVector;
+import org.apache.arrow.vector.TinyIntVector;
+import org.apache.arrow.vector.UInt1Vector;
+import org.apache.arrow.vector.UInt2Vector;
+import org.apache.arrow.vector.UInt4Vector;
+import org.apache.arrow.vector.UInt8Vector;
+import org.apache.arrow.vector.VarBinaryVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.complex.BaseRepeatedValueVector;
+import org.apache.arrow.vector.complex.FixedSizeListVector;
+import org.apache.arrow.vector.complex.LargeListVector;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.holders.IntervalDayHolder;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.Types.MinorType;
+import org.apache.arrow.vector.types.pojo.FieldType;
+
+/**
+ * Utility for populating {@link org.apache.arrow.vector.ValueVector}.
+ */
+public class ValueVectorDataPopulator {
+
+ private ValueVectorDataPopulator(){}
+
+ /**
+ * Populate values for BigIntVector.
+ */
+ public static void setVector(BigIntVector vector, Long... values) {
+ final int length = values.length;
+ vector.allocateNew(length);
+ for (int i = 0; i < length; i++) {
+ if (values[i] != null) {
+ vector.set(i, values[i]);
+ }
+ }
+ vector.setValueCount(length);
+ }
+
+ /**
+ * Populate values for BitVector.
+ */
+ public static void setVector(BitVector vector, Integer... values) {
+ final int length = values.length;
+ vector.allocateNew(length);
+ for (int i = 0; i < length; i++) {
+ if (values[i] != null) {
+ vector.set(i, values[i]);
+ }
+ }
+ vector.setValueCount(length);
+ }
+
+ /**
+ * Populate values for DateDayVector.
+ * @param values numbers of days since UNIX epoch
+ */
+ public static void setVector(DateDayVector vector, Integer... values) {
+ final int length = values.length;
+ vector.allocateNew(length);
+ for (int i = 0; i < length; i++) {
+ if (values[i] != null) {
+ vector.set(i, values[i]);
+ }
+ }
+ vector.setValueCount(length);
+ }
+
+ /**
+ * Populate values for DateMilliVector.
+ * @param values numbers of milliseconds since UNIX epoch
+ */
+ public static void setVector(DateMilliVector vector, Long... values) {
+ final int length = values.length;
+ vector.allocateNew(length);
+ for (int i = 0; i < length; i++) {
+ if (values[i] != null) {
+ vector.set(i, values[i]);
+ }
+ }
+ vector.setValueCount(length);
+ }
+
+ /**
+ * Populate values for DecimalVector.
+ */
+ public static void setVector(DecimalVector vector, Long... values) {
+ final int length = values.length;
+ vector.allocateNew(length);
+ for (int i = 0; i < length; i++) {
+ if (values[i] != null) {
+ vector.set(i, values[i]);
+ }
+ }
+ vector.setValueCount(length);
+ }
+
+ /**
+ * Populate values for DurationVector.
+ * @param values values of elapsed time in either seconds, milliseconds, microseconds or nanoseconds.
+ */
+ public static void setVector(DurationVector vector, Long... values) {
+ final int length = values.length;
+ vector.allocateNew(length);
+ for (int i = 0; i < length; i++) {
+ if (values[i] != null) {
+ vector.set(i, values[i]);
+ }
+ }
+ vector.setValueCount(length);
+ }
+
+ /**
+ * Populate values for FixedSizeBinaryVector.
+ */
+ public static void setVector(FixedSizeBinaryVector vector, byte[]... values) {
+ final int length = values.length;
+ vector.allocateNewSafe();
+ for (int i = 0; i < length; i++) {
+ if (values[i] != null) {
+ vector.set(i, values[i]);
+ }
+ }
+ vector.setValueCount(length);
+ }
+
+ /**
+ * Populate values for Float4Vector.
+ */
+ public static void setVector(Float4Vector vector, Float... values) {
+ final int length = values.length;
+ vector.allocateNew(length);
+ for (int i = 0; i < length; i++) {
+ if (values[i] != null) {
+ vector.set(i, values[i]);
+ }
+ }
+ vector.setValueCount(length);
+ }
+
+ /**
+ * Populate values for Float8Vector.
+ */
+ public static void setVector(Float8Vector vector, Double... values) {
+ final int length = values.length;
+ vector.allocateNew(length);
+ for (int i = 0; i < length; i++) {
+ if (values[i] != null) {
+ vector.set(i, values[i]);
+ }
+ }
+ vector.setValueCount(length);
+ }
+
+ /**
+ * Populate values for IntVector.
+ */
+ public static void setVector(IntVector vector, Integer... values) {
+ final int length = values.length;
+ vector.allocateNew(length);
+ for (int i = 0; i < length; i++) {
+ if (values[i] != null) {
+ vector.set(i, values[i]);
+ }
+ }
+ vector.setValueCount(length);
+ }
+
+ /**
+ * Populate values for IntervalDayVector.
+ * @param values holders witch holds days and milliseconds values which represents interval in SQL style.
+ */
+ public static void setVector(IntervalDayVector vector, IntervalDayHolder... values) {
+ final int length = values.length;
+ vector.allocateNew(length);
+ for (int i = 0; i < length; i++) {
+ if (values[i] != null) {
+ vector.set(i, values[i].days, values[i].milliseconds);
+ }
+ }
+ vector.setValueCount(length);
+ }
+
+ /**
+ * Populate values for IntervalYearVector.
+ * @param values total month intervals in SQL style.
+ */
+ public static void setVector(IntervalYearVector vector, Integer... values) {
+ final int length = values.length;
+ vector.allocateNew(length);
+ for (int i = 0; i < length; i++) {
+ if (values[i] != null) {
+ vector.set(i, values[i]);
+ }
+ }
+ vector.setValueCount(length);
+ }
+
+ /**
+ * Populate values for SmallIntVector.
+ */
+ public static void setVector(SmallIntVector vector, Short... values) {
+ final int length = values.length;
+ vector.allocateNew(length);
+ for (int i = 0; i < length; i++) {
+ if (values[i] != null) {
+ vector.set(i, values[i]);
+ }
+ }
+ vector.setValueCount(length);
+ }
+
+ /**
+ * Populate values for TimeMicroVector.
+ * @param values numbers of microseconds since UNIX epoch
+ */
+ public static void setVector(TimeMicroVector vector, Long... values) {
+ final int length = values.length;
+ vector.allocateNew(length);
+ for (int i = 0; i < length; i++) {
+ if (values[i] != null) {
+ vector.set(i, values[i]);
+ }
+ }
+ vector.setValueCount(length);
+ }
+
+ /**
+ * Populate values for TimeMicroVector.
+ * @param values numbers of milliseconds since UNIX epoch
+ */
+ public static void setVector(TimeMilliVector vector, Integer... values) {
+ final int length = values.length;
+ vector.allocateNew(length);
+ for (int i = 0; i < length; i++) {
+ if (values[i] != null) {
+ vector.set(i, values[i]);
+ }
+ }
+ vector.setValueCount(length);
+ }
+
+ /**
+ * Populate values for TimeNanoVector.
+ * @param values numbers of nanoseconds since UNIX epoch
+ */
+ public static void setVector(TimeNanoVector vector, Long... values) {
+ final int length = values.length;
+ vector.allocateNew(length);
+ for (int i = 0; i < length; i++) {
+ if (values[i] != null) {
+ vector.set(i, values[i]);
+ }
+ }
+ vector.setValueCount(length);
+ }
+
+ /**
+ * Populate values for TimeSecVector.
+ * @param values numbers of seconds since UNIX epoch
+ */
+ public static void setVector(TimeSecVector vector, Integer... values) {
+ final int length = values.length;
+ vector.allocateNew(length);
+ for (int i = 0; i < length; i++) {
+ if (values[i] != null) {
+ vector.set(i, values[i]);
+ }
+ }
+ vector.setValueCount(length);
+ }
+
+ /**
+ * Populate values for TimeStampMicroTZVector.
+ * @param values numbers of microseconds since UNIX epoch
+ */
+ public static void setVector(TimeStampMicroTZVector vector, Long... values) {
+ final int length = values.length;
+ vector.allocateNew(length);
+ for (int i = 0; i < length; i++) {
+ if (values[i] != null) {
+ vector.set(i, values[i]);
+ }
+ }
+ vector.setValueCount(length);
+ }
+
+ /**
+ * Populate values for TimeStampMicroVector.
+ * @param values numbers of microseconds since UNIX epoch
+ */
+ public static void setVector(TimeStampMicroVector vector, Long... values) {
+ final int length = values.length;
+ vector.allocateNew(length);
+ for (int i = 0; i < length; i++) {
+ if (values[i] != null) {
+ vector.set(i, values[i]);
+ }
+ }
+ vector.setValueCount(length);
+ }
+
+ /**
+ * Populate values for TimeStampMilliTZVector.
+ * @param values numbers of milliseconds since UNIX epoch
+ */
+ public static void setVector(TimeStampMilliTZVector vector, Long... values) {
+ final int length = values.length;
+ vector.allocateNew(length);
+ for (int i = 0; i < length; i++) {
+ if (values[i] != null) {
+ vector.set(i, values[i]);
+ }
+ }
+ vector.setValueCount(length);
+ }
+
+ /**
+ * Populate values for TimeStampMilliVector.
+ * @param values numbers of milliseconds since UNIX epoch
+ */
+ public static void setVector(TimeStampMilliVector vector, Long... values) {
+ final int length = values.length;
+ vector.allocateNew(length);
+ for (int i = 0; i < length; i++) {
+ if (values[i] != null) {
+ vector.set(i, values[i]);
+ }
+ }
+ vector.setValueCount(length);
+ }
+
+ /**
+ * Populate values for TimeStampNanoTZVector.
+ * @param values numbers of nanoseconds since UNIX epoch
+ */
+ public static void setVector(TimeStampNanoTZVector vector, Long... values) {
+ final int length = values.length;
+ vector.allocateNew(length);
+ for (int i = 0; i < length; i++) {
+ if (values[i] != null) {
+ vector.set(i, values[i]);
+ }
+ }
+ vector.setValueCount(length);
+ }
+
+ /**
+ * Populate values for TimeStampNanoVector.
+ * @param values numbers of nanoseconds since UNIX epoch
+ */
+ public static void setVector(TimeStampNanoVector vector, Long... values) {
+ final int length = values.length;
+ vector.allocateNew(length);
+ for (int i = 0; i < length; i++) {
+ if (values[i] != null) {
+ vector.set(i, values[i]);
+ }
+ }
+ vector.setValueCount(length);
+ }
+
+ /**
+ * Populate values for TimeStampSecTZVector.
+ * @param values numbers of seconds since UNIX epoch
+ */
+ public static void setVector(TimeStampSecTZVector vector, Long... values) {
+ final int length = values.length;
+ vector.allocateNew(length);
+ for (int i = 0; i < length; i++) {
+ if (values[i] != null) {
+ vector.set(i, values[i]);
+ }
+ }
+ vector.setValueCount(length);
+ }
+
+ /**
+ * Populate values for TimeStampSecVector.
+ * @param values numbers of seconds since UNIX epoch
+ */
+ public static void setVector(TimeStampSecVector vector, Long... values) {
+ final int length = values.length;
+ vector.allocateNew(length);
+ for (int i = 0; i < length; i++) {
+ if (values[i] != null) {
+ vector.set(i, values[i]);
+ }
+ }
+ vector.setValueCount(length);
+ }
+
+ /**
+ * Populate values for TinyIntVector.
+ */
+ public static void setVector(TinyIntVector vector, Byte... values) {
+ final int length = values.length;
+ vector.allocateNew(length);
+ for (int i = 0; i < length; i++) {
+ if (values[i] != null) {
+ vector.set(i, values[i]);
+ }
+ }
+ vector.setValueCount(length);
+ }
+
+ /**
+ * Populate values for UInt1Vector.
+ */
+ public static void setVector(UInt1Vector vector, Byte... values) {
+ final int length = values.length;
+ vector.allocateNew(length);
+ for (int i = 0; i < length; i++) {
+ if (values[i] != null) {
+ vector.set(i, values[i]);
+ }
+ }
+ vector.setValueCount(length);
+ }
+
+ /**
+ * Populate values for UInt2Vector.
+ */
+ public static void setVector(UInt2Vector vector, Character... values) {
+ final int length = values.length;
+ vector.allocateNew(length);
+ for (int i = 0; i < length; i++) {
+ if (values[i] != null) {
+ vector.set(i, values[i]);
+ }
+ }
+ vector.setValueCount(length);
+ }
+
+ /**
+ * Populate values for UInt4Vector.
+ */
+ public static void setVector(UInt4Vector vector, Integer... values) {
+ final int length = values.length;
+ vector.allocateNew(length);
+ for (int i = 0; i < length; i++) {
+ if (values[i] != null) {
+ vector.set(i, values[i]);
+ }
+ }
+ vector.setValueCount(length);
+ }
+
+ /**
+ * Populate values for UInt8Vector.
+ */
+ public static void setVector(UInt8Vector vector, Long... values) {
+ final int length = values.length;
+ vector.allocateNew(length);
+ for (int i = 0; i < length; i++) {
+ if (values[i] != null) {
+ vector.set(i, values[i]);
+ }
+ }
+ vector.setValueCount(length);
+ }
+
+ /**
+ * Populate values for VarBinaryVector.
+ */
+ public static void setVector(VarBinaryVector vector, byte[]... values) {
+ final int length = values.length;
+ vector.allocateNewSafe();
+ for (int i = 0; i < length; i++) {
+ if (values[i] != null) {
+ vector.set(i, values[i]);
+ }
+ }
+ vector.setValueCount(length);
+ }
+
+ /**
+ * Populate values for VarCharVector.
+ */
+ public static void setVector(VarCharVector vector, byte[]... values) {
+ final int length = values.length;
+ vector.allocateNewSafe();
+ for (int i = 0; i < length; i++) {
+ if (values[i] != null) {
+ vector.set(i, values[i]);
+ }
+ }
+ vector.setValueCount(length);
+ }
+
+ /**
+ * Populate values for LargeVarCharVector.
+ */
+ public static void setVector(LargeVarCharVector vector, byte[]... values) {
+ final int length = values.length;
+ vector.allocateNewSafe();
+ for (int i = 0; i < length; i++) {
+ if (values[i] != null) {
+ vector.set(i, values[i]);
+ }
+ }
+ vector.setValueCount(length);
+ }
+
+ /**
+ * Populate values for VarCharVector.
+ */
+ public static void setVector(VarCharVector vector, String... values) {
+ final int length = values.length;
+ vector.allocateNewSafe();
+ for (int i = 0; i < length; i++) {
+ if (values[i] != null) {
+ vector.setSafe(i, values[i].getBytes(StandardCharsets.UTF_8));
+ }
+ }
+ vector.setValueCount(length);
+ }
+
+ /**
+ * Populate values for LargeVarCharVector.
+ */
+ public static void setVector(LargeVarCharVector vector, String... values) {
+ final int length = values.length;
+ vector.allocateNewSafe();
+ for (int i = 0; i < length; i++) {
+ if (values[i] != null) {
+ vector.setSafe(i, values[i].getBytes(StandardCharsets.UTF_8));
+ }
+ }
+ vector.setValueCount(length);
+ }
+
+ /**
+ * Populate values for {@link ListVector}.
+ */
+ public static void setVector(ListVector vector, List<Integer>... values) {
+ vector.allocateNewSafe();
+ Types.MinorType type = Types.MinorType.INT;
+ vector.addOrGetVector(FieldType.nullable(type.getType()));
+
+ IntVector dataVector = (IntVector) vector.getDataVector();
+ dataVector.allocateNew();
+
+ // set underlying vectors
+ int curPos = 0;
+ vector.getOffsetBuffer().setInt(0, curPos);
+ for (int i = 0; i < values.length; i++) {
+ if (values[i] == null) {
+ BitVectorHelper.unsetBit(vector.getValidityBuffer(), i);
+ } else {
+ BitVectorHelper.setBit(vector.getValidityBuffer(), i);
+ for (int value : values[i]) {
+ dataVector.setSafe(curPos, value);
+ curPos += 1;
+ }
+ }
+ vector.getOffsetBuffer().setInt((i + 1) * BaseRepeatedValueVector.OFFSET_WIDTH, curPos);
+ }
+ dataVector.setValueCount(curPos);
+ vector.setLastSet(values.length - 1);
+ vector.setValueCount(values.length);
+ }
+
+ /**
+ * Populate values for {@link LargeListVector}.
+ */
+ public static void setVector(LargeListVector vector, List<Integer>... values) {
+ vector.allocateNewSafe();
+ Types.MinorType type = Types.MinorType.INT;
+ vector.addOrGetVector(FieldType.nullable(type.getType()));
+
+ IntVector dataVector = (IntVector) vector.getDataVector();
+ dataVector.allocateNew();
+
+ // set underlying vectors
+ int curPos = 0;
+ vector.getOffsetBuffer().setLong(0, curPos);
+ for (int i = 0; i < values.length; i++) {
+ if (values[i] == null) {
+ BitVectorHelper.unsetBit(vector.getValidityBuffer(), i);
+ } else {
+ BitVectorHelper.setBit(vector.getValidityBuffer(), i);
+ for (int value : values[i]) {
+ dataVector.setSafe(curPos, value);
+ curPos += 1;
+ }
+ }
+ vector.getOffsetBuffer().setLong((long) (i + 1) * LargeListVector.OFFSET_WIDTH, curPos);
+ }
+ dataVector.setValueCount(curPos);
+ vector.setLastSet(values.length - 1);
+ vector.setValueCount(values.length);
+ }
+
+ /**
+ * Populate values for {@link FixedSizeListVector}.
+ */
+ public static void setVector(FixedSizeListVector vector, List<Integer>... values) {
+ vector.allocateNewSafe();
+ for (int i = 0; i < values.length; i++) {
+ if (values[i] != null) {
+ assertEquals(vector.getListSize(), values[i].size());
+ }
+ }
+
+ Types.MinorType type = Types.MinorType.INT;
+ vector.addOrGetVector(FieldType.nullable(type.getType()));
+
+ IntVector dataVector = (IntVector) vector.getDataVector();
+ dataVector.allocateNew();
+
+ // set underlying vectors
+ int curPos = 0;
+ for (int i = 0; i < values.length; i++) {
+ if (values[i] == null) {
+ BitVectorHelper.unsetBit(vector.getValidityBuffer(), i);
+ } else {
+ BitVectorHelper.setBit(vector.getValidityBuffer(), i);
+ for (int value : values[i]) {
+ dataVector.setSafe(curPos, value);
+ curPos += 1;
+ }
+ }
+ }
+ dataVector.setValueCount(curPos);
+ vector.setValueCount(values.length);
+ }
+
+ /**
+ * Populate values for {@link StructVector}.
+ */
+ public static void setVector(StructVector vector, Map<String, List<Integer>> values) {
+ vector.allocateNewSafe();
+
+ int valueCount = 0;
+ for (final Entry<String, List<Integer>> entry : values.entrySet()) {
+ // Add the child
+ final IntVector child = vector.addOrGet(entry.getKey(),
+ FieldType.nullable(MinorType.INT.getType()), IntVector.class);
+
+ // Write the values to the child
+ child.allocateNew();
+ final List<Integer> v = entry.getValue();
+ for (int i = 0; i < v.size(); i++) {
+ if (v.get(i) != null) {
+ child.set(i, v.get(i));
+ vector.setIndexDefined(i);
+ } else {
+ child.setNull(i);
+ }
+ }
+ valueCount = Math.max(valueCount, v.size());
+ }
+ vector.setValueCount(valueCount);
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java
new file mode 100644
index 000000000..8b2743210
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java
@@ -0,0 +1,420 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.types.pojo;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.channels.FileChannel;
+import java.nio.channels.SeekableByteChannel;
+import java.nio.channels.WritableByteChannel;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.nio.file.StandardOpenOption;
+import java.util.Collections;
+import java.util.UUID;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.memory.util.hash.ArrowBufHasher;
+import org.apache.arrow.vector.ExtensionTypeVector;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.FixedSizeBinaryVector;
+import org.apache.arrow.vector.Float4Vector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.compare.Range;
+import org.apache.arrow.vector.compare.RangeEqualsVisitor;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.ipc.ArrowFileReader;
+import org.apache.arrow.vector.ipc.ArrowFileWriter;
+import org.apache.arrow.vector.types.FloatingPointPrecision;
+import org.apache.arrow.vector.types.pojo.ArrowType.ExtensionType;
+import org.apache.arrow.vector.util.VectorBatchAppender;
+import org.apache.arrow.vector.validate.ValidateVectorVisitor;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class TestExtensionType {
+ /**
+ * Test that a custom UUID type can be round-tripped through a temporary file.
+ */
+ @Test
+ public void roundtripUuid() throws IOException {
+ ExtensionTypeRegistry.register(new UuidType());
+ final Schema schema = new Schema(Collections.singletonList(Field.nullable("a", new UuidType())));
+ try (final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE);
+ final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) {
+ UUID u1 = UUID.randomUUID();
+ UUID u2 = UUID.randomUUID();
+ UuidVector vector = (UuidVector) root.getVector("a");
+ vector.setValueCount(2);
+ vector.set(0, u1);
+ vector.set(1, u2);
+ root.setRowCount(2);
+
+ final File file = File.createTempFile("uuidtest", ".arrow");
+ try (final WritableByteChannel channel = FileChannel
+ .open(Paths.get(file.getAbsolutePath()), StandardOpenOption.WRITE);
+ final ArrowFileWriter writer = new ArrowFileWriter(root, null, channel)) {
+ writer.start();
+ writer.writeBatch();
+ writer.end();
+ }
+
+ try (final SeekableByteChannel channel = Files.newByteChannel(Paths.get(file.getAbsolutePath()));
+ final ArrowFileReader reader = new ArrowFileReader(channel, allocator)) {
+ reader.loadNextBatch();
+ final VectorSchemaRoot readerRoot = reader.getVectorSchemaRoot();
+ Assert.assertEquals(root.getSchema(), readerRoot.getSchema());
+
+ final Field field = readerRoot.getSchema().getFields().get(0);
+ final UuidType expectedType = new UuidType();
+ Assert.assertEquals(field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_NAME),
+ expectedType.extensionName());
+ Assert.assertEquals(field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_METADATA),
+ expectedType.serialize());
+
+ final ExtensionTypeVector deserialized = (ExtensionTypeVector) readerRoot.getFieldVectors().get(0);
+ Assert.assertEquals(vector.getValueCount(), deserialized.getValueCount());
+ for (int i = 0; i < vector.getValueCount(); i++) {
+ Assert.assertEquals(vector.isNull(i), deserialized.isNull(i));
+ if (!vector.isNull(i)) {
+ Assert.assertEquals(vector.getObject(i), deserialized.getObject(i));
+ }
+ }
+ }
+ }
+ }
+
+ /**
+ * Test that a custom UUID type can be read as its underlying type.
+ */
+ @Test
+ public void readUnderlyingType() throws IOException {
+ ExtensionTypeRegistry.register(new UuidType());
+ final Schema schema = new Schema(Collections.singletonList(Field.nullable("a", new UuidType())));
+ try (final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE);
+ final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) {
+ UUID u1 = UUID.randomUUID();
+ UUID u2 = UUID.randomUUID();
+ UuidVector vector = (UuidVector) root.getVector("a");
+ vector.setValueCount(2);
+ vector.set(0, u1);
+ vector.set(1, u2);
+ root.setRowCount(2);
+
+ final File file = File.createTempFile("uuidtest", ".arrow");
+ try (final WritableByteChannel channel = FileChannel
+ .open(Paths.get(file.getAbsolutePath()), StandardOpenOption.WRITE);
+ final ArrowFileWriter writer = new ArrowFileWriter(root, null, channel)) {
+ writer.start();
+ writer.writeBatch();
+ writer.end();
+ }
+
+ ExtensionTypeRegistry.unregister(new UuidType());
+
+ try (final SeekableByteChannel channel = Files.newByteChannel(Paths.get(file.getAbsolutePath()));
+ final ArrowFileReader reader = new ArrowFileReader(channel, allocator)) {
+ reader.loadNextBatch();
+ final VectorSchemaRoot readerRoot = reader.getVectorSchemaRoot();
+ Assert.assertEquals(1, readerRoot.getSchema().getFields().size());
+ Assert.assertEquals("a", readerRoot.getSchema().getFields().get(0).getName());
+ Assert.assertTrue(readerRoot.getSchema().getFields().get(0).getType() instanceof ArrowType.FixedSizeBinary);
+ Assert.assertEquals(16,
+ ((ArrowType.FixedSizeBinary) readerRoot.getSchema().getFields().get(0).getType()).getByteWidth());
+
+ final Field field = readerRoot.getSchema().getFields().get(0);
+ final UuidType expectedType = new UuidType();
+ Assert.assertEquals(field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_NAME),
+ expectedType.extensionName());
+ Assert.assertEquals(field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_METADATA),
+ expectedType.serialize());
+
+ final FixedSizeBinaryVector deserialized = (FixedSizeBinaryVector) readerRoot.getFieldVectors().get(0);
+ Assert.assertEquals(vector.getValueCount(), deserialized.getValueCount());
+ for (int i = 0; i < vector.getValueCount(); i++) {
+ Assert.assertEquals(vector.isNull(i), deserialized.isNull(i));
+ if (!vector.isNull(i)) {
+ final UUID uuid = vector.getObject(i);
+ final ByteBuffer bb = ByteBuffer.allocate(16);
+ bb.putLong(uuid.getMostSignificantBits());
+ bb.putLong(uuid.getLeastSignificantBits());
+ Assert.assertArrayEquals(bb.array(), deserialized.get(i));
+ }
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testNullCheck() {
+ NullPointerException e = assertThrows(NullPointerException.class,
+ () -> {
+ try (final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE);
+ final ExtensionTypeVector vector = new UuidVector("uuid", allocator, null)) {
+ vector.getField();
+ vector.allocateNewSafe();
+ }
+ });
+ assertTrue(e.getMessage().contains("underlyingVector can not be null."));
+ }
+
+ /**
+ * Test that a custom Location type can be round-tripped through a temporary file.
+ */
+ @Test
+ public void roundtripLocation() throws IOException {
+ ExtensionTypeRegistry.register(new LocationType());
+ final Schema schema = new Schema(Collections.singletonList(Field.nullable("location", new LocationType())));
+ try (final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE);
+ final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) {
+ LocationVector vector = (LocationVector) root.getVector("location");
+ vector.allocateNew();
+ vector.set(0, 34.073814f, -118.240784f);
+ vector.set(2, 37.768056f, -122.3875f);
+ vector.set(3, 40.739716f, -73.840782f);
+ vector.setValueCount(4);
+ root.setRowCount(4);
+
+ final File file = File.createTempFile("locationtest", ".arrow");
+ try (final WritableByteChannel channel = FileChannel
+ .open(Paths.get(file.getAbsolutePath()), StandardOpenOption.WRITE);
+ final ArrowFileWriter writer = new ArrowFileWriter(root, null, channel)) {
+ writer.start();
+ writer.writeBatch();
+ writer.end();
+ }
+
+ try (final SeekableByteChannel channel = Files.newByteChannel(Paths.get(file.getAbsolutePath()));
+ final ArrowFileReader reader = new ArrowFileReader(channel, allocator)) {
+ reader.loadNextBatch();
+ final VectorSchemaRoot readerRoot = reader.getVectorSchemaRoot();
+ Assert.assertEquals(root.getSchema(), readerRoot.getSchema());
+
+ final Field field = readerRoot.getSchema().getFields().get(0);
+ final LocationType expectedType = new LocationType();
+ Assert.assertEquals(field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_NAME),
+ expectedType.extensionName());
+ Assert.assertEquals(field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_METADATA),
+ expectedType.serialize());
+
+ final ExtensionTypeVector deserialized = (ExtensionTypeVector) readerRoot.getFieldVectors().get(0);
+ Assert.assertTrue(deserialized instanceof LocationVector);
+ Assert.assertEquals(deserialized.getName(), "location");
+ StructVector deserStruct = (StructVector) deserialized.getUnderlyingVector();
+ Assert.assertNotNull(deserStruct.getChild("Latitude"));
+ Assert.assertNotNull(deserStruct.getChild("Longitude"));
+ Assert.assertEquals(vector.getValueCount(), deserialized.getValueCount());
+ for (int i = 0; i < vector.getValueCount(); i++) {
+ Assert.assertEquals(vector.isNull(i), deserialized.isNull(i));
+ if (!vector.isNull(i)) {
+ Assert.assertEquals(vector.getObject(i), deserialized.getObject(i));
+ }
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testVectorCompare() {
+ UuidType uuidType = new UuidType();
+ ExtensionTypeRegistry.register(uuidType);
+ try (final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE);
+ UuidVector a1 = (UuidVector) uuidType.getNewVector("a", FieldType.nullable(uuidType), allocator);
+ UuidVector a2 = (UuidVector) uuidType.getNewVector("a", FieldType.nullable(uuidType), allocator);
+ UuidVector bb = (UuidVector) uuidType.getNewVector("a", FieldType.nullable(uuidType), allocator)
+ ) {
+ UUID u1 = UUID.randomUUID();
+ UUID u2 = UUID.randomUUID();
+
+ // Test out type and vector validation visitors for an ExtensionTypeVector
+ ValidateVectorVisitor validateVisitor = new ValidateVectorVisitor();
+ validateVisitor.visit(a1, null);
+
+ a1.setValueCount(2);
+ a1.set(0, u1);
+ a1.set(1, u2);
+
+ a2.setValueCount(2);
+ a2.set(0, u1);
+ a2.set(1, u2);
+
+ bb.setValueCount(2);
+ bb.set(0, u2);
+ bb.set(1, u1);
+
+ Range range = new Range(0, 0, a1.getValueCount());
+ RangeEqualsVisitor visitor = new RangeEqualsVisitor(a1, a2);
+ assertTrue(visitor.rangeEquals(range));
+
+ visitor = new RangeEqualsVisitor(a1, bb);
+ assertFalse(visitor.rangeEquals(range));
+
+ // Test out vector appender
+ VectorBatchAppender.batchAppend(a1, a2, bb);
+ assertEquals(a1.getValueCount(), 6);
+ validateVisitor.visit(a1, null);
+ }
+ }
+
+ static class UuidType extends ExtensionType {
+
+ @Override
+ public ArrowType storageType() {
+ return new ArrowType.FixedSizeBinary(16);
+ }
+
+ @Override
+ public String extensionName() {
+ return "uuid";
+ }
+
+ @Override
+ public boolean extensionEquals(ExtensionType other) {
+ return other instanceof UuidType;
+ }
+
+ @Override
+ public ArrowType deserialize(ArrowType storageType, String serializedData) {
+ if (!storageType.equals(storageType())) {
+ throw new UnsupportedOperationException("Cannot construct UuidType from underlying type " + storageType);
+ }
+ return new UuidType();
+ }
+
+ @Override
+ public String serialize() {
+ return "";
+ }
+
+ @Override
+ public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator) {
+ return new UuidVector(name, allocator, new FixedSizeBinaryVector(name, allocator, 16));
+ }
+ }
+
+ static class UuidVector extends ExtensionTypeVector<FixedSizeBinaryVector> {
+
+ public UuidVector(String name, BufferAllocator allocator, FixedSizeBinaryVector underlyingVector) {
+ super(name, allocator, underlyingVector);
+ }
+
+ @Override
+ public UUID getObject(int index) {
+ final ByteBuffer bb = ByteBuffer.wrap(getUnderlyingVector().getObject(index));
+ return new UUID(bb.getLong(), bb.getLong());
+ }
+
+ @Override
+ public int hashCode(int index) {
+ return hashCode(index, null);
+ }
+
+ @Override
+ public int hashCode(int index, ArrowBufHasher hasher) {
+ return getUnderlyingVector().hashCode(index, hasher);
+ }
+
+ public void set(int index, UUID uuid) {
+ ByteBuffer bb = ByteBuffer.allocate(16);
+ bb.putLong(uuid.getMostSignificantBits());
+ bb.putLong(uuid.getLeastSignificantBits());
+ getUnderlyingVector().set(index, bb.array());
+ }
+ }
+
+ static class LocationType extends ExtensionType {
+
+ @Override
+ public ArrowType storageType() {
+ return Struct.INSTANCE;
+ }
+
+ @Override
+ public String extensionName() {
+ return "location";
+ }
+
+ @Override
+ public boolean extensionEquals(ExtensionType other) {
+ return other instanceof LocationType;
+ }
+
+ @Override
+ public ArrowType deserialize(ArrowType storageType, String serializedData) {
+ if (!storageType.equals(storageType())) {
+ throw new UnsupportedOperationException("Cannot construct LocationType from underlying type " + storageType);
+ }
+ return new LocationType();
+ }
+
+ @Override
+ public String serialize() {
+ return "";
+ }
+
+ @Override
+ public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator) {
+ return new LocationVector(name, allocator);
+ }
+ }
+
+ static class LocationVector extends ExtensionTypeVector<StructVector> {
+
+ private static StructVector buildUnderlyingVector(String name, BufferAllocator allocator) {
+ final StructVector underlyingVector =
+ new StructVector(name, allocator, FieldType.nullable(ArrowType.Struct.INSTANCE), null);
+ underlyingVector.addOrGet("Latitude",
+ FieldType.nullable(new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)), Float4Vector.class);
+ underlyingVector.addOrGet("Longitude",
+ FieldType.nullable(new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)), Float4Vector.class);
+ return underlyingVector;
+ }
+
+ public LocationVector(String name, BufferAllocator allocator) {
+ super(name, allocator, buildUnderlyingVector(name, allocator));
+ }
+
+ @Override
+ public int hashCode(int index) {
+ return hashCode(index, null);
+ }
+
+ @Override
+ public int hashCode(int index, ArrowBufHasher hasher) {
+ return getUnderlyingVector().hashCode(index, hasher);
+ }
+
+ @Override
+ public java.util.Map<String, ?> getObject(int index) {
+ return getUnderlyingVector().getObject(index);
+ }
+
+ public void set(int index, float latitude, float longitude) {
+ getUnderlyingVector().getChild("Latitude", Float4Vector.class).set(index, latitude);
+ getUnderlyingVector().getChild("Longitude", Float4Vector.class).set(index, longitude);
+ getUnderlyingVector().setIndexDefined(index);
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestField.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestField.java
new file mode 100644
index 000000000..bc984fa64
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestField.java
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.types.pojo;
+
+import static org.apache.arrow.vector.types.pojo.Schema.METADATA_KEY;
+import static org.apache.arrow.vector.types.pojo.Schema.METADATA_VALUE;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.arrow.vector.types.pojo.ArrowType.Int;
+import org.junit.Test;
+
+public class TestField {
+
+ private static Field field(String name, boolean nullable, ArrowType type, Map<String, String> metadata) {
+ return new Field(name, new FieldType(nullable, type, null, metadata), Collections.emptyList());
+ }
+
+ @Test
+ public void testMetadata() throws IOException {
+ Map<String, String> metadata = new HashMap<>(1);
+ metadata.put("testKey", "testValue");
+
+ Schema schema = new Schema(Collections.singletonList(
+ field("a", false, new Int(8, true), metadata)
+ ));
+
+ String json = schema.toJson();
+ Schema actual = Schema.fromJSON(json);
+
+ jsonContains(json, "\"" + METADATA_KEY + "\" : \"testKey\"", "\"" + METADATA_VALUE + "\" : \"testValue\"");
+
+ Map<String, String> actualMetadata = actual.getFields().get(0).getMetadata();
+ assertEquals(1, actualMetadata.size());
+ assertEquals("testValue", actualMetadata.get("testKey"));
+ }
+
+ private void jsonContains(String json, String... strings) {
+ for (String string : strings) {
+ assertTrue(json + " contains " + string, json.contains(string));
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestSchema.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestSchema.java
new file mode 100644
index 000000000..0e5375865
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestSchema.java
@@ -0,0 +1,254 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.types.pojo;
+
+import static java.util.Arrays.asList;
+import static org.apache.arrow.vector.types.pojo.Schema.METADATA_KEY;
+import static org.apache.arrow.vector.types.pojo.Schema.METADATA_VALUE;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.arrow.vector.types.DateUnit;
+import org.apache.arrow.vector.types.FloatingPointPrecision;
+import org.apache.arrow.vector.types.IntervalUnit;
+import org.apache.arrow.vector.types.TimeUnit;
+import org.apache.arrow.vector.types.UnionMode;
+import org.apache.arrow.vector.types.pojo.ArrowType.Binary;
+import org.apache.arrow.vector.types.pojo.ArrowType.Bool;
+import org.apache.arrow.vector.types.pojo.ArrowType.Date;
+import org.apache.arrow.vector.types.pojo.ArrowType.Decimal;
+import org.apache.arrow.vector.types.pojo.ArrowType.Duration;
+import org.apache.arrow.vector.types.pojo.ArrowType.FixedSizeBinary;
+import org.apache.arrow.vector.types.pojo.ArrowType.FloatingPoint;
+import org.apache.arrow.vector.types.pojo.ArrowType.Int;
+import org.apache.arrow.vector.types.pojo.ArrowType.Interval;
+import org.apache.arrow.vector.types.pojo.ArrowType.List;
+import org.apache.arrow.vector.types.pojo.ArrowType.Null;
+import org.apache.arrow.vector.types.pojo.ArrowType.Struct;
+import org.apache.arrow.vector.types.pojo.ArrowType.Time;
+import org.apache.arrow.vector.types.pojo.ArrowType.Timestamp;
+import org.apache.arrow.vector.types.pojo.ArrowType.Union;
+import org.apache.arrow.vector.types.pojo.ArrowType.Utf8;
+import org.junit.Test;
+
+public class TestSchema {
+
+ private static Field field(String name, boolean nullable, ArrowType type, Field... children) {
+ return new Field(name, new FieldType(nullable, type, null, null), asList(children));
+ }
+
+ private static Field field(String name, ArrowType type, Field... children) {
+ return field(name, true, type, children);
+ }
+
+ @Test
+ public void testComplex() throws IOException {
+ Schema schema = new Schema(asList(
+ field("a", false, new Int(8, true)),
+ field("b", new Struct(),
+ field("c", new Int(16, true)),
+ field("d", new Utf8())),
+ field("e", new List(), field(null, new Date(DateUnit.MILLISECOND))),
+ field("f", new FloatingPoint(FloatingPointPrecision.SINGLE)),
+ field("g", new Timestamp(TimeUnit.MILLISECOND, "UTC")),
+ field("h", new Timestamp(TimeUnit.MICROSECOND, null)),
+ field("i", new Interval(IntervalUnit.DAY_TIME)),
+ field("j", new ArrowType.Duration(TimeUnit.SECOND))
+ ));
+ roundTrip(schema);
+ assertEquals(
+ "Schema<a: Int(8, true) not null, b: Struct<c: Int(16, true), d: Utf8>, e: List<Date(MILLISECOND)>, " +
+ "f: FloatingPoint(SINGLE), g: Timestamp(MILLISECOND, UTC), h: Timestamp(MICROSECOND, null), " +
+ "i: Interval(DAY_TIME), j: Duration(SECOND)>",
+ schema.toString());
+ }
+
+ @Test
+ public void testAll() throws IOException {
+ Schema schema = new Schema(asList(
+ field("a", false, new Null()),
+ field("b", new Struct(), field("ba", new Null())),
+ field("c", new List(), field("ca", new Null())),
+ field("d", new Union(UnionMode.Sparse, new int[] {1, 2, 3}), field("da", new Null())),
+ field("e", new Int(8, true)),
+ field("f", new FloatingPoint(FloatingPointPrecision.SINGLE)),
+ field("g", new Utf8()),
+ field("h", new Binary()),
+ field("i", new Bool()),
+ field("j", new Decimal(5, 5, 128)),
+ field("k", new Date(DateUnit.DAY)),
+ field("l", new Date(DateUnit.MILLISECOND)),
+ field("m", new Time(TimeUnit.SECOND, 32)),
+ field("n", new Time(TimeUnit.MILLISECOND, 32)),
+ field("o", new Time(TimeUnit.MICROSECOND, 64)),
+ field("p", new Time(TimeUnit.NANOSECOND, 64)),
+ field("q", new Timestamp(TimeUnit.MILLISECOND, "UTC")),
+ field("r", new Timestamp(TimeUnit.MICROSECOND, null)),
+ field("s", new Interval(IntervalUnit.DAY_TIME)),
+ field("t", new FixedSizeBinary(100)),
+ field("u", new Duration(TimeUnit.SECOND)),
+ field("v", new Duration(TimeUnit.MICROSECOND))
+ ));
+ roundTrip(schema);
+ }
+
+ @Test
+ public void testUnion() throws IOException {
+ Schema schema = new Schema(asList(
+ field("d", new Union(UnionMode.Sparse, new int[] {1, 2, 3}), field("da", new Null()))
+ ));
+ roundTrip(schema);
+ contains(schema, "Sparse");
+ }
+
+ @Test
+ public void testDate() throws IOException {
+ Schema schema = new Schema(asList(
+ field("a", new Date(DateUnit.DAY)),
+ field("b", new Date(DateUnit.MILLISECOND))
+ ));
+ roundTrip(schema);
+ assertEquals(
+ "Schema<a: Date(DAY), b: Date(MILLISECOND)>",
+ schema.toString());
+ }
+
+ @Test
+ public void testTime() throws IOException {
+ Schema schema = new Schema(asList(
+ field("a", new Time(TimeUnit.SECOND, 32)),
+ field("b", new Time(TimeUnit.MILLISECOND, 32)),
+ field("c", new Time(TimeUnit.MICROSECOND, 64)),
+ field("d", new Time(TimeUnit.NANOSECOND, 64))
+ ));
+ roundTrip(schema);
+ assertEquals(
+ "Schema<a: Time(SECOND, 32), b: Time(MILLISECOND, 32), c: Time(MICROSECOND, 64), d: Time(NANOSECOND, 64)>",
+ schema.toString());
+ }
+
+ @Test
+ public void testTS() throws IOException {
+ Schema schema = new Schema(asList(
+ field("a", new Timestamp(TimeUnit.SECOND, "UTC")),
+ field("b", new Timestamp(TimeUnit.MILLISECOND, "UTC")),
+ field("c", new Timestamp(TimeUnit.MICROSECOND, "UTC")),
+ field("d", new Timestamp(TimeUnit.NANOSECOND, "UTC")),
+ field("e", new Timestamp(TimeUnit.SECOND, null)),
+ field("f", new Timestamp(TimeUnit.MILLISECOND, null)),
+ field("g", new Timestamp(TimeUnit.MICROSECOND, null)),
+ field("h", new Timestamp(TimeUnit.NANOSECOND, null))
+ ));
+ roundTrip(schema);
+ assertEquals(
+ "Schema<a: Timestamp(SECOND, UTC), b: Timestamp(MILLISECOND, UTC), c: Timestamp(MICROSECOND, UTC), " +
+ "d: Timestamp(NANOSECOND, UTC), e: Timestamp(SECOND, null), f: Timestamp(MILLISECOND, null), " +
+ "g: Timestamp(MICROSECOND, null), h: Timestamp(NANOSECOND, null)>",
+ schema.toString());
+ }
+
+ @Test
+ public void testInterval() throws IOException {
+ Schema schema = new Schema(asList(
+ field("a", new Interval(IntervalUnit.YEAR_MONTH)),
+ field("b", new Interval(IntervalUnit.DAY_TIME))
+ ));
+ roundTrip(schema);
+ contains(schema, "YEAR_MONTH", "DAY_TIME");
+ }
+
+ @Test
+ public void testRoundTripDurationInterval() throws IOException {
+ Schema schema = new Schema(asList(
+ field("a", new Duration(TimeUnit.SECOND)),
+ field("b", new Duration(TimeUnit.MILLISECOND)),
+ field("c", new Duration(TimeUnit.MICROSECOND)),
+ field("d", new Duration(TimeUnit.NANOSECOND))
+ ));
+ roundTrip(schema);
+ contains(schema, "SECOND", "MILLI", "MICRO", "NANO");
+ }
+
+ @Test
+ public void testFP() throws IOException {
+ Schema schema = new Schema(asList(
+ field("a", new FloatingPoint(FloatingPointPrecision.HALF)),
+ field("b", new FloatingPoint(FloatingPointPrecision.SINGLE)),
+ field("c", new FloatingPoint(FloatingPointPrecision.DOUBLE))
+ ));
+ roundTrip(schema);
+ contains(schema, "HALF", "SINGLE", "DOUBLE");
+ }
+
+ @Test
+ public void testMetadata() throws IOException {
+ Map<String, String> metadata = new HashMap<>(1);
+ metadata.put("testKey", "testValue");
+
+ java.util.List<Field> fields = asList(
+ field("a", false, new Int(8, true)),
+ field("b", new Struct(),
+ field("c", new Int(16, true)),
+ field("d", new Utf8())),
+ field("e", new List(), field(null, new Date(DateUnit.MILLISECOND)))
+ );
+ Schema schema = new Schema(fields, metadata);
+ roundTrip(schema);
+ contains(schema, "\"" + METADATA_KEY + "\" : \"testKey\"", "\"" + METADATA_VALUE + "\" : \"testValue\"");
+ }
+
+ private void roundTrip(Schema schema) throws IOException {
+ String json = schema.toJson();
+ Schema actual = Schema.fromJSON(json);
+ assertEquals(schema.toJson(), actual.toJson());
+ assertEquals(schema, actual);
+ validateFieldsHashcode(schema.getFields(), actual.getFields());
+ assertEquals(schema.hashCode(), actual.hashCode());
+ }
+
+ private void validateFieldsHashcode(java.util.List<Field> schemaFields, java.util.List<Field> actualFields) {
+ assertEquals(schemaFields.size(), actualFields.size());
+ if (schemaFields.size() == 0) {
+ return;
+ }
+ for (int i = 0; i < schemaFields.size(); i++) {
+ Field schemaField = schemaFields.get(i);
+ Field actualField = actualFields.get(i);
+ validateFieldsHashcode(schemaField.getChildren(), actualField.getChildren());
+ validateHashCode(schemaField.getType(), actualField.getType());
+ validateHashCode(schemaField, actualField);
+ }
+ }
+
+ private void validateHashCode(Object o1, Object o2) {
+ assertEquals(o1, o2);
+ assertEquals(o1 + " == " + o2, o1.hashCode(), o2.hashCode());
+ }
+
+ private void contains(Schema schema, String... s) {
+ String json = schema.toJson();
+ for (String string : s) {
+ assertTrue(json + " contains " + string, json.contains(string));
+ }
+ }
+
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/util/DecimalUtilityTest.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/util/DecimalUtilityTest.java
new file mode 100644
index 000000000..804092ed9
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/util/DecimalUtilityTest.java
@@ -0,0 +1,127 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.util;
+
+import java.math.BigDecimal;
+import java.math.BigInteger;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class DecimalUtilityTest {
+ private static final BigInteger[] MAX_BIG_INT = new BigInteger[]{BigInteger.valueOf(10).pow(38)
+ .subtract(java.math.BigInteger.ONE), java.math.BigInteger.valueOf(10).pow(76)};
+ private static final BigInteger[] MIN_BIG_INT = new BigInteger[]{MAX_BIG_INT[0].multiply(BigInteger.valueOf(-1)),
+ MAX_BIG_INT[1].multiply(BigInteger.valueOf(-1))};
+
+ @Test
+ public void testSetLongInDecimalArrowBuf() {
+ int[] byteLengths = new int[]{16, 32};
+ for (int x = 0; x < 2; x++) {
+ try (BufferAllocator allocator = new RootAllocator(128);
+ ArrowBuf buf = allocator.buffer(byteLengths[x]);
+ ) {
+ int [] intValues = new int [] {Integer.MAX_VALUE, Integer.MIN_VALUE, 0};
+ for (int val : intValues) {
+ buf.clear();
+ DecimalUtility.writeLongToArrowBuf((long) val, buf, 0, byteLengths[x]);
+ BigDecimal actual = DecimalUtility.getBigDecimalFromArrowBuf(buf, 0, 0, byteLengths[x]);
+ BigDecimal expected = BigDecimal.valueOf(val);
+ Assert.assertEquals(expected, actual);
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testSetByteArrayInDecimalArrowBuf() {
+ int[] byteLengths = new int[]{16, 32};
+ for (int x = 0; x < 2; x++) {
+ try (BufferAllocator allocator = new RootAllocator(128);
+ ArrowBuf buf = allocator.buffer(byteLengths[x]);
+ ) {
+ int [] intValues = new int [] {Integer.MAX_VALUE, Integer.MIN_VALUE, 0};
+ for (int val : intValues) {
+ buf.clear();
+ DecimalUtility.writeByteArrayToArrowBuf(BigInteger.valueOf(val).toByteArray(), buf, 0, byteLengths[x]);
+ BigDecimal actual = DecimalUtility.getBigDecimalFromArrowBuf(buf, 0, 0, byteLengths[x]);
+ BigDecimal expected = BigDecimal.valueOf(val);
+ Assert.assertEquals(expected, actual);
+ }
+
+ long [] longValues = new long[] {Long.MIN_VALUE, 0 , Long.MAX_VALUE};
+ for (long val : longValues) {
+ buf.clear();
+ DecimalUtility.writeByteArrayToArrowBuf(BigInteger.valueOf(val).toByteArray(), buf, 0, byteLengths[x]);
+ BigDecimal actual = DecimalUtility.getBigDecimalFromArrowBuf(buf, 0, 0, byteLengths[x]);
+ BigDecimal expected = BigDecimal.valueOf(val);
+ Assert.assertEquals(expected, actual);
+ }
+
+ BigInteger [] decimals = new BigInteger[] {MAX_BIG_INT[x], new BigInteger("0"), MIN_BIG_INT[x]};
+ for (BigInteger val : decimals) {
+ buf.clear();
+ DecimalUtility.writeByteArrayToArrowBuf(val.toByteArray(), buf, 0, byteLengths[x]);
+ BigDecimal actual = DecimalUtility.getBigDecimalFromArrowBuf(buf, 0, 0, byteLengths[x]);
+ BigDecimal expected = new BigDecimal(val);
+ Assert.assertEquals(expected, actual);
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testSetBigDecimalInDecimalArrowBuf() {
+ int[] byteLengths = new int[]{16, 32};
+ for (int x = 0; x < 2; x++) {
+ try (BufferAllocator allocator = new RootAllocator(128);
+ ArrowBuf buf = allocator.buffer(byteLengths[x]);
+ ) {
+ int [] intValues = new int [] {Integer.MAX_VALUE, Integer.MIN_VALUE, 0};
+ for (int val : intValues) {
+ buf.clear();
+ DecimalUtility.writeBigDecimalToArrowBuf(BigDecimal.valueOf(val), buf, 0, byteLengths[x]);
+ BigDecimal actual = DecimalUtility.getBigDecimalFromArrowBuf(buf, 0, 0, byteLengths[x]);
+ BigDecimal expected = BigDecimal.valueOf(val);
+ Assert.assertEquals(expected, actual);
+ }
+
+ long [] longValues = new long[] {Long.MIN_VALUE, 0 , Long.MAX_VALUE};
+ for (long val : longValues) {
+ buf.clear();
+ DecimalUtility.writeBigDecimalToArrowBuf(BigDecimal.valueOf(val), buf, 0, byteLengths[x]);
+ BigDecimal actual = DecimalUtility.getBigDecimalFromArrowBuf(buf, 0, 0, byteLengths[x]);
+ BigDecimal expected = BigDecimal.valueOf(val);
+ Assert.assertEquals(expected, actual);
+ }
+
+ BigInteger [] decimals = new BigInteger[] {MAX_BIG_INT[x], new BigInteger("0"), MIN_BIG_INT[x]};
+ for (BigInteger val : decimals) {
+ buf.clear();
+ DecimalUtility.writeBigDecimalToArrowBuf(new BigDecimal(val), buf, 0, byteLengths[x]);
+ BigDecimal actual = DecimalUtility.getBigDecimalFromArrowBuf(buf, 0, 0, byteLengths[x]);
+ BigDecimal expected = new BigDecimal(val);
+ Assert.assertEquals(expected, actual);
+ }
+ }
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/util/TestDataSizeRoundingUtil.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/util/TestDataSizeRoundingUtil.java
new file mode 100644
index 000000000..4138ea9d7
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/util/TestDataSizeRoundingUtil.java
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.util;
+
+import static org.junit.Assert.assertEquals;
+
+import org.junit.Test;
+
+/**
+ * Test cases for {@link DataSizeRoundingUtil}.
+ */
+public class TestDataSizeRoundingUtil {
+
+ @Test
+ public void testRoundUpTo8MultipleInt() {
+ assertEquals(0, DataSizeRoundingUtil.roundUpTo8Multiple(0));
+ assertEquals(16, DataSizeRoundingUtil.roundUpTo8Multiple(9));
+ assertEquals(24, DataSizeRoundingUtil.roundUpTo8Multiple(20));
+ assertEquals(128, DataSizeRoundingUtil.roundUpTo8Multiple(128));
+ }
+
+ @Test
+ public void testRoundUpTo8MultipleLong() {
+ assertEquals(0L, DataSizeRoundingUtil.roundUpTo8Multiple(0L));
+ assertEquals(40L, DataSizeRoundingUtil.roundUpTo8Multiple(37L));
+ assertEquals(32L, DataSizeRoundingUtil.roundUpTo8Multiple(29L));
+ assertEquals(512L, DataSizeRoundingUtil.roundUpTo8Multiple(512L));
+ }
+
+ @Test
+ public void testRoundDownTo8MultipleInt() {
+ assertEquals(0, DataSizeRoundingUtil.roundDownTo8Multiple(0));
+ assertEquals(16, DataSizeRoundingUtil.roundDownTo8Multiple(23));
+ assertEquals(24, DataSizeRoundingUtil.roundDownTo8Multiple(27));
+ assertEquals(128, DataSizeRoundingUtil.roundDownTo8Multiple(128));
+ }
+
+ @Test
+ public void testRoundDownTo8MultipleLong() {
+ assertEquals(0L, DataSizeRoundingUtil.roundDownTo8Multiple(0L));
+ assertEquals(40L, DataSizeRoundingUtil.roundDownTo8Multiple(45L));
+ assertEquals(32L, DataSizeRoundingUtil.roundDownTo8Multiple(39L));
+ assertEquals(512L, DataSizeRoundingUtil.roundDownTo8Multiple(512L));
+ }
+
+ @Test
+ public void testDivideBy8CeilInt() {
+ assertEquals(0, DataSizeRoundingUtil.divideBy8Ceil(0));
+ assertEquals(3, DataSizeRoundingUtil.divideBy8Ceil(23));
+ assertEquals(5, DataSizeRoundingUtil.divideBy8Ceil(35));
+ assertEquals(24, DataSizeRoundingUtil.divideBy8Ceil(192));
+ }
+
+ @Test
+ public void testDivideBy8CeilLong() {
+ assertEquals(0L, DataSizeRoundingUtil.divideBy8Ceil(0L));
+ assertEquals(5L, DataSizeRoundingUtil.divideBy8Ceil(37L));
+ assertEquals(10L, DataSizeRoundingUtil.divideBy8Ceil(73L));
+ assertEquals(25L, DataSizeRoundingUtil.divideBy8Ceil(200L));
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/util/TestElementAddressableVectorIterator.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/util/TestElementAddressableVectorIterator.java
new file mode 100644
index 000000000..419872225
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/util/TestElementAddressableVectorIterator.java
@@ -0,0 +1,134 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.util;
+
+import static junit.framework.TestCase.assertNull;
+import static org.junit.Assert.assertEquals;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.memory.util.ArrowBufPointer;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Test cases for {@link ElementAddressableVectorIterator}.
+ */
+public class TestElementAddressableVectorIterator {
+
+ private final int VECTOR_LENGTH = 100;
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void prepare() {
+ allocator = new RootAllocator(1024 * 1024);
+ }
+
+ @After
+ public void shutdown() {
+ allocator.close();
+ }
+
+ @Test
+ public void testIterateIntVector() {
+ try (IntVector intVector = new IntVector("", allocator)) {
+ intVector.allocateNew(VECTOR_LENGTH);
+ intVector.setValueCount(VECTOR_LENGTH);
+
+ // prepare data in sorted order
+ for (int i = 0; i < VECTOR_LENGTH; i++) {
+ if (i == 0) {
+ intVector.setNull(i);
+ } else {
+ intVector.set(i, i);
+ }
+ }
+
+ // iterate
+ ElementAddressableVectorIterator<IntVector> it = new ElementAddressableVectorIterator<>(intVector);
+ int index = 0;
+ while (it.hasNext()) {
+ ArrowBufPointer pt;
+
+ if (index % 2 == 0) {
+ // use populated pointer.
+ pt = new ArrowBufPointer();
+ it.next(pt);
+ } else {
+ // use iterator inner pointer
+ pt = it.next();
+ }
+ if (index == 0) {
+ assertNull(pt.getBuf());
+ } else {
+ assertEquals(index, pt.getBuf().getInt(pt.getOffset()));
+ }
+ index += 1;
+ }
+ }
+ }
+
+ @Test
+ public void testIterateVarCharVector() {
+ try (VarCharVector strVector = new VarCharVector("", allocator)) {
+ strVector.allocateNew(VECTOR_LENGTH * 10, VECTOR_LENGTH);
+ strVector.setValueCount(VECTOR_LENGTH);
+
+ // prepare data in sorted order
+ for (int i = 0; i < VECTOR_LENGTH; i++) {
+ if (i == 0) {
+ strVector.setNull(i);
+ } else {
+ strVector.set(i, String.valueOf(i).getBytes());
+ }
+ }
+
+ // iterate
+ ElementAddressableVectorIterator<VarCharVector> it = new ElementAddressableVectorIterator<>(strVector);
+ int index = 0;
+ while (it.hasNext()) {
+ ArrowBufPointer pt;
+
+ if (index % 2 == 0) {
+ // use populated pointer.
+ pt = new ArrowBufPointer();
+ it.next(pt);
+ } else {
+ // use iterator inner pointer
+ pt = it.next();
+ }
+
+ if (index == 0) {
+ assertNull(pt.getBuf());
+ } else {
+ String expected = String.valueOf(index);
+ byte[] actual = new byte[expected.length()];
+ assertEquals(expected.length(), pt.getLength());
+
+ pt.getBuf().getBytes(pt.getOffset(), actual);
+ assertEquals(expected, new String(actual));
+ }
+ index += 1;
+ }
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/util/TestMultiMapWithOrdinal.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/util/TestMultiMapWithOrdinal.java
new file mode 100644
index 000000000..ea829060d
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/util/TestMultiMapWithOrdinal.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.util;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+public class TestMultiMapWithOrdinal {
+
+ @Test
+ public void test() {
+ MultiMapWithOrdinal<String, String> map = new MultiMapWithOrdinal<>();
+
+ map.put("x", "1", false);
+ Assert.assertEquals(1, map.size());
+ map.remove("x", "1");
+ Assert.assertTrue(map.isEmpty());
+ map.put("x", "1", false);
+ map.put("x", "2", false);
+ map.put("y", "0", false);
+ Assert.assertEquals(3, map.size());
+ Assert.assertEquals(2, map.getAll("x").size());
+ Assert.assertEquals("1", map.getAll("x").stream().findFirst().get());
+ Assert.assertEquals("1", map.getByOrdinal(0));
+ Assert.assertEquals("2", map.getByOrdinal(1));
+ Assert.assertEquals("0", map.getByOrdinal(2));
+ Assert.assertTrue(map.remove("x", "1"));
+ Assert.assertFalse(map.remove("x", "1"));
+ Assert.assertEquals("0", map.getByOrdinal(0));
+ Assert.assertEquals(2, map.size());
+ map.put("x", "3", true);
+ Assert.assertEquals(1, map.getAll("x").size());
+ Assert.assertEquals("3", map.getAll("x").stream().findFirst().get());
+ map.put("z", "4", false);
+ Assert.assertEquals(3, map.size());
+ map.put("z", "5", false);
+ map.put("z", "6", false);
+ Assert.assertEquals(5, map.size());
+ map.removeAll("z");
+ Assert.assertEquals(2, map.size());
+ Assert.assertFalse(map.containsKey("z"));
+
+
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/util/TestValidator.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/util/TestValidator.java
new file mode 100644
index 000000000..2db70ca5d
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/util/TestValidator.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.util;
+
+import static org.apache.arrow.vector.util.Validator.equalEnough;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import org.junit.Test;
+
+public class TestValidator {
+
+ @Test
+ public void testFloatComp() {
+ assertTrue(equalEnough(912.4140000000002F, 912.414F));
+ assertTrue(equalEnough(912.4140000000002D, 912.414D));
+ assertTrue(equalEnough(912.414F, 912.4140000000002F));
+ assertTrue(equalEnough(912.414D, 912.4140000000002D));
+ assertFalse(equalEnough(912.414D, 912.4140001D));
+ assertFalse(equalEnough(null, 912.414D));
+ assertTrue(equalEnough((Float) null, null));
+ assertTrue(equalEnough((Double) null, null));
+ assertFalse(equalEnough(912.414D, null));
+ assertFalse(equalEnough(Double.MAX_VALUE, Double.MIN_VALUE));
+ assertFalse(equalEnough(Double.MIN_VALUE, Double.MAX_VALUE));
+ assertTrue(equalEnough(Double.MAX_VALUE, Double.MAX_VALUE));
+ assertTrue(equalEnough(Double.MIN_VALUE, Double.MIN_VALUE));
+ assertTrue(equalEnough(Double.NEGATIVE_INFINITY, Double.NEGATIVE_INFINITY));
+ assertFalse(equalEnough(Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY));
+ assertTrue(equalEnough(Double.NaN, Double.NaN));
+ assertFalse(equalEnough(1.0, Double.NaN));
+ assertFalse(equalEnough(Float.MAX_VALUE, Float.MIN_VALUE));
+ assertFalse(equalEnough(Float.MIN_VALUE, Float.MAX_VALUE));
+ assertTrue(equalEnough(Float.MAX_VALUE, Float.MAX_VALUE));
+ assertTrue(equalEnough(Float.MIN_VALUE, Float.MIN_VALUE));
+ assertTrue(equalEnough(Float.NEGATIVE_INFINITY, Float.NEGATIVE_INFINITY));
+ assertFalse(equalEnough(Float.NEGATIVE_INFINITY, Float.POSITIVE_INFINITY));
+ assertTrue(equalEnough(Float.NaN, Float.NaN));
+ assertFalse(equalEnough(1.0F, Float.NaN));
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java
new file mode 100644
index 000000000..1cd263120
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java
@@ -0,0 +1,794 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.util;
+
+import static junit.framework.TestCase.assertEquals;
+import static junit.framework.TestCase.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.BigIntVector;
+import org.apache.arrow.vector.Float4Vector;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.LargeVarCharVector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.compare.Range;
+import org.apache.arrow.vector.compare.RangeEqualsVisitor;
+import org.apache.arrow.vector.compare.TypeEqualsVisitor;
+import org.apache.arrow.vector.complex.DenseUnionVector;
+import org.apache.arrow.vector.complex.FixedSizeListVector;
+import org.apache.arrow.vector.complex.LargeListVector;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.complex.UnionVector;
+import org.apache.arrow.vector.holders.NullableBigIntHolder;
+import org.apache.arrow.vector.holders.NullableFloat4Holder;
+import org.apache.arrow.vector.holders.NullableIntHolder;
+import org.apache.arrow.vector.testing.ValueVectorDataPopulator;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Test cases for {@link VectorAppender}.
+ */
+public class TestVectorAppender {
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void prepare() {
+ allocator = new RootAllocator(1024 * 1024);
+ }
+
+ @After
+ public void shutdown() {
+ allocator.close();
+ }
+
+ @Test
+ public void testAppendFixedWidthVector() {
+ final int length1 = 10;
+ final int length2 = 5;
+ try (IntVector target = new IntVector("", allocator);
+ IntVector delta = new IntVector("", allocator)) {
+
+ target.allocateNew(length1);
+ delta.allocateNew(length2);
+
+ ValueVectorDataPopulator.setVector(target, 0, 1, 2, 3, 4, 5, 6, null, 8, 9);
+ ValueVectorDataPopulator.setVector(delta, null, 11, 12, 13, 14);
+
+ VectorAppender appender = new VectorAppender(target);
+ delta.accept(appender, null);
+
+ assertEquals(length1 + length2, target.getValueCount());
+
+ try (IntVector expected = new IntVector("expected", allocator)) {
+ expected.allocateNew();
+ ValueVectorDataPopulator.setVector(expected, 0, 1, 2, 3, 4, 5, 6, null, 8, 9, null, 11, 12, 13, 14);
+ assertVectorsEqual(expected, target);
+ }
+ }
+ }
+
+ @Test
+ public void testAppendEmptyFixedWidthVector() {
+ try (IntVector target = new IntVector("", allocator);
+ IntVector delta = new IntVector("", allocator)) {
+
+ ValueVectorDataPopulator.setVector(target, 0, 1, 2, 3, 4, 5, 6, null, 8, 9);
+
+ VectorAppender appender = new VectorAppender(target);
+ delta.accept(appender, null);
+
+ assertEquals(10, target.getValueCount());
+
+ try (IntVector expected = new IntVector("expected", allocator)) {
+ ValueVectorDataPopulator.setVector(expected, 0, 1, 2, 3, 4, 5, 6, null, 8, 9);
+ assertVectorsEqual(expected, target);
+ }
+ }
+ }
+
+ @Test
+ public void testAppendVariableWidthVector() {
+ final int length1 = 10;
+ final int length2 = 5;
+ try (VarCharVector target = new VarCharVector("", allocator);
+ VarCharVector delta = new VarCharVector("", allocator)) {
+
+ target.allocateNew(5, length1);
+ delta.allocateNew(5, length2);
+
+ ValueVectorDataPopulator.setVector(target, "a0", "a1", "a2", "a3", null, "a5", "a6", "a7", "a8", "a9");
+ ValueVectorDataPopulator.setVector(delta, "a10", "a11", "a12", "a13", null);
+
+ VectorAppender appender = new VectorAppender(target);
+ delta.accept(appender, null);
+
+ try (VarCharVector expected = new VarCharVector("expected", allocator)) {
+ expected.allocateNew();
+ ValueVectorDataPopulator.setVector(expected,
+ "a0", "a1", "a2", "a3", null, "a5", "a6", "a7", "a8", "a9", "a10", "a11", "a12", "a13", null);
+ assertVectorsEqual(expected, target);
+ }
+ }
+ }
+
+ @Test
+ public void testAppendEmptyVariableWidthVector() {
+ try (VarCharVector target = new VarCharVector("", allocator);
+ VarCharVector delta = new VarCharVector("", allocator)) {
+
+ ValueVectorDataPopulator.setVector(target, "a0", "a1", "a2", "a3", null, "a5", "a6", "a7", "a8", "a9");
+
+ VectorAppender appender = new VectorAppender(target);
+ delta.accept(appender, null);
+
+ try (VarCharVector expected = new VarCharVector("expected", allocator)) {
+ ValueVectorDataPopulator.setVector(expected,
+ "a0", "a1", "a2", "a3", null, "a5", "a6", "a7", "a8", "a9");
+ assertVectorsEqual(expected, target);
+ }
+ }
+ }
+
+ @Test
+ public void testAppendLargeVariableWidthVector() {
+ final int length1 = 5;
+ final int length2 = 10;
+ try (LargeVarCharVector target = new LargeVarCharVector("", allocator);
+ LargeVarCharVector delta = new LargeVarCharVector("", allocator)) {
+
+ target.allocateNew(5, length1);
+ delta.allocateNew(5, length2);
+
+ ValueVectorDataPopulator.setVector(target, "a0", null, "a2", "a3", null);
+ ValueVectorDataPopulator.setVector(delta, "a5", "a6", "a7", null, null, "a10", "a11", "a12", "a13", null);
+
+ VectorAppender appender = new VectorAppender(target);
+ delta.accept(appender, null);
+
+ try (LargeVarCharVector expected = new LargeVarCharVector("expected", allocator)) {
+ expected.allocateNew();
+ ValueVectorDataPopulator.setVector(expected,
+ "a0", null, "a2", "a3", null, "a5", "a6", "a7", null, null, "a10", "a11", "a12", "a13", null);
+ assertVectorsEqual(expected, target);
+ }
+ }
+ }
+
+ @Test
+ public void testAppendEmptyLargeVariableWidthVector() {
+ try (LargeVarCharVector target = new LargeVarCharVector("", allocator);
+ LargeVarCharVector delta = new LargeVarCharVector("", allocator)) {
+
+ ValueVectorDataPopulator.setVector(target, "a0", null, "a2", "a3", null);
+
+ VectorAppender appender = new VectorAppender(target);
+ delta.accept(appender, null);
+
+ try (LargeVarCharVector expected = new LargeVarCharVector("expected", allocator)) {
+ ValueVectorDataPopulator.setVector(expected, "a0", null, "a2", "a3", null);
+ assertVectorsEqual(expected, target);
+ }
+ }
+ }
+
+ @Test
+ public void testAppendListVector() {
+ final int length1 = 5;
+ final int length2 = 2;
+ try (ListVector target = ListVector.empty("target", allocator);
+ ListVector delta = ListVector.empty("delta", allocator)) {
+
+ target.allocateNew();
+ ValueVectorDataPopulator.setVector(target,
+ Arrays.asList(0, 1),
+ Arrays.asList(2, 3),
+ null,
+ Arrays.asList(6, 7),
+ Arrays.asList(8, 9));
+ assertEquals(length1, target.getValueCount());
+
+ delta.allocateNew();
+ ValueVectorDataPopulator.setVector(delta,
+ Arrays.asList(10, 11, 12, 13, 14),
+ Arrays.asList(15, 16, 17, 18, 19));
+ assertEquals(length2, delta.getValueCount());
+
+ VectorAppender appender = new VectorAppender(target);
+ delta.accept(appender, null);
+
+ assertEquals(7, target.getValueCount());
+
+ List<Integer> expected = Arrays.asList(0, 1);
+ assertEquals(expected, target.getObject(0));
+
+ expected = Arrays.asList(2, 3);
+ assertEquals(expected, target.getObject(1));
+
+ assertTrue(target.isNull(2));
+
+ expected = Arrays.asList(6, 7);
+ assertEquals(expected, target.getObject(3));
+
+ expected = Arrays.asList(8, 9);
+ assertEquals(expected, target.getObject(4));
+
+ expected = Arrays.asList(10, 11, 12, 13, 14);
+ assertEquals(expected, target.getObject(5));
+
+ expected = Arrays.asList(15, 16, 17, 18, 19);
+ assertEquals(expected, target.getObject(6));
+ }
+ }
+
+ @Test
+ public void testAppendEmptyListVector() {
+ try (ListVector target = ListVector.empty("target", allocator);
+ ListVector delta = ListVector.empty("delta", allocator)) {
+ // populate target with data
+ ValueVectorDataPopulator.setVector(target,
+ Arrays.asList(0, 1),
+ Arrays.asList(2, 3),
+ null,
+ Arrays.asList(6, 7));
+ assertEquals(4, target.getValueCount());
+
+ // leave delta vector empty and unallocated
+ delta.addOrGetVector(FieldType.nullable(Types.MinorType.INT.getType()));
+
+ VectorAppender appender = new VectorAppender(target);
+ delta.accept(appender, null);
+
+ // verify delta vector has original data
+ assertEquals(4, target.getValueCount());
+
+ List<Integer> expected = Arrays.asList(0, 1);
+ assertEquals(expected, target.getObject(0));
+
+ expected = Arrays.asList(2, 3);
+ assertEquals(expected, target.getObject(1));
+
+ assertTrue(target.isNull(2));
+
+ expected = Arrays.asList(6, 7);
+ assertEquals(expected, target.getObject(3));
+ }
+ }
+
+ @Test
+ public void testAppendFixedSizeListVector() {
+ try (FixedSizeListVector target = FixedSizeListVector.empty("target", 5, allocator);
+ FixedSizeListVector delta = FixedSizeListVector.empty("delta", 5, allocator)) {
+
+ target.allocateNew();
+ ValueVectorDataPopulator.setVector(target,
+ Arrays.asList(0, 1, 2, 3, 4),
+ null);
+ assertEquals(2, target.getValueCount());
+
+ delta.allocateNew();
+ ValueVectorDataPopulator.setVector(delta,
+ Arrays.asList(10, 11, 12, 13, 14),
+ Arrays.asList(15, 16, 17, 18, 19));
+ assertEquals(2, delta.getValueCount());
+
+ VectorAppender appender = new VectorAppender(target);
+ delta.accept(appender, null);
+
+ assertEquals(4, target.getValueCount());
+
+ assertEquals(Arrays.asList(0, 1, 2, 3, 4), target.getObject(0));
+ assertTrue(target.isNull(1));
+ assertEquals(Arrays.asList(10, 11, 12, 13, 14), target.getObject(2));
+ assertEquals(Arrays.asList(15, 16, 17, 18, 19), target.getObject(3));
+ }
+ }
+
+ @Test
+ public void testAppendEmptyFixedSizeListVector() {
+ try (FixedSizeListVector target = FixedSizeListVector.empty("target", 5, allocator);
+ FixedSizeListVector delta = FixedSizeListVector.empty("delta", 5, allocator)) {
+
+ ValueVectorDataPopulator.setVector(target,
+ Arrays.asList(0, 1, 2, 3, 4),
+ null);
+ assertEquals(2, target.getValueCount());
+
+ // leave delta vector empty and unallocated
+ delta.addOrGetVector(FieldType.nullable(Types.MinorType.INT.getType()));
+
+ VectorAppender appender = new VectorAppender(target);
+ delta.accept(appender, null);
+
+ assertEquals(2, target.getValueCount());
+
+ assertEquals(Arrays.asList(0, 1, 2, 3, 4), target.getObject(0));
+ assertTrue(target.isNull(1));
+ }
+ }
+
+ @Test
+ public void testAppendEmptyLargeListVector() {
+ try (LargeListVector target = LargeListVector.empty("target", allocator);
+ LargeListVector delta = LargeListVector.empty("delta", allocator)) {
+
+ ValueVectorDataPopulator.setVector(target,
+ Arrays.asList(0, 1, 2, 3, 4),
+ null);
+ assertEquals(2, target.getValueCount());
+
+ // leave delta vector empty and unallocated
+ delta.addOrGetVector(FieldType.nullable(Types.MinorType.INT.getType()));
+
+ VectorAppender appender = new VectorAppender(target);
+ delta.accept(appender, null);
+
+ assertEquals(2, target.getValueCount());
+
+ assertEquals(Arrays.asList(0, 1, 2, 3, 4), target.getObject(0));
+ assertTrue(target.isNull(1));
+ }
+ }
+
+ @Test
+ public void testAppendStructVector() {
+ final int length1 = 10;
+ final int length2 = 5;
+ try (final StructVector target = StructVector.empty("target", allocator);
+ final StructVector delta = StructVector.empty("delta", allocator)) {
+
+ IntVector targetChild1 = target.addOrGet("f0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class);
+ VarCharVector targetChild2 = target.addOrGet("f1", FieldType.nullable(new ArrowType.Utf8()), VarCharVector.class);
+ targetChild1.allocateNew();
+ targetChild2.allocateNew();
+ ValueVectorDataPopulator.setVector(targetChild1, 0, 1, 2, 3, 4, null, 6, 7, 8, 9);
+ ValueVectorDataPopulator.setVector(targetChild2, "a0", "a1", "a2", "a3", "a4", "a5", "a6", null, "a8", "a9");
+ target.setValueCount(length1);
+
+ IntVector deltaChild1 = delta.addOrGet("f0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class);
+ VarCharVector deltaChild2 = delta.addOrGet("f1", FieldType.nullable(new ArrowType.Utf8()), VarCharVector.class);
+ deltaChild1.allocateNew();
+ deltaChild2.allocateNew();
+ ValueVectorDataPopulator.setVector(deltaChild1, 10, 11, 12, null, 14);
+ ValueVectorDataPopulator.setVector(deltaChild2, "a10", "a11", "a12", "a13", "a14");
+ delta.setValueCount(length2);
+
+ VectorAppender appender = new VectorAppender(target);
+ delta.accept(appender, null);
+
+ assertEquals(length1 + length2, target.getValueCount());
+ IntVector child1 = (IntVector) target.getVectorById(0);
+ VarCharVector child2 = (VarCharVector) target.getVectorById(1);
+
+ try (IntVector expected1 = new IntVector("expected1", allocator);
+ VarCharVector expected2 = new VarCharVector("expected2", allocator)) {
+ expected1.allocateNew();
+ expected2.allocateNew();
+
+ ValueVectorDataPopulator.setVector(expected1, 0, 1, 2, 3, 4, null, 6, 7, 8, 9, 10, 11, 12, null, 14);
+ ValueVectorDataPopulator.setVector(expected2,
+ "a0", "a1", "a2", "a3", "a4", "a5", "a6", null, "a8", "a9", "a10", "a11", "a12", "a13", "a14");
+
+ assertVectorsEqual(expected1, target.getChild("f0"));
+ assertVectorsEqual(expected2, target.getChild("f1"));
+ }
+ }
+ }
+
+ @Test
+ public void testAppendEmptyStructVector() {
+ try (final StructVector target = StructVector.empty("target", allocator);
+ final StructVector delta = StructVector.empty("delta", allocator)) {
+
+ IntVector targetChild1 = target.addOrGet("f0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class);
+ VarCharVector targetChild2 = target.addOrGet("f1", FieldType.nullable(new ArrowType.Utf8()), VarCharVector.class);
+ ValueVectorDataPopulator.setVector(targetChild1, 0, 1, 2, 3, 4, null, 6, 7, 8, 9);
+ ValueVectorDataPopulator.setVector(targetChild2, "a0", "a1", "a2", "a3", "a4", "a5", "a6", null, "a8", "a9");
+ target.setValueCount(10);
+
+ // leave delta vector fields empty and unallocated
+ delta.addOrGet("f0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class);
+ delta.addOrGet("f1", FieldType.nullable(new ArrowType.Utf8()), VarCharVector.class);
+
+ VectorAppender appender = new VectorAppender(target);
+ delta.accept(appender, null);
+
+ assertEquals(10, target.getValueCount());
+
+ try (IntVector expected1 = new IntVector("expected1", allocator);
+ VarCharVector expected2 = new VarCharVector("expected2", allocator)) {
+ ValueVectorDataPopulator.setVector(expected1, 0, 1, 2, 3, 4, null, 6, 7, 8, 9);
+ ValueVectorDataPopulator.setVector(expected2,
+ "a0", "a1", "a2", "a3", "a4", "a5", "a6", null, "a8", "a9");
+
+ assertVectorsEqual(expected1, target.getChild("f0"));
+ assertVectorsEqual(expected2, target.getChild("f1"));
+ }
+ }
+ }
+
+ @Test
+ public void testAppendUnionVector() {
+ final int length1 = 10;
+ final int length2 = 5;
+
+ try (final UnionVector target = UnionVector.empty("target", allocator);
+ final UnionVector delta = UnionVector.empty("delta", allocator)) {
+
+ // alternating ints and big ints
+ target.setType(0, Types.MinorType.INT);
+ target.setType(1, Types.MinorType.BIGINT);
+ target.setType(2, Types.MinorType.INT);
+ target.setType(3, Types.MinorType.BIGINT);
+ target.setType(4, Types.MinorType.INT);
+ target.setType(5, Types.MinorType.BIGINT);
+ target.setType(6, Types.MinorType.INT);
+ target.setType(7, Types.MinorType.BIGINT);
+ target.setType(8, Types.MinorType.INT);
+ target.setType(9, Types.MinorType.BIGINT);
+ target.setType(10, Types.MinorType.INT);
+ target.setType(11, Types.MinorType.BIGINT);
+ target.setType(12, Types.MinorType.INT);
+ target.setType(13, Types.MinorType.BIGINT);
+ target.setType(14, Types.MinorType.INT);
+ target.setType(15, Types.MinorType.BIGINT);
+ target.setType(16, Types.MinorType.INT);
+ target.setType(17, Types.MinorType.BIGINT);
+ target.setType(18, Types.MinorType.INT);
+ target.setType(19, Types.MinorType.BIGINT);
+
+ IntVector targetIntVec = target.getIntVector();
+ targetIntVec.allocateNew();
+ ValueVectorDataPopulator.setVector(
+ targetIntVec,
+ 0, null, 1, null, 2, null, 3, null, 4, null, 5, null, 6, null, 7, null, 8, null, 9, null);
+ assertEquals(length1 * 2, targetIntVec.getValueCount());
+
+ BigIntVector targetBigIntVec = target.getBigIntVector();
+ targetBigIntVec.allocateNew();
+ ValueVectorDataPopulator.setVector(
+ targetBigIntVec,
+ null, 0L, null, 1L, null, 2L, null, 3L, null, 4L, null, 5L, null, 6L, null, 7L, null, 8L, null, 9L);
+ assertEquals(length1 * 2, targetBigIntVec.getValueCount());
+
+ target.setValueCount(length1 * 2);
+
+ // populate the delta vector
+ delta.setType(0, Types.MinorType.FLOAT4);
+ delta.setType(1, Types.MinorType.FLOAT4);
+ delta.setType(2, Types.MinorType.FLOAT4);
+ delta.setType(3, Types.MinorType.FLOAT4);
+ delta.setType(4, Types.MinorType.FLOAT4);
+
+ Float4Vector deltaFloatVector = delta.getFloat4Vector();
+ deltaFloatVector.allocateNew();
+ ValueVectorDataPopulator.setVector(deltaFloatVector, 10f, 11f, 12f, 13f, 14f);
+ assertEquals(length2, deltaFloatVector.getValueCount());
+ delta.setValueCount(length2);
+
+ VectorAppender appender = new VectorAppender(target);
+ delta.accept(appender, null);
+
+ assertEquals(length1 * 2 + length2, target.getValueCount());
+
+ for (int i = 0; i < length1; i++) {
+ Object intObj = target.getObject(i * 2);
+ assertTrue(intObj instanceof Integer);
+ assertEquals(i, ((Integer) intObj).intValue());
+
+ Object longObj = target.getObject(i * 2 + 1);
+ assertTrue(longObj instanceof Long);
+ assertEquals(i, ((Long) longObj).longValue());
+ }
+
+ for (int i = 0; i < length2; i++) {
+ Object floatObj = target.getObject(length1 * 2 + i);
+ assertTrue(floatObj instanceof Float);
+ assertEquals(i + length1, ((Float) floatObj).intValue());
+ }
+ }
+ }
+
+ @Test
+ public void testAppendEmptyUnionVector() {
+ final int length1 = 10;
+
+ try (final UnionVector target = UnionVector.empty("target", allocator);
+ final UnionVector delta = UnionVector.empty("delta", allocator)) {
+
+ // alternating ints and big ints
+ target.setType(0, Types.MinorType.INT);
+ target.setType(1, Types.MinorType.BIGINT);
+ target.setType(2, Types.MinorType.INT);
+ target.setType(3, Types.MinorType.BIGINT);
+ target.setType(4, Types.MinorType.INT);
+ target.setType(5, Types.MinorType.BIGINT);
+ target.setType(6, Types.MinorType.INT);
+ target.setType(7, Types.MinorType.BIGINT);
+ target.setType(8, Types.MinorType.INT);
+ target.setType(9, Types.MinorType.BIGINT);
+ target.setType(10, Types.MinorType.INT);
+ target.setType(11, Types.MinorType.BIGINT);
+ target.setType(12, Types.MinorType.INT);
+ target.setType(13, Types.MinorType.BIGINT);
+ target.setType(14, Types.MinorType.INT);
+ target.setType(15, Types.MinorType.BIGINT);
+ target.setType(16, Types.MinorType.INT);
+ target.setType(17, Types.MinorType.BIGINT);
+ target.setType(18, Types.MinorType.INT);
+ target.setType(19, Types.MinorType.BIGINT);
+
+ IntVector targetIntVec = target.getIntVector();
+ ValueVectorDataPopulator.setVector(
+ targetIntVec,
+ 0, null, 1, null, 2, null, 3, null, 4, null, 5, null, 6, null, 7, null, 8, null, 9, null);
+ assertEquals(length1 * 2, targetIntVec.getValueCount());
+
+ BigIntVector targetBigIntVec = target.getBigIntVector();
+ ValueVectorDataPopulator.setVector(
+ targetBigIntVec,
+ null, 0L, null, 1L, null, 2L, null, 3L, null, 4L, null, 5L, null, 6L, null, 7L, null, 8L, null, 9L);
+ assertEquals(length1 * 2, targetBigIntVec.getValueCount());
+
+ target.setValueCount(length1 * 2);
+
+ // initialize the delta vector but leave it empty and unallocated
+ delta.setType(0, Types.MinorType.FLOAT4);
+ delta.setType(1, Types.MinorType.FLOAT4);
+ delta.setType(2, Types.MinorType.FLOAT4);
+ delta.setType(3, Types.MinorType.FLOAT4);
+ delta.setType(4, Types.MinorType.FLOAT4);
+
+ VectorAppender appender = new VectorAppender(target);
+ delta.accept(appender, null);
+
+ assertEquals(length1 * 2, target.getValueCount());
+
+ for (int i = 0; i < length1; i++) {
+ Object intObj = target.getObject(i * 2);
+ assertTrue(intObj instanceof Integer);
+ assertEquals(i, ((Integer) intObj).intValue());
+
+ Object longObj = target.getObject(i * 2 + 1);
+ assertTrue(longObj instanceof Long);
+ assertEquals(i, ((Long) longObj).longValue());
+ }
+ }
+ }
+
+ private DenseUnionVector getTargetVector() {
+ // create a vector, and populate it with values {1, 2, null, 10L}
+
+ final NullableIntHolder intHolder = new NullableIntHolder();
+ intHolder.isSet = 1;
+ final NullableBigIntHolder longHolder = new NullableBigIntHolder();
+ longHolder.isSet = 1;
+ final NullableFloat4Holder floatHolder = new NullableFloat4Holder();
+ floatHolder.isSet = 1;
+ DenseUnionVector targetVector = new DenseUnionVector("target vector", allocator, null, null);
+
+ targetVector.allocateNew();
+
+ while (targetVector.getValueCapacity() < 4) {
+ targetVector.reAlloc();
+ }
+
+ byte intTypeId = targetVector.registerNewTypeId(Field.nullable("", Types.MinorType.INT.getType()));
+ targetVector.setTypeId(0, intTypeId);
+ intHolder.value = 1;
+ targetVector.setSafe(0, intHolder);
+ targetVector.setTypeId(1, intTypeId);
+ intHolder.value = 2;
+ targetVector.setSafe(1, intHolder);
+ byte longTypeId = targetVector.registerNewTypeId(Field.nullable("", Types.MinorType.BIGINT.getType()));
+ targetVector.setTypeId(3, longTypeId);
+ longHolder.value = 10L;
+ targetVector.setSafe(3, longHolder);
+ targetVector.setValueCount(4);
+
+ assertVectorValuesEqual(targetVector, new Object[]{1, 2, null, 10L});
+ return targetVector;
+ }
+
+ private DenseUnionVector getDeltaVector() {
+ // create a vector, and populate it with values {7, null, 8L, 9.0f}
+
+ final NullableIntHolder intHolder = new NullableIntHolder();
+ intHolder.isSet = 1;
+ final NullableBigIntHolder longHolder = new NullableBigIntHolder();
+ longHolder.isSet = 1;
+ final NullableFloat4Holder floatHolder = new NullableFloat4Holder();
+ floatHolder.isSet = 1;
+
+ DenseUnionVector deltaVector = new DenseUnionVector("target vector", allocator, null, null);
+
+ while (deltaVector.getValueCapacity() < 4) {
+ deltaVector.reAlloc();
+ }
+ byte intTypeId = deltaVector.registerNewTypeId(Field.nullable("", Types.MinorType.INT.getType()));
+ deltaVector.setTypeId(0, intTypeId);
+ intHolder.value = 7;
+ deltaVector.setSafe(0, intHolder);
+ byte longTypeId = deltaVector.registerNewTypeId(Field.nullable("", Types.MinorType.BIGINT.getType()));
+ deltaVector.setTypeId(2, longTypeId);
+ longHolder.value = 8L;
+ deltaVector.setSafe(2, longHolder);
+ byte floatTypeId = deltaVector.registerNewTypeId(Field.nullable("", Types.MinorType.FLOAT4.getType()));
+ deltaVector.setTypeId(3, floatTypeId);
+ floatHolder.value = 9.0f;
+ deltaVector.setSafe(3, floatHolder);
+
+ deltaVector.setValueCount(4);
+
+ assertVectorValuesEqual(deltaVector, new Object[]{7, null, 8L, 9.0f});
+ return deltaVector;
+ }
+
+ @Test
+ public void testAppendDenseUnionVector() {
+ try (DenseUnionVector targetVector = getTargetVector();
+ DenseUnionVector deltaVector = getDeltaVector()) {
+
+ // append
+ VectorAppender appender = new VectorAppender(targetVector);
+ deltaVector.accept(appender, null);
+ assertVectorValuesEqual(targetVector, new Object[] {1, 2, null, 10L, 7, null, 8L, 9.0f});
+ }
+
+ // test reverse append
+ try (DenseUnionVector targetVector = getTargetVector();
+ DenseUnionVector deltaVector = getDeltaVector()) {
+
+ // append
+ VectorAppender appender = new VectorAppender(deltaVector);
+ targetVector.accept(appender, null);
+ assertVectorValuesEqual(deltaVector, new Object[] {7, null, 8L, 9.0f, 1, 2, null, 10L});
+ }
+ }
+
+ private DenseUnionVector getEmptyDeltaVector() {
+ // create a vector, but leave it empty and uninitialized
+ DenseUnionVector deltaVector = new DenseUnionVector("target vector", allocator, null, null);
+
+ byte intTypeId = deltaVector.registerNewTypeId(Field.nullable("", Types.MinorType.INT.getType()));
+ deltaVector.setTypeId(0, intTypeId);
+
+ byte longTypeId = deltaVector.registerNewTypeId(Field.nullable("", Types.MinorType.BIGINT.getType()));
+ deltaVector.setTypeId(2, longTypeId);
+
+ byte floatTypeId = deltaVector.registerNewTypeId(Field.nullable("", Types.MinorType.FLOAT4.getType()));
+ deltaVector.setTypeId(3, floatTypeId);
+
+ return deltaVector;
+ }
+
+ @Test
+ public void testAppendEmptyDenseUnionVector() {
+ try (DenseUnionVector targetVector = getTargetVector();
+ DenseUnionVector deltaVector = getEmptyDeltaVector()) {
+
+ // append
+ VectorAppender appender = new VectorAppender(targetVector);
+ deltaVector.accept(appender, null);
+ assertVectorValuesEqual(targetVector, new Object[] {1, 2, null, 10L});
+ }
+ }
+
+ /**
+ * Test appending dense union vectors where the child vectors do not match.
+ */
+ @Test
+ public void testAppendDenseUnionVectorMismatch() {
+ final NullableIntHolder intHolder = new NullableIntHolder();
+ intHolder.isSet = 1;
+
+ final NullableBigIntHolder longHolder = new NullableBigIntHolder();
+ longHolder.isSet = 1;
+
+ final NullableFloat4Holder floatHolder = new NullableFloat4Holder();
+ floatHolder.isSet = 1;
+
+ try (DenseUnionVector targetVector = new DenseUnionVector("target vector" , allocator, null, null);
+ DenseUnionVector deltaVector = new DenseUnionVector("target vector" , allocator, null, null)) {
+ targetVector.allocateNew();
+ deltaVector.allocateNew();
+
+ // populate the target vector with values {1, 2L}
+ while (targetVector.getValueCapacity() < 2) {
+ targetVector.reAlloc();
+ }
+ byte intTypeId = targetVector.registerNewTypeId(Field.nullable("", Types.MinorType.INT.getType()));
+ targetVector.setTypeId(0, intTypeId);
+ intHolder.value = 1;
+ targetVector.setSafe(0, intHolder);
+ byte longTypeId = targetVector.registerNewTypeId(Field.nullable("", Types.MinorType.BIGINT.getType()));
+ targetVector.setTypeId(1, longTypeId);
+ longHolder.value = 2L;
+ targetVector.setSafe(1, longHolder);
+ targetVector.setValueCount(2);
+
+ assertVectorValuesEqual(targetVector, new Object[] {1, 2L});
+
+ // populate the delta vector with values {3, 5.0f}
+ while (deltaVector.getValueCapacity() < 2) {
+ deltaVector.reAlloc();
+ }
+ intTypeId = deltaVector.registerNewTypeId(Field.nullable("", Types.MinorType.INT.getType()));
+ deltaVector.setTypeId(0, intTypeId);
+ intHolder.value = 3;
+ deltaVector.setSafe(0, intHolder);
+ byte floatTypeId = deltaVector.registerNewTypeId(Field.nullable("", Types.MinorType.FLOAT4.getType()));
+ deltaVector.setTypeId(1, floatTypeId);
+ floatHolder.value = 5.0f;
+ deltaVector.setSafe(1, floatHolder);
+ deltaVector.setValueCount(2);
+
+ assertVectorValuesEqual(deltaVector, new Object[] {3, 5.0f});
+
+ // append
+ VectorAppender appender = new VectorAppender(targetVector);
+ assertThrows(IllegalArgumentException.class,
+ () -> deltaVector.accept(appender, null));
+ }
+ }
+
+ @Test
+ public void testAppendVectorNegative() {
+ final int vectorLength = 10;
+ try (IntVector target = new IntVector("", allocator);
+ VarCharVector delta = new VarCharVector("", allocator)) {
+
+ target.allocateNew(vectorLength);
+ delta.allocateNew(vectorLength);
+
+ VectorAppender appender = new VectorAppender(target);
+
+ assertThrows(IllegalArgumentException.class,
+ () -> delta.accept(appender, null));
+ }
+ }
+
+ private void assertVectorValuesEqual(ValueVector vector, Object[] values) {
+ assertEquals(vector.getValueCount(), values.length);
+ for (int i = 0; i < values.length; i++) {
+ assertEquals(vector.getObject(i), values[i]);
+ }
+ }
+
+ public static void assertVectorsEqual(ValueVector vector1, ValueVector vector2) {
+ assertEquals(vector1.getValueCount(), vector2.getValueCount());
+
+ TypeEqualsVisitor typeEqualsVisitor = new TypeEqualsVisitor(vector1, false, false);
+ RangeEqualsVisitor equalsVisitor =
+ new RangeEqualsVisitor(vector1, vector2, (v1, v2) -> typeEqualsVisitor.equals(vector2));
+ assertTrue(equalsVisitor.rangeEquals(new Range(0, 0, vector1.getValueCount())));
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorBatchAppender.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorBatchAppender.java
new file mode 100644
index 000000000..799c25c0a
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorBatchAppender.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.util;
+
+import static junit.framework.TestCase.assertEquals;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.testing.ValueVectorDataPopulator;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Test cases for {@link VectorBatchAppender}.
+ */
+public class TestVectorBatchAppender {
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void prepare() {
+ allocator = new RootAllocator(1024 * 1024);
+ }
+
+ @After
+ public void shutdown() {
+ allocator.close();
+ }
+
+ @Test
+ public void testBatchAppendIntVector() {
+ final int length1 = 10;
+ final int length2 = 5;
+ final int length3 = 7;
+ try (IntVector target = new IntVector("", allocator);
+ IntVector delta1 = new IntVector("", allocator);
+ IntVector delta2 = new IntVector("", allocator)) {
+
+ target.allocateNew(length1);
+ delta1.allocateNew(length2);
+ delta2.allocateNew(length3);
+
+ ValueVectorDataPopulator.setVector(target, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9);
+ ValueVectorDataPopulator.setVector(delta1, 10, 11, 12, 13, 14);
+ ValueVectorDataPopulator.setVector(delta2, 15, 16, 17, 18, 19, 20, 21);
+
+ VectorBatchAppender.batchAppend(target, delta1, delta2);
+
+ assertEquals(length1 + length2 + length3, target.getValueCount());
+ for (int i = 0; i < target.getValueCount(); i++) {
+ assertEquals(i, target.get(i));
+ }
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorSchemaRootAppender.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorSchemaRootAppender.java
new file mode 100644
index 000000000..ab0ee3a20
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorSchemaRootAppender.java
@@ -0,0 +1,161 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.util;
+
+import static junit.framework.TestCase.assertEquals;
+import static org.apache.arrow.vector.util.TestVectorAppender.assertVectorsEqual;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.BigIntVector;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.testing.ValueVectorDataPopulator;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Test cases for {@link VectorSchemaRootAppender}.
+ */
+public class TestVectorSchemaRootAppender {
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void prepare() {
+ allocator = new RootAllocator(1024 * 1024);
+ }
+
+ @After
+ public void shutdown() {
+ allocator.close();
+ }
+
+ @Test
+ public void testVectorScehmaRootAppend() {
+ final int length1 = 5;
+ final int length2 = 3;
+ final int length3 = 2;
+
+ try (IntVector targetChild1 = new IntVector("t1", allocator);
+ VarCharVector targetChild2 = new VarCharVector("t2", allocator);
+ BigIntVector targetChild3 = new BigIntVector("t3", allocator);
+
+ IntVector deltaChildOne1 = new IntVector("do1", allocator);
+ VarCharVector deltaChildOne2 = new VarCharVector("do2", allocator);
+ BigIntVector deltaChildOne3 = new BigIntVector("do3", allocator);
+
+ IntVector deltaChildTwo1 = new IntVector("dt1", allocator);
+ VarCharVector deltaChildTwo2 = new VarCharVector("dt2", allocator);
+ BigIntVector deltaChildTwo3 = new BigIntVector("dt3", allocator)) {
+
+ ValueVectorDataPopulator.setVector(targetChild1, 0, 1, null, 3, 4);
+ ValueVectorDataPopulator.setVector(targetChild2, "zero", "one", null, "three", "four");
+ ValueVectorDataPopulator.setVector(targetChild3, 0L, 10L, null, 30L, 40L);
+ VectorSchemaRoot root1 = VectorSchemaRoot.of(targetChild1, targetChild2, targetChild3);
+ root1.setRowCount(length1);
+
+ ValueVectorDataPopulator.setVector(deltaChildOne1, 5, 6, 7);
+ ValueVectorDataPopulator.setVector(deltaChildOne2, "five", "six", "seven");
+ ValueVectorDataPopulator.setVector(deltaChildOne3, 50L, 60L, 70L);
+ VectorSchemaRoot root2 = VectorSchemaRoot.of(deltaChildOne1, deltaChildOne2, deltaChildOne3);
+ root2.setRowCount(length2);
+
+ ValueVectorDataPopulator.setVector(deltaChildTwo1, null, 9);
+ ValueVectorDataPopulator.setVector(deltaChildTwo2, null, "nine");
+ ValueVectorDataPopulator.setVector(deltaChildTwo3, null, 90L);
+ VectorSchemaRoot root3 = VectorSchemaRoot.of(deltaChildTwo1, deltaChildTwo2, deltaChildTwo3);
+ root3.setRowCount(length3);
+
+ VectorSchemaRootAppender.append(root1, root2, root3);
+ assertEquals(length1 + length2 + length3, root1.getRowCount());
+ assertEquals(3, root1.getFieldVectors().size());
+
+ try (IntVector expected1 = new IntVector("", allocator);
+ VarCharVector expected2 = new VarCharVector("", allocator);
+ BigIntVector expected3 = new BigIntVector("", allocator)) {
+
+ ValueVectorDataPopulator.setVector(expected1, 0, 1, null, 3, 4, 5, 6, 7, null, 9);
+ ValueVectorDataPopulator.setVector(
+ expected2, "zero", "one", null, "three", "four", "five", "six", "seven", null, "nine");
+ ValueVectorDataPopulator.setVector(expected3, 0L, 10L, null, 30L, 40L, 50L, 60L, 70L, null, 90L);
+
+ assertVectorsEqual(expected1, root1.getVector(0));
+ assertVectorsEqual(expected2, root1.getVector(1));
+ assertVectorsEqual(expected3, root1.getVector(2));
+ }
+ }
+ }
+
+ @Test
+ public void testRootWithDifferentChildCounts() {
+ try (IntVector targetChild1 = new IntVector("t1", allocator);
+ VarCharVector targetChild2 = new VarCharVector("t2", allocator);
+ BigIntVector targetChild3 = new BigIntVector("t3", allocator);
+
+ IntVector deltaChild1 = new IntVector("d1", allocator);
+ VarCharVector deltaChild2 = new VarCharVector("d2", allocator)) {
+
+ ValueVectorDataPopulator.setVector(targetChild1, 0, 1, null, 3, 4);
+ ValueVectorDataPopulator.setVector(targetChild2, "zero", "one", null, "three", "four");
+ ValueVectorDataPopulator.setVector(targetChild3, 0L, 10L, null, 30L, 40L);
+ VectorSchemaRoot root1 = VectorSchemaRoot.of(targetChild1, targetChild2, targetChild3);
+ root1.setRowCount(5);
+
+ ValueVectorDataPopulator.setVector(deltaChild1, 5, 6, 7);
+ ValueVectorDataPopulator.setVector(deltaChild2, "five", "six", "seven");
+ VectorSchemaRoot root2 = VectorSchemaRoot.of(deltaChild1, deltaChild2);
+ root2.setRowCount(3);
+
+ IllegalArgumentException exp = assertThrows(IllegalArgumentException.class,
+ () -> VectorSchemaRootAppender.append(root1, root2));
+
+ assertEquals("Vector schema roots have different numbers of child vectors.", exp.getMessage());
+ }
+ }
+
+ @Test
+ public void testRootWithDifferentChildTypes() {
+ try (IntVector targetChild1 = new IntVector("t1", allocator);
+ VarCharVector targetChild2 = new VarCharVector("t2", allocator);
+
+ IntVector deltaChild1 = new IntVector("d1", allocator);
+ VarCharVector deltaChild2 = new VarCharVector("d2", allocator)) {
+
+ ValueVectorDataPopulator.setVector(targetChild1, 0, 1, null, 3, 4);
+ ValueVectorDataPopulator.setVector(targetChild2, "zero", "one", null, "three", "four");
+ VectorSchemaRoot root1 = VectorSchemaRoot.of(targetChild1, targetChild2);
+ root1.setRowCount(5);
+
+ ValueVectorDataPopulator.setVector(deltaChild1, 5, 6, 7);
+ ValueVectorDataPopulator.setVector(deltaChild2, "five", "six", "seven");
+
+ // note that the child vectors are in reverse order
+ VectorSchemaRoot root2 = VectorSchemaRoot.of(deltaChild2, deltaChild1);
+ root2.setRowCount(3);
+
+ IllegalArgumentException exp = assertThrows(IllegalArgumentException.class,
+ () -> VectorSchemaRootAppender.append(root1, root2));
+
+ assertEquals("Vector schema roots have different schemas.", exp.getMessage());
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVector.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVector.java
new file mode 100644
index 000000000..2354b281e
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVector.java
@@ -0,0 +1,260 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.validate;
+
+import static org.apache.arrow.vector.testing.ValueVectorDataPopulator.setVector;
+import static org.apache.arrow.vector.util.ValueVectorUtility.validate;
+import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+
+import java.nio.charset.Charset;
+import java.util.Arrays;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.BigIntVector;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.LargeVarCharVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.complex.DenseUnionVector;
+import org.apache.arrow.vector.complex.FixedSizeListVector;
+import org.apache.arrow.vector.complex.LargeListVector;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.complex.UnionVector;
+import org.apache.arrow.vector.complex.impl.NullableStructWriter;
+import org.apache.arrow.vector.holders.NullableFloat4Holder;
+import org.apache.arrow.vector.holders.NullableFloat8Holder;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestValidateVector {
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void init() {
+ allocator = new RootAllocator(Long.MAX_VALUE);
+ }
+
+ private static final Charset utf8Charset = Charset.forName("UTF-8");
+ private static final byte[] STR1 = "AAAAA1".getBytes(utf8Charset);
+ private static final byte[] STR2 = "BBBBBBBBB2".getBytes(utf8Charset);
+ private static final byte[] STR3 = "CCCC3".getBytes(utf8Charset);
+
+ @After
+ public void terminate() throws Exception {
+ allocator.close();
+ }
+
+ @Test
+ public void testBaseFixedWidthVector() {
+ try (final IntVector vector = new IntVector("v", allocator)) {
+ validate(vector);
+ setVector(vector, 1, 2, 3);
+ validate(vector);
+
+ vector.getDataBuffer().capacity(0);
+ ValidateUtil.ValidateException e = assertThrows(ValidateUtil.ValidateException.class,
+ () -> validate(vector));
+ assertTrue(e.getMessage().contains("Not enough capacity for fixed width data buffer"));
+ }
+ }
+
+ @Test
+ public void testBaseVariableWidthVector() {
+ try (final VarCharVector vector = new VarCharVector("v", allocator)) {
+ validate(vector);
+ setVector(vector, STR1, STR2, STR3);
+ validate(vector);
+
+ vector.getDataBuffer().capacity(0);
+ ValidateUtil.ValidateException e = assertThrows(ValidateUtil.ValidateException.class,
+ () -> validate(vector));
+ assertTrue(e.getMessage().contains("Not enough capacity for data buffer"));
+ }
+ }
+
+ @Test
+ public void testBaseLargeVariableWidthVector() {
+ try (final LargeVarCharVector vector = new LargeVarCharVector("v", allocator)) {
+ validate(vector);
+ setVector(vector, STR1, STR2, null, STR3);
+ validate(vector);
+
+ vector.getDataBuffer().capacity(0);
+ ValidateUtil.ValidateException e = assertThrows(ValidateUtil.ValidateException.class,
+ () -> validate(vector));
+ assertTrue(e.getMessage().contains("Not enough capacity for data buffer"));
+ }
+ }
+
+ @Test
+ public void testListVector() {
+ try (final ListVector vector = ListVector.empty("v", allocator)) {
+ validate(vector);
+ setVector(vector, Arrays.asList(1, 2, 3), Arrays.asList(4, 5));
+ validate(vector);
+
+ vector.getDataVector().setValueCount(3);
+ ValidateUtil.ValidateException e = assertThrows(ValidateUtil.ValidateException.class,
+ () -> validate(vector));
+ assertTrue(e.getMessage().contains("Inner vector does not contain enough elements."));
+ }
+ }
+
+ @Test
+ public void testLargeListVector() {
+ try (final LargeListVector vector = LargeListVector.empty("v", allocator)) {
+ validate(vector);
+ setVector(vector, Arrays.asList(1, 2, 3, 4), Arrays.asList(5, 6));
+ validate(vector);
+
+ vector.getDataVector().setValueCount(4);
+ ValidateUtil.ValidateException e = assertThrows(ValidateUtil.ValidateException.class,
+ () -> validate(vector));
+ assertTrue(e.getMessage().contains("Inner vector does not contain enough elements."));
+ }
+ }
+
+ @Test
+ public void testFixedSizeListVector() {
+ try (final FixedSizeListVector vector = FixedSizeListVector.empty("v", 3, allocator)) {
+ validate(vector);
+ setVector(vector, Arrays.asList(1, 2, 3), Arrays.asList(4, 5, 6));
+ validate(vector);
+
+ vector.getDataVector().setValueCount(3);
+ ValidateUtil.ValidateException e = assertThrows(ValidateUtil.ValidateException.class,
+ () -> validate(vector));
+ assertTrue(e.getMessage().contains("Inner vector does not contain enough elements."));
+ }
+ }
+
+ @Test
+ public void testStructVectorRangeEquals() {
+ try (final StructVector vector = StructVector.empty("struct", allocator)) {
+ vector.addOrGet("f0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class);
+ vector.addOrGet("f1", FieldType.nullable(new ArrowType.Int(64, true)), BigIntVector.class);
+
+ validate(vector);
+
+ NullableStructWriter writer = vector.getWriter();
+ writer.allocate();
+
+ writeStructVector(writer, 1, 10L);
+ writeStructVector(writer, 2, 20L);
+ writeStructVector(writer, 3, 30L);
+ writeStructVector(writer, 4, 40L);
+ writeStructVector(writer, 5, 50L);
+ writer.setValueCount(5);
+
+ vector.getChild("f0").setValueCount(2);
+ ValidateUtil.ValidateException e = assertThrows(ValidateUtil.ValidateException.class,
+ () -> validate(vector));
+ assertTrue(e.getMessage().contains("Struct vector length not equal to child vector length"));
+
+ vector.getChild("f0").setValueCount(5);
+ validate(vector);
+
+ vector.getChild("f0").getDataBuffer().capacity(0);
+ ValidateUtil.ValidateException e2 = assertThrows(ValidateUtil.ValidateException.class,
+ () -> validate(vector));
+ assertTrue(e2.getMessage().contains("Not enough capacity for fixed width data buffer"));
+ }
+ }
+
+ @Test
+ public void testUnionVector() {
+ try (final UnionVector vector = UnionVector.empty("union", allocator)) {
+ validate(vector);
+
+ final NullableFloat4Holder float4Holder = new NullableFloat4Holder();
+ float4Holder.value = 1.01f;
+ float4Holder.isSet = 1;
+
+ final NullableFloat8Holder float8Holder = new NullableFloat8Holder();
+ float8Holder.value = 2.02f;
+ float8Holder.isSet = 1;
+
+ vector.setType(0, Types.MinorType.FLOAT4);
+ vector.setSafe(0, float4Holder);
+ vector.setType(1, Types.MinorType.FLOAT8);
+ vector.setSafe(1, float8Holder);
+ vector.setValueCount(2);
+
+ validate(vector);
+
+ vector.getChildrenFromFields().get(0).setValueCount(1);
+ ValidateUtil.ValidateException e1 = assertThrows(ValidateUtil.ValidateException.class,
+ () -> validate(vector));
+ assertTrue(e1.getMessage().contains("Union vector length not equal to child vector length"));
+
+ vector.getChildrenFromFields().get(0).setValueCount(2);
+ validate(vector);
+
+ vector.getChildrenFromFields().get(0).getDataBuffer().capacity(0);
+ ValidateUtil.ValidateException e2 = assertThrows(ValidateUtil.ValidateException.class,
+ () -> validate(vector));
+ assertTrue(e2.getMessage().contains("Not enough capacity for fixed width data buffer"));
+ }
+ }
+
+ @Test
+ public void testDenseUnionVector() {
+ try (final DenseUnionVector vector = DenseUnionVector.empty("union", allocator)) {
+ validate(vector);
+
+ final NullableFloat4Holder float4Holder = new NullableFloat4Holder();
+ float4Holder.value = 1.01f;
+ float4Holder.isSet = 1;
+
+ final NullableFloat8Holder float8Holder = new NullableFloat8Holder();
+ float8Holder.value = 2.02f;
+ float8Holder.isSet = 1;
+
+ byte float4TypeId = vector.registerNewTypeId(Field.nullable("", Types.MinorType.FLOAT4.getType()));
+ byte float8TypeId = vector.registerNewTypeId(Field.nullable("", Types.MinorType.FLOAT8.getType()));
+
+ vector.setTypeId(0, float4TypeId);
+ vector.setSafe(0, float4Holder);
+ vector.setTypeId(1, float8TypeId);
+ vector.setSafe(1, float8Holder);
+ vector.setValueCount(2);
+
+ validate(vector);
+
+ vector.getChildrenFromFields().get(0).getDataBuffer().capacity(0);
+ ValidateUtil.ValidateException e = assertThrows(ValidateUtil.ValidateException.class,
+ () -> validate(vector));
+ assertTrue(e.getMessage().contains("Not enough capacity for fixed width data buffer"));
+ }
+ }
+
+ private void writeStructVector(NullableStructWriter writer, int value1, long value2) {
+ writer.start();
+ writer.integer("f0").writeInt(value1);
+ writer.bigInt("f1").writeBigInt(value2);
+ writer.end();
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVectorFull.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVectorFull.java
new file mode 100644
index 000000000..4241a0d9c
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVectorFull.java
@@ -0,0 +1,234 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.validate;
+
+import static org.apache.arrow.vector.testing.ValueVectorDataPopulator.setVector;
+import static org.apache.arrow.vector.util.ValueVectorUtility.validateFull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+
+import java.util.Arrays;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.Float4Vector;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.LargeVarCharVector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.complex.DenseUnionVector;
+import org.apache.arrow.vector.complex.LargeListVector;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.complex.UnionVector;
+import org.apache.arrow.vector.holders.NullableFloat4Holder;
+import org.apache.arrow.vector.holders.NullableFloat8Holder;
+import org.apache.arrow.vector.testing.ValueVectorDataPopulator;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestValidateVectorFull {
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void init() {
+ allocator = new RootAllocator(Long.MAX_VALUE);
+ }
+
+ @After
+ public void terminate() throws Exception {
+ allocator.close();
+ }
+
+ @Test
+ public void testBaseVariableWidthVector() {
+ try (final VarCharVector vector = new VarCharVector("v", allocator)) {
+ validateFull(vector);
+ setVector(vector, "aaa", "bbb", "ccc");
+ validateFull(vector);
+
+ ArrowBuf offsetBuf = vector.getOffsetBuffer();
+ offsetBuf.setInt(0, 100);
+ offsetBuf.setInt(4, 50);
+
+ ValidateUtil.ValidateException e = assertThrows(ValidateUtil.ValidateException.class,
+ () -> validateFull(vector));
+ assertTrue(e.getMessage().contains("The values in positions 0 and 1 of the offset buffer are decreasing"));
+ }
+ }
+
+ @Test
+ public void testBaseLargeVariableWidthVector() {
+ try (final LargeVarCharVector vector = new LargeVarCharVector("v", allocator)) {
+ validateFull(vector);
+ setVector(vector, "aaa", "bbb", null, "ccc");
+ validateFull(vector);
+
+ ArrowBuf offsetBuf = vector.getOffsetBuffer();
+ offsetBuf.setLong(0, 100);
+ offsetBuf.setLong(8, 50);
+
+ ValidateUtil.ValidateException e = assertThrows(ValidateUtil.ValidateException.class,
+ () -> validateFull(vector));
+ assertTrue(e.getMessage().contains("The values in positions 0 and 1 of the large offset buffer are decreasing"));
+ }
+ }
+
+ @Test
+ public void testListVector() {
+ try (final ListVector vector = ListVector.empty("v", allocator)) {
+ validateFull(vector);
+ setVector(vector, Arrays.asList(1, 2, 3), Arrays.asList(4, 5), Arrays.asList(6, 7, 8, 9));
+ validateFull(vector);
+
+ ArrowBuf offsetBuf = vector.getOffsetBuffer();
+ offsetBuf.setInt(0, 100);
+ offsetBuf.setInt(8, 50);
+
+ ValidateUtil.ValidateException e = assertThrows(ValidateUtil.ValidateException.class,
+ () -> validateFull(vector));
+ assertTrue(e.getMessage().contains("The values in positions 0 and 1 of the offset buffer are decreasing"));
+ }
+ }
+
+ @Test
+ public void testLargeListVector() {
+ try (final LargeListVector vector = LargeListVector.empty("v", allocator)) {
+ validateFull(vector);
+ setVector(vector, Arrays.asList(1, 2, 3), Arrays.asList(4, 5), Arrays.asList(6, 7, 8, 9));
+ validateFull(vector);
+
+ ArrowBuf offsetBuf = vector.getOffsetBuffer();
+ offsetBuf.setLong(0, 100);
+ offsetBuf.setLong(16, 50);
+
+ ValidateUtil.ValidateException e = assertThrows(ValidateUtil.ValidateException.class,
+ () -> validateFull(vector));
+ assertTrue(e.getMessage().contains("The values in positions 0 and 1 of the large offset buffer are decreasing"));
+ }
+ }
+
+ @Test
+ public void testStructVectorRangeEquals() {
+ try (final StructVector vector = StructVector.empty("struct", allocator)) {
+ IntVector intVector =
+ vector.addOrGet("f0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class);
+ VarCharVector strVector =
+ vector.addOrGet("f1", FieldType.nullable(new ArrowType.Utf8()), VarCharVector.class);
+
+ validateFull(vector);
+ validateFull(intVector);
+ validateFull(strVector);
+
+ ValueVectorDataPopulator.setVector(intVector, 1, 2, 3, 4, 5);
+ ValueVectorDataPopulator.setVector(strVector, "a", "b", "c", "d", "e");
+ vector.setValueCount(5);
+
+ validateFull(vector);
+ validateFull(intVector);
+ validateFull(strVector);
+
+ ArrowBuf offsetBuf = strVector.getOffsetBuffer();
+ offsetBuf.setInt(0, 100);
+ offsetBuf.setInt(8, 50);
+
+ ValidateUtil.ValidateException e = assertThrows(ValidateUtil.ValidateException.class,
+ () -> validateFull(strVector));
+ assertTrue(e.getMessage().contains("The values in positions 0 and 1 of the offset buffer are decreasing"));
+
+ e = assertThrows(ValidateUtil.ValidateException.class,
+ () -> validateFull(vector));
+ assertTrue(e.getMessage().contains("The values in positions 0 and 1 of the offset buffer are decreasing"));
+ }
+ }
+
+ @Test
+ public void testUnionVector() {
+ try (final UnionVector vector = UnionVector.empty("union", allocator)) {
+ validateFull(vector);
+
+ final NullableFloat4Holder float4Holder = new NullableFloat4Holder();
+ float4Holder.value = 1.01f;
+ float4Holder.isSet = 1;
+
+ final NullableFloat8Holder float8Holder = new NullableFloat8Holder();
+ float8Holder.value = 2.02f;
+ float8Holder.isSet = 1;
+
+ vector.setType(0, Types.MinorType.FLOAT4);
+ vector.setSafe(0, float4Holder);
+ vector.setType(1, Types.MinorType.FLOAT8);
+ vector.setSafe(1, float8Holder);
+ vector.setValueCount(2);
+
+ validateFull(vector);
+
+ // negative type id
+ vector.getTypeBuffer().setByte(0, -1);
+
+ ValidateUtil.ValidateException e = assertThrows(ValidateUtil.ValidateException.class,
+ () -> validateFull(vector));
+ assertTrue(e.getMessage().contains("The type id at position 0 is negative"));
+ }
+ }
+
+ @Test
+ public void testDenseUnionVector() {
+ try (final DenseUnionVector vector = DenseUnionVector.empty("union", allocator)) {
+ validateFull(vector);
+
+ final NullableFloat4Holder float4Holder = new NullableFloat4Holder();
+ float4Holder.value = 1.01f;
+ float4Holder.isSet = 1;
+
+ final NullableFloat8Holder float8Holder = new NullableFloat8Holder();
+ float8Holder.value = 2.02f;
+ float8Holder.isSet = 1;
+
+ byte float4TypeId = vector.registerNewTypeId(Field.nullable("", Types.MinorType.FLOAT4.getType()));
+ byte float8TypeId = vector.registerNewTypeId(Field.nullable("", Types.MinorType.FLOAT8.getType()));
+
+ vector.setTypeId(0, float4TypeId);
+ vector.setSafe(0, float4Holder);
+ vector.setTypeId(1, float8TypeId);
+ vector.setSafe(1, float8Holder);
+ vector.setValueCount(2);
+
+ validateFull(vector);
+
+ ValueVector subVector = vector.getVectorByType(float4TypeId);
+ assertTrue(subVector instanceof Float4Vector);
+ assertEquals(1, subVector.getValueCount());
+
+ // shrink sub-vector
+ subVector.setValueCount(0);
+
+ ValidateUtil.ValidateException e = assertThrows(ValidateUtil.ValidateException.class,
+ () -> validateFull(vector));
+ assertTrue(e.getMessage().contains("Dense union vector offset exceeds sub-vector boundary"));
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVectorSchemaRoot.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVectorSchemaRoot.java
new file mode 100644
index 000000000..1885fb21f
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVectorSchemaRoot.java
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.validate;
+
+import static org.apache.arrow.vector.util.ValueVectorUtility.validate;
+import static org.apache.arrow.vector.util.ValueVectorUtility.validateFull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.testing.ValueVectorDataPopulator;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestValidateVectorSchemaRoot {
+
+ private BufferAllocator allocator;
+
+ @Before
+ public void init() {
+ allocator = new RootAllocator(Long.MAX_VALUE);
+ }
+
+ @After
+ public void terminate() throws Exception {
+ allocator.close();
+ }
+
+ @Test
+ public void testValidatePositive() {
+ try (IntVector intVector = new IntVector("int vector", allocator);
+ VarCharVector strVector = new VarCharVector("var char vector", allocator)) {
+
+ VectorSchemaRoot root = VectorSchemaRoot.of(intVector, strVector);
+
+ validate(root);
+ validateFull(root);
+
+ ValueVectorDataPopulator.setVector(intVector, 1, 2, 3, 4, 5);
+ ValueVectorDataPopulator.setVector(strVector, "a", "b", "c", "d", "e");
+ root.setRowCount(5);
+
+ validate(root);
+ validateFull(root);
+ }
+ }
+
+ @Test
+ public void testValidateNegative() {
+ try (IntVector intVector = new IntVector("int vector", allocator);
+ VarCharVector strVector = new VarCharVector("var char vector", allocator)) {
+
+ VectorSchemaRoot root = VectorSchemaRoot.of(intVector, strVector);
+
+ ValueVectorDataPopulator.setVector(intVector, 1, 2, 3, 4, 5);
+ ValueVectorDataPopulator.setVector(strVector, "a", "b", "c", "d", "e");
+
+ // validate mismatching value counts
+ root.setRowCount(4);
+ intVector.setValueCount(5);
+ strVector.setValueCount(5);
+ ValidateUtil.ValidateException e = assertThrows(ValidateUtil.ValidateException.class,
+ () -> validate(root));
+ assertTrue(e.getMessage().contains("Child vector and vector schema root have different value counts"));
+ e = assertThrows(ValidateUtil.ValidateException.class,
+ () -> validateFull(root));
+ assertTrue(e.getMessage().contains("Child vector and vector schema root have different value counts"));
+
+ // valid problems with the child vector
+ root.setRowCount(5);
+ ArrowBuf offsetBuf = strVector.getOffsetBuffer();
+ offsetBuf.setInt(0, 100);
+ offsetBuf.setInt(8, 50);
+ validate(root);
+ e = assertThrows(ValidateUtil.ValidateException.class,
+ () -> validateFull(root));
+ assertTrue(e.getMessage().contains("The values in positions 0 and 1 of the offset buffer are decreasing"));
+ }
+ }
+}
diff --git a/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVectorTypeVisitor.java b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVectorTypeVisitor.java
new file mode 100644
index 000000000..7a0f12f7a
--- /dev/null
+++ b/src/arrow/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVectorTypeVisitor.java
@@ -0,0 +1,301 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.vector.validate;
+
+import static org.junit.jupiter.api.Assertions.assertThrows;
+
+import java.util.function.Supplier;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.BigIntVector;
+import org.apache.arrow.vector.BitVector;
+import org.apache.arrow.vector.DateDayVector;
+import org.apache.arrow.vector.DateMilliVector;
+import org.apache.arrow.vector.DecimalVector;
+import org.apache.arrow.vector.DurationVector;
+import org.apache.arrow.vector.FixedSizeBinaryVector;
+import org.apache.arrow.vector.Float4Vector;
+import org.apache.arrow.vector.Float8Vector;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.IntervalDayVector;
+import org.apache.arrow.vector.IntervalYearVector;
+import org.apache.arrow.vector.LargeVarBinaryVector;
+import org.apache.arrow.vector.LargeVarCharVector;
+import org.apache.arrow.vector.NullVector;
+import org.apache.arrow.vector.SmallIntVector;
+import org.apache.arrow.vector.TimeMicroVector;
+import org.apache.arrow.vector.TimeMilliVector;
+import org.apache.arrow.vector.TimeNanoVector;
+import org.apache.arrow.vector.TimeSecVector;
+import org.apache.arrow.vector.TimeStampMicroTZVector;
+import org.apache.arrow.vector.TimeStampMicroVector;
+import org.apache.arrow.vector.TimeStampMilliTZVector;
+import org.apache.arrow.vector.TimeStampMilliVector;
+import org.apache.arrow.vector.TimeStampNanoTZVector;
+import org.apache.arrow.vector.TimeStampNanoVector;
+import org.apache.arrow.vector.TimeStampSecTZVector;
+import org.apache.arrow.vector.TimeStampSecVector;
+import org.apache.arrow.vector.TinyIntVector;
+import org.apache.arrow.vector.UInt1Vector;
+import org.apache.arrow.vector.UInt2Vector;
+import org.apache.arrow.vector.UInt4Vector;
+import org.apache.arrow.vector.UInt8Vector;
+import org.apache.arrow.vector.ValueVector;
+import org.apache.arrow.vector.VarBinaryVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.complex.DenseUnionVector;
+import org.apache.arrow.vector.complex.FixedSizeListVector;
+import org.apache.arrow.vector.complex.LargeListVector;
+import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.StructVector;
+import org.apache.arrow.vector.complex.UnionVector;
+import org.apache.arrow.vector.types.TimeUnit;
+import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Test cases for {@link ValidateVectorTypeVisitor}.
+ */
+public class TestValidateVectorTypeVisitor {
+
+ private BufferAllocator allocator;
+
+ private ValidateVectorTypeVisitor visitor = new ValidateVectorTypeVisitor();
+
+ @Before
+ public void init() {
+ allocator = new RootAllocator(Long.MAX_VALUE);
+ }
+
+ @After
+ public void terminate() throws Exception {
+ allocator.close();
+ }
+
+ private void testPositiveCase(Supplier<ValueVector> vectorGenerator) {
+ try (ValueVector vector = vectorGenerator.get();) {
+ vector.accept(visitor, null);
+ }
+ }
+
+ private void testNegativeCase(Supplier<ValueVector> vectorGenerator) {
+ try (ValueVector vector = vectorGenerator.get()) {
+ assertThrows(ValidateUtil.ValidateException.class, () -> {
+ vector.accept(visitor, null);
+ });
+ }
+ }
+
+ @Test
+ public void testFixedWidthVectorsPositive() {
+ // integer vectors
+ testPositiveCase(() -> new TinyIntVector("vector", allocator));
+ testPositiveCase(() -> new SmallIntVector("vector", allocator));
+ testPositiveCase(() -> new IntVector("vector", allocator));
+ testPositiveCase(() -> new BigIntVector("vector", allocator));
+ testPositiveCase(() -> new UInt1Vector("vector", allocator));
+ testPositiveCase(() -> new UInt2Vector("vector", allocator));
+ testPositiveCase(() -> new UInt4Vector("vector", allocator));
+ testPositiveCase(() -> new UInt8Vector("vector", allocator));
+
+ testPositiveCase(() -> new BitVector("vector", allocator));
+ testPositiveCase(() -> new DecimalVector("vector", allocator, 30, 16));
+
+ // date vectors
+ testPositiveCase(() -> new DateDayVector("vector", allocator));
+ testPositiveCase(() -> new DateMilliVector("vector", allocator));
+
+ testPositiveCase(() -> new DurationVector(
+ "vector", FieldType.nullable(new ArrowType.Duration(TimeUnit.SECOND)), allocator));
+
+ // float vectors
+ testPositiveCase(() -> new Float4Vector("vector", allocator));
+ testPositiveCase(() -> new Float8Vector("vector", allocator));
+
+ // interval vectors
+ testPositiveCase(() -> new IntervalDayVector("vector", allocator));
+ testPositiveCase(() -> new IntervalYearVector("vector", allocator));
+
+ // time vectors
+ testPositiveCase(() -> new TimeMicroVector("vector", allocator));
+ testPositiveCase(() -> new TimeMilliVector("vector", allocator));
+ testPositiveCase(() -> new TimeMicroVector("vector", allocator));
+ testPositiveCase(() -> new TimeSecVector("vector", allocator));
+
+ // time stamp vectors
+ testPositiveCase(() -> new TimeStampMicroTZVector("vector", allocator, "cn"));
+ testPositiveCase(() -> new TimeStampMicroVector("vector", allocator));
+ testPositiveCase(() -> new TimeStampMilliTZVector("vector", allocator, "cn"));
+ testPositiveCase(() -> new TimeStampMilliVector("vector", allocator));
+ testPositiveCase(() -> new TimeStampNanoTZVector("vector", allocator, "cn"));
+ testPositiveCase(() -> new TimeStampNanoVector("vector", allocator));
+ testPositiveCase(() -> new TimeStampSecTZVector("vector", allocator, "cn"));
+ testPositiveCase(() -> new TimeStampSecVector("vector", allocator));
+
+ testPositiveCase(() -> new FixedSizeBinaryVector("vector", allocator, 5));
+ }
+
+ @Test
+ public void testFixedWidthVectorsNegative() {
+ // integer vectors
+ testNegativeCase(
+ () -> new TinyIntVector("vector", FieldType.nullable(Types.MinorType.INT.getType()), allocator));
+ testNegativeCase(
+ () -> new SmallIntVector("vector", FieldType.nullable(Types.MinorType.INT.getType()), allocator));
+ testNegativeCase(
+ () -> new BigIntVector("vector", FieldType.nullable(Types.MinorType.SMALLINT.getType()), allocator));
+ testNegativeCase(
+ () -> new BigIntVector("vector", FieldType.nullable(Types.MinorType.SMALLINT.getType()), allocator));
+ testNegativeCase(
+ () -> new UInt1Vector("vector", FieldType.nullable(Types.MinorType.SMALLINT.getType()), allocator));
+ testNegativeCase(
+ () -> new UInt2Vector("vector", FieldType.nullable(Types.MinorType.SMALLINT.getType()), allocator));
+ testNegativeCase(
+ () -> new UInt4Vector("vector", FieldType.nullable(Types.MinorType.SMALLINT.getType()), allocator));
+ testNegativeCase(
+ () -> new UInt8Vector("vector", FieldType.nullable(Types.MinorType.SMALLINT.getType()), allocator));
+
+ testNegativeCase(
+ () -> new BitVector("vector", FieldType.nullable(Types.MinorType.BIGINT.getType()), allocator));
+ testNegativeCase(
+ () -> new DecimalVector("vector", allocator, 30, -16));
+
+ // date vectors
+ testNegativeCase(
+ () -> new DateDayVector("vector", FieldType.nullable(Types.MinorType.FLOAT4.getType()), allocator));
+ testNegativeCase(
+ () -> new DateMilliVector("vector", FieldType.nullable(Types.MinorType.BIGINT.getType()), allocator));
+
+ // float pont vectors
+ testNegativeCase(
+ () -> new Float4Vector("vector", FieldType.nullable(Types.MinorType.BIGINT.getType()), allocator));
+ testNegativeCase(
+ () -> new Float8Vector("vector", FieldType.nullable(Types.MinorType.BIGINT.getType()), allocator));
+
+ // interval vectors
+ testNegativeCase(
+ () -> new IntervalDayVector("vector", FieldType.nullable(Types.MinorType.INT.getType()), allocator));
+ testNegativeCase(
+ () -> new IntervalYearVector("vector", FieldType.nullable(Types.MinorType.BIGINT.getType()), allocator));
+
+ // time vectors
+ testNegativeCase(
+ () -> new TimeMilliVector("vector", FieldType.nullable(Types.MinorType.BIGINT.getType()), allocator));
+ testNegativeCase(
+ () -> new TimeMicroVector("vector", FieldType.nullable(Types.MinorType.BIGINT.getType()), allocator));
+ testNegativeCase(
+ () -> new TimeNanoVector("vector", FieldType.nullable(Types.MinorType.BIGINT.getType()), allocator));
+ testNegativeCase(
+ () -> new TimeSecVector("vector", FieldType.nullable(Types.MinorType.BIGINT.getType()), allocator));
+
+ // time stamp vectors
+ testNegativeCase(
+ () -> new TimeStampMicroTZVector("vector", allocator, null));
+ testNegativeCase(
+ () -> new TimeStampMicroVector("vector", FieldType.nullable(Types.MinorType.BIGINT.getType()), allocator));
+ testNegativeCase(
+ () -> new TimeStampMilliTZVector("vector", allocator, null));
+ testNegativeCase(
+ () -> new TimeStampMilliVector("vector", FieldType.nullable(Types.MinorType.BIGINT.getType()), allocator));
+ testNegativeCase(
+ () -> new TimeStampNanoTZVector("vector", allocator, null));
+ testNegativeCase(
+ () -> new TimeStampNanoVector("vector", FieldType.nullable(Types.MinorType.BIGINT.getType()), allocator));
+ testNegativeCase(
+ () -> new TimeStampSecTZVector("vector", allocator, null));
+ testNegativeCase(
+ () -> new TimeStampSecVector("vector", FieldType.nullable(Types.MinorType.BIGINT.getType()), allocator));
+ }
+
+ @Test
+ public void testVariableWidthVectorsPositive() {
+ testPositiveCase(() -> new VarCharVector("vector", allocator));
+ testPositiveCase(() -> new VarBinaryVector("vector", allocator));
+ }
+
+ @Test
+ public void testVariableWidthVectorsNegative() {
+ testNegativeCase(
+ () -> new VarCharVector("vector", FieldType.nullable(Types.MinorType.INT.getType()), allocator));
+ testNegativeCase(
+ () -> new VarBinaryVector("vector", FieldType.nullable(Types.MinorType.INT.getType()), allocator));
+ }
+
+ @Test
+ public void testLargeVariableWidthVectorsPositive() {
+ testPositiveCase(() -> new LargeVarCharVector("vector", allocator));
+ testPositiveCase(() -> new LargeVarBinaryVector("vector", allocator));
+ }
+
+ @Test
+ public void testLargeVariableWidthVectorsNegative() {
+ testNegativeCase(
+ () -> new LargeVarCharVector("vector", FieldType.nullable(Types.MinorType.INT.getType()), allocator));
+ testNegativeCase(
+ () -> new LargeVarBinaryVector("vector", FieldType.nullable(Types.MinorType.INT.getType()), allocator));
+ }
+
+ @Test
+ public void testListVector() {
+ testPositiveCase(() -> ListVector.empty("vector", allocator));
+
+ testNegativeCase(
+ () -> new ListVector("vector", allocator, FieldType.nullable(Types.MinorType.INT.getType()), null));
+ }
+
+ @Test
+ public void testLargeListVector() {
+ testPositiveCase(() -> LargeListVector.empty("vector", allocator));
+
+ testNegativeCase(
+ () -> new LargeListVector("vector", allocator, FieldType.nullable(Types.MinorType.INT.getType()), null));
+ }
+
+ @Test
+ public void testFixedSizeListVector() {
+ testPositiveCase(() -> FixedSizeListVector.empty("vector", 10, allocator));
+ }
+
+ @Test
+ public void testStructVector() {
+ testPositiveCase(() -> StructVector.empty("vector", allocator));
+
+ testNegativeCase(
+ () -> new StructVector("vector", allocator, FieldType.nullable(Types.MinorType.INT.getType()), null));
+ }
+
+ @Test
+ public void testUnionVector() {
+ testPositiveCase(() -> UnionVector.empty("vector", allocator));
+ }
+
+ @Test
+ public void testDenseUnionVector() {
+ testPositiveCase(() -> DenseUnionVector.empty("vector", allocator));
+ }
+
+ @Test
+ public void testNullVector() {
+ testPositiveCase(() -> new NullVector("null vec"));
+ }
+}
diff --git a/src/arrow/java/vector/src/test/resources/logback.xml b/src/arrow/java/vector/src/test/resources/logback.xml
new file mode 100644
index 000000000..f9e449fa6
--- /dev/null
+++ b/src/arrow/java/vector/src/test/resources/logback.xml
@@ -0,0 +1,28 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor
+ license agreements. See the NOTICE file distributed with this work for additional
+ information regarding copyright ownership. The ASF licenses this file to
+ You under the Apache License, Version 2.0 (the "License"); you may not use
+ this file except in compliance with the License. You may obtain a copy of
+ the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
+ by applicable law or agreed to in writing, software distributed under the
+ License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+ OF ANY KIND, either express or implied. See the License for the specific
+ language governing permissions and limitations under the License. -->
+
+<configuration>
+ <appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
+ <!-- encoders are assigned the type
+ ch.qos.logback.classic.encoder.PatternLayoutEncoder by default -->
+ <encoder>
+ <pattern>%d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern>
+ </encoder>
+ </appender>
+
+ <statusListener class="ch.qos.logback.core.status.NopStatusListener"/>
+ <logger name="org.apache.arrow" additivity="false">
+ <level value="info" />
+ <appender-ref ref="STDOUT" />
+ </logger>
+
+</configuration>