diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
commit | e6918187568dbd01842d8d1d2c808ce16a894239 (patch) | |
tree | 64f88b554b444a49f656b6c656111a145cbbaa28 /src/arrow/java/adapter | |
parent | Initial commit. (diff) | |
download | ceph-e6918187568dbd01842d8d1d2c808ce16a894239.tar.xz ceph-e6918187568dbd01842d8d1d2c808ce16a894239.zip |
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/arrow/java/adapter')
194 files changed, 16850 insertions, 0 deletions
diff --git a/src/arrow/java/adapter/avro/pom.xml b/src/arrow/java/adapter/avro/pom.xml new file mode 100644 index 000000000..1f3fea849 --- /dev/null +++ b/src/arrow/java/adapter/avro/pom.xml @@ -0,0 +1,59 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor + license agreements. See the NOTICE file distributed with this work for additional + information regarding copyright ownership. The ASF licenses this file to + You under the Apache License, Version 2.0 (the "License"); you may not use + this file except in compliance with the License. You may obtain a copy of + the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required + by applicable law or agreed to in writing, software distributed under the + License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS + OF ANY KIND, either express or implied. See the License for the specific + language governing permissions and limitations under the License. --> + +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + + <parent> + <groupId>org.apache.arrow</groupId> + <artifactId>arrow-java-root</artifactId> + <version>6.0.1</version> + <relativePath>../../pom.xml</relativePath> + </parent> + + <artifactId>arrow-avro</artifactId> + <name>Arrow AVRO Adapter</name> + <description>(Contrib/Experimental) A library for converting Avro data to Arrow data.</description> + <url>http://maven.apache.org</url> + + <dependencies> + + <!-- https://mvnrepository.com/artifact/org.apache.arrow/arrow-memory-core --> + <dependency> + <groupId>org.apache.arrow</groupId> + <artifactId>arrow-memory-core</artifactId> + <version>${project.version}</version> + </dependency> + + <!-- https://mvnrepository.com/artifact/org.apache.arrow/arrow-memory-netty --> + <dependency> + <groupId>org.apache.arrow</groupId> + <artifactId>arrow-memory-netty</artifactId> + <version>${project.version}</version> + <scope>runtime</scope> + </dependency> + + <!-- https://mvnrepository.com/artifact/org.apache.arrow/arrow-vector --> + <dependency> + <groupId>org.apache.arrow</groupId> + <artifactId>arrow-vector</artifactId> + <version>${project.version}</version> + </dependency> + + <dependency> + <groupId>org.apache.avro</groupId> + <artifactId>avro</artifactId> + <version>${dep.avro.version}</version> + </dependency> + </dependencies> + +</project> diff --git a/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/AvroToArrow.java b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/AvroToArrow.java new file mode 100644 index 000000000..9fb5ce291 --- /dev/null +++ b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/AvroToArrow.java @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow; + +import java.io.IOException; + +import org.apache.arrow.util.Preconditions; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.avro.Schema; +import org.apache.avro.io.Decoder; + +/** + * Utility class to convert Avro objects to columnar Arrow format objects. + */ +public class AvroToArrow { + + /** + * Fetch the data from {@link Decoder} and convert it to Arrow objects. + * Only for testing purpose. + * @param schema avro schema. + * @param decoder avro decoder + * @param config configuration of the conversion. + * @return Arrow Data Objects {@link VectorSchemaRoot} + */ + static VectorSchemaRoot avroToArrow(Schema schema, Decoder decoder, AvroToArrowConfig config) + throws IOException { + Preconditions.checkNotNull(schema, "Avro schema object can not be null"); + Preconditions.checkNotNull(decoder, "Avro decoder object can not be null"); + Preconditions.checkNotNull(config, "config can not be null"); + + return AvroToArrowUtils.avroToArrowVectors(schema, decoder, config); + } + + /** + * Fetch the data from {@link Decoder} and iteratively convert it to Arrow objects. + * @param schema avro schema + * @param decoder avro decoder + * @param config configuration of the conversion. + * @throws IOException on error + */ + public static AvroToArrowVectorIterator avroToArrowIterator( + Schema schema, + Decoder decoder, + AvroToArrowConfig config) throws IOException { + + Preconditions.checkNotNull(schema, "Avro schema object can not be null"); + Preconditions.checkNotNull(decoder, "Avro decoder object can not be null"); + Preconditions.checkNotNull(config, "config can not be null"); + + return AvroToArrowVectorIterator.create(decoder, schema, config); + } +} diff --git a/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/AvroToArrowConfig.java b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/AvroToArrowConfig.java new file mode 100644 index 000000000..4f59ef384 --- /dev/null +++ b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/AvroToArrowConfig.java @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow; + +import java.util.Set; + +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.util.Preconditions; +import org.apache.arrow.vector.dictionary.DictionaryProvider; + +/** + * This class configures the Avro-to-Arrow conversion process. + */ +public class AvroToArrowConfig { + + private final BufferAllocator allocator; + /** + * The maximum rowCount to read each time when partially convert data. + * Default value is 1024 and -1 means read all data into one vector. + */ + private final int targetBatchSize; + + /** + * The dictionary provider used for enum type. + * If avro schema has enum type, will create dictionary and update this provider. + */ + private final DictionaryProvider.MapDictionaryProvider provider; + + /** + * The field names which to skip when reading decoder values. + */ + private final Set<String> skipFieldNames; + + /** + * Instantiate an instance. + * @param allocator The memory allocator to construct the Arrow vectors with. + * @param targetBatchSize The maximum rowCount to read each time when partially convert data. + * @param provider The dictionary provider used for enum type, adapter will update this provider. + * @param skipFieldNames Field names which to skip. + */ + AvroToArrowConfig( + BufferAllocator allocator, + int targetBatchSize, + DictionaryProvider.MapDictionaryProvider provider, + Set<String> skipFieldNames) { + + Preconditions.checkArgument(targetBatchSize == AvroToArrowVectorIterator.NO_LIMIT_BATCH_SIZE || + targetBatchSize > 0, "invalid targetBatchSize: %s", targetBatchSize); + + this.allocator = allocator; + this.targetBatchSize = targetBatchSize; + this.provider = provider; + this.skipFieldNames = skipFieldNames; + } + + public BufferAllocator getAllocator() { + return allocator; + } + + public int getTargetBatchSize() { + return targetBatchSize; + } + + public DictionaryProvider.MapDictionaryProvider getProvider() { + return provider; + } + + public Set<String> getSkipFieldNames() { + return skipFieldNames; + } +} diff --git a/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/AvroToArrowConfigBuilder.java b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/AvroToArrowConfigBuilder.java new file mode 100644 index 000000000..474c1eb5c --- /dev/null +++ b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/AvroToArrowConfigBuilder.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow; + +import java.util.HashSet; +import java.util.Set; + +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.vector.dictionary.DictionaryProvider; + +/** + * This class builds {@link AvroToArrowConfig}s. + */ +public class AvroToArrowConfigBuilder { + + private BufferAllocator allocator; + + private int targetBatchSize; + + private DictionaryProvider.MapDictionaryProvider provider; + + private Set<String> skipFieldNames; + + /** + * Default constructor for the {@link AvroToArrowConfigBuilder}. + */ + public AvroToArrowConfigBuilder(BufferAllocator allocator) { + this.allocator = allocator; + this.targetBatchSize = AvroToArrowVectorIterator.DEFAULT_BATCH_SIZE; + this.provider = new DictionaryProvider.MapDictionaryProvider(); + this.skipFieldNames = new HashSet<>(); + } + + public AvroToArrowConfigBuilder setTargetBatchSize(int targetBatchSize) { + this.targetBatchSize = targetBatchSize; + return this; + } + + public AvroToArrowConfigBuilder setProvider(DictionaryProvider.MapDictionaryProvider provider) { + this.provider = provider; + return this; + } + + public AvroToArrowConfigBuilder setSkipFieldNames(Set<String> skipFieldNames) { + this.skipFieldNames = skipFieldNames; + return this; + } + + /** + * This builds the {@link AvroToArrowConfig} from the provided params. + */ + public AvroToArrowConfig build() { + return new AvroToArrowConfig( + allocator, + targetBatchSize, + provider, + skipFieldNames); + } +} diff --git a/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/AvroToArrowUtils.java b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/AvroToArrowUtils.java new file mode 100644 index 000000000..80293c8b8 --- /dev/null +++ b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/AvroToArrowUtils.java @@ -0,0 +1,805 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow; + +import static org.apache.arrow.vector.types.FloatingPointPrecision.DOUBLE; +import static org.apache.arrow.vector.types.FloatingPointPrecision.SINGLE; + +import java.io.EOFException; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +import org.apache.arrow.consumers.AvroArraysConsumer; +import org.apache.arrow.consumers.AvroBooleanConsumer; +import org.apache.arrow.consumers.AvroBytesConsumer; +import org.apache.arrow.consumers.AvroDoubleConsumer; +import org.apache.arrow.consumers.AvroEnumConsumer; +import org.apache.arrow.consumers.AvroFixedConsumer; +import org.apache.arrow.consumers.AvroFloatConsumer; +import org.apache.arrow.consumers.AvroIntConsumer; +import org.apache.arrow.consumers.AvroLongConsumer; +import org.apache.arrow.consumers.AvroMapConsumer; +import org.apache.arrow.consumers.AvroNullConsumer; +import org.apache.arrow.consumers.AvroStringConsumer; +import org.apache.arrow.consumers.AvroStructConsumer; +import org.apache.arrow.consumers.AvroUnionsConsumer; +import org.apache.arrow.consumers.CompositeAvroConsumer; +import org.apache.arrow.consumers.Consumer; +import org.apache.arrow.consumers.SkipConsumer; +import org.apache.arrow.consumers.SkipFunction; +import org.apache.arrow.consumers.logical.AvroDateConsumer; +import org.apache.arrow.consumers.logical.AvroDecimalConsumer; +import org.apache.arrow.consumers.logical.AvroTimeMicroConsumer; +import org.apache.arrow.consumers.logical.AvroTimeMillisConsumer; +import org.apache.arrow.consumers.logical.AvroTimestampMicrosConsumer; +import org.apache.arrow.consumers.logical.AvroTimestampMillisConsumer; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.util.Preconditions; +import org.apache.arrow.vector.BaseIntVector; +import org.apache.arrow.vector.BigIntVector; +import org.apache.arrow.vector.BitVector; +import org.apache.arrow.vector.DateDayVector; +import org.apache.arrow.vector.DecimalVector; +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.FixedSizeBinaryVector; +import org.apache.arrow.vector.Float4Vector; +import org.apache.arrow.vector.Float8Vector; +import org.apache.arrow.vector.IntVector; +import org.apache.arrow.vector.NullVector; +import org.apache.arrow.vector.TimeMicroVector; +import org.apache.arrow.vector.TimeMilliVector; +import org.apache.arrow.vector.TimeStampMicroVector; +import org.apache.arrow.vector.TimeStampMilliVector; +import org.apache.arrow.vector.VarBinaryVector; +import org.apache.arrow.vector.VarCharVector; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.complex.ListVector; +import org.apache.arrow.vector.complex.MapVector; +import org.apache.arrow.vector.complex.StructVector; +import org.apache.arrow.vector.complex.UnionVector; +import org.apache.arrow.vector.dictionary.Dictionary; +import org.apache.arrow.vector.dictionary.DictionaryEncoder; +import org.apache.arrow.vector.dictionary.DictionaryProvider; +import org.apache.arrow.vector.types.DateUnit; +import org.apache.arrow.vector.types.TimeUnit; +import org.apache.arrow.vector.types.Types; +import org.apache.arrow.vector.types.UnionMode; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.DictionaryEncoding; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.util.JsonStringArrayList; +import org.apache.arrow.vector.util.ValueVectorUtility; +import org.apache.avro.LogicalType; +import org.apache.avro.LogicalTypes; +import org.apache.avro.Schema; +import org.apache.avro.Schema.Type; +import org.apache.avro.io.Decoder; + +/** + * Class that does most of the work to convert Avro data into Arrow columnar format Vector objects. + */ +public class AvroToArrowUtils { + + /** + * Creates a {@link Consumer} from the {@link Schema} + * + <p>This method currently performs following type mapping for Avro data types to corresponding Arrow data types. + * + * <ul> + * <li>STRING --> ArrowType.Utf8</li> + * <li>INT --> ArrowType.Int(32, signed)</li> + * <li>LONG --> ArrowType.Int(64, signed)</li> + * <li>FLOAT --> ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)</li> + * <li>DOUBLE --> ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)</li> + * <li>BOOLEAN --> ArrowType.Bool</li> + * <li>BYTES --> ArrowType.Binary</li> + * <li>ARRAY --> ArrowType.List</li> + * <li>MAP --> ArrowType.Map</li> + * <li>FIXED --> ArrowType.FixedSizeBinary</li> + * <li>RECORD --> ArrowType.Struct</li> + * <li>UNION --> ArrowType.Union</li> + * <li>ENUM--> ArrowType.Int</li> + * <li>DECIMAL --> ArrowType.Decimal</li> + * <li>Date --> ArrowType.Date(DateUnit.DAY)</li> + * <li>TimeMillis --> ArrowType.Time(TimeUnit.MILLISECOND, 32)</li> + * <li>TimeMicros --> ArrowType.Time(TimeUnit.MICROSECOND, 64)</li> + * <li>TimestampMillis --> ArrowType.Timestamp(TimeUnit.MILLISECOND, null)</li> + * <li>TimestampMicros --> ArrowType.Timestamp(TimeUnit.MICROSECOND, null)</li> + * </ul> + */ + + private static Consumer createConsumer(Schema schema, String name, AvroToArrowConfig config) { + return createConsumer(schema, name, false, config, null); + } + + private static Consumer createConsumer(Schema schema, String name, AvroToArrowConfig config, FieldVector vector) { + return createConsumer(schema, name, false, config, vector); + } + + /** + * Create a consumer with the given Avro schema. + * + * @param schema avro schema + * @param name arrow field name + * @param consumerVector vector to keep in consumer, if v == null, will create a new vector via field. + * @return consumer + */ + private static Consumer createConsumer( + Schema schema, + String name, + boolean nullable, + AvroToArrowConfig config, + FieldVector consumerVector) { + + Preconditions.checkNotNull(schema, "Avro schema object can't be null"); + Preconditions.checkNotNull(config, "Config can't be null"); + + final BufferAllocator allocator = config.getAllocator(); + + final Type type = schema.getType(); + final LogicalType logicalType = schema.getLogicalType(); + + final ArrowType arrowType; + final FieldType fieldType; + final FieldVector vector; + final Consumer consumer; + + switch (type) { + case UNION: + consumer = createUnionConsumer(schema, name, config, consumerVector); + break; + case ARRAY: + consumer = createArrayConsumer(schema, name, config, consumerVector); + break; + case MAP: + consumer = createMapConsumer(schema, name, config, consumerVector); + break; + case RECORD: + consumer = createStructConsumer(schema, name, config, consumerVector); + break; + case ENUM: + consumer = createEnumConsumer(schema, name, config, consumerVector); + break; + case STRING: + arrowType = new ArrowType.Utf8(); + fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema)); + vector = createVector(consumerVector, fieldType, name, allocator); + consumer = new AvroStringConsumer((VarCharVector) vector); + break; + case FIXED: + Map<String, String> extProps = createExternalProps(schema); + if (logicalType instanceof LogicalTypes.Decimal) { + arrowType = createDecimalArrowType((LogicalTypes.Decimal) logicalType); + fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema, extProps)); + vector = createVector(consumerVector, fieldType, name, allocator); + consumer = new AvroDecimalConsumer.FixedDecimalConsumer((DecimalVector) vector, schema.getFixedSize()); + } else { + arrowType = new ArrowType.FixedSizeBinary(schema.getFixedSize()); + fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema, extProps)); + vector = createVector(consumerVector, fieldType, name, allocator); + consumer = new AvroFixedConsumer((FixedSizeBinaryVector) vector, schema.getFixedSize()); + } + break; + case INT: + if (logicalType instanceof LogicalTypes.Date) { + arrowType = new ArrowType.Date(DateUnit.DAY); + fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema)); + vector = createVector(consumerVector, fieldType, name, allocator); + consumer = new AvroDateConsumer((DateDayVector) vector); + } else if (logicalType instanceof LogicalTypes.TimeMillis) { + arrowType = new ArrowType.Time(TimeUnit.MILLISECOND, 32); + fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema)); + vector = createVector(consumerVector, fieldType, name, allocator); + consumer = new AvroTimeMillisConsumer((TimeMilliVector) vector); + } else { + arrowType = new ArrowType.Int(32, /*signed=*/true); + fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema)); + vector = createVector(consumerVector, fieldType, name, allocator); + consumer = new AvroIntConsumer((IntVector) vector); + } + break; + case BOOLEAN: + arrowType = new ArrowType.Bool(); + fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema)); + vector = createVector(consumerVector, fieldType, name, allocator); + consumer = new AvroBooleanConsumer((BitVector) vector); + break; + case LONG: + if (logicalType instanceof LogicalTypes.TimeMicros) { + arrowType = new ArrowType.Time(TimeUnit.MICROSECOND, 64); + fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema)); + vector = createVector(consumerVector, fieldType, name, allocator); + consumer = new AvroTimeMicroConsumer((TimeMicroVector) vector); + } else if (logicalType instanceof LogicalTypes.TimestampMillis) { + arrowType = new ArrowType.Timestamp(TimeUnit.MILLISECOND, null); + fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema)); + vector = createVector(consumerVector, fieldType, name, allocator); + consumer = new AvroTimestampMillisConsumer((TimeStampMilliVector) vector); + } else if (logicalType instanceof LogicalTypes.TimestampMicros) { + arrowType = new ArrowType.Timestamp(TimeUnit.MICROSECOND, null); + fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema)); + vector = createVector(consumerVector, fieldType, name, allocator); + consumer = new AvroTimestampMicrosConsumer((TimeStampMicroVector) vector); + } else { + arrowType = new ArrowType.Int(64, /*signed=*/true); + fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema)); + vector = createVector(consumerVector, fieldType, name, allocator); + consumer = new AvroLongConsumer((BigIntVector) vector); + } + break; + case FLOAT: + arrowType = new ArrowType.FloatingPoint(SINGLE); + fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema)); + vector = createVector(consumerVector, fieldType, name, allocator); + consumer = new AvroFloatConsumer((Float4Vector) vector); + break; + case DOUBLE: + arrowType = new ArrowType.FloatingPoint(DOUBLE); + fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema)); + vector = createVector(consumerVector, fieldType, name, allocator); + consumer = new AvroDoubleConsumer((Float8Vector) vector); + break; + case BYTES: + if (logicalType instanceof LogicalTypes.Decimal) { + arrowType = createDecimalArrowType((LogicalTypes.Decimal) logicalType); + fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema)); + vector = createVector(consumerVector, fieldType, name, allocator); + consumer = new AvroDecimalConsumer.BytesDecimalConsumer((DecimalVector) vector); + } else { + arrowType = new ArrowType.Binary(); + fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema)); + vector = createVector(consumerVector, fieldType, name, allocator); + consumer = new AvroBytesConsumer((VarBinaryVector) vector); + } + break; + case NULL: + arrowType = new ArrowType.Null(); + fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema)); + vector = fieldType.createNewSingleVector(name, allocator, /*schemaCallback=*/null); + consumer = new AvroNullConsumer((NullVector) vector); + break; + default: + // no-op, shouldn't get here + throw new UnsupportedOperationException("Can't convert avro type %s to arrow type." + type.getName()); + } + return consumer; + } + + private static ArrowType createDecimalArrowType(LogicalTypes.Decimal logicalType) { + final int scale = logicalType.getScale(); + final int precision = logicalType.getPrecision(); + Preconditions.checkArgument(precision > 0 && precision <= 38, + "Precision must be in range of 1 to 38"); + Preconditions.checkArgument(scale >= 0 && scale <= 38, + "Scale must be in range of 0 to 38."); + Preconditions.checkArgument(scale <= precision, + "Invalid decimal scale: %s (greater than precision: %s)", scale, precision); + + return new ArrowType.Decimal(precision, scale, 128); + + } + + private static Consumer createSkipConsumer(Schema schema) { + + SkipFunction skipFunction; + Type type = schema.getType(); + + switch (type) { + case UNION: + List<Consumer> unionDelegates = schema.getTypes().stream().map(s -> + createSkipConsumer(s)).collect(Collectors.toList()); + skipFunction = decoder -> unionDelegates.get(decoder.readInt()).consume(decoder); + + break; + case ARRAY: + Consumer elementDelegate = createSkipConsumer(schema.getElementType()); + skipFunction = decoder -> { + for (long i = decoder.skipArray(); i != 0; i = decoder.skipArray()) { + for (long j = 0; j < i; j++) { + elementDelegate.consume(decoder); + } + } + }; + break; + case MAP: + Consumer valueDelegate = createSkipConsumer(schema.getValueType()); + skipFunction = decoder -> { + for (long i = decoder.skipMap(); i != 0; i = decoder.skipMap()) { + for (long j = 0; j < i; j++) { + decoder.skipString(); // Discard key + valueDelegate.consume(decoder); + } + } + }; + break; + case RECORD: + List<Consumer> delegates = schema.getFields().stream().map(field -> + createSkipConsumer(field.schema())).collect(Collectors.toList()); + + skipFunction = decoder -> { + for (Consumer consumer : delegates) { + consumer.consume(decoder); + } + }; + + break; + case ENUM: + skipFunction = decoder -> decoder.readEnum(); + break; + case STRING: + skipFunction = decoder -> decoder.skipString(); + break; + case FIXED: + skipFunction = decoder -> decoder.skipFixed(schema.getFixedSize()); + break; + case INT: + skipFunction = decoder -> decoder.readInt(); + break; + case BOOLEAN: + skipFunction = decoder -> decoder.skipFixed(1); + break; + case LONG: + skipFunction = decoder -> decoder.readLong(); + break; + case FLOAT: + skipFunction = decoder -> decoder.readFloat(); + break; + case DOUBLE: + skipFunction = decoder -> decoder.readDouble(); + break; + case BYTES: + skipFunction = decoder -> decoder.skipBytes(); + break; + case NULL: + skipFunction = decoder -> { }; + break; + default: + // no-op, shouldn't get here + throw new UnsupportedOperationException("Invalid avro type: " + type.getName()); + } + + return new SkipConsumer(skipFunction); + } + + static CompositeAvroConsumer createCompositeConsumer( + Schema schema, AvroToArrowConfig config) { + + List<Consumer> consumers = new ArrayList<>(); + final Set<String> skipFieldNames = config.getSkipFieldNames(); + + Schema.Type type = schema.getType(); + if (type == Type.RECORD) { + for (Schema.Field field : schema.getFields()) { + if (skipFieldNames.contains(field.name())) { + consumers.add(createSkipConsumer(field.schema())); + } else { + Consumer consumer = createConsumer(field.schema(), field.name(), config); + consumers.add(consumer); + } + + } + } else { + Consumer consumer = createConsumer(schema, "", config); + consumers.add(consumer); + } + + return new CompositeAvroConsumer(consumers); + } + + private static FieldVector createVector(FieldVector consumerVector, FieldType fieldType, + String name, BufferAllocator allocator) { + return consumerVector != null ? consumerVector : fieldType.createNewSingleVector(name, allocator, null); + } + + private static String getDefaultFieldName(ArrowType type) { + Types.MinorType minorType = Types.getMinorTypeForArrowType(type); + return minorType.name().toLowerCase(); + } + + private static Field avroSchemaToField(Schema schema, String name, AvroToArrowConfig config) { + return avroSchemaToField(schema, name, config, null); + } + + private static Field avroSchemaToField( + Schema schema, + String name, + AvroToArrowConfig config, + Map<String, String> externalProps) { + + final Type type = schema.getType(); + final LogicalType logicalType = schema.getLogicalType(); + final List<Field> children = new ArrayList<>(); + final FieldType fieldType; + + switch (type) { + case UNION: + for (int i = 0; i < schema.getTypes().size(); i++) { + Schema childSchema = schema.getTypes().get(i); + // Union child vector should use default name + children.add(avroSchemaToField(childSchema, null, config)); + } + fieldType = createFieldType(new ArrowType.Union(UnionMode.Sparse, null), schema, externalProps); + break; + case ARRAY: + Schema elementSchema = schema.getElementType(); + children.add(avroSchemaToField(elementSchema, elementSchema.getName(), config)); + fieldType = createFieldType(new ArrowType.List(), schema, externalProps); + break; + case MAP: + // MapVector internal struct field and key field should be non-nullable + FieldType keyFieldType = new FieldType(/*nullable=*/false, new ArrowType.Utf8(), /*dictionary=*/null); + Field keyField = new Field("key", keyFieldType, /*children=*/null); + Field valueField = avroSchemaToField(schema.getValueType(), "value", config); + + FieldType structFieldType = new FieldType(false, new ArrowType.Struct(), /*dictionary=*/null); + Field structField = new Field("internal", structFieldType, Arrays.asList(keyField, valueField)); + children.add(structField); + fieldType = createFieldType(new ArrowType.Map(/*keySorted=*/false), schema, externalProps); + break; + case RECORD: + final Set<String> skipFieldNames = config.getSkipFieldNames(); + for (int i = 0; i < schema.getFields().size(); i++) { + final Schema.Field field = schema.getFields().get(i); + Schema childSchema = field.schema(); + String fullChildName = String.format("%s.%s", name, field.name()); + if (!skipFieldNames.contains(fullChildName)) { + final Map<String, String> extProps = new HashMap<>(); + String doc = field.doc(); + Set<String> aliases = field.aliases(); + if (doc != null) { + extProps.put("doc", doc); + } + if (aliases != null) { + extProps.put("aliases", convertAliases(aliases)); + } + children.add(avroSchemaToField(childSchema, fullChildName, config, extProps)); + } + } + fieldType = createFieldType(new ArrowType.Struct(), schema, externalProps); + break; + case ENUM: + DictionaryProvider.MapDictionaryProvider provider = config.getProvider(); + int current = provider.getDictionaryIds().size(); + int enumCount = schema.getEnumSymbols().size(); + ArrowType.Int indexType = DictionaryEncoder.getIndexType(enumCount); + + fieldType = createFieldType(indexType, schema, externalProps, + new DictionaryEncoding(current, /*ordered=*/false, /*indexType=*/indexType)); + break; + + case STRING: + fieldType = createFieldType(new ArrowType.Utf8(), schema, externalProps); + break; + case FIXED: + final ArrowType fixedArrowType; + if (logicalType instanceof LogicalTypes.Decimal) { + fixedArrowType = createDecimalArrowType((LogicalTypes.Decimal) logicalType); + } else { + fixedArrowType = new ArrowType.FixedSizeBinary(schema.getFixedSize()); + } + fieldType = createFieldType(fixedArrowType, schema, externalProps); + break; + case INT: + final ArrowType intArrowType; + if (logicalType instanceof LogicalTypes.Date) { + intArrowType = new ArrowType.Date(DateUnit.DAY); + } else if (logicalType instanceof LogicalTypes.TimeMillis) { + intArrowType = new ArrowType.Time(TimeUnit.MILLISECOND, 32); + } else { + intArrowType = new ArrowType.Int(32, /*signed=*/true); + } + fieldType = createFieldType(intArrowType, schema, externalProps); + break; + case BOOLEAN: + fieldType = createFieldType(new ArrowType.Bool(), schema, externalProps); + break; + case LONG: + final ArrowType longArrowType; + if (logicalType instanceof LogicalTypes.TimeMicros) { + longArrowType = new ArrowType.Time(TimeUnit.MICROSECOND, 64); + } else if (logicalType instanceof LogicalTypes.TimestampMillis) { + longArrowType = new ArrowType.Timestamp(TimeUnit.MILLISECOND, null); + } else if (logicalType instanceof LogicalTypes.TimestampMicros) { + longArrowType = new ArrowType.Timestamp(TimeUnit.MICROSECOND, null); + } else { + longArrowType = new ArrowType.Int(64, /*signed=*/true); + } + fieldType = createFieldType(longArrowType, schema, externalProps); + break; + case FLOAT: + fieldType = createFieldType(new ArrowType.FloatingPoint(SINGLE), schema, externalProps); + break; + case DOUBLE: + fieldType = createFieldType(new ArrowType.FloatingPoint(DOUBLE), schema, externalProps); + break; + case BYTES: + final ArrowType bytesArrowType; + if (logicalType instanceof LogicalTypes.Decimal) { + bytesArrowType = createDecimalArrowType((LogicalTypes.Decimal) logicalType); + } else { + bytesArrowType = new ArrowType.Binary(); + } + fieldType = createFieldType(bytesArrowType, schema, externalProps); + break; + case NULL: + fieldType = createFieldType(ArrowType.Null.INSTANCE, schema, externalProps); + break; + default: + // no-op, shouldn't get here + throw new UnsupportedOperationException(); + } + + if (name == null) { + name = getDefaultFieldName(fieldType.getType()); + } + return new Field(name, fieldType, children.size() == 0 ? null : children); + } + + private static Consumer createArrayConsumer(Schema schema, String name, AvroToArrowConfig config, + FieldVector consumerVector) { + + ListVector listVector; + if (consumerVector == null) { + final Field field = avroSchemaToField(schema, name, config); + listVector = (ListVector) field.createVector(config.getAllocator()); + } else { + listVector = (ListVector) consumerVector; + } + + FieldVector dataVector = listVector.getDataVector(); + + // create delegate + Schema childSchema = schema.getElementType(); + Consumer delegate = createConsumer(childSchema, childSchema.getName(), config, dataVector); + + return new AvroArraysConsumer(listVector, delegate); + } + + private static Consumer createStructConsumer(Schema schema, String name, AvroToArrowConfig config, + FieldVector consumerVector) { + + final Set<String> skipFieldNames = config.getSkipFieldNames(); + + StructVector structVector; + if (consumerVector == null) { + final Field field = avroSchemaToField(schema, name, config, createExternalProps(schema)); + structVector = (StructVector) field.createVector(config.getAllocator()); + } else { + structVector = (StructVector) consumerVector; + } + + Consumer[] delegates = new Consumer[schema.getFields().size()]; + int vectorIndex = 0; + for (int i = 0; i < schema.getFields().size(); i++) { + Schema.Field childField = schema.getFields().get(i); + Consumer delegate; + // use full name to distinguish fields have same names between parent and child fields. + final String fullChildName = String.format("%s.%s", name, childField.name()); + if (skipFieldNames.contains(fullChildName)) { + delegate = createSkipConsumer(childField.schema()); + } else { + delegate = createConsumer(childField.schema(), fullChildName, config, + structVector.getChildrenFromFields().get(vectorIndex++)); + } + + delegates[i] = delegate; + } + + return new AvroStructConsumer(structVector, delegates); + + } + + private static Consumer createEnumConsumer(Schema schema, String name, AvroToArrowConfig config, + FieldVector consumerVector) { + + BaseIntVector indexVector; + if (consumerVector == null) { + final Field field = avroSchemaToField(schema, name, config, createExternalProps(schema)); + indexVector = (BaseIntVector) field.createVector(config.getAllocator()); + } else { + indexVector = (BaseIntVector) consumerVector; + } + + final int valueCount = schema.getEnumSymbols().size(); + VarCharVector dictVector = new VarCharVector(name, config.getAllocator()); + dictVector.allocateNewSafe(); + dictVector.setValueCount(valueCount); + for (int i = 0; i < valueCount; i++) { + dictVector.set(i, schema.getEnumSymbols().get(i).getBytes(StandardCharsets.UTF_8)); + } + Dictionary dictionary = + new Dictionary(dictVector, indexVector.getField().getDictionary()); + config.getProvider().put(dictionary); + + return new AvroEnumConsumer(indexVector); + + } + + private static Consumer createMapConsumer(Schema schema, String name, AvroToArrowConfig config, + FieldVector consumerVector) { + + MapVector mapVector; + if (consumerVector == null) { + final Field field = avroSchemaToField(schema, name, config); + mapVector = (MapVector) field.createVector(config.getAllocator()); + } else { + mapVector = (MapVector) consumerVector; + } + + // create delegate struct consumer + StructVector structVector = (StructVector) mapVector.getDataVector(); + + // keys in avro map are always assumed to be strings. + Consumer keyConsumer = new AvroStringConsumer( + (VarCharVector) structVector.getChildrenFromFields().get(0)); + Consumer valueConsumer = createConsumer(schema.getValueType(), schema.getValueType().getName(), + config, structVector.getChildrenFromFields().get(1)); + + AvroStructConsumer internalConsumer = + new AvroStructConsumer(structVector, new Consumer[] {keyConsumer, valueConsumer}); + + return new AvroMapConsumer(mapVector, internalConsumer); + } + + private static Consumer createUnionConsumer(Schema schema, String name, AvroToArrowConfig config, + FieldVector consumerVector) { + final int size = schema.getTypes().size(); + + final boolean nullable = schema.getTypes().stream().anyMatch(t -> t.getType() == Type.NULL); + + UnionVector unionVector; + if (consumerVector == null) { + final Field field = avroSchemaToField(schema, name, config); + unionVector = (UnionVector) field.createVector(config.getAllocator()); + } else { + unionVector = (UnionVector) consumerVector; + } + + List<FieldVector> childVectors = unionVector.getChildrenFromFields(); + + Consumer[] delegates = new Consumer[size]; + Types.MinorType[] types = new Types.MinorType[size]; + + for (int i = 0; i < size; i++) { + FieldVector child = childVectors.get(i); + Schema subSchema = schema.getTypes().get(i); + Consumer delegate = createConsumer(subSchema, subSchema.getName(), nullable, config, child); + delegates[i] = delegate; + types[i] = child.getMinorType(); + } + return new AvroUnionsConsumer(unionVector, delegates, types); + } + + /** + * Read data from {@link Decoder} and generate a {@link VectorSchemaRoot}. + * @param schema avro schema + * @param decoder avro decoder to read data from + */ + static VectorSchemaRoot avroToArrowVectors( + Schema schema, + Decoder decoder, + AvroToArrowConfig config) + throws IOException { + + List<FieldVector> vectors = new ArrayList<>(); + List<Consumer> consumers = new ArrayList<>(); + final Set<String> skipFieldNames = config.getSkipFieldNames(); + + Schema.Type type = schema.getType(); + if (type == Type.RECORD) { + for (Schema.Field field : schema.getFields()) { + if (skipFieldNames.contains(field.name())) { + consumers.add(createSkipConsumer(field.schema())); + } else { + Consumer consumer = createConsumer(field.schema(), field.name(), config); + consumers.add(consumer); + vectors.add(consumer.getVector()); + } + } + } else { + Consumer consumer = createConsumer(schema, "", config); + consumers.add(consumer); + vectors.add(consumer.getVector()); + } + + long validConsumerCount = consumers.stream().filter(c -> !c.skippable()).count(); + Preconditions.checkArgument(vectors.size() == validConsumerCount, + "vectors size not equals consumers size."); + + List<Field> fields = vectors.stream().map(t -> t.getField()).collect(Collectors.toList()); + + VectorSchemaRoot root = new VectorSchemaRoot(fields, vectors, 0); + + CompositeAvroConsumer compositeConsumer = new CompositeAvroConsumer(consumers); + + int valueCount = 0; + try { + while (true) { + ValueVectorUtility.ensureCapacity(root, valueCount + 1); + compositeConsumer.consume(decoder); + valueCount++; + } + } catch (EOFException eof) { + // reach the end of encoder stream. + root.setRowCount(valueCount); + } catch (Exception e) { + compositeConsumer.close(); + throw new UnsupportedOperationException("Error occurs while consume process.", e); + } + + return root; + } + + private static Map<String, String> getMetaData(Schema schema) { + Map<String, String> metadata = new HashMap<>(); + schema.getObjectProps().forEach((k, v) -> metadata.put(k, v.toString())); + return metadata; + } + + private static Map<String, String> getMetaData(Schema schema, Map<String, String> externalProps) { + Map<String, String> metadata = getMetaData(schema); + if (externalProps != null) { + metadata.putAll(externalProps); + } + return metadata; + } + + /** + * Parse avro attributes and convert them to metadata. + */ + private static Map<String, String> createExternalProps(Schema schema) { + final Map<String, String> extProps = new HashMap<>(); + String doc = schema.getDoc(); + Set<String> aliases = schema.getAliases(); + if (doc != null) { + extProps.put("doc", doc); + } + if (aliases != null) { + extProps.put("aliases", convertAliases(aliases)); + } + return extProps; + } + + private static FieldType createFieldType(ArrowType arrowType, Schema schema, Map<String, String> externalProps) { + return createFieldType(arrowType, schema, externalProps, /*dictionary=*/null); + } + + private static FieldType createFieldType( + ArrowType arrowType, + Schema schema, + Map<String, String> externalProps, + DictionaryEncoding dictionary) { + + return new FieldType(/*nullable=*/false, arrowType, dictionary, + getMetaData(schema, externalProps)); + } + + private static String convertAliases(Set<String> aliases) { + JsonStringArrayList jsonList = new JsonStringArrayList(); + aliases.stream().forEach(a -> jsonList.add(a)); + return jsonList.toString(); + } +} diff --git a/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/AvroToArrowVectorIterator.java b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/AvroToArrowVectorIterator.java new file mode 100644 index 000000000..1faa7595c --- /dev/null +++ b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/AvroToArrowVectorIterator.java @@ -0,0 +1,186 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow; + +import java.io.EOFException; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.stream.Collectors; + +import org.apache.arrow.consumers.CompositeAvroConsumer; +import org.apache.arrow.util.Preconditions; +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.util.ValueVectorUtility; +import org.apache.avro.Schema; +import org.apache.avro.io.Decoder; + +/** + * VectorSchemaRoot iterator for partially converting avro data. + */ +public class AvroToArrowVectorIterator implements Iterator<VectorSchemaRoot>, AutoCloseable { + + public static final int NO_LIMIT_BATCH_SIZE = -1; + public static final int DEFAULT_BATCH_SIZE = 1024; + + private final Decoder decoder; + private final Schema schema; + + private final AvroToArrowConfig config; + + private CompositeAvroConsumer compositeConsumer; + + private org.apache.arrow.vector.types.pojo.Schema rootSchema; + + private VectorSchemaRoot nextBatch; + + private final int targetBatchSize; + + /** + * Construct an instance. + */ + private AvroToArrowVectorIterator( + Decoder decoder, + Schema schema, + AvroToArrowConfig config) { + + this.decoder = decoder; + this.schema = schema; + this.config = config; + this.targetBatchSize = config.getTargetBatchSize(); + + } + + /** + * Create a ArrowVectorIterator to partially convert data. + */ + public static AvroToArrowVectorIterator create( + Decoder decoder, + Schema schema, + AvroToArrowConfig config) { + + AvroToArrowVectorIterator iterator = new AvroToArrowVectorIterator(decoder, schema, config); + try { + iterator.initialize(); + return iterator; + } catch (Exception e) { + iterator.close(); + throw new RuntimeException("Error occurs while creating iterator.", e); + } + } + + private void initialize() { + // create consumers + compositeConsumer = AvroToArrowUtils.createCompositeConsumer(schema, config); + List<FieldVector> vectors = new ArrayList<>(); + compositeConsumer.getConsumers().forEach(c -> vectors.add(c.getVector())); + List<Field> fields = vectors.stream().map(t -> t.getField()).collect(Collectors.toList()); + VectorSchemaRoot root = new VectorSchemaRoot(fields, vectors, 0); + rootSchema = root.getSchema(); + + load(root); + } + + private void consumeData(VectorSchemaRoot root) { + int readRowCount = 0; + try { + while ((targetBatchSize == NO_LIMIT_BATCH_SIZE || readRowCount < targetBatchSize)) { + compositeConsumer.consume(decoder); + readRowCount++; + } + + if (targetBatchSize == NO_LIMIT_BATCH_SIZE) { + while (true) { + ValueVectorUtility.ensureCapacity(root, readRowCount + 1); + compositeConsumer.consume(decoder); + readRowCount++; + } + } else { + while (readRowCount < targetBatchSize) { + compositeConsumer.consume(decoder); + readRowCount++; + } + } + + root.setRowCount(readRowCount); + } catch (EOFException eof) { + // reach the end of encoder stream. + root.setRowCount(readRowCount); + } catch (Exception e) { + compositeConsumer.close(); + throw new RuntimeException("Error occurs while consuming data.", e); + } + } + + // Loads the next schema root or null if no more rows are available. + private void load(VectorSchemaRoot root) { + final int targetBatchSize = config.getTargetBatchSize(); + if (targetBatchSize != NO_LIMIT_BATCH_SIZE) { + ValueVectorUtility.preAllocate(root, targetBatchSize); + } + + long validConsumerCount = compositeConsumer.getConsumers().stream().filter(c -> + !c.skippable()).count(); + Preconditions.checkArgument(root.getFieldVectors().size() == validConsumerCount, + "Schema root vectors size not equals to consumers size."); + + compositeConsumer.resetConsumerVectors(root); + + // consume data + consumeData(root); + + if (root.getRowCount() == 0) { + root.close(); + nextBatch = null; + } else { + nextBatch = root; + } + } + + @Override + public boolean hasNext() { + return nextBatch != null; + } + + /** + * Gets the next vector. The user is responsible for freeing its resources. + */ + public VectorSchemaRoot next() { + Preconditions.checkArgument(hasNext()); + VectorSchemaRoot returned = nextBatch; + try { + load(VectorSchemaRoot.create(rootSchema, config.getAllocator())); + } catch (Exception e) { + returned.close(); + throw new RuntimeException("Error occurs while getting next schema root.", e); + } + return returned; + } + + /** + * Clean up resources. + */ + public void close() { + if (nextBatch != null) { + nextBatch.close(); + } + compositeConsumer.close(); + } +} diff --git a/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroArraysConsumer.java b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroArraysConsumer.java new file mode 100644 index 000000000..b9d0f84cf --- /dev/null +++ b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroArraysConsumer.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.consumers; + +import java.io.IOException; + +import org.apache.arrow.vector.complex.ListVector; +import org.apache.avro.io.Decoder; + +/** + * Consumer which consume array type values from avro decoder. + * Write the data to {@link ListVector}. + */ +public class AvroArraysConsumer extends BaseAvroConsumer<ListVector> { + + private final Consumer delegate; + + /** + * Instantiate a ArrayConsumer. + */ + public AvroArraysConsumer(ListVector vector, Consumer delegate) { + super(vector); + this.delegate = delegate; + } + + @Override + public void consume(Decoder decoder) throws IOException { + + vector.startNewValue(currentIndex); + long totalCount = 0; + for (long count = decoder.readArrayStart(); count != 0; count = decoder.arrayNext()) { + totalCount += count; + ensureInnerVectorCapacity(totalCount); + for (int element = 0; element < count; element++) { + delegate.consume(decoder); + } + } + vector.endValue(currentIndex, (int) totalCount); + currentIndex++; + } + + @Override + public void close() throws Exception { + super.close(); + delegate.close(); + } + + @Override + public boolean resetValueVector(ListVector vector) { + this.delegate.resetValueVector(vector.getDataVector()); + return super.resetValueVector(vector); + } + + void ensureInnerVectorCapacity(long targetCapacity) { + while (vector.getDataVector().getValueCapacity() < targetCapacity) { + vector.getDataVector().reAlloc(); + } + } +} diff --git a/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroBooleanConsumer.java b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroBooleanConsumer.java new file mode 100644 index 000000000..4ca5f2445 --- /dev/null +++ b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroBooleanConsumer.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.consumers; + +import java.io.IOException; + +import org.apache.arrow.vector.BitVector; +import org.apache.avro.io.Decoder; + +/** + * Consumer which consume boolean type values from avro decoder. + * Write the data to {@link BitVector}. + */ +public class AvroBooleanConsumer extends BaseAvroConsumer<BitVector> { + + /** + * Instantiate a AvroBooleanConsumer. + */ + public AvroBooleanConsumer(BitVector vector) { + super(vector); + } + + @Override + public void consume(Decoder decoder) throws IOException { + vector.set(currentIndex, decoder.readBoolean() ? 1 : 0); + currentIndex++; + } +} diff --git a/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroBytesConsumer.java b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroBytesConsumer.java new file mode 100644 index 000000000..eede68ebd --- /dev/null +++ b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroBytesConsumer.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.consumers; + +import java.io.IOException; +import java.nio.ByteBuffer; + +import org.apache.arrow.vector.VarBinaryVector; +import org.apache.avro.io.Decoder; + +/** + * Consumer which consume bytes type values from avro decoder. + * Write the data to {@link VarBinaryVector}. + */ +public class AvroBytesConsumer extends BaseAvroConsumer<VarBinaryVector> { + + private ByteBuffer cacheBuffer; + + /** + * Instantiate a AvroBytesConsumer. + */ + public AvroBytesConsumer(VarBinaryVector vector) { + super(vector); + } + + @Override + public void consume(Decoder decoder) throws IOException { + // cacheBuffer is initialized null and create in the first consume, + // if its capacity < size to read, decoder will create a new one with new capacity. + cacheBuffer = decoder.readBytes(cacheBuffer); + vector.setSafe(currentIndex, cacheBuffer, 0, cacheBuffer.limit()); + currentIndex++; + } +} diff --git a/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroDoubleConsumer.java b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroDoubleConsumer.java new file mode 100644 index 000000000..356707a14 --- /dev/null +++ b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroDoubleConsumer.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.consumers; + +import java.io.IOException; + +import org.apache.arrow.vector.Float8Vector; +import org.apache.avro.io.Decoder; + +/** + * Consumer which consume double type values from avro decoder. + * Write the data to {@link Float8Vector}. + */ +public class AvroDoubleConsumer extends BaseAvroConsumer<Float8Vector> { + + /** + * Instantiate a AvroDoubleConsumer. + */ + public AvroDoubleConsumer(Float8Vector vector) { + super(vector); + } + + @Override + public void consume(Decoder decoder) throws IOException { + vector.set(currentIndex++, decoder.readDouble()); + } +} diff --git a/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroEnumConsumer.java b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroEnumConsumer.java new file mode 100644 index 000000000..2f4443b74 --- /dev/null +++ b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroEnumConsumer.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.consumers; + +import java.io.IOException; + +import org.apache.arrow.vector.BaseIntVector; +import org.apache.arrow.vector.IntVector; +import org.apache.avro.io.Decoder; + +/** + * Consumer which consume enum type values from avro decoder. + * Write the data to {@link IntVector}. + */ +public class AvroEnumConsumer extends BaseAvroConsumer<BaseIntVector> { + + /** + * Instantiate a AvroEnumConsumer. + */ + public AvroEnumConsumer(BaseIntVector vector) { + super(vector); + } + + @Override + public void consume(Decoder decoder) throws IOException { + vector.setWithPossibleTruncate(currentIndex++, decoder.readEnum()); + } +} diff --git a/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroFixedConsumer.java b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroFixedConsumer.java new file mode 100644 index 000000000..a065466e3 --- /dev/null +++ b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroFixedConsumer.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.consumers; + +import java.io.IOException; + +import org.apache.arrow.vector.FixedSizeBinaryVector; +import org.apache.avro.io.Decoder; + +/** + * Consumer which consume fixed type values from avro decoder. + * Write the data to {@link org.apache.arrow.vector.FixedSizeBinaryVector}. + */ +public class AvroFixedConsumer extends BaseAvroConsumer<FixedSizeBinaryVector> { + + private final byte[] reuseBytes; + + /** + * Instantiate a AvroFixedConsumer. + */ + public AvroFixedConsumer(FixedSizeBinaryVector vector, int size) { + super(vector); + reuseBytes = new byte[size]; + } + + @Override + public void consume(Decoder decoder) throws IOException { + decoder.readFixed(reuseBytes); + vector.setSafe(currentIndex++, reuseBytes); + } +} diff --git a/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroFloatConsumer.java b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroFloatConsumer.java new file mode 100644 index 000000000..c8de4a21a --- /dev/null +++ b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroFloatConsumer.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.consumers; + +import java.io.IOException; + +import org.apache.arrow.vector.Float4Vector; +import org.apache.avro.io.Decoder; + +/** + * Consumer which consume float type values from avro decoder. + * Write the data to {@link Float4Vector}. + */ +public class AvroFloatConsumer extends BaseAvroConsumer<Float4Vector> { + + /** + * Instantiate a AvroFloatConsumer. + */ + public AvroFloatConsumer(Float4Vector vector) { + super(vector); + } + + @Override + public void consume(Decoder decoder) throws IOException { + vector.set(currentIndex++, decoder.readFloat()); + } +} diff --git a/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroIntConsumer.java b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroIntConsumer.java new file mode 100644 index 000000000..bc8d4de78 --- /dev/null +++ b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroIntConsumer.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.consumers; + +import java.io.IOException; + +import org.apache.arrow.vector.IntVector; +import org.apache.avro.io.Decoder; + +/** + * Consumer which consume int type values from avro decoder. + * Write the data to {@link IntVector}. + */ +public class AvroIntConsumer extends BaseAvroConsumer<IntVector> { + + /** + * Instantiate a AvroIntConsumer. + */ + public AvroIntConsumer(IntVector vector) { + super(vector); + } + + @Override + public void consume(Decoder decoder) throws IOException { + vector.set(currentIndex++, decoder.readInt()); + } +} diff --git a/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroLongConsumer.java b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroLongConsumer.java new file mode 100644 index 000000000..b9016c58f --- /dev/null +++ b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroLongConsumer.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.consumers; + +import java.io.IOException; + +import org.apache.arrow.vector.BigIntVector; +import org.apache.avro.io.Decoder; + +/** + * Consumer which consume long type values from avro decoder. + * Write the data to {@link BigIntVector}. + */ +public class AvroLongConsumer extends BaseAvroConsumer<BigIntVector> { + + /** + * Instantiate a AvroLongConsumer. + */ + public AvroLongConsumer(BigIntVector vector) { + super(vector); + } + + @Override + public void consume(Decoder decoder) throws IOException { + vector.set(currentIndex++, decoder.readLong()); + } +} diff --git a/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroMapConsumer.java b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroMapConsumer.java new file mode 100644 index 000000000..b8e8bd585 --- /dev/null +++ b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroMapConsumer.java @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.consumers; + +import java.io.IOException; + +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.complex.MapVector; +import org.apache.arrow.vector.complex.StructVector; +import org.apache.avro.io.Decoder; + +/** + * Consumer which consume map type values from avro decoder. + * Write the data to {@link MapVector}. + */ +public class AvroMapConsumer extends BaseAvroConsumer<MapVector> { + + private final Consumer delegate; + + /** + * Instantiate a AvroMapConsumer. + */ + public AvroMapConsumer(MapVector vector, Consumer delegate) { + super(vector); + this.delegate = delegate; + } + + @Override + public void consume(Decoder decoder) throws IOException { + + vector.startNewValue(currentIndex); + long totalCount = 0; + for (long count = decoder.readMapStart(); count != 0; count = decoder.mapNext()) { + totalCount += count; + ensureInnerVectorCapacity(totalCount); + for (int element = 0; element < count; element++) { + delegate.consume(decoder); + } + } + vector.endValue(currentIndex, (int) totalCount); + currentIndex++; + } + + @Override + public void close() throws Exception { + super.close(); + delegate.close(); + } + + @Override + public boolean resetValueVector(MapVector vector) { + this.delegate.resetValueVector(vector.getDataVector()); + return super.resetValueVector(vector); + } + + void ensureInnerVectorCapacity(long targetCapacity) { + StructVector innerVector = (StructVector) vector.getDataVector(); + for (FieldVector v : innerVector.getChildrenFromFields()) { + while (v.getValueCapacity() < targetCapacity) { + v.reAlloc(); + } + } + } +} diff --git a/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroNullConsumer.java b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroNullConsumer.java new file mode 100644 index 000000000..64768008a --- /dev/null +++ b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroNullConsumer.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.consumers; + +import java.io.IOException; + +import org.apache.arrow.vector.NullVector; +import org.apache.avro.io.Decoder; + +/** + * Consumer which consume null type values from avro decoder. + * Corresponding to {@link org.apache.arrow.vector.NullVector}. + */ +public class AvroNullConsumer extends BaseAvroConsumer<NullVector> { + + public AvroNullConsumer(NullVector vector) { + super(vector); + } + + @Override + public void consume(Decoder decoder) throws IOException { + currentIndex++; + } +} diff --git a/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroStringConsumer.java b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroStringConsumer.java new file mode 100644 index 000000000..10fe234ac --- /dev/null +++ b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroStringConsumer.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.consumers; + +import java.io.IOException; +import java.nio.ByteBuffer; + +import org.apache.arrow.vector.VarCharVector; +import org.apache.avro.io.Decoder; + +/** + * Consumer which consume string type values from avro decoder. + * Write the data to {@link VarCharVector}. + */ +public class AvroStringConsumer extends BaseAvroConsumer<VarCharVector> { + + private ByteBuffer cacheBuffer; + + /** + * Instantiate a AvroStringConsumer. + */ + public AvroStringConsumer(VarCharVector vector) { + super(vector); + } + + @Override + public void consume(Decoder decoder) throws IOException { + // cacheBuffer is initialized null and create in the first consume, + // if its capacity < size to read, decoder will create a new one with new capacity. + cacheBuffer = decoder.readBytes(cacheBuffer); + vector.setSafe(currentIndex++, cacheBuffer, 0, cacheBuffer.limit()); + } +} diff --git a/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroStructConsumer.java b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroStructConsumer.java new file mode 100644 index 000000000..792d01ee5 --- /dev/null +++ b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroStructConsumer.java @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.consumers; + +import java.io.IOException; + +import org.apache.arrow.util.AutoCloseables; +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.complex.StructVector; +import org.apache.avro.io.Decoder; + +/** + * Consumer which consume nested record type values from avro decoder. + * Write the data to {@link org.apache.arrow.vector.complex.StructVector}. + */ +public class AvroStructConsumer extends BaseAvroConsumer<StructVector> { + + private final Consumer[] delegates; + + /** + * Instantiate a AvroStructConsumer. + */ + public AvroStructConsumer(StructVector vector, Consumer[] delegates) { + super(vector); + this.delegates = delegates; + } + + @Override + public void consume(Decoder decoder) throws IOException { + + ensureInnerVectorCapacity(currentIndex + 1); + for (int i = 0; i < delegates.length; i++) { + delegates[i].consume(decoder); + } + vector.setIndexDefined(currentIndex); + currentIndex++; + + } + + @Override + public void close() throws Exception { + super.close(); + AutoCloseables.close(delegates); + } + + @Override + public boolean resetValueVector(StructVector vector) { + for (int i = 0; i < delegates.length; i++) { + delegates[i].resetValueVector(vector.getChildrenFromFields().get(i)); + } + return super.resetValueVector(vector); + } + + void ensureInnerVectorCapacity(long targetCapacity) { + for (FieldVector v : vector.getChildrenFromFields()) { + while (v.getValueCapacity() < targetCapacity) { + v.reAlloc(); + } + } + } +} diff --git a/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroUnionsConsumer.java b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroUnionsConsumer.java new file mode 100644 index 000000000..c0bb0200f --- /dev/null +++ b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/AvroUnionsConsumer.java @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.consumers; + +import java.io.IOException; + +import org.apache.arrow.util.AutoCloseables; +import org.apache.arrow.vector.ValueVector; +import org.apache.arrow.vector.complex.UnionVector; +import org.apache.arrow.vector.types.Types; +import org.apache.avro.io.Decoder; + +/** + * Consumer which consume unions type values from avro decoder. + * Write the data to {@link org.apache.arrow.vector.complex.UnionVector}. + */ +public class AvroUnionsConsumer extends BaseAvroConsumer<UnionVector> { + + private Consumer[] delegates; + private Types.MinorType[] types; + + /** + * Instantiate an AvroUnionConsumer. + */ + public AvroUnionsConsumer(UnionVector vector, Consumer[] delegates, Types.MinorType[] types) { + + super(vector); + this.delegates = delegates; + this.types = types; + } + + @Override + public void consume(Decoder decoder) throws IOException { + int fieldIndex = decoder.readInt(); + + ensureInnerVectorCapacity(currentIndex + 1, fieldIndex); + Consumer delegate = delegates[fieldIndex]; + + vector.setType(currentIndex, types[fieldIndex]); + // In UnionVector we need to set sub vector writer position before consume a value + // because in the previous iterations we might not have written to the specific union sub vector. + delegate.setPosition(currentIndex); + delegate.consume(decoder); + + currentIndex++; + } + + @Override + public void close() throws Exception { + super.close(); + AutoCloseables.close(delegates); + } + + @Override + public boolean resetValueVector(UnionVector vector) { + for (int i = 0; i < delegates.length; i++) { + delegates[i].resetValueVector(vector.getChildrenFromFields().get(i)); + } + return super.resetValueVector(vector); + } + + void ensureInnerVectorCapacity(long targetCapacity, int fieldIndex) { + ValueVector fieldVector = vector.getChildrenFromFields().get(fieldIndex); + if (fieldVector.getMinorType() == Types.MinorType.NULL) { + return; + } + while (fieldVector.getValueCapacity() < targetCapacity) { + fieldVector.reAlloc(); + } + } +} diff --git a/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/BaseAvroConsumer.java b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/BaseAvroConsumer.java new file mode 100644 index 000000000..303be8e50 --- /dev/null +++ b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/BaseAvroConsumer.java @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.consumers; + +import org.apache.arrow.vector.FieldVector; + +/** + * Base class for non-skippable avro consumers. + * @param <T> vector type. + */ +public abstract class BaseAvroConsumer<T extends FieldVector> implements Consumer<T> { + + protected T vector; + protected int currentIndex; + + /** + * Constructs a base avro consumer. + * @param vector the vector to consume. + */ + public BaseAvroConsumer(T vector) { + this.vector = vector; + } + + @Override + public void addNull() { + currentIndex++; + } + + @Override + public void setPosition(int index) { + currentIndex = index; + } + + @Override + public FieldVector getVector() { + return vector; + } + + @Override + public void close() throws Exception { + vector.close(); + } + + @Override + public boolean resetValueVector(T vector) { + this.vector = vector; + this.currentIndex = 0; + return true; + } +} diff --git a/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/CompositeAvroConsumer.java b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/CompositeAvroConsumer.java new file mode 100644 index 000000000..af476d27c --- /dev/null +++ b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/CompositeAvroConsumer.java @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.consumers; + +import java.io.IOException; +import java.util.List; + +import org.apache.arrow.util.AutoCloseables; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.avro.io.Decoder; + +/** + * Composite consumer which hold all consumers. + * It manages the consume and cleanup process. + */ +public class CompositeAvroConsumer implements AutoCloseable { + + private final List<Consumer> consumers; + + public List<Consumer> getConsumers() { + return consumers; + } + + public CompositeAvroConsumer(List<Consumer> consumers) { + this.consumers = consumers; + } + + /** + * Consume decoder data. + */ + public void consume(Decoder decoder) throws IOException { + for (Consumer consumer : consumers) { + consumer.consume(decoder); + } + } + + /** + * Reset vector of consumers with the given {@link VectorSchemaRoot}. + */ + public void resetConsumerVectors(VectorSchemaRoot root) { + int index = 0; + for (Consumer consumer : consumers) { + if (consumer.resetValueVector(root.getFieldVectors().get(index))) { + index++; + } + } + } + + @Override + public void close() { + // clean up + try { + AutoCloseables.close(consumers); + } catch (Exception e) { + throw new RuntimeException("Error occurs in close.", e); + } + } +} diff --git a/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/Consumer.java b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/Consumer.java new file mode 100644 index 000000000..8c4ee9a96 --- /dev/null +++ b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/Consumer.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.consumers; + +import java.io.IOException; + +import org.apache.arrow.vector.FieldVector; +import org.apache.avro.io.Decoder; + +/** + * Interface that is used to consume values from avro decoder. + * @param <T> The vector within consumer or its delegate, used for partially consume purpose. + */ +public interface Consumer<T extends FieldVector> extends AutoCloseable { + + /** + * Consume a specific type value from avro decoder and write it to vector. + * @param decoder avro decoder to read data + * @throws IOException on error + */ + void consume(Decoder decoder) throws IOException; + + /** + * Add null value to vector by making writer position + 1. + */ + void addNull(); + + /** + * Set the position to write value into vector. + */ + void setPosition(int index); + + /** + * Get the vector within the consumer. + */ + FieldVector getVector(); + + /** + * Close this consumer when occurs exception to avoid potential leak. + */ + void close() throws Exception; + + /** + * Reset the vector within consumer for partial read purpose. + * @return true if reset is successful, false if reset is not needed. + */ + boolean resetValueVector(T vector); + + /** + * Indicates whether the consumer is type of {@link SkipConsumer}. + */ + default boolean skippable() { + return false; + } + +} diff --git a/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/SkipConsumer.java b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/SkipConsumer.java new file mode 100644 index 000000000..94c5b339d --- /dev/null +++ b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/SkipConsumer.java @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.consumers; + +import java.io.IOException; + +import org.apache.arrow.vector.FieldVector; +import org.apache.avro.io.Decoder; + +/** + * Consumer which skip (throw away) data from the decoder. + */ +public class SkipConsumer implements Consumer { + + private final SkipFunction skipFunction; + + public SkipConsumer(SkipFunction skipFunction) { + this.skipFunction = skipFunction; + } + + @Override + public void consume(Decoder decoder) throws IOException { + skipFunction.apply(decoder); + } + + @Override + public void addNull() { + } + + @Override + public void setPosition(int index) { + } + + @Override + public FieldVector getVector() { + return null; + } + + @Override + public void close() throws Exception { + } + + @Override + public boolean resetValueVector(FieldVector vector) { + return false; + } + + @Override + public boolean skippable() { + return true; + } +} diff --git a/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/SkipFunction.java b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/SkipFunction.java new file mode 100644 index 000000000..61938916a --- /dev/null +++ b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/SkipFunction.java @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.consumers; + +import java.io.IOException; + +import org.apache.avro.io.Decoder; + +/** + * Adapter function to skip (throw away) data from the decoder. + */ +@FunctionalInterface +public interface SkipFunction { + void apply(Decoder decoder) throws IOException; +} diff --git a/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/logical/AvroDateConsumer.java b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/logical/AvroDateConsumer.java new file mode 100644 index 000000000..3aa8970d9 --- /dev/null +++ b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/logical/AvroDateConsumer.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.consumers.logical; + +import java.io.IOException; + +import org.apache.arrow.consumers.BaseAvroConsumer; +import org.apache.arrow.vector.DateDayVector; +import org.apache.avro.io.Decoder; + +/** + * Consumer which consume date type values from avro decoder. + * Write the data to {@link DateDayVector}. + */ +public class AvroDateConsumer extends BaseAvroConsumer<DateDayVector> { + + /** + * Instantiate a AvroDateConsumer. + */ + public AvroDateConsumer(DateDayVector vector) { + super(vector); + } + + @Override + public void consume(Decoder decoder) throws IOException { + vector.set(currentIndex++, decoder.readInt()); + } +} diff --git a/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/logical/AvroDecimalConsumer.java b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/logical/AvroDecimalConsumer.java new file mode 100644 index 000000000..24d73cf82 --- /dev/null +++ b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/logical/AvroDecimalConsumer.java @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.consumers.logical; + +import java.io.IOException; +import java.nio.ByteBuffer; + +import org.apache.arrow.consumers.BaseAvroConsumer; +import org.apache.arrow.util.Preconditions; +import org.apache.arrow.vector.DecimalVector; +import org.apache.avro.io.Decoder; + +/** + * Consumer which consume decimal type values from avro decoder. + * Write the data to {@link DecimalVector}. + */ +public abstract class AvroDecimalConsumer extends BaseAvroConsumer<DecimalVector> { + + /** + * Instantiate a AvroDecimalConsumer. + */ + public AvroDecimalConsumer(DecimalVector vector) { + super(vector); + } + + /** + * Consumer for decimal logical type with original bytes type. + */ + public static class BytesDecimalConsumer extends AvroDecimalConsumer { + + private ByteBuffer cacheBuffer; + + /** + * Instantiate a BytesDecimalConsumer. + */ + public BytesDecimalConsumer(DecimalVector vector) { + super(vector); + } + + @Override + public void consume(Decoder decoder) throws IOException { + cacheBuffer = decoder.readBytes(cacheBuffer); + byte[] bytes = new byte[cacheBuffer.limit()]; + Preconditions.checkArgument(bytes.length <= 16, "Decimal bytes length should <= 16."); + cacheBuffer.get(bytes); + vector.setBigEndian(currentIndex++, bytes); + } + + } + + /** + * Consumer for decimal logical type with original fixed type. + */ + public static class FixedDecimalConsumer extends AvroDecimalConsumer { + + private byte[] reuseBytes; + + /** + * Instantiate a FixedDecimalConsumer. + */ + public FixedDecimalConsumer(DecimalVector vector, int size) { + super(vector); + Preconditions.checkArgument(size <= 16, "Decimal bytes length should <= 16."); + reuseBytes = new byte[size]; + } + + @Override + public void consume(Decoder decoder) throws IOException { + decoder.readFixed(reuseBytes); + vector.setBigEndian(currentIndex++, reuseBytes); + } + } +} diff --git a/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/logical/AvroTimeMicroConsumer.java b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/logical/AvroTimeMicroConsumer.java new file mode 100644 index 000000000..e68ba158f --- /dev/null +++ b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/logical/AvroTimeMicroConsumer.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.consumers.logical; + +import java.io.IOException; + +import org.apache.arrow.consumers.BaseAvroConsumer; +import org.apache.arrow.vector.TimeMicroVector; +import org.apache.avro.io.Decoder; + +/** + * Consumer which consume date time-micro values from avro decoder. + * Write the data to {@link TimeMicroVector}. + */ +public class AvroTimeMicroConsumer extends BaseAvroConsumer<TimeMicroVector> { + + /** + * Instantiate a AvroTimeMicroConsumer. + */ + public AvroTimeMicroConsumer(TimeMicroVector vector) { + super(vector); + } + + @Override + public void consume(Decoder decoder) throws IOException { + vector.set(currentIndex++, decoder.readLong()); + } +} diff --git a/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/logical/AvroTimeMillisConsumer.java b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/logical/AvroTimeMillisConsumer.java new file mode 100644 index 000000000..f76186fc3 --- /dev/null +++ b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/logical/AvroTimeMillisConsumer.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.consumers.logical; + +import java.io.IOException; + +import org.apache.arrow.consumers.BaseAvroConsumer; +import org.apache.arrow.vector.TimeMilliVector; +import org.apache.avro.io.Decoder; + +/** + * Consumer which consume date time-millis values from avro decoder. + * Write the data to {@link TimeMilliVector}. + */ +public class AvroTimeMillisConsumer extends BaseAvroConsumer<TimeMilliVector> { + + /** + * Instantiate a AvroTimeMilliConsumer. + */ + public AvroTimeMillisConsumer(TimeMilliVector vector) { + super(vector); + } + + @Override + public void consume(Decoder decoder) throws IOException { + vector.set(currentIndex++, decoder.readInt()); + } +} diff --git a/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/logical/AvroTimestampMicrosConsumer.java b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/logical/AvroTimestampMicrosConsumer.java new file mode 100644 index 000000000..82da0e805 --- /dev/null +++ b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/logical/AvroTimestampMicrosConsumer.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.consumers.logical; + +import java.io.IOException; + +import org.apache.arrow.consumers.BaseAvroConsumer; +import org.apache.arrow.vector.TimeStampMicroVector; +import org.apache.avro.io.Decoder; + +/** + * Consumer which consume date timestamp-micro values from avro decoder. + * Write the data to {@link TimeStampMicroVector}. + */ +public class AvroTimestampMicrosConsumer extends BaseAvroConsumer<TimeStampMicroVector> { + + /** + * Instantiate a AvroTimestampMicroConsumer. + */ + public AvroTimestampMicrosConsumer(TimeStampMicroVector vector) { + super(vector); + } + + @Override + public void consume(Decoder decoder) throws IOException { + vector.set(currentIndex++, decoder.readLong()); + } +} diff --git a/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/logical/AvroTimestampMillisConsumer.java b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/logical/AvroTimestampMillisConsumer.java new file mode 100644 index 000000000..159f49e14 --- /dev/null +++ b/src/arrow/java/adapter/avro/src/main/java/org/apache/arrow/consumers/logical/AvroTimestampMillisConsumer.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.consumers.logical; + +import java.io.IOException; + +import org.apache.arrow.consumers.BaseAvroConsumer; +import org.apache.arrow.vector.TimeStampMilliVector; +import org.apache.avro.io.Decoder; + +/** + * Consumer which consume date timestamp-millis values from avro decoder. + * Write the data to {@link TimeStampMilliVector}. + */ +public class AvroTimestampMillisConsumer extends BaseAvroConsumer<TimeStampMilliVector> { + + /** + * Instantiate a AvroTimestampMillisConsumer. + */ + public AvroTimestampMillisConsumer(TimeStampMilliVector vector) { + super(vector); + } + + @Override + public void consume(Decoder decoder) throws IOException { + vector.set(currentIndex++, decoder.readLong()); + } +} diff --git a/src/arrow/java/adapter/avro/src/test/java/org/apache/arrow/AvroLogicalTypesTest.java b/src/arrow/java/adapter/avro/src/test/java/org/apache/arrow/AvroLogicalTypesTest.java new file mode 100644 index 000000000..050a50dda --- /dev/null +++ b/src/arrow/java/adapter/avro/src/test/java/org/apache/arrow/AvroLogicalTypesTest.java @@ -0,0 +1,201 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow; + +import static junit.framework.TestCase.assertNull; +import static junit.framework.TestCase.assertTrue; +import static org.junit.jupiter.api.Assertions.assertThrows; + +import java.math.BigDecimal; +import java.nio.ByteBuffer; +import java.time.LocalDateTime; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.util.DateUtility; +import org.apache.avro.Conversions; +import org.apache.avro.Schema; +import org.apache.avro.generic.GenericFixed; +import org.junit.Test; + +public class AvroLogicalTypesTest extends AvroTestBase { + + @Test + public void testTimestampMicros() throws Exception { + Schema schema = getSchema("logical/test_timestamp_micros.avsc"); + + List<Long> data = Arrays.asList(10000L, 20000L, 30000L, 40000L, 50000L); + List<LocalDateTime> expected = Arrays.asList( + DateUtility.getLocalDateTimeFromEpochMicro(10000), + DateUtility.getLocalDateTimeFromEpochMicro(20000), + DateUtility.getLocalDateTimeFromEpochMicro(30000), + DateUtility.getLocalDateTimeFromEpochMicro(40000), + DateUtility.getLocalDateTimeFromEpochMicro(50000) + ); + + VectorSchemaRoot root = writeAndRead(schema, data); + FieldVector vector = root.getFieldVectors().get(0); + + checkPrimitiveResult(expected, vector); + } + + @Test + public void testTimestampMillis() throws Exception { + Schema schema = getSchema("logical/test_timestamp_millis.avsc"); + + List<Long> data = Arrays.asList(10000L, 20000L, 30000L, 40000L, 50000L); + List<LocalDateTime> expected = Arrays.asList( + DateUtility.getLocalDateTimeFromEpochMilli(10000), + DateUtility.getLocalDateTimeFromEpochMilli(20000), + DateUtility.getLocalDateTimeFromEpochMilli(30000), + DateUtility.getLocalDateTimeFromEpochMilli(40000), + DateUtility.getLocalDateTimeFromEpochMilli(50000) + ); + + VectorSchemaRoot root = writeAndRead(schema, data); + FieldVector vector = root.getFieldVectors().get(0); + + checkPrimitiveResult(expected, vector); + } + + @Test + public void testTimeMicros() throws Exception { + Schema schema = getSchema("logical/test_time_micros.avsc"); + + List<Long> data = Arrays.asList(10000L, 20000L, 30000L, 40000L, 50000L); + + VectorSchemaRoot root = writeAndRead(schema, data); + FieldVector vector = root.getFieldVectors().get(0); + + checkPrimitiveResult(data, vector); + } + + @Test + public void testTimeMillis() throws Exception { + Schema schema = getSchema("logical/test_time_millis.avsc"); + + List<Integer> data = Arrays.asList(100, 200, 300, 400, 500); + List<LocalDateTime> expected = Arrays.asList( + DateUtility.getLocalDateTimeFromEpochMilli(100), + DateUtility.getLocalDateTimeFromEpochMilli(200), + DateUtility.getLocalDateTimeFromEpochMilli(300), + DateUtility.getLocalDateTimeFromEpochMilli(400), + DateUtility.getLocalDateTimeFromEpochMilli(500) + ); + + VectorSchemaRoot root = writeAndRead(schema, data); + FieldVector vector = root.getFieldVectors().get(0); + + checkPrimitiveResult(expected, vector); + } + + @Test + public void testDate() throws Exception { + Schema schema = getSchema("logical/test_date.avsc"); + + List<Integer> data = Arrays.asList(100, 200, 300, 400, 500); + + VectorSchemaRoot root = writeAndRead(schema, data); + FieldVector vector = root.getFieldVectors().get(0); + + checkPrimitiveResult(data, vector); + } + + @Test + public void testDecimalWithOriginalBytes() throws Exception { + Schema schema = getSchema("logical/test_decimal_with_original_bytes.avsc"); + List<ByteBuffer> data = new ArrayList<>(); + List<BigDecimal> expected = new ArrayList<>(); + + Conversions.DecimalConversion conversion = new Conversions.DecimalConversion(); + + for (int i = 0; i < 5; i++) { + BigDecimal value = new BigDecimal(i * i).setScale(2); + ByteBuffer buffer = conversion.toBytes(value, schema, schema.getLogicalType()); + data.add(buffer); + expected.add(value); + } + + VectorSchemaRoot root = writeAndRead(schema, data); + FieldVector vector = root.getFieldVectors().get(0); + checkPrimitiveResult(expected, vector); + + } + + @Test + public void testDecimalWithOriginalFixed() throws Exception { + Schema schema = getSchema("logical/test_decimal_with_original_fixed.avsc"); + + List<GenericFixed> data = new ArrayList<>(); + List<BigDecimal> expected = new ArrayList<>(); + + Conversions.DecimalConversion conversion = new Conversions.DecimalConversion(); + + for (int i = 0; i < 5; i++) { + BigDecimal value = new BigDecimal(i * i).setScale(2); + GenericFixed fixed = conversion.toFixed(value, schema, schema.getLogicalType()); + data.add(fixed); + expected.add(value); + } + + VectorSchemaRoot root = writeAndRead(schema, data); + FieldVector vector = root.getFieldVectors().get(0); + checkPrimitiveResult(expected, vector); + } + + @Test + public void testInvalidDecimalPrecision() throws Exception { + Schema schema = getSchema("logical/test_decimal_invalid1.avsc"); + List<ByteBuffer> data = new ArrayList<>(); + + Conversions.DecimalConversion conversion = new Conversions.DecimalConversion(); + + for (int i = 0; i < 5; i++) { + BigDecimal value = new BigDecimal(i * i).setScale(2); + ByteBuffer buffer = conversion.toBytes(value, schema, schema.getLogicalType()); + data.add(buffer); + } + + IllegalArgumentException e = assertThrows(IllegalArgumentException.class, + () -> writeAndRead(schema, data)); + assertTrue(e.getMessage().contains("Precision must be in range of 1 to 38")); + + } + + @Test + public void testFailedToCreateDecimalLogicalType() throws Exception { + // For decimal logical type, if avro validate schema failed, it will not create logical type, + // and the schema will be treated as its original type. + + // java.lang.IllegalArgumentException: Invalid decimal scale: -1 (must be positive) + Schema schema1 = getSchema("logical/test_decimal_invalid2.avsc"); + assertNull(schema1.getLogicalType()); + + // java.lang.IllegalArgumentException: Invalid decimal scale: 40 (greater than precision: 20) + Schema schema2 = getSchema("logical/test_decimal_invalid3.avsc"); + assertNull(schema2.getLogicalType()); + + // java.lang.IllegalArgumentException: fixed(1) cannot store 30 digits (max 2) + Schema schema3 = getSchema("logical/test_decimal_invalid4.avsc"); + assertNull(schema3.getLogicalType()); + } + +} diff --git a/src/arrow/java/adapter/avro/src/test/java/org/apache/arrow/AvroSkipFieldTest.java b/src/arrow/java/adapter/avro/src/test/java/org/apache/arrow/AvroSkipFieldTest.java new file mode 100644 index 000000000..b946dbd86 --- /dev/null +++ b/src/arrow/java/adapter/avro/src/test/java/org/apache/arrow/AvroSkipFieldTest.java @@ -0,0 +1,626 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow; + +import static org.junit.Assert.assertEquals; + +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Set; + +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.complex.StructVector; +import org.apache.arrow.vector.types.Types; +import org.apache.avro.Schema; +import org.apache.avro.generic.GenericData; +import org.apache.avro.generic.GenericRecord; +import org.junit.Test; + +public class AvroSkipFieldTest extends AvroTestBase { + + @Test + public void testSkipUnionWithOneField() throws Exception { + Set<String> skipFieldNames = new HashSet<>(); + skipFieldNames.add("f0"); + config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build(); + Schema schema = getSchema("skip/test_skip_union_before.avsc"); + Schema expectedSchema = getSchema("skip/test_skip_union_one_field_expected.avsc"); + + ArrayList<GenericRecord> data = new ArrayList<>(); + ArrayList<GenericRecord> expectedData = new ArrayList<>(); + + for (int i = 0; i < 5; i++) { + GenericRecord record = new GenericData.Record(schema); + record.put(0, "test" + i); + record.put(1, i % 2 == 0 ? "test" + i : null); + record.put(2, i % 2 == 0 ? "test" + i : i); + record.put(3, i); + data.add(record); + + GenericRecord expectedRecord = new GenericData.Record(expectedSchema); + expectedRecord.put(0, record.get(1)); + expectedRecord.put(1, record.get(2)); + expectedRecord.put(2, record.get(3)); + expectedData.add(expectedRecord); + } + VectorSchemaRoot root = writeAndRead(schema, data); + checkRecordResult(expectedSchema, expectedData, root); + } + + @Test + public void testSkipUnionWithNullableOneField() throws Exception { + Set<String> skipFieldNames = new HashSet<>(); + skipFieldNames.add("f1"); + config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build(); + Schema schema = getSchema("skip/test_skip_union_before.avsc"); + Schema expectedSchema = getSchema("skip/test_skip_union_nullable_field_expected.avsc"); + + ArrayList<GenericRecord> data = new ArrayList<>(); + ArrayList<GenericRecord> expectedData = new ArrayList<>(); + + for (int i = 0; i < 5; i++) { + GenericRecord record = new GenericData.Record(schema); + record.put(0, "test" + i); + record.put(1, i % 2 == 0 ? "test" + i : null); + record.put(2, i % 2 == 0 ? "test" + i : i); + record.put(3, i); + data.add(record); + + GenericRecord expectedRecord = new GenericData.Record(expectedSchema); + expectedRecord.put(0, record.get(0)); + expectedRecord.put(1, record.get(2)); + expectedRecord.put(2, record.get(3)); + expectedData.add(expectedRecord); + } + VectorSchemaRoot root = writeAndRead(schema, data); + checkRecordResult(expectedSchema, expectedData, root); + } + + @Test + public void testSkipUnionWithMultiFields() throws Exception { + Set<String> skipFieldNames = new HashSet<>(); + skipFieldNames.add("f2"); + config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build(); + Schema schema = getSchema("skip/test_skip_union_before.avsc"); + Schema expectedSchema = getSchema("skip/test_skip_union_multi_fields_expected.avsc"); + + ArrayList<GenericRecord> data = new ArrayList<>(); + ArrayList<GenericRecord> expectedData = new ArrayList<>(); + + for (int i = 0; i < 5; i++) { + GenericRecord record = new GenericData.Record(schema); + record.put(0, "test" + i); + record.put(1, i % 2 == 0 ? "test" + i : null); + record.put(2, i % 2 == 0 ? "test" + i : i); + record.put(3, i); + data.add(record); + + GenericRecord expectedRecord = new GenericData.Record(expectedSchema); + expectedRecord.put(0, record.get(0)); + expectedRecord.put(1, record.get(1)); + expectedRecord.put(2, record.get(3)); + expectedData.add(expectedRecord); + } + VectorSchemaRoot root = writeAndRead(schema, data); + checkRecordResult(expectedSchema, expectedData, root); + } + + @Test + public void testSkipMapField() throws Exception { + Set<String> skipFieldNames = new HashSet<>(); + skipFieldNames.add("f1"); + config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build(); + Schema schema = getSchema("skip/test_skip_map_before.avsc"); + Schema expectedSchema = getSchema("skip/test_skip_map_expected.avsc"); + + HashMap map = new HashMap(); + map.put("key1", "value1"); + map.put("key2", "value3"); + + ArrayList<GenericRecord> data = new ArrayList<>(); + ArrayList<GenericRecord> expectedData = new ArrayList<>(); + + for (int i = 0; i < 5; i++) { + GenericRecord record = new GenericData.Record(schema); + record.put(0, "test" + i); + record.put(1, map); + record.put(2, i % 2 == 0); + data.add(record); + + GenericRecord expectedRecord = new GenericData.Record(expectedSchema); + expectedRecord.put(0, record.get(0)); + expectedRecord.put(1, record.get(2)); + expectedData.add(expectedRecord); + } + + VectorSchemaRoot root = writeAndRead(schema, data); + checkRecordResult(expectedSchema, expectedData, root); + } + + @Test + public void testSkipArrayField() throws Exception { + Set<String> skipFieldNames = new HashSet<>(); + skipFieldNames.add("f1"); + config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build(); + Schema schema = getSchema("skip/test_skip_array_before.avsc"); + Schema expectedSchema = getSchema("skip/test_skip_array_expected.avsc"); + + ArrayList<GenericRecord> data = new ArrayList<>(); + ArrayList<GenericRecord> expectedData = new ArrayList<>(); + + for (int i = 0; i < 5; i++) { + GenericRecord record = new GenericData.Record(schema); + record.put(0, "test" + i); + record.put(1, Arrays.asList("test" + i, "test" + i)); + record.put(2, i % 2 == 0); + data.add(record); + + GenericRecord expectedRecord = new GenericData.Record(expectedSchema); + expectedRecord.put(0, record.get(0)); + expectedRecord.put(1, record.get(2)); + expectedData.add(expectedRecord); + } + + VectorSchemaRoot root = writeAndRead(schema, data); + checkRecordResult(expectedSchema, expectedData, root); + } + + @Test + public void testSkipMultiFields() throws Exception { + Set<String> skipFieldNames = new HashSet<>(); + skipFieldNames.add("f1"); + skipFieldNames.add("f2"); + config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build(); + Schema schema = getSchema("test_record.avsc"); + Schema expectedSchema = getSchema("skip/test_skip_multi_fields_expected.avsc"); + + ArrayList<GenericRecord> data = new ArrayList<>(); + ArrayList<GenericRecord> expectedData = new ArrayList<>(); + + for (int i = 0; i < 5; i++) { + GenericRecord record = new GenericData.Record(schema); + record.put(0, "test" + i); + record.put(1, i); + record.put(2, i % 2 == 0); + data.add(record); + + GenericRecord expectedRecord = new GenericData.Record(expectedSchema); + expectedRecord.put(0, record.get(0)); + expectedData.add(expectedRecord); + } + + VectorSchemaRoot root = writeAndRead(schema, data); + checkRecordResult(expectedSchema, expectedData, root); + } + + @Test + public void testSkipStringField() throws Exception { + Set<String> skipFieldNames = new HashSet<>(); + skipFieldNames.add("f2"); + config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build(); + Schema schema = getSchema("skip/test_skip_base1.avsc"); + Schema expectedSchema = getSchema("skip/test_skip_string_expected.avsc"); + + ArrayList<GenericRecord> data = new ArrayList<>(); + ArrayList<GenericRecord> expectedData = new ArrayList<>(); + + for (int i = 0; i < 5; i++) { + final byte[] testBytes = ("test" + i).getBytes(); + GenericRecord record = new GenericData.Record(schema); + GenericData.Fixed fixed = new GenericData.Fixed(schema.getField("f0").schema()); + fixed.bytes(testBytes); + record.put(0, fixed); + GenericData.EnumSymbol symbol = new GenericData.EnumSymbol(schema.getField("f1").schema(), "TEST" + i % 2); + record.put(1, symbol); + record.put(2, "testtest" + i); + record.put(3, ByteBuffer.wrap(testBytes)); + data.add(record); + + GenericRecord expectedRecord = new GenericData.Record(expectedSchema); + expectedRecord.put(0, testBytes); + expectedRecord.put(1, (byte) i % 2); + expectedRecord.put(2, testBytes); + expectedData.add(expectedRecord); + } + + VectorSchemaRoot root = writeAndRead(schema, data); + checkRecordResult(expectedSchema, expectedData, root); + } + + @Test + public void testSkipBytesField() throws Exception { + Set<String> skipFieldNames = new HashSet<>(); + skipFieldNames.add("f3"); + config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build(); + Schema schema = getSchema("skip/test_skip_base1.avsc"); + Schema expectedSchema = getSchema("skip/test_skip_bytes_expected.avsc"); + + ArrayList<GenericRecord> data = new ArrayList<>(); + ArrayList<GenericRecord> expectedData = new ArrayList<>(); + + for (int i = 0; i < 5; i++) { + final byte[] testBytes = ("test" + i).getBytes(); + GenericRecord record = new GenericData.Record(schema); + GenericData.Fixed fixed = new GenericData.Fixed(schema.getField("f0").schema()); + fixed.bytes(testBytes); + record.put(0, fixed); + GenericData.EnumSymbol symbol = new GenericData.EnumSymbol(schema.getField("f1").schema(), "TEST" + i % 2); + record.put(1, symbol); + record.put(2, "testtest" + i); + record.put(3, ByteBuffer.wrap(testBytes)); + data.add(record); + + GenericRecord expectedRecord = new GenericData.Record(expectedSchema); + expectedRecord.put(0, testBytes); + expectedRecord.put(1, (byte) i % 2); + expectedRecord.put(2, record.get(2)); + expectedData.add(expectedRecord); + } + + VectorSchemaRoot root = writeAndRead(schema, data); + checkRecordResult(expectedSchema, expectedData, root); + } + + @Test + public void testSkipFixedField() throws Exception { + Set<String> skipFieldNames = new HashSet<>(); + skipFieldNames.add("f0"); + config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build(); + Schema schema = getSchema("skip/test_skip_base1.avsc"); + Schema expectedSchema = getSchema("skip/test_skip_fixed_expected.avsc"); + + ArrayList<GenericRecord> data = new ArrayList<>(); + ArrayList<GenericRecord> expectedData = new ArrayList<>(); + + for (int i = 0; i < 5; i++) { + final byte[] testBytes = ("test" + i).getBytes(); + GenericRecord record = new GenericData.Record(schema); + GenericData.Fixed fixed = new GenericData.Fixed(schema.getField("f0").schema()); + fixed.bytes(testBytes); + record.put(0, fixed); + GenericData.EnumSymbol symbol = new GenericData.EnumSymbol(schema.getField("f1").schema(), "TEST" + i % 2); + record.put(1, symbol); + record.put(2, "testtest" + i); + record.put(3, ByteBuffer.wrap(testBytes)); + data.add(record); + + GenericRecord expectedRecord = new GenericData.Record(expectedSchema); + expectedRecord.put(0, (byte) i % 2); + expectedRecord.put(1, record.get(2)); + expectedRecord.put(2, record.get(3)); + expectedData.add(expectedRecord); + } + + VectorSchemaRoot root = writeAndRead(schema, data); + checkRecordResult(expectedSchema, expectedData, root); + } + + @Test + public void testSkipEnumField() throws Exception { + Set<String> skipFieldNames = new HashSet<>(); + skipFieldNames.add("f1"); + config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build(); + Schema schema = getSchema("skip/test_skip_base1.avsc"); + Schema expectedSchema = getSchema("skip/test_skip_fixed_expected.avsc"); + + ArrayList<GenericRecord> data = new ArrayList<>(); + ArrayList<GenericRecord> expectedData = new ArrayList<>(); + + for (int i = 0; i < 5; i++) { + final byte[] testBytes = ("test" + i).getBytes(); + GenericRecord record = new GenericData.Record(schema); + GenericData.Fixed fixed = new GenericData.Fixed(schema.getField("f0").schema()); + fixed.bytes(testBytes); + record.put(0, fixed); + GenericData.EnumSymbol symbol = new GenericData.EnumSymbol(schema.getField("f1").schema(), "TEST" + i % 2); + record.put(1, symbol); + record.put(2, "testtest" + i); + record.put(3, ByteBuffer.wrap(testBytes)); + data.add(record); + + GenericRecord expectedRecord = new GenericData.Record(expectedSchema); + expectedRecord.put(0, testBytes); + expectedRecord.put(1, record.get(2)); + expectedRecord.put(2, record.get(3)); + expectedData.add(expectedRecord); + } + + VectorSchemaRoot root = writeAndRead(schema, data); + checkRecordResult(expectedSchema, expectedData, root); + } + + @Test + public void testSkipBooleanField() throws Exception { + Set<String> skipFieldNames = new HashSet<>(); + skipFieldNames.add("f0"); + config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build(); + Schema schema = getSchema("skip/test_skip_base2.avsc"); + Schema expectedSchema = getSchema("skip/test_skip_boolean_expected.avsc"); + + ArrayList<GenericRecord> data = new ArrayList<>(); + ArrayList<GenericRecord> expectedData = new ArrayList<>(); + + for (int i = 0; i < 5; i++) { + GenericRecord record = new GenericData.Record(schema); + record.put(0, i % 2 == 0); + record.put(1, i); + record.put(2, (long) i); + record.put(3, (float) i); + record.put(4, (double) i); + data.add(record); + + GenericRecord expectedRecord = new GenericData.Record(expectedSchema); + expectedRecord.put(0, record.get(1)); + expectedRecord.put(1, record.get(2)); + expectedRecord.put(2, record.get(3)); + expectedRecord.put(3, record.get(4)); + + expectedData.add(expectedRecord); + } + + VectorSchemaRoot root = writeAndRead(schema, data); + checkRecordResult(expectedSchema, expectedData, root); + } + + @Test + public void testSkipIntField() throws Exception { + Set<String> skipFieldNames = new HashSet<>(); + skipFieldNames.add("f1"); + config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build(); + Schema schema = getSchema("skip/test_skip_base2.avsc"); + Schema expectedSchema = getSchema("skip/test_skip_int_expected.avsc"); + + ArrayList<GenericRecord> data = new ArrayList<>(); + ArrayList<GenericRecord> expectedData = new ArrayList<>(); + + for (int i = 0; i < 5; i++) { + GenericRecord record = new GenericData.Record(schema); + record.put(0, i % 2 == 0); + record.put(1, i); + record.put(2, (long) i); + record.put(3, (float) i); + record.put(4, (double) i); + data.add(record); + + GenericRecord expectedRecord = new GenericData.Record(expectedSchema); + expectedRecord.put(0, record.get(0)); + expectedRecord.put(1, record.get(2)); + expectedRecord.put(2, record.get(3)); + expectedRecord.put(3, record.get(4)); + + expectedData.add(expectedRecord); + } + + VectorSchemaRoot root = writeAndRead(schema, data); + checkRecordResult(expectedSchema, expectedData, root); + } + + @Test + public void testSkipLongField() throws Exception { + Set<String> skipFieldNames = new HashSet<>(); + skipFieldNames.add("f2"); + config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build(); + Schema schema = getSchema("skip/test_skip_base2.avsc"); + Schema expectedSchema = getSchema("skip/test_skip_long_expected.avsc"); + + ArrayList<GenericRecord> data = new ArrayList<>(); + ArrayList<GenericRecord> expectedData = new ArrayList<>(); + + for (int i = 0; i < 5; i++) { + GenericRecord record = new GenericData.Record(schema); + record.put(0, i % 2 == 0); + record.put(1, i); + record.put(2, (long) i); + record.put(3, (float) i); + record.put(4, (double) i); + data.add(record); + + GenericRecord expectedRecord = new GenericData.Record(expectedSchema); + expectedRecord.put(0, record.get(0)); + expectedRecord.put(1, record.get(1)); + expectedRecord.put(2, record.get(3)); + expectedRecord.put(3, record.get(4)); + + expectedData.add(expectedRecord); + } + + VectorSchemaRoot root = writeAndRead(schema, data); + checkRecordResult(expectedSchema, expectedData, root); + } + + @Test + public void testSkipFloatField() throws Exception { + Set<String> skipFieldNames = new HashSet<>(); + skipFieldNames.add("f3"); + config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build(); + Schema schema = getSchema("skip/test_skip_base2.avsc"); + Schema expectedSchema = getSchema("skip/test_skip_float_expected.avsc"); + + ArrayList<GenericRecord> data = new ArrayList<>(); + ArrayList<GenericRecord> expectedData = new ArrayList<>(); + + for (int i = 0; i < 5; i++) { + GenericRecord record = new GenericData.Record(schema); + record.put(0, i % 2 == 0); + record.put(1, i); + record.put(2, (long) i); + record.put(3, (float) i); + record.put(4, (double) i); + data.add(record); + + GenericRecord expectedRecord = new GenericData.Record(expectedSchema); + expectedRecord.put(0, record.get(0)); + expectedRecord.put(1, record.get(1)); + expectedRecord.put(2, record.get(2)); + expectedRecord.put(3, record.get(4)); + + expectedData.add(expectedRecord); + } + + VectorSchemaRoot root = writeAndRead(schema, data); + checkRecordResult(expectedSchema, expectedData, root); + } + + @Test + public void testSkipDoubleField() throws Exception { + Set<String> skipFieldNames = new HashSet<>(); + skipFieldNames.add("f4"); + config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build(); + Schema schema = getSchema("skip/test_skip_base2.avsc"); + Schema expectedSchema = getSchema("skip/test_skip_double_expected.avsc"); + + ArrayList<GenericRecord> data = new ArrayList<>(); + ArrayList<GenericRecord> expectedData = new ArrayList<>(); + + for (int i = 0; i < 5; i++) { + GenericRecord record = new GenericData.Record(schema); + record.put(0, i % 2 == 0); + record.put(1, i); + record.put(2, (long) i); + record.put(3, (float) i); + record.put(4, (double) i); + data.add(record); + + GenericRecord expectedRecord = new GenericData.Record(expectedSchema); + expectedRecord.put(0, record.get(0)); + expectedRecord.put(1, record.get(1)); + expectedRecord.put(2, record.get(2)); + expectedRecord.put(3, record.get(3)); + + expectedData.add(expectedRecord); + } + + VectorSchemaRoot root = writeAndRead(schema, data); + checkRecordResult(expectedSchema, expectedData, root); + } + + @Test + public void testSkipRecordField() throws Exception { + Set<String> skipFieldNames = new HashSet<>(); + skipFieldNames.add("f0"); + config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build(); + Schema schema = getSchema("skip/test_skip_record_before.avsc"); + Schema nestedSchema = schema.getFields().get(0).schema(); + ArrayList<GenericRecord> data = new ArrayList<>(); + + Schema expectedSchema = getSchema("skip/test_skip_record_expected.avsc"); + ArrayList<GenericRecord> expectedData = new ArrayList<>(); + + for (int i = 0; i < 5; i++) { + GenericRecord record = new GenericData.Record(schema); + GenericRecord nestedRecord = new GenericData.Record(nestedSchema); + nestedRecord.put(0, "test" + i); + nestedRecord.put(1, i); + record.put(0, nestedRecord); + record.put(1, i); + data.add(record); + + GenericRecord expectedRecord = new GenericData.Record(expectedSchema); + expectedRecord.put(0, i); + expectedData.add(expectedRecord); + } + + VectorSchemaRoot root = writeAndRead(schema, data); + checkRecordResult(expectedSchema, expectedData, root); + } + + @Test + public void testSkipNestedFields() throws Exception { + Set<String> skipFieldNames = new HashSet<>(); + skipFieldNames.add("f0.f0"); + config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build(); + Schema schema = getSchema("test_nested_record.avsc"); + Schema nestedSchema = schema.getFields().get(0).schema(); + ArrayList<GenericRecord> data = new ArrayList<>(); + + Schema expectedSchema = getSchema("skip/test_skip_second_level_expected.avsc"); + Schema expectedNestedSchema = expectedSchema.getFields().get(0).schema(); + ArrayList<GenericRecord> expectedData = new ArrayList<>(); + + for (int i = 0; i < 5; i++) { + GenericRecord record = new GenericData.Record(schema); + GenericRecord nestedRecord = new GenericData.Record(nestedSchema); + nestedRecord.put(0, "test" + i); + nestedRecord.put(1, i); + record.put(0, nestedRecord); + data.add(record); + + GenericRecord expectedRecord = new GenericData.Record(expectedSchema); + GenericRecord expectedNestedRecord = new GenericData.Record(expectedNestedSchema); + expectedNestedRecord.put(0, nestedRecord.get(1)); + expectedRecord.put(0, expectedNestedRecord); + expectedData.add(expectedRecord); + } + + VectorSchemaRoot root = writeAndRead(schema, data); + checkNestedRecordResult(expectedSchema, expectedData, root); + } + + @Test + public void testSkipThirdLevelField() throws Exception { + Schema firstLevelSchema = getSchema("skip/test_skip_third_level_expected.avsc"); + Schema secondLevelSchema = firstLevelSchema.getFields().get(0).schema(); + Schema thirdLevelSchema = secondLevelSchema.getFields().get(0).schema(); + + ArrayList<GenericRecord> data = new ArrayList<>(); + for (int i = 0; i < 5; i++) { + GenericRecord firstLevelRecord = new GenericData.Record(firstLevelSchema); + GenericRecord secondLevelRecord = new GenericData.Record(secondLevelSchema); + GenericRecord thirdLevelRecord = new GenericData.Record(thirdLevelSchema); + + thirdLevelRecord.put(0, i); + thirdLevelRecord.put(1, "test" + i); + thirdLevelRecord.put(2, i % 2 == 0); + + secondLevelRecord.put(0, thirdLevelRecord); + firstLevelRecord.put(0, secondLevelRecord); + data.add(firstLevelRecord); + } + + // do not skip any fields first + VectorSchemaRoot root1 = writeAndRead(firstLevelSchema, data); + + assertEquals(1, root1.getFieldVectors().size()); + assertEquals(Types.MinorType.STRUCT, root1.getFieldVectors().get(0).getMinorType()); + StructVector secondLevelVector = (StructVector) root1.getFieldVectors().get(0); + assertEquals(1, secondLevelVector.getChildrenFromFields().size()); + assertEquals(Types.MinorType.STRUCT, secondLevelVector.getChildrenFromFields().get(0).getMinorType()); + StructVector thirdLevelVector = (StructVector) secondLevelVector.getChildrenFromFields().get(0); + assertEquals(3, thirdLevelVector.getChildrenFromFields().size()); + + // skip third level field and validate + Set<String> skipFieldNames = new HashSet<>(); + skipFieldNames.add("f0.f0.f0"); + config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build(); + VectorSchemaRoot root2 = writeAndRead(firstLevelSchema, data); + + assertEquals(1, root2.getFieldVectors().size()); + assertEquals(Types.MinorType.STRUCT, root2.getFieldVectors().get(0).getMinorType()); + StructVector secondStruct = (StructVector) root2.getFieldVectors().get(0); + assertEquals(1, secondStruct.getChildrenFromFields().size()); + assertEquals(Types.MinorType.STRUCT, secondStruct.getChildrenFromFields().get(0).getMinorType()); + StructVector thirdStruct = (StructVector) secondStruct.getChildrenFromFields().get(0); + assertEquals(2, thirdStruct.getChildrenFromFields().size()); + + assertEquals(Types.MinorType.INT, thirdStruct.getChildrenFromFields().get(0).getMinorType()); + assertEquals(Types.MinorType.BIT, thirdStruct.getChildrenFromFields().get(1).getMinorType()); + } +} diff --git a/src/arrow/java/adapter/avro/src/test/java/org/apache/arrow/AvroTestBase.java b/src/arrow/java/adapter/avro/src/test/java/org/apache/arrow/AvroTestBase.java new file mode 100644 index 000000000..a00cd7704 --- /dev/null +++ b/src/arrow/java/adapter/avro/src/test/java/org/apache/arrow/AvroTestBase.java @@ -0,0 +1,228 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.nio.ByteBuffer; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.List; + +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.complex.ListVector; +import org.apache.arrow.vector.complex.StructVector; +import org.apache.arrow.vector.util.Text; +import org.apache.avro.Schema; +import org.apache.avro.generic.GenericDatumWriter; +import org.apache.avro.generic.GenericRecord; +import org.apache.avro.io.BinaryDecoder; +import org.apache.avro.io.BinaryEncoder; +import org.apache.avro.io.DatumWriter; +import org.apache.avro.io.DecoderFactory; +import org.apache.avro.io.EncoderFactory; +import org.junit.Before; +import org.junit.ClassRule; +import org.junit.rules.TemporaryFolder; + +public class AvroTestBase { + + @ClassRule + public static final TemporaryFolder TMP = new TemporaryFolder(); + + protected AvroToArrowConfig config; + + @Before + public void init() { + BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE); + config = new AvroToArrowConfigBuilder(allocator).build(); + } + + protected Schema getSchema(String schemaName) throws Exception { + Path schemaPath = Paths.get(TestWriteReadAvroRecord.class.getResource("/").getPath(), + "schema", schemaName); + return new Schema.Parser().parse(schemaPath.toFile()); + } + + protected VectorSchemaRoot writeAndRead(Schema schema, List data) throws Exception { + File dataFile = TMP.newFile(); + + BinaryEncoder + encoder = new EncoderFactory().directBinaryEncoder(new FileOutputStream(dataFile), null); + DatumWriter writer = new GenericDatumWriter(schema); + BinaryDecoder + decoder = new DecoderFactory().directBinaryDecoder(new FileInputStream(dataFile), null); + + for (Object value : data) { + writer.write(value, encoder); + } + + return AvroToArrow.avroToArrow(schema, decoder, config); + } + + protected void checkArrayResult(List<List<?>> expected, ListVector vector) { + assertEquals(expected.size(), vector.getValueCount()); + for (int i = 0; i < expected.size(); i++) { + checkArrayElement(expected.get(i), vector.getObject(i)); + } + } + + protected void checkArrayElement(List expected, List actual) { + assertEquals(expected.size(), actual.size()); + for (int i = 0; i < expected.size(); i++) { + Object value1 = expected.get(i); + Object value2 = actual.get(i); + if (value1 == null) { + assertTrue(value2 == null); + continue; + } + if (value2 instanceof byte[]) { + value2 = ByteBuffer.wrap((byte[]) value2); + } else if (value2 instanceof Text) { + value2 = value2.toString(); + } + assertEquals(value1, value2); + } + } + + protected void checkPrimitiveResult(List data, FieldVector vector) { + assertEquals(data.size(), vector.getValueCount()); + for (int i = 0; i < data.size(); i++) { + Object value1 = data.get(i); + Object value2 = vector.getObject(i); + if (value1 == null) { + assertTrue(value2 == null); + continue; + } + if (value2 instanceof byte[]) { + value2 = ByteBuffer.wrap((byte[]) value2); + if (value1 instanceof byte[]) { + value1 = ByteBuffer.wrap((byte[]) value1); + } + } else if (value2 instanceof Text) { + value2 = value2.toString(); + } else if (value2 instanceof Byte) { + value2 = ((Byte) value2).intValue(); + } + assertEquals(value1, value2); + } + } + + protected void checkRecordResult(Schema schema, ArrayList<GenericRecord> data, VectorSchemaRoot root) { + assertEquals(data.size(), root.getRowCount()); + assertEquals(schema.getFields().size(), root.getFieldVectors().size()); + + for (int i = 0; i < schema.getFields().size(); i++) { + ArrayList fieldData = new ArrayList(); + for (GenericRecord record : data) { + fieldData.add(record.get(i)); + } + + checkPrimitiveResult(fieldData, root.getFieldVectors().get(i)); + } + + } + + protected void checkNestedRecordResult(Schema schema, List<GenericRecord> data, VectorSchemaRoot root) { + assertEquals(data.size(), root.getRowCount()); + assertTrue(schema.getFields().size() == 1); + + final Schema nestedSchema = schema.getFields().get(0).schema(); + final StructVector structVector = (StructVector) root.getFieldVectors().get(0); + + for (int i = 0; i < nestedSchema.getFields().size(); i++) { + ArrayList fieldData = new ArrayList(); + for (GenericRecord record : data) { + GenericRecord nestedRecord = (GenericRecord) record.get(0); + fieldData.add(nestedRecord.get(i)); + } + + checkPrimitiveResult(fieldData, structVector.getChildrenFromFields().get(i)); + } + + } + + + // belows are for iterator api + + protected void checkArrayResult(List<List<?>> expected, List<ListVector> vectors) { + int valueCount = vectors.stream().mapToInt(v -> v.getValueCount()).sum(); + assertEquals(expected.size(), valueCount); + + int index = 0; + for (ListVector vector : vectors) { + for (int i = 0; i < vector.getValueCount(); i++) { + checkArrayElement(expected.get(index++), vector.getObject(i)); + } + } + } + + protected void checkRecordResult(Schema schema, ArrayList<GenericRecord> data, List<VectorSchemaRoot> roots) { + roots.forEach(root -> { + assertEquals(schema.getFields().size(), root.getFieldVectors().size()); + }); + + for (int i = 0; i < schema.getFields().size(); i++) { + List fieldData = new ArrayList(); + List<FieldVector> vectors = new ArrayList<>(); + for (GenericRecord record : data) { + fieldData.add(record.get(i)); + } + final int columnIndex = i; + roots.forEach(root -> vectors.add(root.getFieldVectors().get(columnIndex))); + + checkPrimitiveResult(fieldData, vectors); + } + + } + + protected void checkPrimitiveResult(List data, List<FieldVector> vectors) { + int valueCount = vectors.stream().mapToInt(v -> v.getValueCount()).sum(); + assertEquals(data.size(), valueCount); + + int index = 0; + for (FieldVector vector : vectors) { + for (int i = 0; i < vector.getValueCount(); i++) { + Object value1 = data.get(index++); + Object value2 = vector.getObject(i); + if (value1 == null) { + assertNull(value2); + continue; + } + if (value2 instanceof byte[]) { + value2 = ByteBuffer.wrap((byte[]) value2); + if (value1 instanceof byte[]) { + value1 = ByteBuffer.wrap((byte[]) value1); + } + } else if (value2 instanceof Text) { + value2 = value2.toString(); + } + assertEquals(value1, value2); + } + } + } +} diff --git a/src/arrow/java/adapter/avro/src/test/java/org/apache/arrow/AvroToArrowIteratorTest.java b/src/arrow/java/adapter/avro/src/test/java/org/apache/arrow/AvroToArrowIteratorTest.java new file mode 100644 index 000000000..2b05a19f3 --- /dev/null +++ b/src/arrow/java/adapter/avro/src/test/java/org/apache/arrow/AvroToArrowIteratorTest.java @@ -0,0 +1,313 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow; + +import static org.junit.Assert.assertEquals; + +import java.io.EOFException; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.util.AutoCloseables; +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.complex.ListVector; +import org.apache.avro.Schema; +import org.apache.avro.generic.GenericData; +import org.apache.avro.generic.GenericDatumWriter; +import org.apache.avro.generic.GenericRecord; +import org.apache.avro.io.BinaryDecoder; +import org.apache.avro.io.BinaryEncoder; +import org.apache.avro.io.DatumWriter; +import org.apache.avro.io.Decoder; +import org.apache.avro.io.DecoderFactory; +import org.apache.avro.io.EncoderFactory; +import org.apache.avro.util.Utf8; +import org.junit.Test; + +public class AvroToArrowIteratorTest extends AvroTestBase { + + @Override + public void init() { + final BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE); + this.config = new AvroToArrowConfigBuilder(allocator).setTargetBatchSize(3).build(); + } + + private AvroToArrowVectorIterator convert(Schema schema, List data) throws Exception { + File dataFile = TMP.newFile(); + + BinaryEncoder + encoder = new EncoderFactory().directBinaryEncoder(new FileOutputStream(dataFile), null); + DatumWriter writer = new GenericDatumWriter(schema); + BinaryDecoder + decoder = new DecoderFactory().directBinaryDecoder(new FileInputStream(dataFile), null); + + for (Object value : data) { + writer.write(value, encoder); + } + + return AvroToArrow.avroToArrowIterator(schema, decoder, config); + } + + @Test + public void testStringType() throws Exception { + Schema schema = getSchema("test_primitive_string.avsc"); + List<String> data = Arrays.asList("v1", "v2", "v3", "v4", "v5"); + + List<VectorSchemaRoot> roots = new ArrayList<>(); + List<FieldVector> vectors = new ArrayList<>(); + try (AvroToArrowVectorIterator iterator = convert(schema, data)) { + while (iterator.hasNext()) { + VectorSchemaRoot root = iterator.next(); + FieldVector vector = root.getFieldVectors().get(0); + roots.add(root); + vectors.add(vector); + } + } + checkPrimitiveResult(data, vectors); + AutoCloseables.close(roots); + } + + @Test + public void testNullableStringType() throws Exception { + Schema schema = getSchema("test_nullable_string.avsc"); + + List<GenericRecord> data = new ArrayList<>(); + List<String> expected = new ArrayList<>(); + for (int i = 0; i < 5; i++) { + GenericRecord record = new GenericData.Record(schema); + String value = i % 2 == 0 ? "test" + i : null; + record.put(0, value); + expected.add(value); + data.add(record); + } + + List<VectorSchemaRoot> roots = new ArrayList<>(); + List<FieldVector> vectors = new ArrayList<>(); + try (AvroToArrowVectorIterator iterator = convert(schema, data);) { + while (iterator.hasNext()) { + VectorSchemaRoot root = iterator.next(); + FieldVector vector = root.getFieldVectors().get(0); + roots.add(root); + vectors.add(vector); + } + } + checkPrimitiveResult(expected, vectors); + AutoCloseables.close(roots); + + } + + @Test + public void testRecordType() throws Exception { + Schema schema = getSchema("test_record.avsc"); + ArrayList<GenericRecord> data = new ArrayList<>(); + for (int i = 0; i < 5; i++) { + GenericRecord record = new GenericData.Record(schema); + record.put(0, "test" + i); + record.put(1, i); + record.put(2, i % 2 == 0); + data.add(record); + } + + List<VectorSchemaRoot> roots = new ArrayList<>(); + try (AvroToArrowVectorIterator iterator = convert(schema, data)) { + while (iterator.hasNext()) { + roots.add(iterator.next()); + } + } + checkRecordResult(schema, data, roots); + AutoCloseables.close(roots); + + } + + @Test + public void testArrayType() throws Exception { + Schema schema = getSchema("test_array.avsc"); + List<List<?>> data = Arrays.asList( + Arrays.asList("11", "222", "999"), + Arrays.asList("12222", "2333", "1000"), + Arrays.asList("1rrr", "2ggg"), + Arrays.asList("1vvv", "2bbb"), + Arrays.asList("1fff", "2")); + + List<VectorSchemaRoot> roots = new ArrayList<>(); + List<ListVector> vectors = new ArrayList<>(); + try (AvroToArrowVectorIterator iterator = convert(schema, data)) { + while (iterator.hasNext()) { + VectorSchemaRoot root = iterator.next(); + roots.add(root); + vectors.add((ListVector) root.getFieldVectors().get(0)); + } + } + checkArrayResult(data, vectors); + AutoCloseables.close(roots); + } + + @Test + public void runLargeNumberOfRows() throws Exception { + Schema schema = getSchema("test_large_data.avsc"); + int x = 0; + final int targetRows = 600000; + Decoder fakeDecoder = new FakeDecoder(targetRows); + try (AvroToArrowVectorIterator iter = AvroToArrow.avroToArrowIterator(schema, fakeDecoder, + new AvroToArrowConfigBuilder(config.getAllocator()).build())) { + while (iter.hasNext()) { + VectorSchemaRoot root = iter.next(); + x += root.getRowCount(); + root.close(); + } + } + + assertEquals(x, targetRows); + } + + /** + * Fake avro decoder to test large data. + */ + private class FakeDecoder extends Decoder { + + private int numRows; + + FakeDecoder(int numRows) { + this.numRows = numRows; + } + + // note that Decoder has no hasNext() API, assume enum is the first type in schema + // and fixed is the last type in schema and they are unique. + private void validate() throws EOFException { + if (numRows <= 0) { + throw new EOFException(); + } + } + + @Override + public void readNull() throws IOException { + } + + @Override + public boolean readBoolean() throws IOException { + return false; + } + + @Override + public int readInt() throws IOException { + return 0; + } + + @Override + public long readLong() throws IOException { + return 0; + } + + @Override + public float readFloat() throws IOException { + return 0; + } + + @Override + public double readDouble() throws IOException { + return 0; + } + + @Override + public Utf8 readString(Utf8 old) throws IOException { + return new Utf8("test123test123" + numRows); + } + + @Override + public String readString() throws IOException { + return "test123test123" + numRows; + } + + @Override + public void skipString() throws IOException { + + } + + @Override + public ByteBuffer readBytes(ByteBuffer old) throws IOException { + return ByteBuffer.allocate(0); + } + + @Override + public void skipBytes() throws IOException { + + } + + @Override + public void readFixed(byte[] bytes, int start, int length) throws IOException { + // fixed type is last column, after read value, decrease numRows + numRows--; + } + + @Override + public void skipFixed(int length) throws IOException { + + } + + @Override + public int readEnum() throws IOException { + // enum type is first column, validate numRows first. + validate(); + return 0; + } + + @Override + public long readArrayStart() throws IOException { + return 5; + } + + @Override + public long arrayNext() throws IOException { + return 0; + } + + @Override + public long skipArray() throws IOException { + return 0; + } + + @Override + public long readMapStart() throws IOException { + return 5; + } + + @Override + public long mapNext() throws IOException { + return 0; + } + + @Override + public long skipMap() throws IOException { + return 0; + } + + @Override + public int readIndex() throws IOException { + return 0; + } + } +} diff --git a/src/arrow/java/adapter/avro/src/test/java/org/apache/arrow/AvroToArrowTest.java b/src/arrow/java/adapter/avro/src/test/java/org/apache/arrow/AvroToArrowTest.java new file mode 100644 index 000000000..c007e1ac7 --- /dev/null +++ b/src/arrow/java/adapter/avro/src/test/java/org/apache/arrow/AvroToArrowTest.java @@ -0,0 +1,477 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow; + +import static org.junit.Assert.assertEquals; + +import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.VarCharVector; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.complex.ListVector; +import org.apache.arrow.vector.complex.MapVector; +import org.apache.arrow.vector.complex.StructVector; +import org.apache.avro.Schema; +import org.apache.avro.generic.GenericData; +import org.apache.avro.generic.GenericRecord; +import org.junit.Test; + +public class AvroToArrowTest extends AvroTestBase { + + @Test + public void testStringType() throws Exception { + Schema schema = getSchema("test_primitive_string.avsc"); + List<String> data = Arrays.asList("v1", "v2", "v3", "v4", "v5"); + + VectorSchemaRoot root = writeAndRead(schema, data); + FieldVector vector = root.getFieldVectors().get(0); + + checkPrimitiveResult(data, vector); + } + + @Test + public void testNullableStringType() throws Exception { + Schema schema = getSchema("test_nullable_string.avsc"); + + ArrayList<GenericRecord> data = new ArrayList<>(); + for (int i = 0; i < 5; i++) { + GenericRecord record = new GenericData.Record(schema); + record.put(0, i % 2 == 0 ? "test" + i : null); + data.add(record); + } + + VectorSchemaRoot root = writeAndRead(schema, data); + checkRecordResult(schema, data, root); + } + + @Test + public void testRecordType() throws Exception { + Schema schema = getSchema("test_record.avsc"); + ArrayList<GenericRecord> data = new ArrayList<>(); + for (int i = 0; i < 5; i++) { + GenericRecord record = new GenericData.Record(schema); + record.put(0, "test" + i); + record.put(1, i); + record.put(2, i % 2 == 0); + data.add(record); + } + + VectorSchemaRoot root = writeAndRead(schema, data); + checkRecordResult(schema, data, root); + } + + @Test + public void testFixedAttributes() throws Exception { + Schema schema = getSchema("attrs/test_fixed_attr.avsc"); + + List<GenericData.Fixed> data = new ArrayList<>(); + List<byte[]> expected = new ArrayList<>(); + for (int i = 0; i < 5; i++) { + byte[] value = ("value" + i).getBytes(StandardCharsets.UTF_8); + expected.add(value); + GenericData.Fixed fixed = new GenericData.Fixed(schema); + fixed.bytes(value); + data.add(fixed); + } + + VectorSchemaRoot root = writeAndRead(schema, data); + FieldVector vector = root.getFieldVectors().get(0); + + Map<String, String> metadata = vector.getField().getMetadata(); + assertEquals("fixed doc", metadata.get("doc")); + assertEquals("[\"alias1\",\"alias2\"]", metadata.get("aliases")); + } + + @Test + public void testEnumAttributes() throws Exception { + Schema schema = getSchema("attrs/test_enum_attrs.avsc"); + List<GenericData.EnumSymbol> data = Arrays.asList( + new GenericData.EnumSymbol(schema, "SPADES"), + new GenericData.EnumSymbol(schema, "HEARTS"), + new GenericData.EnumSymbol(schema, "DIAMONDS"), + new GenericData.EnumSymbol(schema, "CLUBS"), + new GenericData.EnumSymbol(schema, "SPADES")); + + VectorSchemaRoot root = writeAndRead(schema, data); + FieldVector vector = root.getFieldVectors().get(0); + + Map<String, String> metadata = vector.getField().getMetadata(); + assertEquals("enum doc", metadata.get("doc")); + assertEquals("[\"alias1\",\"alias2\"]", metadata.get("aliases")); + } + + @Test + public void testRecordAttributes() throws Exception { + Schema schema = getSchema("attrs/test_record_attrs.avsc"); + Schema nestedSchema = schema.getFields().get(0).schema(); + ArrayList<GenericRecord> data = new ArrayList<>(); + for (int i = 0; i < 5; i++) { + GenericRecord record = new GenericData.Record(schema); + GenericRecord nestedRecord = new GenericData.Record(nestedSchema); + nestedRecord.put(0, "test" + i); + nestedRecord.put(1, i); + record.put(0, nestedRecord); + + data.add(record); + } + + VectorSchemaRoot root = writeAndRead(schema, data); + + StructVector structVector = (StructVector) root.getFieldVectors().get(0); + Map<String, String> structMeta = structVector.getField().getMetadata(); + Map<String, String> childMeta1 = structVector.getChildByOrdinal(0).getField().getMetadata(); + Map<String, String> childMeta2 = structVector.getChildByOrdinal(1).getField().getMetadata(); + + assertEquals("f0 doc", structMeta.get("doc")); + assertEquals("[\"f0.a1\"]", structMeta.get("aliases")); + assertEquals("f1 doc", childMeta1.get("doc")); + assertEquals("[\"f1.a1\",\"f1.a2\"]", childMeta1.get("aliases")); + assertEquals("f2 doc", childMeta2.get("doc")); + assertEquals("[\"f2.a1\",\"f2.a2\"]", childMeta2.get("aliases")); + } + + @Test + public void testNestedRecordType() throws Exception { + Schema schema = getSchema("test_nested_record.avsc"); + Schema nestedSchema = schema.getFields().get(0).schema(); + ArrayList<GenericRecord> data = new ArrayList<>(); + for (int i = 0; i < 5; i++) { + GenericRecord record = new GenericData.Record(schema); + GenericRecord nestedRecord = new GenericData.Record(nestedSchema); + nestedRecord.put(0, "test" + i); + nestedRecord.put(1, i); + record.put(0, nestedRecord); + + data.add(record); + } + + VectorSchemaRoot root = writeAndRead(schema, data); + checkNestedRecordResult(schema, data, root); + } + + @Test + public void testEnumType() throws Exception { + Schema schema = getSchema("test_primitive_enum.avsc"); + List<GenericData.EnumSymbol> data = Arrays.asList( + new GenericData.EnumSymbol(schema, "SPADES"), + new GenericData.EnumSymbol(schema, "HEARTS"), + new GenericData.EnumSymbol(schema, "DIAMONDS"), + new GenericData.EnumSymbol(schema, "CLUBS"), + new GenericData.EnumSymbol(schema, "SPADES")); + + List<Integer> expectedIndices = Arrays.asList(0, 1, 2, 3, 0); + + VectorSchemaRoot root = writeAndRead(schema, data); + FieldVector vector = root.getFieldVectors().get(0); + + checkPrimitiveResult(expectedIndices, vector); + + VarCharVector dictVector = (VarCharVector) config.getProvider().lookup(0).getVector(); + assertEquals(4, dictVector.getValueCount()); + + assertEquals("SPADES", dictVector.getObject(0).toString()); + assertEquals("HEARTS", dictVector.getObject(1).toString()); + assertEquals("DIAMONDS", dictVector.getObject(2).toString()); + assertEquals("CLUBS", dictVector.getObject(3).toString()); + } + + @Test + public void testIntType() throws Exception { + Schema schema = getSchema("test_primitive_int.avsc"); + List<Integer> data = Arrays.asList(1, 2, 3, 4, 5); + + VectorSchemaRoot root = writeAndRead(schema, data); + FieldVector vector = root.getFieldVectors().get(0); + + checkPrimitiveResult(data, vector); + } + + @Test + public void testNullableIntType() throws Exception { + Schema schema = getSchema("test_nullable_int.avsc"); + + ArrayList<GenericRecord> data = new ArrayList<>(); + for (int i = 0; i < 5; i++) { + GenericRecord record = new GenericData.Record(schema); + record.put(0, i % 2 == 0 ? i : null); + data.add(record); + } + + VectorSchemaRoot root = writeAndRead(schema, data); + checkRecordResult(schema, data, root); + } + + @Test + public void testLongType() throws Exception { + Schema schema = getSchema("test_primitive_long.avsc"); + List<Long> data = Arrays.asList(1L, 2L, 3L, 4L, 5L); + + VectorSchemaRoot root = writeAndRead(schema, data); + FieldVector vector = root.getFieldVectors().get(0); + + checkPrimitiveResult(data, vector); + } + + @Test + public void testNullableLongType() throws Exception { + Schema schema = getSchema("test_nullable_long.avsc"); + + ArrayList<GenericRecord> data = new ArrayList<>(); + for (int i = 0; i < 5; i++) { + GenericRecord record = new GenericData.Record(schema); + record.put(0, i % 2 == 0 ? (long) i : null); + data.add(record); + } + + VectorSchemaRoot root = writeAndRead(schema, data); + checkRecordResult(schema, data, root); + } + + @Test + public void testFloatType() throws Exception { + Schema schema = getSchema("test_primitive_float.avsc"); + List<Float> data = Arrays.asList(1.1f, 2.2f, 3.3f, 4.4f, 5.5f); + + VectorSchemaRoot root = writeAndRead(schema, data); + FieldVector vector = root.getFieldVectors().get(0); + + checkPrimitiveResult(data, vector); + } + + @Test + public void testNullableFloatType() throws Exception { + Schema schema = getSchema("test_nullable_float.avsc"); + + ArrayList<GenericRecord> data = new ArrayList<>(); + for (int i = 0; i < 5; i++) { + GenericRecord record = new GenericData.Record(schema); + record.put(0, i % 2 == 0 ? i + 0.1f : null); + data.add(record); + } + + VectorSchemaRoot root = writeAndRead(schema, data); + checkRecordResult(schema, data, root); + } + + @Test + public void testDoubleType() throws Exception { + Schema schema = getSchema("test_primitive_double.avsc"); + List<Double> data = Arrays.asList(1.1, 2.2, 3.3, 4.4, 5.5); + + VectorSchemaRoot root = writeAndRead(schema, data); + FieldVector vector = root.getFieldVectors().get(0); + + checkPrimitiveResult(data, vector); + } + + @Test + public void testNullableDoubleType() throws Exception { + Schema schema = getSchema("test_nullable_double.avsc"); + + ArrayList<GenericRecord> data = new ArrayList<>(); + for (int i = 0; i < 5; i++) { + GenericRecord record = new GenericData.Record(schema); + record.put(0, i % 2 == 0 ? i + 0.1 : null); + data.add(record); + } + + VectorSchemaRoot root = writeAndRead(schema, data); + checkRecordResult(schema, data, root); + } + + @Test + public void testBytesType() throws Exception { + Schema schema = getSchema("test_primitive_bytes.avsc"); + List<ByteBuffer> data = Arrays.asList( + ByteBuffer.wrap("value1".getBytes(StandardCharsets.UTF_8)), + ByteBuffer.wrap("value2".getBytes(StandardCharsets.UTF_8)), + ByteBuffer.wrap("value3".getBytes(StandardCharsets.UTF_8)), + ByteBuffer.wrap("value4".getBytes(StandardCharsets.UTF_8)), + ByteBuffer.wrap("value5".getBytes(StandardCharsets.UTF_8))); + + VectorSchemaRoot root = writeAndRead(schema, data); + FieldVector vector = root.getFieldVectors().get(0); + + checkPrimitiveResult(data, vector); + } + + @Test + public void testNullableBytesType() throws Exception { + Schema schema = getSchema("test_nullable_bytes.avsc"); + + ArrayList<GenericRecord> data = new ArrayList<>(); + for (int i = 0; i < 5; i++) { + GenericRecord record = new GenericData.Record(schema); + record.put(0, i % 2 == 0 ? ByteBuffer.wrap(("test" + i).getBytes(StandardCharsets.UTF_8)) : null); + data.add(record); + } + + VectorSchemaRoot root = writeAndRead(schema, data); + checkRecordResult(schema, data, root); + } + + @Test + public void testBooleanType() throws Exception { + Schema schema = getSchema("test_primitive_boolean.avsc"); + List<Boolean> data = Arrays.asList(true, false, true, false, true); + + VectorSchemaRoot root = writeAndRead(schema, data); + FieldVector vector = root.getFieldVectors().get(0); + + checkPrimitiveResult(data, vector); + } + + @Test + public void testNullableBooleanType() throws Exception { + Schema schema = getSchema("test_nullable_boolean.avsc"); + + ArrayList<GenericRecord> data = new ArrayList<>(); + for (int i = 0; i < 5; i++) { + GenericRecord record = new GenericData.Record(schema); + record.put(0, i % 2 == 0 ? true : null); + data.add(record); + } + + VectorSchemaRoot root = writeAndRead(schema, data); + checkRecordResult(schema, data, root); + } + + @Test + public void testArrayType() throws Exception { + Schema schema = getSchema("test_array.avsc"); + List<List<?>> data = Arrays.asList( + Arrays.asList("11", "222", "999"), + Arrays.asList("12222", "2333", "1000"), + Arrays.asList("1rrr", "2ggg"), + Arrays.asList("1vvv", "2bbb"), + Arrays.asList("1fff", "2")); + + VectorSchemaRoot root = writeAndRead(schema, data); + FieldVector vector = root.getFieldVectors().get(0); + + checkArrayResult(data, (ListVector) vector); + } + + @Test + public void testMapType() throws Exception { + Schema schema = getSchema("test_map.avsc"); + + List keys = Arrays.asList("key1", "key2", "key3", "key4", "key5", "key6"); + List vals = Arrays.asList("val1", "val2", "val3", "val4", "val5", "val6"); + + List<LinkedHashMap> data = new ArrayList<>(); + LinkedHashMap map1 = new LinkedHashMap(); + map1.put(keys.get(0), vals.get(0)); + map1.put(keys.get(1), vals.get(1)); + data.add(map1); + + LinkedHashMap map2 = new LinkedHashMap(); + map2.put(keys.get(2), vals.get(2)); + map2.put(keys.get(3), vals.get(3)); + data.add(map2); + + LinkedHashMap map3 = new LinkedHashMap(); + map3.put(keys.get(4), vals.get(4)); + map3.put(keys.get(5), vals.get(5)); + data.add(map3); + + VectorSchemaRoot root = writeAndRead(schema, data); + MapVector vector = (MapVector) root.getFieldVectors().get(0); + + checkPrimitiveResult(keys, vector.getDataVector().getChildrenFromFields().get(0)); + checkPrimitiveResult(vals, vector.getDataVector().getChildrenFromFields().get(1)); + assertEquals(0, vector.getOffsetBuffer().getInt(0)); + assertEquals(2, vector.getOffsetBuffer().getInt(1 * 4)); + assertEquals(4, vector.getOffsetBuffer().getInt(2 * 4)); + assertEquals(6, vector.getOffsetBuffer().getInt(3 * 4)); + } + + @Test + public void testFixedType() throws Exception { + Schema schema = getSchema("test_fixed.avsc"); + + List<GenericData.Fixed> data = new ArrayList<>(); + List<byte[]> expected = new ArrayList<>(); + for (int i = 0; i < 5; i++) { + byte[] value = ("value" + i).getBytes(StandardCharsets.UTF_8); + expected.add(value); + GenericData.Fixed fixed = new GenericData.Fixed(schema); + fixed.bytes(value); + data.add(fixed); + } + + VectorSchemaRoot root = writeAndRead(schema, data); + FieldVector vector = root.getFieldVectors().get(0); + + checkPrimitiveResult(expected, vector); + } + + @Test + public void testUnionType() throws Exception { + Schema schema = getSchema("test_union.avsc"); + ArrayList<GenericRecord> data = new ArrayList<>(); + ArrayList<Object> expected = new ArrayList<>(); + for (int i = 0; i < 5; i++) { + GenericRecord record = new GenericData.Record(schema); + record.put(0, i % 2 == 0 ? "test" + i : i); + expected.add(i % 2 == 0 ? "test" + i : i); + data.add(record); + } + + VectorSchemaRoot root = writeAndRead(schema, data); + FieldVector vector = root.getFieldVectors().get(0); + + checkPrimitiveResult(expected, vector); + } + + @Test + public void testNullableUnionType() throws Exception { + Schema schema = getSchema("test_nullable_union.avsc"); + ArrayList<GenericRecord> data = new ArrayList<>(); + ArrayList<Object> expected = new ArrayList<>(); + for (int i = 0; i < 5; i++) { + GenericRecord record = new GenericData.Record(schema); + if (i % 3 == 0) { + record.put(0, "test" + i); + expected.add("test" + i); + data.add(record); + } else if (i % 3 == 1) { + record.put(0, i); + expected.add(i); + data.add(record); + } else { + record.put(0, null); + expected.add(null); + data.add(record); + } + } + + VectorSchemaRoot root = writeAndRead(schema, data); + FieldVector vector = root.getFieldVectors().get(0); + + checkPrimitiveResult(expected, vector); + } + +} diff --git a/src/arrow/java/adapter/avro/src/test/java/org/apache/arrow/TestWriteReadAvroRecord.java b/src/arrow/java/adapter/avro/src/test/java/org/apache/arrow/TestWriteReadAvroRecord.java new file mode 100644 index 000000000..bf695d193 --- /dev/null +++ b/src/arrow/java/adapter/avro/src/test/java/org/apache/arrow/TestWriteReadAvroRecord.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow; + +import static org.junit.Assert.assertEquals; + +import java.io.File; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.List; + +import org.apache.avro.Schema; +import org.apache.avro.file.DataFileReader; +import org.apache.avro.file.DataFileWriter; +import org.apache.avro.generic.GenericData; +import org.apache.avro.generic.GenericDatumReader; +import org.apache.avro.generic.GenericDatumWriter; +import org.apache.avro.generic.GenericRecord; +import org.apache.avro.io.DatumReader; +import org.apache.avro.io.DatumWriter; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; + + +public class TestWriteReadAvroRecord { + + @ClassRule + public static final TemporaryFolder TMP = new TemporaryFolder(); + + @Test + public void testWriteAndRead() throws Exception { + + File dataFile = TMP.newFile(); + Path schemaPath = Paths.get(TestWriteReadAvroRecord.class.getResource("/").getPath(), "schema", "test.avsc"); + Schema schema = new Schema.Parser().parse(schemaPath.toFile()); + + //write data to disk + GenericRecord user1 = new GenericData.Record(schema); + user1.put("name", "Alyssa"); + user1.put("favorite_number", 256); + + GenericRecord user2 = new GenericData.Record(schema); + user2.put("name", "Ben"); + user2.put("favorite_number", 7); + user2.put("favorite_color", "red"); + + DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(schema); + DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>(datumWriter); + dataFileWriter.create(schema, dataFile); + dataFileWriter.append(user1); + dataFileWriter.append(user2); + dataFileWriter.close(); + + //read data from disk + DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(schema); + DataFileReader<GenericRecord> + dataFileReader = new DataFileReader<GenericRecord>(dataFile, datumReader); + List<GenericRecord> result = new ArrayList<>(); + while (dataFileReader.hasNext()) { + GenericRecord user = dataFileReader.next(); + result.add(user); + } + + assertEquals(2, result.size()); + GenericRecord deUser1 = result.get(0); + assertEquals("Alyssa", deUser1.get("name").toString()); + assertEquals(256, deUser1.get("favorite_number")); + assertEquals(null, deUser1.get("favorite_color")); + + GenericRecord deUser2 = result.get(1); + assertEquals("Ben", deUser2.get("name").toString()); + assertEquals(7, deUser2.get("favorite_number")); + assertEquals("red", deUser2.get("favorite_color").toString()); + } + +} diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/attrs/test_enum_attrs.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/attrs/test_enum_attrs.avsc new file mode 100644 index 000000000..afd00b8d9 --- /dev/null +++ b/src/arrow/java/adapter/avro/src/test/resources/schema/attrs/test_enum_attrs.avsc @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +{ + "type": "enum", + "symbols" : ["SPADES", "HEARTS", "DIAMONDS", "CLUBS"], + "name": "testEnum", + "doc" : "enum doc", + "aliases" : ["alias1", "alias2"] +} diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/attrs/test_fixed_attr.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/attrs/test_fixed_attr.avsc new file mode 100644 index 000000000..55e504def --- /dev/null +++ b/src/arrow/java/adapter/avro/src/test/resources/schema/attrs/test_fixed_attr.avsc @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +{ + "type": "fixed", + "size": 6, + "name": "testFixed", + "doc" : "fixed doc", + "aliases" : ["alias1", "alias2"] +} diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/attrs/test_record_attrs.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/attrs/test_record_attrs.avsc new file mode 100644 index 000000000..2e2e311a9 --- /dev/null +++ b/src/arrow/java/adapter/avro/src/test/resources/schema/attrs/test_record_attrs.avsc @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +{ + "namespace": "org.apache.arrow.avro", + "type": "record", + "name": "testAttrs", + "fields": [ + { + "name" : "f0", + "type" : { + "type" : "record", + "name" : "nestedInRecord", + "doc" : "f0 doc", + "aliases" : ["f0.a1"], + "fields": [ + {"name": "f1", "type": "string", "doc": "f1 doc", "aliases" : ["f1.a1", "f1.a2"]}, + {"name": "f2", "type": "int", "doc": "f2 doc", "aliases" : ["f2.a1", "f2.a2"]} + ] + } + } + ] +} diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_date.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_date.avsc new file mode 100644 index 000000000..f661e6506 --- /dev/null +++ b/src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_date.avsc @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +{ + "namespace": "org.apache.arrow.avro", + "name": "test", + "type": "int", + "logicalType" : "date" +} diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_decimal_invalid1.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_decimal_invalid1.avsc new file mode 100644 index 000000000..18d7d63fc --- /dev/null +++ b/src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_decimal_invalid1.avsc @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +{ + "namespace": "org.apache.arrow.avro", + "name": "test", + "type": "bytes", + "logicalType" : "decimal", + "precision": 39, + "scale": 2 +} diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_decimal_invalid2.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_decimal_invalid2.avsc new file mode 100644 index 000000000..eed7bd781 --- /dev/null +++ b/src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_decimal_invalid2.avsc @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +{ + "namespace": "org.apache.arrow.avro", + "name": "test", + "type": "bytes", + "logicalType" : "decimal", + "precision": 20, + "scale": -1 +} diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_decimal_invalid3.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_decimal_invalid3.avsc new file mode 100644 index 000000000..1667b8aff --- /dev/null +++ b/src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_decimal_invalid3.avsc @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +{ + "namespace": "org.apache.arrow.avro", + "name": "test", + "type": "bytes", + "logicalType" : "decimal", + "precision": 20, + "scale": 40 +} diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_decimal_invalid4.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_decimal_invalid4.avsc new file mode 100644 index 000000000..e1f710416 --- /dev/null +++ b/src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_decimal_invalid4.avsc @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +{ + "namespace": "org.apache.arrow.avro", + "name": "test", + "type": "fixed", + "size" : 1, + "logicalType" : "decimal", + "precision": 30, + "scale": 2 +} diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_decimal_with_original_bytes.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_decimal_with_original_bytes.avsc new file mode 100644 index 000000000..944b5d85d --- /dev/null +++ b/src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_decimal_with_original_bytes.avsc @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +{ + "namespace": "org.apache.arrow.avro", + "name": "test", + "type": "bytes", + "logicalType" : "decimal", + "precision": 10, + "scale": 2 +} diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_decimal_with_original_fixed.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_decimal_with_original_fixed.avsc new file mode 100644 index 000000000..1901f90a9 --- /dev/null +++ b/src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_decimal_with_original_fixed.avsc @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +{ + "namespace": "org.apache.arrow.avro", + "name": "test", + "type": "fixed", + "size" : 10, + "logicalType" : "decimal", + "precision": 10, + "scale": 2 +} diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_time_micros.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_time_micros.avsc new file mode 100644 index 000000000..ee7d4e937 --- /dev/null +++ b/src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_time_micros.avsc @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +{ + "namespace": "org.apache.arrow.avro", + "name": "test", + "type": "long", + "logicalType" : "time-micros" +} diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_time_millis.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_time_millis.avsc new file mode 100644 index 000000000..54877babc --- /dev/null +++ b/src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_time_millis.avsc @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +{ + "namespace": "org.apache.arrow.avro", + "name": "test", + "type": "int", + "logicalType" : "time-millis" +} diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_timestamp_micros.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_timestamp_micros.avsc new file mode 100644 index 000000000..15c0bf53d --- /dev/null +++ b/src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_timestamp_micros.avsc @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +{ + "namespace": "org.apache.arrow.avro", + "name": "test", + "type": "long", + "logicalType" : "timestamp-micros" +} diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_timestamp_millis.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_timestamp_millis.avsc new file mode 100644 index 000000000..822a2c360 --- /dev/null +++ b/src/arrow/java/adapter/avro/src/test/resources/schema/logical/test_timestamp_millis.avsc @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +{ + "namespace": "org.apache.arrow.avro", + "name": "test", + "type": "long", + "logicalType" : "timestamp-millis" +} diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_array_before.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_array_before.avsc new file mode 100644 index 000000000..e836aa768 --- /dev/null +++ b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_array_before.avsc @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +{ + "namespace": "org.apache.arrow.avro", + "type": "record", + "name": "test", + "fields": [ + {"name": "f0", "type": "string"}, + {"name": "f1", "type": {"type" : "array", "items": "string"}}, + {"name": "f2", "type": "boolean"} + ] +} diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_array_expected.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_array_expected.avsc new file mode 100644 index 000000000..36e7fdfb0 --- /dev/null +++ b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_array_expected.avsc @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +{ + "namespace": "org.apache.arrow.avro", + "type": "record", + "name": "test", + "fields": [ + {"name": "f0", "type": "string"}, + {"name": "f2", "type": "boolean"} + ] +} diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_base1.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_base1.avsc new file mode 100644 index 000000000..5338253f4 --- /dev/null +++ b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_base1.avsc @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +{ + "namespace": "org.apache.arrow.avro", + "type": "record", + "name": "testRecord", + "fields": [ + {"name": "f0", "type": {"type" : "fixed", "size":5, "name" : "fix"}}, + {"name": "f1", "type": {"type" : "enum", "name" : "enum", "symbols": ["TEST0", "TEST1"]}}, + {"name": "f2", "type": "string"}, + {"name": "f3", "type": "bytes"} + ] +} diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_base2.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_base2.avsc new file mode 100644 index 000000000..50655a70e --- /dev/null +++ b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_base2.avsc @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +{ + "namespace": "org.apache.arrow.avro", + "type": "record", + "name": "testRecord", + "fields": [ + {"name": "f0", "type": "boolean"}, + {"name": "f1", "type": "int"}, + {"name": "f2", "type": "long"}, + {"name": "f3", "type": "float"}, + {"name": "f4", "type": "double"} + ] +} diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_boolean_expected.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_boolean_expected.avsc new file mode 100644 index 000000000..9b62e3149 --- /dev/null +++ b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_boolean_expected.avsc @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +{ + "namespace": "org.apache.arrow.avro", + "type": "record", + "name": "testRecord", + "fields": [ + {"name": "f1", "type": "int"}, + {"name": "f2", "type": "long"}, + {"name": "f3", "type": "float"}, + {"name": "f4", "type": "double"} + ] +} diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_bytes_expected.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_bytes_expected.avsc new file mode 100644 index 000000000..8a1903b34 --- /dev/null +++ b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_bytes_expected.avsc @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +{ + "namespace": "org.apache.arrow.avro", + "type": "record", + "name": "testRecord", + "fields": [ + {"name": "f0", "type": {"type" : "fixed", "size":5, "name" : "fix"}}, + {"name": "f1", "type": {"type" : "enum", "name" : "enum", "symbols": ["TEST0", "TEST1"]}}, + {"name": "f2", "type": "string"} + ] +} diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_double_expected.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_double_expected.avsc new file mode 100644 index 000000000..6021c4454 --- /dev/null +++ b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_double_expected.avsc @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +{ + "namespace": "org.apache.arrow.avro", + "type": "record", + "name": "testRecord", + "fields": [ + {"name": "f0", "type": "boolean"}, + {"name": "f1", "type": "int"}, + {"name": "f2", "type": "long"}, + {"name": "f3", "type": "float"} + ] +} diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_enum_expected.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_enum_expected.avsc new file mode 100644 index 000000000..f5ed86a28 --- /dev/null +++ b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_enum_expected.avsc @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +{ + "namespace": "org.apache.arrow.avro", + "type": "record", + "name": "testRecord", + "fields": [ + {"name": "f0", "type": {"type" : "fixed", "size":5, "name" : "fix"}}, + {"name": "f2", "type": "string"}, + {"name": "f3", "type": "bytes"} + ] +} diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_fixed_expected.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_fixed_expected.avsc new file mode 100644 index 000000000..5423a7977 --- /dev/null +++ b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_fixed_expected.avsc @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +{ + "namespace": "org.apache.arrow.avro", + "type": "record", + "name": "testRecord", + "fields": [ + {"name": "f1", "type": {"type" : "enum", "name" : "enum", "symbols": ["TEST0", "TEST1"]}}, + {"name": "f2", "type": "string"}, + {"name": "f3", "type": "bytes"} + ] +} diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_float_expected.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_float_expected.avsc new file mode 100644 index 000000000..dea106331 --- /dev/null +++ b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_float_expected.avsc @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +{ + "namespace": "org.apache.arrow.avro", + "type": "record", + "name": "testRecord", + "fields": [ + {"name": "f0", "type": "boolean"}, + {"name": "f1", "type": "int"}, + {"name": "f2", "type": "long"}, + {"name": "f4", "type": "double"} + ] +} diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_int_expected.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_int_expected.avsc new file mode 100644 index 000000000..53d4f1025 --- /dev/null +++ b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_int_expected.avsc @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +{ + "namespace": "org.apache.arrow.avro", + "type": "record", + "name": "testRecord", + "fields": [ + {"name": "f0", "type": "boolean"}, + {"name": "f2", "type": "long"}, + {"name": "f3", "type": "float"}, + {"name": "f4", "type": "double"} + ] +} diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_long_expected.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_long_expected.avsc new file mode 100644 index 000000000..bf16601dd --- /dev/null +++ b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_long_expected.avsc @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +{ + "namespace": "org.apache.arrow.avro", + "type": "record", + "name": "testRecord", + "fields": [ + {"name": "f0", "type": "boolean"}, + {"name": "f1", "type": "int"}, + {"name": "f3", "type": "float"}, + {"name": "f4", "type": "double"} + ] +} diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_map_before.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_map_before.avsc new file mode 100644 index 000000000..8cbb1a1d7 --- /dev/null +++ b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_map_before.avsc @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +{ + "namespace": "org.apache.arrow.avro", + "type": "record", + "name": "test", + "fields": [ + {"name": "f0", "type": "string"}, + {"name": "f1", "type": {"type" : "map", "values": "string"}}, + {"name": "f2", "type": "boolean"} + ] +} diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_map_expected.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_map_expected.avsc new file mode 100644 index 000000000..36e7fdfb0 --- /dev/null +++ b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_map_expected.avsc @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +{ + "namespace": "org.apache.arrow.avro", + "type": "record", + "name": "test", + "fields": [ + {"name": "f0", "type": "string"}, + {"name": "f2", "type": "boolean"} + ] +} diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_multi_fields_expected.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_multi_fields_expected.avsc new file mode 100644 index 000000000..b5d637b1d --- /dev/null +++ b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_multi_fields_expected.avsc @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +{ + "namespace": "org.apache.arrow.avro", + "type": "record", + "name": "testSkip", + "fields": [ + {"name": "f0", "type": "string"} + ] +} diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_record_before.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_record_before.avsc new file mode 100644 index 000000000..7aee92b92 --- /dev/null +++ b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_record_before.avsc @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +{ + "namespace": "org.apache.arrow.avro", + "type": "record", + "name": "test", + "fields": [ + { + "name" : "f0", + "type" : { + "type" : "record", + "name" : "nestedInRecord", + "fields": [ + {"name": "f00", "type": "string"}, + {"name": "f01", "type": "int"} + ] + } + }, + { + "name" : "f1", "type" : "int" + } + ] +} diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_record_expected.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_record_expected.avsc new file mode 100644 index 000000000..3e2495203 --- /dev/null +++ b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_record_expected.avsc @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +{ + "namespace": "org.apache.arrow.avro", + "type": "record", + "name": "test", + "fields": [ + { "name" : "f1", "type" : "int"} + ] +} diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_second_level_expected.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_second_level_expected.avsc new file mode 100644 index 000000000..f3b7f8c09 --- /dev/null +++ b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_second_level_expected.avsc @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +{ + "namespace": "org.apache.arrow.avro", + "type": "record", + "name": "testSkipNested", + "fields": [ + { + "name" : "nested", + "type" : { + "type" : "record", + "name" : "nestedInRecord", + "fields": [ + {"name": "f1", "type": "int"} + ] + } + } + ] +} diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_single_field_expected.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_single_field_expected.avsc new file mode 100644 index 000000000..553525847 --- /dev/null +++ b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_single_field_expected.avsc @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +{ + "namespace": "org.apache.arrow.avro", + "type": "record", + "name": "testSkip", + "fields": [ + {"name": "f0", "type": "string"}, + {"name": "f2", "type": "boolean"} + ] +} diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_string_expected.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_string_expected.avsc new file mode 100644 index 000000000..2d2c08174 --- /dev/null +++ b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_string_expected.avsc @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +{ + "namespace": "org.apache.arrow.avro", + "type": "record", + "name": "testRecord", + "fields": [ + {"name": "f0", "type": {"type" : "fixed", "size":5, "name" : "fix"}}, + {"name": "f1", "type": {"type" : "enum", "name" : "enum", "symbols": ["TEST0", "TEST1"]}}, + {"name": "f3", "type": "bytes"} + ] +} diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_third_level_expected.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_third_level_expected.avsc new file mode 100644 index 000000000..6f42da893 --- /dev/null +++ b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_third_level_expected.avsc @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +{ + "namespace": "org.apache.arrow.avro", + "type": "record", + "name": "firstLevel", + "fields": [ + { + "name" : "f0", + "type" : { + "type" : "record", + "name" : "secondLevel", + "fields": [ + { + "name" : "f0", + "type" : { + "type" : "record", + "name" : "thirdLevel", + "fields" : [ + {"name": "f1", "type": "int"}, + {"name": "f0", "type": "string"}, + {"name": "f2", "type": "boolean"} + ] + } + } + ] + } + } + ] +} diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_union_before.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_union_before.avsc new file mode 100644 index 000000000..fc1105911 --- /dev/null +++ b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_union_before.avsc @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +{ + "namespace": "org.apache.arrow.avro", + "type": "record", + "name": "test", + "fields": [ + {"name": "f0", "type": ["string"]}, + {"name": "f1", "type": ["string", "null"]}, + {"name": "f2", "type": ["string", "int"]}, + {"name": "f3", "type": "int"} + ] +} diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_union_multi_fields_expected.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_union_multi_fields_expected.avsc new file mode 100644 index 000000000..308e027a2 --- /dev/null +++ b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_union_multi_fields_expected.avsc @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +{ + "namespace": "org.apache.arrow.avro", + "type": "record", + "name": "test", + "fields": [ + {"name": "f0", "type": ["string"]}, + {"name": "f1", "type": ["string", "null"]}, + {"name": "f3", "type": "int"} + ] +} diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_union_nullable_field_expected.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_union_nullable_field_expected.avsc new file mode 100644 index 000000000..cbc83e566 --- /dev/null +++ b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_union_nullable_field_expected.avsc @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +{ + "namespace": "org.apache.arrow.avro", + "type": "record", + "name": "test", + "fields": [ + {"name": "f0", "type": ["string"]}, + {"name": "f2", "type": ["string", "int"]}, + {"name": "f3", "type": "int"} + ] +} diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_union_one_field_expected.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_union_one_field_expected.avsc new file mode 100644 index 000000000..0f72fb432 --- /dev/null +++ b/src/arrow/java/adapter/avro/src/test/resources/schema/skip/test_skip_union_one_field_expected.avsc @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +{ + "namespace": "org.apache.arrow.avro", + "type": "record", + "name": "test", + "fields": [ + {"name": "f1", "type": ["string", "null"]}, + {"name": "f2", "type": ["string", "int"]}, + {"name": "f3", "type": ["string", "int"]} + ] +} diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/test.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/test.avsc new file mode 100644 index 000000000..92c0873de --- /dev/null +++ b/src/arrow/java/adapter/avro/src/test/resources/schema/test.avsc @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +{ + "namespace": "org.apache.arrow.avro", + "type": "record", + "name": "User", + "fields": [ + {"name": "name", "type": "string"}, + {"name": "favorite_number", "type": ["int", "null"]}, + {"name": "favorite_color", "type": ["string", "null"]} + ] +} diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/test_array.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/test_array.avsc new file mode 100644 index 000000000..5b75a4031 --- /dev/null +++ b/src/arrow/java/adapter/avro/src/test/resources/schema/test_array.avsc @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +{ + "namespace": "org.apache.arrow.avro", + "type": "array", + "items": "string", + "name": "testArray" +} diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/test_fixed.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/test_fixed.avsc new file mode 100644 index 000000000..a4d96e9ab --- /dev/null +++ b/src/arrow/java/adapter/avro/src/test/resources/schema/test_fixed.avsc @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +{ + "namespace": "org.apache.arrow.avro", + "type": "fixed", + "size": 6, + "name": "testFixed" +} diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/test_large_data.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/test_large_data.avsc new file mode 100644 index 000000000..f784ae623 --- /dev/null +++ b/src/arrow/java/adapter/avro/src/test/resources/schema/test_large_data.avsc @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +{ + "namespace": "org.apache.arrow.avro", + "type": "record", + "name": "testLargeData", + "fields": [ + { + "name": "f0", + "type": { + "name" : "f0", + "type" : "enum", + "symbols" : ["value1", "value2", "value3", "value4", "value5"] + } + }, + { + "name" : "f1", + "type" : { + "type" : "record", + "name" : "nestedRecord", + "fields": [ + {"name": "f1_0", "type": "string"}, + {"name": "f1_1", "type": "int"} + ] + } + }, + + {"name": "f2", "type": "string"}, + {"name": "f3", "type": "int"}, + {"name": "f4", "type": "boolean"}, + {"name": "f5", "type": "float"}, + {"name": "f6", "type": "double"}, + {"name": "f7", "type": "bytes"}, + {"name": "f8", "type": ["string", "int"]}, + { + "name": "f9", + "type": { + "name" : "f9", + "type" : "array", + "items" : "string" + } + }, + { + "name": "f10", + "type": { + "name" : "f10", + "type" : "map", + "values" : "string" + } + }, + { + "name": "f11", + "type": { + "type" : "fixed", + "name" : "f11", + "size" : 5 + } + } + ] +} diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/test_map.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/test_map.avsc new file mode 100644 index 000000000..0dfa3a595 --- /dev/null +++ b/src/arrow/java/adapter/avro/src/test/resources/schema/test_map.avsc @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +{ + "namespace": "org.apache.arrow.avro", + "type": "map", + "values": "string", + "name": "testMap" +} diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/test_nested_record.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/test_nested_record.avsc new file mode 100644 index 000000000..29dddfd1a --- /dev/null +++ b/src/arrow/java/adapter/avro/src/test/resources/schema/test_nested_record.avsc @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +{ + "namespace": "org.apache.arrow.avro", + "type": "record", + "name": "testNestedRecord", + "fields": [ + { + "name" : "f0", + "type" : { + "type" : "record", + "name" : "nestedInRecord", + "fields": [ + {"name": "f0", "type": "string"}, + {"name": "f1", "type": "int"} + ] + } + } + ] +} diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/test_nullable_boolean.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/test_nullable_boolean.avsc new file mode 100644 index 000000000..62af1a85d --- /dev/null +++ b/src/arrow/java/adapter/avro/src/test/resources/schema/test_nullable_boolean.avsc @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +{ + "namespace": "org.apache.arrow.avro", + "type": "record", + "name": "nullableBoolean", + "fields": [ + {"name": "f0", "type": ["null", "boolean"]} + ] +} diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/test_nullable_bytes.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/test_nullable_bytes.avsc new file mode 100644 index 000000000..002bc7ce2 --- /dev/null +++ b/src/arrow/java/adapter/avro/src/test/resources/schema/test_nullable_bytes.avsc @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +{ + "namespace": "org.apache.arrow.avro", + "type": "record", + "name": "nullableBytes", + "fields": [ + {"name": "f0", "type": ["null", "bytes"]} + ] +} diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/test_nullable_double.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/test_nullable_double.avsc new file mode 100644 index 000000000..642b7aa16 --- /dev/null +++ b/src/arrow/java/adapter/avro/src/test/resources/schema/test_nullable_double.avsc @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +{ + "namespace": "org.apache.arrow.avro", + "type": "record", + "name": "nullableDouble", + "fields": [ + {"name": "f0", "type": ["null", "double"]} + ] +} diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/test_nullable_float.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/test_nullable_float.avsc new file mode 100644 index 000000000..dff285909 --- /dev/null +++ b/src/arrow/java/adapter/avro/src/test/resources/schema/test_nullable_float.avsc @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +{ + "namespace": "org.apache.arrow.avro", + "type": "record", + "name": "nullableFloat", + "fields": [ + {"name": "f0", "type": ["null", "float"]} + ] +} diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/test_nullable_int.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/test_nullable_int.avsc new file mode 100644 index 000000000..abb2fc48a --- /dev/null +++ b/src/arrow/java/adapter/avro/src/test/resources/schema/test_nullable_int.avsc @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +{ + "namespace": "org.apache.arrow.avro", + "type": "record", + "name": "nullableInt", + "fields": [ + {"name": "f0", "type": ["null", "int"]} + ] +} diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/test_nullable_long.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/test_nullable_long.avsc new file mode 100644 index 000000000..0624d2737 --- /dev/null +++ b/src/arrow/java/adapter/avro/src/test/resources/schema/test_nullable_long.avsc @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +{ + "namespace": "org.apache.arrow.avro", + "type": "record", + "name": "nullableLong", + "fields": [ + {"name": "f0", "type": ["null", "long"]} + ] +} diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/test_nullable_string.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/test_nullable_string.avsc new file mode 100644 index 000000000..347808ce6 --- /dev/null +++ b/src/arrow/java/adapter/avro/src/test/resources/schema/test_nullable_string.avsc @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +{ + "namespace": "org.apache.arrow.avro", + "type": "record", + "name": "nullableString", + "fields": [ + {"name": "f0", "type": ["null", "string"]} + ] +} diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/test_nullable_union.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/test_nullable_union.avsc new file mode 100644 index 000000000..af94812d7 --- /dev/null +++ b/src/arrow/java/adapter/avro/src/test/resources/schema/test_nullable_union.avsc @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +{ + "namespace": "org.apache.arrow.avro", + "type": "record", + "name": "testNullableUnions", + "fields": [ + {"name": "f0", "type": ["string", "int", "null"]} + ] +} diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/test_primitive_boolean.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/test_primitive_boolean.avsc new file mode 100644 index 000000000..7652ce723 --- /dev/null +++ b/src/arrow/java/adapter/avro/src/test/resources/schema/test_primitive_boolean.avsc @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +{ + "namespace": "org.apache.arrow.avro", + "type": "boolean", + "name": "TestBoolean" +} diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/test_primitive_bytes.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/test_primitive_bytes.avsc new file mode 100644 index 000000000..5102430b6 --- /dev/null +++ b/src/arrow/java/adapter/avro/src/test/resources/schema/test_primitive_bytes.avsc @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +{ + "namespace": "org.apache.arrow.avro", + "type": "bytes", + "name": "TestBytes" +} diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/test_primitive_double.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/test_primitive_double.avsc new file mode 100644 index 000000000..d1ae0b605 --- /dev/null +++ b/src/arrow/java/adapter/avro/src/test/resources/schema/test_primitive_double.avsc @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +{ + "namespace": "org.apache.arrow.avro", + "type": "double", + "name": "TestDouble" +} diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/test_primitive_enum.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/test_primitive_enum.avsc new file mode 100644 index 000000000..bd8df6102 --- /dev/null +++ b/src/arrow/java/adapter/avro/src/test/resources/schema/test_primitive_enum.avsc @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +{ + "namespace": "org.apache.arrow.avro", + "type": "enum", + "symbols" : ["SPADES", "HEARTS", "DIAMONDS", "CLUBS"], + "name": "testEnum" +} diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/test_primitive_float.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/test_primitive_float.avsc new file mode 100644 index 000000000..675d1090d --- /dev/null +++ b/src/arrow/java/adapter/avro/src/test/resources/schema/test_primitive_float.avsc @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +{ + "namespace": "org.apache.arrow.avro", + "type": "float", + "name": "TestFloat" +} diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/test_primitive_int.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/test_primitive_int.avsc new file mode 100644 index 000000000..8fc848828 --- /dev/null +++ b/src/arrow/java/adapter/avro/src/test/resources/schema/test_primitive_int.avsc @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +{ + "namespace": "org.apache.arrow.avro", + "type": "int", + "name": "TestInt" +} diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/test_primitive_long.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/test_primitive_long.avsc new file mode 100644 index 000000000..b9706107c --- /dev/null +++ b/src/arrow/java/adapter/avro/src/test/resources/schema/test_primitive_long.avsc @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +{ + "namespace": "org.apache.arrow.avro", + "type": "long", + "name": "TestLong" +} diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/test_primitive_string.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/test_primitive_string.avsc new file mode 100644 index 000000000..b4a89a7f6 --- /dev/null +++ b/src/arrow/java/adapter/avro/src/test/resources/schema/test_primitive_string.avsc @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +{ + "namespace": "org.apache.arrow.avro", + "type": "string", + "name": "TestString" +} diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/test_record.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/test_record.avsc new file mode 100644 index 000000000..e83cf1180 --- /dev/null +++ b/src/arrow/java/adapter/avro/src/test/resources/schema/test_record.avsc @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +{ + "namespace": "org.apache.arrow.avro", + "type": "record", + "name": "testRecord", + "fields": [ + {"name": "f0", "type": "string"}, + {"name": "f1", "type": "int"}, + {"name": "f2", "type": "boolean"} + ] +} diff --git a/src/arrow/java/adapter/avro/src/test/resources/schema/test_union.avsc b/src/arrow/java/adapter/avro/src/test/resources/schema/test_union.avsc new file mode 100644 index 000000000..f181e36e3 --- /dev/null +++ b/src/arrow/java/adapter/avro/src/test/resources/schema/test_union.avsc @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +{ + "namespace": "org.apache.arrow.avro", + "type": "record", + "name": "testUnions", + "fields": [ + {"name": "f0", "type": ["string", "int"]} + ] +} diff --git a/src/arrow/java/adapter/jdbc/pom.xml b/src/arrow/java/adapter/jdbc/pom.xml new file mode 100644 index 000000000..a0e09fc0d --- /dev/null +++ b/src/arrow/java/adapter/jdbc/pom.xml @@ -0,0 +1,96 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor + license agreements. See the NOTICE file distributed with this work for additional + information regarding copyright ownership. The ASF licenses this file to + You under the Apache License, Version 2.0 (the "License"); you may not use + this file except in compliance with the License. You may obtain a copy of + the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required + by applicable law or agreed to in writing, software distributed under the + License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS + OF ANY KIND, either express or implied. See the License for the specific + language governing permissions and limitations under the License. --> + +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + + <parent> + <groupId>org.apache.arrow</groupId> + <artifactId>arrow-java-root</artifactId> + <version>6.0.1</version> + <relativePath>../../pom.xml</relativePath> + </parent> + + <artifactId>arrow-jdbc</artifactId> + <name>Arrow JDBC Adapter</name> + <description>(Contrib/Experimental)A library for converting JDBC data to Arrow data.</description> + <url>http://maven.apache.org</url> + + <dependencies> + + <!-- https://mvnrepository.com/artifact/org.apache.arrow/arrow-memory-core --> + <dependency> + <groupId>org.apache.arrow</groupId> + <artifactId>arrow-memory-core</artifactId> + <version>${project.version}</version> + </dependency> + + <!-- https://mvnrepository.com/artifact/org.apache.arrow/arrow-memory-netty --> + <dependency> + <groupId>org.apache.arrow</groupId> + <artifactId>arrow-memory-netty</artifactId> + <version>${project.version}</version> + <scope>runtime</scope> + </dependency> + + <!-- https://mvnrepository.com/artifact/org.apache.arrow/arrow-vector --> + <dependency> + <groupId>org.apache.arrow</groupId> + <artifactId>arrow-vector</artifactId> + <version>${project.version}</version> + <classifier>${arrow.vector.classifier}</classifier> + </dependency> + + <!-- https://mvnrepository.com/artifact/com.h2database/h2 --> + <dependency> + <groupId>com.h2database</groupId> + <artifactId>h2</artifactId> + <version>1.4.196</version> + <scope>test</scope> + </dependency> + + <dependency> + <groupId>com.fasterxml.jackson.dataformat</groupId> + <artifactId>jackson-dataformat-yaml</artifactId> + <version>${dep.jackson.version}</version> + <scope>test</scope> + </dependency> + + <dependency> + <groupId>com.fasterxml.jackson.core</groupId> + <artifactId>jackson-databind</artifactId> + <version>${dep.jackson.version}</version> + <scope>test</scope> + </dependency> + + <dependency> + <groupId>com.fasterxml.jackson.core</groupId> + <artifactId>jackson-core</artifactId> + <version>${dep.jackson.version}</version> + <scope>test</scope> + </dependency> + + <dependency> + <groupId>com.fasterxml.jackson.core</groupId> + <artifactId>jackson-annotations</artifactId> + <version>${dep.jackson.version}</version> + <scope>test</scope> + </dependency> + + <dependency> + <groupId>io.netty</groupId> + <artifactId>netty-common</artifactId> + </dependency> + + </dependencies> + +</project> diff --git a/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/ArrowVectorIterator.java b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/ArrowVectorIterator.java new file mode 100644 index 000000000..0e833bcc2 --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/ArrowVectorIterator.java @@ -0,0 +1,186 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.adapter.jdbc; + +import static org.apache.arrow.adapter.jdbc.JdbcToArrowUtils.isColumnNullable; + +import java.sql.ResultSet; +import java.sql.ResultSetMetaData; +import java.sql.SQLException; +import java.util.Iterator; + +import org.apache.arrow.adapter.jdbc.consumer.CompositeJdbcConsumer; +import org.apache.arrow.adapter.jdbc.consumer.JdbcConsumer; +import org.apache.arrow.util.Preconditions; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.Schema; +import org.apache.arrow.vector.util.ValueVectorUtility; + +/** + * VectorSchemaRoot iterator for partially converting JDBC data. + */ +public class ArrowVectorIterator implements Iterator<VectorSchemaRoot>, AutoCloseable { + + private final ResultSet resultSet; + private final JdbcToArrowConfig config; + + private final Schema schema; + private final ResultSetMetaData rsmd; + + private final JdbcConsumer[] consumers; + final CompositeJdbcConsumer compositeConsumer; + + // this is used only if resuing vector schema root is enabled. + private VectorSchemaRoot nextBatch; + + private final int targetBatchSize; + + /** + * Construct an instance. + */ + private ArrowVectorIterator(ResultSet resultSet, JdbcToArrowConfig config) throws SQLException { + this.resultSet = resultSet; + this.config = config; + this.schema = JdbcToArrowUtils.jdbcToArrowSchema(resultSet.getMetaData(), config); + this.targetBatchSize = config.getTargetBatchSize(); + + rsmd = resultSet.getMetaData(); + consumers = new JdbcConsumer[rsmd.getColumnCount()]; + this.compositeConsumer = new CompositeJdbcConsumer(consumers); + } + + private void initialize() throws SQLException { + // create consumers + for (int i = 1; i <= consumers.length; i++) { + ArrowType arrowType = config.getJdbcToArrowTypeConverter() + .apply(new JdbcFieldInfo(resultSet.getMetaData(), i)); + consumers[i - 1] = JdbcToArrowUtils.getConsumer( + arrowType, i, isColumnNullable(resultSet, i), null, config); + } + + this.nextBatch = config.isReuseVectorSchemaRoot() ? createVectorSchemaRoot() : null; + } + + /** + * Create a ArrowVectorIterator to partially convert data. + */ + public static ArrowVectorIterator create( + ResultSet resultSet, + JdbcToArrowConfig config) + throws SQLException { + ArrowVectorIterator iterator = null; + try { + iterator = new ArrowVectorIterator(resultSet, config); + iterator.initialize(); + } catch (Throwable e) { + if (iterator != null) { + iterator.close(); + } + throw new RuntimeException("Error occurred while creating iterator.", e); + } + return iterator; + } + + private void consumeData(VectorSchemaRoot root) { + // consume data + try { + int readRowCount = 0; + if (targetBatchSize == JdbcToArrowConfig.NO_LIMIT_BATCH_SIZE) { + while (resultSet.next()) { + ValueVectorUtility.ensureCapacity(root, readRowCount + 1); + compositeConsumer.consume(resultSet); + readRowCount++; + } + } else { + while (readRowCount < targetBatchSize && resultSet.next()) { + compositeConsumer.consume(resultSet); + readRowCount++; + } + } + + root.setRowCount(readRowCount); + } catch (Throwable e) { + compositeConsumer.close(); + throw new RuntimeException("Error occurred while consuming data.", e); + } + } + + private VectorSchemaRoot createVectorSchemaRoot() { + VectorSchemaRoot root = null; + try { + root = VectorSchemaRoot.create(schema, config.getAllocator()); + if (config.getTargetBatchSize() != JdbcToArrowConfig.NO_LIMIT_BATCH_SIZE) { + ValueVectorUtility.preAllocate(root, config.getTargetBatchSize()); + } + } catch (Throwable e) { + if (root != null) { + root.close(); + } + throw new RuntimeException("Error occurred while creating schema root.", e); + } + return root; + } + + // Loads the next schema root or null if no more rows are available. + private void load(VectorSchemaRoot root) throws SQLException { + for (int i = 0; i < consumers.length; i++) { + consumers[i].resetValueVector(root.getVector(i)); + } + + consumeData(root); + } + + @Override + public boolean hasNext() { + try { + return !resultSet.isAfterLast(); + } catch (SQLException e) { + throw new RuntimeException(e); + } + } + + /** + * Gets the next vector. + * If {@link JdbcToArrowConfig#isReuseVectorSchemaRoot()} is false, + * the client is responsible for freeing its resources. + */ + @Override + public VectorSchemaRoot next() { + Preconditions.checkArgument(hasNext()); + try { + VectorSchemaRoot ret = config.isReuseVectorSchemaRoot() ? nextBatch : createVectorSchemaRoot(); + load(ret); + return ret; + } catch (Exception e) { + close(); + throw new RuntimeException("Error occurred while getting next schema root.", e); + } + } + + /** + * Clean up resources. + */ + @Override + public void close() { + if (config.isReuseVectorSchemaRoot()) { + nextBatch.close(); + } + compositeConsumer.close(); + } +} diff --git a/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/Constants.java b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/Constants.java new file mode 100644 index 000000000..aaadacb54 --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/Constants.java @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.adapter.jdbc; + +/** + * String constants used for metadata returned on Vectors. + */ +public class Constants { + private Constants() {} + + public static final String SQL_CATALOG_NAME_KEY = "SQL_CATALOG_NAME"; + public static final String SQL_TABLE_NAME_KEY = "SQL_TABLE_NAME"; + public static final String SQL_COLUMN_NAME_KEY = "SQL_COLUMN_NAME"; + public static final String SQL_TYPE_KEY = "SQL_TYPE"; + +} diff --git a/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcFieldInfo.java b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcFieldInfo.java new file mode 100644 index 000000000..e3747bb04 --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcFieldInfo.java @@ -0,0 +1,114 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.adapter.jdbc; + +import java.sql.ResultSetMetaData; +import java.sql.SQLException; +import java.sql.Types; + +import org.apache.arrow.util.Preconditions; + +/** + * This class represents the information about a JDBC ResultSet Field that is + * needed to construct an {@link org.apache.arrow.vector.types.pojo.ArrowType}. + * Currently, this is: + * <ul> + * <li>The JDBC {@link java.sql.Types} type.</li> + * <li>The field's precision (used for {@link java.sql.Types#DECIMAL} and {@link java.sql.Types#NUMERIC} types)</li> + * <li>The field's scale (used for {@link java.sql.Types#DECIMAL} and {@link java.sql.Types#NUMERIC} types)</li> + * </ul> + */ +public class JdbcFieldInfo { + private final int jdbcType; + private final int precision; + private final int scale; + + /** + * Builds a <code>JdbcFieldInfo</code> using only the {@link java.sql.Types} type. Do not use this constructor + * if the field type is {@link java.sql.Types#DECIMAL} or {@link java.sql.Types#NUMERIC}; the precision and + * scale will be set to <code>0</code>. + * + * @param jdbcType The {@link java.sql.Types} type. + * @throws IllegalArgumentException if jdbcType is {@link java.sql.Types#DECIMAL} or {@link java.sql.Types#NUMERIC}. + */ + public JdbcFieldInfo(int jdbcType) { + Preconditions.checkArgument( + (jdbcType != Types.DECIMAL && jdbcType != Types.NUMERIC), + "DECIMAL and NUMERIC types require a precision and scale; please use another constructor."); + + this.jdbcType = jdbcType; + this.precision = 0; + this.scale = 0; + } + + /** + * Builds a <code>JdbcFieldInfo</code> from the {@link java.sql.Types} type, precision, and scale. + * Use this constructor for {@link java.sql.Types#DECIMAL} and {@link java.sql.Types#NUMERIC} types. + * + * @param jdbcType The {@link java.sql.Types} type. + * @param precision The field's numeric precision. + * @param scale The field's numeric scale. + */ + public JdbcFieldInfo(int jdbcType, int precision, int scale) { + this.jdbcType = jdbcType; + this.precision = precision; + this.scale = scale; + } + + /** + * Builds a <code>JdbcFieldInfo</code> from the corresponding {@link java.sql.ResultSetMetaData} column. + * + * @param rsmd The {@link java.sql.ResultSetMetaData} to get the field information from. + * @param column The column to get the field information for (on a 1-based index). + * @throws SQLException If the column information cannot be retrieved. + * @throws NullPointerException if <code>rsmd</code> is <code>null</code>. + * @throws IllegalArgumentException if <code>column</code> is out of bounds. + */ + public JdbcFieldInfo(ResultSetMetaData rsmd, int column) throws SQLException { + Preconditions.checkNotNull(rsmd, "ResultSetMetaData cannot be null."); + Preconditions.checkArgument(column > 0, "ResultSetMetaData columns have indices starting at 1."); + Preconditions.checkArgument( + column <= rsmd.getColumnCount(), + "The index must be within the number of columns (1 to %s, inclusive)", rsmd.getColumnCount()); + + this.jdbcType = rsmd.getColumnType(column); + this.precision = rsmd.getPrecision(column); + this.scale = rsmd.getScale(column); + } + + /** + * The {@link java.sql.Types} type. + */ + public int getJdbcType() { + return jdbcType; + } + + /** + * The numeric precision, for {@link java.sql.Types#NUMERIC} and {@link java.sql.Types#DECIMAL} types. + */ + public int getPrecision() { + return precision; + } + + /** + * The numeric scale, for {@link java.sql.Types#NUMERIC} and {@link java.sql.Types#DECIMAL} types. + */ + public int getScale() { + return scale; + } +} diff --git a/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrow.java b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrow.java new file mode 100644 index 000000000..daee64d93 --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrow.java @@ -0,0 +1,103 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.adapter.jdbc; + +import java.io.IOException; +import java.sql.ResultSet; +import java.sql.SQLException; + +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.util.Preconditions; + +/** + * Utility class to convert JDBC objects to columnar Arrow format objects. + * + * <p>This utility uses following data mapping to map JDBC/SQL datatype to Arrow data types. + * + * <p>CHAR --> ArrowType.Utf8 + * NCHAR --> ArrowType.Utf8 + * VARCHAR --> ArrowType.Utf8 + * NVARCHAR --> ArrowType.Utf8 + * LONGVARCHAR --> ArrowType.Utf8 + * LONGNVARCHAR --> ArrowType.Utf8 + * NUMERIC --> ArrowType.Decimal(precision, scale) + * DECIMAL --> ArrowType.Decimal(precision, scale) + * BIT --> ArrowType.Bool + * TINYINT --> ArrowType.Int(8, signed) + * SMALLINT --> ArrowType.Int(16, signed) + * INTEGER --> ArrowType.Int(32, signed) + * BIGINT --> ArrowType.Int(64, signed) + * REAL --> ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE) + * FLOAT --> ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE) + * DOUBLE --> ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE) + * BINARY --> ArrowType.Binary + * VARBINARY --> ArrowType.Binary + * LONGVARBINARY --> ArrowType.Binary + * DATE --> ArrowType.Date(DateUnit.MILLISECOND) + * TIME --> ArrowType.Time(TimeUnit.MILLISECOND, 32) + * TIMESTAMP --> ArrowType.Timestamp(TimeUnit.MILLISECOND, timezone=null) + * CLOB --> ArrowType.Utf8 + * BLOB --> ArrowType.Binary + * + * @since 0.10.0 + */ +public class JdbcToArrow { + + /*----------------------------------------------------------------* + | | + | Partial Convert API | + | | + *----------------------------------------------------------------*/ + + /** + * For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow objects. + * Note here uses the default targetBatchSize = 1024. + * + * @param resultSet ResultSet to use to fetch the data from underlying database + * @param allocator Memory allocator + * @return Arrow Data Objects {@link ArrowVectorIterator} + * @throws SQLException on error + */ + public static ArrowVectorIterator sqlToArrowVectorIterator( + ResultSet resultSet, + BufferAllocator allocator) + throws SQLException, IOException { + Preconditions.checkNotNull(allocator, "Memory Allocator object can not be null"); + + JdbcToArrowConfig config = + new JdbcToArrowConfig(allocator, JdbcToArrowUtils.getUtcCalendar()); + return sqlToArrowVectorIterator(resultSet, config); + } + + /** + * For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow objects. + * Note if not specify {@link JdbcToArrowConfig#targetBatchSize}, will use default value 1024. + * @param resultSet ResultSet to use to fetch the data from underlying database + * @param config Configuration of the conversion from JDBC to Arrow. + * @return Arrow Data Objects {@link ArrowVectorIterator} + * @throws SQLException on error + */ + public static ArrowVectorIterator sqlToArrowVectorIterator( + ResultSet resultSet, + JdbcToArrowConfig config) + throws SQLException, IOException { + Preconditions.checkNotNull(resultSet, "JDBC ResultSet object can not be null"); + Preconditions.checkNotNull(config, "The configuration cannot be null"); + return ArrowVectorIterator.create(resultSet, config); + } +} diff --git a/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfig.java b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfig.java new file mode 100644 index 000000000..250b0edd2 --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfig.java @@ -0,0 +1,296 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.adapter.jdbc; + +import static org.apache.arrow.vector.types.FloatingPointPrecision.DOUBLE; +import static org.apache.arrow.vector.types.FloatingPointPrecision.SINGLE; + +import java.sql.Types; +import java.util.Calendar; +import java.util.Map; +import java.util.function.Function; + +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.util.Preconditions; +import org.apache.arrow.vector.types.DateUnit; +import org.apache.arrow.vector.types.TimeUnit; +import org.apache.arrow.vector.types.pojo.ArrowType; + +/** + * This class configures the JDBC-to-Arrow conversion process. + * <p> + * The allocator is used to construct the {@link org.apache.arrow.vector.VectorSchemaRoot}, + * and the calendar is used to define the time zone of any + * {@link org.apache.arrow.vector.types.pojo.ArrowType.Timestamp} + * fields that are created during the conversion. Neither field may be <code>null</code>. + * </p> + * <p> + * If the <code>includeMetadata</code> flag is set, the Arrow field metadata will contain information + * from the corresponding {@link java.sql.ResultSetMetaData} that was used to create the + * {@link org.apache.arrow.vector.types.pojo.FieldType} of the corresponding + * {@link org.apache.arrow.vector.FieldVector}. + * </p> + * <p> + * If there are any {@link java.sql.Types#ARRAY} fields in the {@link java.sql.ResultSet}, the corresponding + * {@link JdbcFieldInfo} for the array's contents must be defined here. Unfortunately, the sub-type + * information cannot be retrieved from all JDBC implementations (H2 for example, returns + * {@link java.sql.Types#NULL} for the array sub-type), so it must be configured here. The column index + * or name can be used to map to a {@link JdbcFieldInfo}, and that will be used for the conversion. + * </p> + */ +public final class JdbcToArrowConfig { + + private final Calendar calendar; + private final BufferAllocator allocator; + private final boolean includeMetadata; + private final boolean reuseVectorSchemaRoot; + private final Map<Integer, JdbcFieldInfo> arraySubTypesByColumnIndex; + private final Map<String, JdbcFieldInfo> arraySubTypesByColumnName; + + public static final int DEFAULT_TARGET_BATCH_SIZE = 1024; + public static final int NO_LIMIT_BATCH_SIZE = -1; + + /** + * The maximum rowCount to read each time when partially convert data. + * Default value is 1024 and -1 means disable partial read. + * default is -1 which means disable partial read. + * Note that this flag only useful for {@link JdbcToArrow#sqlToArrowVectorIterator} + * 1) if targetBatchSize != -1, it will convert full data into multiple vectors + * with valueCount no more than targetBatchSize. + * 2) if targetBatchSize == -1, it will convert full data into a single vector in {@link ArrowVectorIterator} + * </p> + */ + private final int targetBatchSize; + + private final Function<JdbcFieldInfo, ArrowType> jdbcToArrowTypeConverter; + + /** + * Constructs a new configuration from the provided allocator and calendar. The <code>allocator</code> + * is used when constructing the Arrow vectors from the ResultSet, and the calendar is used to define + * Arrow Timestamp fields, and to read time-based fields from the JDBC <code>ResultSet</code>. + * + * @param allocator The memory allocator to construct the Arrow vectors with. + * @param calendar The calendar to use when constructing Timestamp fields and reading time-based results. + */ + JdbcToArrowConfig(BufferAllocator allocator, Calendar calendar) { + this(allocator, calendar, + /* include metadata */ false, + /* reuse vector schema root */ false, + /* array sub-types by column index */ null, + /* array sub-types by column name */ null, + DEFAULT_TARGET_BATCH_SIZE, null); + } + + /** + * Constructs a new configuration from the provided allocator and calendar. The <code>allocator</code> + * is used when constructing the Arrow vectors from the ResultSet, and the calendar is used to define + * Arrow Timestamp fields, and to read time-based fields from the JDBC <code>ResultSet</code>. + * + * @param allocator The memory allocator to construct the Arrow vectors with. + * @param calendar The calendar to use when constructing Timestamp fields and reading time-based results. + * @param includeMetadata Whether to include JDBC field metadata in the Arrow Schema Field metadata. + * @param reuseVectorSchemaRoot Whether to reuse the vector schema root for each data load. + * @param arraySubTypesByColumnIndex The type of the JDBC array at the column index (1-based). + * @param arraySubTypesByColumnName The type of the JDBC array at the column name. + * @param jdbcToArrowTypeConverter The function that maps JDBC field type information to arrow type. If set to null, + * the default mapping will be used, which is defined as: + * <ul> + * <li>CHAR --> ArrowType.Utf8</li> + * <li>NCHAR --> ArrowType.Utf8</li> + * <li>VARCHAR --> ArrowType.Utf8</li> + * <li>NVARCHAR --> ArrowType.Utf8</li> + * <li>LONGVARCHAR --> ArrowType.Utf8</li> + * <li>LONGNVARCHAR --> ArrowType.Utf8</li> + * <li>NUMERIC --> ArrowType.Decimal(precision, scale)</li> + * <li>DECIMAL --> ArrowType.Decimal(precision, scale)</li> + * <li>BIT --> ArrowType.Bool</li> + * <li>TINYINT --> ArrowType.Int(8, signed)</li> + * <li>SMALLINT --> ArrowType.Int(16, signed)</li> + * <li>INTEGER --> ArrowType.Int(32, signed)</li> + * <li>BIGINT --> ArrowType.Int(64, signed)</li> + * <li>REAL --> ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)</li> + * <li>FLOAT --> ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)</li> + * <li>DOUBLE --> ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)</li> + * <li>BINARY --> ArrowType.Binary</li> + * <li>VARBINARY --> ArrowType.Binary</li> + * <li>LONGVARBINARY --> ArrowType.Binary</li> + * <li>DATE --> ArrowType.Date(DateUnit.DAY)</li> + * <li>TIME --> ArrowType.Time(TimeUnit.MILLISECOND, 32)</li> + * <li>TIMESTAMP --> ArrowType.Timestamp(TimeUnit.MILLISECOND, calendar timezone)</li> + * <li>CLOB --> ArrowType.Utf8</li> + * <li>BLOB --> ArrowType.Binary</li> + * <li>NULL --> ArrowType.Null</li> + * </ul> + */ + JdbcToArrowConfig( + BufferAllocator allocator, + Calendar calendar, + boolean includeMetadata, + boolean reuseVectorSchemaRoot, + Map<Integer, JdbcFieldInfo> arraySubTypesByColumnIndex, + Map<String, JdbcFieldInfo> arraySubTypesByColumnName, + int targetBatchSize, + Function<JdbcFieldInfo, ArrowType> jdbcToArrowTypeConverter) { + Preconditions.checkNotNull(allocator, "Memory allocator cannot be null"); + this.allocator = allocator; + this.calendar = calendar; + this.includeMetadata = includeMetadata; + this.reuseVectorSchemaRoot = reuseVectorSchemaRoot; + this.arraySubTypesByColumnIndex = arraySubTypesByColumnIndex; + this.arraySubTypesByColumnName = arraySubTypesByColumnName; + this.targetBatchSize = targetBatchSize; + + // set up type converter + this.jdbcToArrowTypeConverter = jdbcToArrowTypeConverter != null ? jdbcToArrowTypeConverter : + fieldInfo -> { + final String timezone; + if (calendar != null) { + timezone = calendar.getTimeZone().getID(); + } else { + timezone = null; + } + + switch (fieldInfo.getJdbcType()) { + case Types.BOOLEAN: + case Types.BIT: + return new ArrowType.Bool(); + case Types.TINYINT: + return new ArrowType.Int(8, true); + case Types.SMALLINT: + return new ArrowType.Int(16, true); + case Types.INTEGER: + return new ArrowType.Int(32, true); + case Types.BIGINT: + return new ArrowType.Int(64, true); + case Types.NUMERIC: + case Types.DECIMAL: + int precision = fieldInfo.getPrecision(); + int scale = fieldInfo.getScale(); + return new ArrowType.Decimal(precision, scale, 128); + case Types.REAL: + case Types.FLOAT: + return new ArrowType.FloatingPoint(SINGLE); + case Types.DOUBLE: + return new ArrowType.FloatingPoint(DOUBLE); + case Types.CHAR: + case Types.NCHAR: + case Types.VARCHAR: + case Types.NVARCHAR: + case Types.LONGVARCHAR: + case Types.LONGNVARCHAR: + case Types.CLOB: + return new ArrowType.Utf8(); + case Types.DATE: + return new ArrowType.Date(DateUnit.DAY); + case Types.TIME: + return new ArrowType.Time(TimeUnit.MILLISECOND, 32); + case Types.TIMESTAMP: + return new ArrowType.Timestamp(TimeUnit.MILLISECOND, timezone); + case Types.BINARY: + case Types.VARBINARY: + case Types.LONGVARBINARY: + case Types.BLOB: + return new ArrowType.Binary(); + case Types.ARRAY: + return new ArrowType.List(); + case Types.NULL: + return new ArrowType.Null(); + default: + // no-op, shouldn't get here + return null; + } + }; + } + + /** + * The calendar to use when defining Arrow Timestamp fields + * and retrieving {@link java.sql.Date}, {@link java.sql.Time}, or {@link java.sql.Timestamp} + * data types from the {@link java.sql.ResultSet}, or <code>null</code> if not converting. + * + * @return the calendar. + */ + public Calendar getCalendar() { + return calendar; + } + + /** + * The Arrow memory allocator. + * @return the allocator. + */ + public BufferAllocator getAllocator() { + return allocator; + } + + /** + * Whether to include JDBC ResultSet field metadata in the Arrow Schema field metadata. + * + * @return <code>true</code> to include field metadata, <code>false</code> to exclude it. + */ + public boolean shouldIncludeMetadata() { + return includeMetadata; + } + + /** + * Get the target batch size for partial read. + */ + public int getTargetBatchSize() { + return targetBatchSize; + } + + /** + * Get whether it is allowed to reuse the vector schema root. + */ + public boolean isReuseVectorSchemaRoot() { + return reuseVectorSchemaRoot; + } + + /** + * Gets the mapping between JDBC type information to Arrow type. + */ + public Function<JdbcFieldInfo, ArrowType> getJdbcToArrowTypeConverter() { + return jdbcToArrowTypeConverter; + } + + /** + * Returns the array sub-type {@link JdbcFieldInfo} defined for the provided column index. + * + * @param index The {@link java.sql.ResultSetMetaData} column index of an {@link java.sql.Types#ARRAY} type. + * @return The {@link JdbcFieldInfo} for that array's sub-type, or <code>null</code> if not defined. + */ + public JdbcFieldInfo getArraySubTypeByColumnIndex(int index) { + if (arraySubTypesByColumnIndex == null) { + return null; + } else { + return arraySubTypesByColumnIndex.get(index); + } + } + + /** + * Returns the array sub-type {@link JdbcFieldInfo} defined for the provided column name. + * + * @param name The {@link java.sql.ResultSetMetaData} column name of an {@link java.sql.Types#ARRAY} type. + * @return The {@link JdbcFieldInfo} for that array's sub-type, or <code>null</code> if not defined. + */ + public JdbcFieldInfo getArraySubTypeByColumnName(String name) { + if (arraySubTypesByColumnName == null) { + return null; + } else { + return arraySubTypesByColumnName.get(name); + } + } +} diff --git a/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigBuilder.java b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigBuilder.java new file mode 100644 index 000000000..3941d978f --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigBuilder.java @@ -0,0 +1,201 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.adapter.jdbc; + +import static org.apache.arrow.adapter.jdbc.JdbcToArrowConfig.DEFAULT_TARGET_BATCH_SIZE; + +import java.util.Calendar; +import java.util.Map; +import java.util.function.Function; + +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.util.Preconditions; +import org.apache.arrow.vector.types.pojo.ArrowType; + +/** + * This class builds {@link JdbcToArrowConfig}s. + */ +public class JdbcToArrowConfigBuilder { + private Calendar calendar; + private BufferAllocator allocator; + private boolean includeMetadata; + private boolean reuseVectorSchemaRoot; + private Map<Integer, JdbcFieldInfo> arraySubTypesByColumnIndex; + private Map<String, JdbcFieldInfo> arraySubTypesByColumnName; + + private int targetBatchSize; + private Function<JdbcFieldInfo, ArrowType> jdbcToArrowTypeConverter; + + /** + * Default constructor for the <code>JdbcToArrowConfigBuilder}</code>. + * Use the setter methods for the allocator and calendar; the allocator must be + * set. Otherwise, {@link #build()} will throw a {@link NullPointerException}. + */ + public JdbcToArrowConfigBuilder() { + this.allocator = null; + this.calendar = null; + this.includeMetadata = false; + this.reuseVectorSchemaRoot = false; + this.arraySubTypesByColumnIndex = null; + this.arraySubTypesByColumnName = null; + } + + /** + * Constructor for the <code>JdbcToArrowConfigBuilder</code>. The + * allocator is required, and a {@link NullPointerException} + * will be thrown if it is <code>null</code>. + * <p> + * The allocator is used to construct Arrow vectors from the JDBC ResultSet. + * The calendar is used to determine the time zone of {@link java.sql.Timestamp} + * fields and convert {@link java.sql.Date}, {@link java.sql.Time}, and + * {@link java.sql.Timestamp} fields to a single, common time zone when reading + * from the result set. + * </p> + * + * @param allocator The Arrow Vector memory allocator. + * @param calendar The calendar to use when constructing timestamp fields. + */ + public JdbcToArrowConfigBuilder(BufferAllocator allocator, Calendar calendar) { + this(); + + Preconditions.checkNotNull(allocator, "Memory allocator cannot be null"); + + this.allocator = allocator; + this.calendar = calendar; + this.includeMetadata = false; + this.reuseVectorSchemaRoot = false; + this.targetBatchSize = DEFAULT_TARGET_BATCH_SIZE; + } + + /** + * Constructor for the <code>JdbcToArrowConfigBuilder</code>. Both the + * allocator and calendar are required. A {@link NullPointerException} + * will be thrown if either of those arguments is <code>null</code>. + * <p> + * The allocator is used to construct Arrow vectors from the JDBC ResultSet. + * The calendar is used to determine the time zone of {@link java.sql.Timestamp} + * fields and convert {@link java.sql.Date}, {@link java.sql.Time}, and + * {@link java.sql.Timestamp} fields to a single, common time zone when reading + * from the result set. + * </p> + * <p> + * The <code>includeMetadata</code> argument, if <code>true</code> will cause + * various information about each database field to be added to the Vector + * Schema's field metadata. + * </p> + * + * @param allocator The Arrow Vector memory allocator. + * @param calendar The calendar to use when constructing timestamp fields. + */ + public JdbcToArrowConfigBuilder(BufferAllocator allocator, Calendar calendar, boolean includeMetadata) { + this(allocator, calendar); + this.includeMetadata = includeMetadata; + } + + /** + * Sets the memory allocator to use when constructing the Arrow vectors from the ResultSet. + * + * @param allocator the allocator to set. + * @exception NullPointerException if <code>allocator</code> is null. + */ + public JdbcToArrowConfigBuilder setAllocator(BufferAllocator allocator) { + Preconditions.checkNotNull(allocator, "Memory allocator cannot be null"); + this.allocator = allocator; + return this; + } + + /** + * Sets the {@link Calendar} to use when constructing timestamp fields in the + * Arrow schema, and reading time-based fields from the JDBC <code>ResultSet</code>. + * + * @param calendar the calendar to set. + */ + public JdbcToArrowConfigBuilder setCalendar(Calendar calendar) { + this.calendar = calendar; + return this; + } + + /** + * Sets whether to include JDBC ResultSet field metadata in the Arrow Schema field metadata. + * + * @param includeMetadata Whether to include or exclude JDBC metadata in the Arrow Schema field metadata. + * @return This instance of the <code>JdbcToArrowConfig</code>, for chaining. + */ + public JdbcToArrowConfigBuilder setIncludeMetadata(boolean includeMetadata) { + this.includeMetadata = includeMetadata; + return this; + } + + /** + * Sets the mapping of column-index-to-{@link JdbcFieldInfo} used for columns of type {@link java.sql.Types#ARRAY}. + * The column index is 1-based, to match the JDBC column index. + * + * @param map The mapping. + * @return This instance of the <code>JdbcToArrowConfig</code>, for chaining. + */ + public JdbcToArrowConfigBuilder setArraySubTypeByColumnIndexMap(Map<Integer, JdbcFieldInfo> map) { + this.arraySubTypesByColumnIndex = map; + return this; + } + + /** + * Sets the mapping of column-name-to-{@link JdbcFieldInfo} used for columns of type {@link java.sql.Types#ARRAY}. + * + * @param map The mapping. + * @return This instance of the <code>JdbcToArrowConfig</code>, for chaining. + */ + public JdbcToArrowConfigBuilder setArraySubTypeByColumnNameMap(Map<String, JdbcFieldInfo> map) { + this.arraySubTypesByColumnName = map; + return this; + } + + public JdbcToArrowConfigBuilder setTargetBatchSize(int targetBatchSize) { + this.targetBatchSize = targetBatchSize; + return this; + } + + public JdbcToArrowConfigBuilder setJdbcToArrowTypeConverter( + Function<JdbcFieldInfo, ArrowType> jdbcToArrowTypeConverter) { + this.jdbcToArrowTypeConverter = jdbcToArrowTypeConverter; + return this; + } + + public JdbcToArrowConfigBuilder setReuseVectorSchemaRoot(boolean reuseVectorSchemaRoot) { + this.reuseVectorSchemaRoot = reuseVectorSchemaRoot; + return this; + } + + /** + * This builds the {@link JdbcToArrowConfig} from the provided + * {@link BufferAllocator} and {@link Calendar}. + * + * @return The built {@link JdbcToArrowConfig} + * @throws NullPointerException if either the allocator or calendar was not set. + */ + public JdbcToArrowConfig build() { + return new JdbcToArrowConfig( + allocator, + calendar, + includeMetadata, + reuseVectorSchemaRoot, + arraySubTypesByColumnIndex, + arraySubTypesByColumnName, + targetBatchSize, + jdbcToArrowTypeConverter); + } +} diff --git a/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java new file mode 100644 index 000000000..e05f21d48 --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java @@ -0,0 +1,341 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.adapter.jdbc; + +import java.io.IOException; +import java.sql.Date; +import java.sql.ResultSet; +import java.sql.ResultSetMetaData; +import java.sql.SQLException; +import java.sql.Time; +import java.sql.Timestamp; +import java.util.ArrayList; +import java.util.Calendar; +import java.util.HashMap; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.TimeZone; + +import org.apache.arrow.adapter.jdbc.consumer.ArrayConsumer; +import org.apache.arrow.adapter.jdbc.consumer.BigIntConsumer; +import org.apache.arrow.adapter.jdbc.consumer.BinaryConsumer; +import org.apache.arrow.adapter.jdbc.consumer.BitConsumer; +import org.apache.arrow.adapter.jdbc.consumer.CompositeJdbcConsumer; +import org.apache.arrow.adapter.jdbc.consumer.DateConsumer; +import org.apache.arrow.adapter.jdbc.consumer.DecimalConsumer; +import org.apache.arrow.adapter.jdbc.consumer.DoubleConsumer; +import org.apache.arrow.adapter.jdbc.consumer.FloatConsumer; +import org.apache.arrow.adapter.jdbc.consumer.IntConsumer; +import org.apache.arrow.adapter.jdbc.consumer.JdbcConsumer; +import org.apache.arrow.adapter.jdbc.consumer.NullConsumer; +import org.apache.arrow.adapter.jdbc.consumer.SmallIntConsumer; +import org.apache.arrow.adapter.jdbc.consumer.TimeConsumer; +import org.apache.arrow.adapter.jdbc.consumer.TimestampConsumer; +import org.apache.arrow.adapter.jdbc.consumer.TimestampTZConsumer; +import org.apache.arrow.adapter.jdbc.consumer.TinyIntConsumer; +import org.apache.arrow.adapter.jdbc.consumer.VarCharConsumer; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.util.Preconditions; +import org.apache.arrow.vector.BigIntVector; +import org.apache.arrow.vector.BitVector; +import org.apache.arrow.vector.DateDayVector; +import org.apache.arrow.vector.DecimalVector; +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.Float4Vector; +import org.apache.arrow.vector.Float8Vector; +import org.apache.arrow.vector.IntVector; +import org.apache.arrow.vector.NullVector; +import org.apache.arrow.vector.SmallIntVector; +import org.apache.arrow.vector.TimeMilliVector; +import org.apache.arrow.vector.TimeStampMilliTZVector; +import org.apache.arrow.vector.TimeStampMilliVector; +import org.apache.arrow.vector.TinyIntVector; +import org.apache.arrow.vector.VarBinaryVector; +import org.apache.arrow.vector.VarCharVector; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.complex.ListVector; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.types.pojo.Schema; +import org.apache.arrow.vector.util.ValueVectorUtility; + +/** + * Class that does most of the work to convert JDBC ResultSet data into Arrow columnar format Vector objects. + * + * @since 0.10.0 + */ +public class JdbcToArrowUtils { + + private static final int JDBC_ARRAY_VALUE_COLUMN = 2; + + /** + * Returns the instance of a {java.util.Calendar} with the UTC time zone and root locale. + */ + public static Calendar getUtcCalendar() { + return Calendar.getInstance(TimeZone.getTimeZone("UTC"), Locale.ROOT); + } + + /** + * Create Arrow {@link Schema} object for the given JDBC {@link ResultSetMetaData}. + * + * @param rsmd The ResultSetMetaData containing the results, to read the JDBC metadata from. + * @param calendar The calendar to use the time zone field of, to construct Timestamp fields from. + * @return {@link Schema} + * @throws SQLException on error + */ + public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, Calendar calendar) throws SQLException { + Preconditions.checkNotNull(calendar, "Calendar object can't be null"); + + return jdbcToArrowSchema(rsmd, new JdbcToArrowConfig(new RootAllocator(0), calendar)); + } + + /** + * Create Arrow {@link Schema} object for the given JDBC {@link java.sql.ResultSetMetaData}. + * + * <p> + * If {@link JdbcToArrowConfig#shouldIncludeMetadata()} returns <code>true</code>, the following fields + * will be added to the {@link FieldType#getMetadata()}: + * <ul> + * <li>{@link Constants#SQL_CATALOG_NAME_KEY} representing {@link ResultSetMetaData#getCatalogName(int)}</li> + * <li>{@link Constants#SQL_TABLE_NAME_KEY} representing {@link ResultSetMetaData#getTableName(int)}</li> + * <li>{@link Constants#SQL_COLUMN_NAME_KEY} representing {@link ResultSetMetaData#getColumnLabel(int)}</li> + * <li>{@link Constants#SQL_TYPE_KEY} representing {@link ResultSetMetaData#getColumnTypeName(int)}</li> + * </ul> + * </p> + * <p> + * If any columns are of type {@link java.sql.Types#ARRAY}, the configuration object will be used to look up + * the array sub-type field. The {@link JdbcToArrowConfig#getArraySubTypeByColumnIndex(int)} method will be + * checked first, followed by the {@link JdbcToArrowConfig#getArraySubTypeByColumnName(String)} method. + * </p> + * + * @param rsmd The ResultSetMetaData containing the results, to read the JDBC metadata from. + * @param config The configuration to use when constructing the schema. + * @return {@link Schema} + * @throws SQLException on error + * @throws IllegalArgumentException if <code>rsmd</code> contains an {@link java.sql.Types#ARRAY} but the + * <code>config</code> does not have a sub-type definition for it. + */ + public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, JdbcToArrowConfig config) throws SQLException { + Preconditions.checkNotNull(rsmd, "JDBC ResultSetMetaData object can't be null"); + Preconditions.checkNotNull(config, "The configuration object must not be null"); + + List<Field> fields = new ArrayList<>(); + int columnCount = rsmd.getColumnCount(); + for (int i = 1; i <= columnCount; i++) { + final String columnName = rsmd.getColumnLabel(i); + + final Map<String, String> metadata; + if (config.shouldIncludeMetadata()) { + metadata = new HashMap<>(); + metadata.put(Constants.SQL_CATALOG_NAME_KEY, rsmd.getCatalogName(i)); + metadata.put(Constants.SQL_TABLE_NAME_KEY, rsmd.getTableName(i)); + metadata.put(Constants.SQL_COLUMN_NAME_KEY, columnName); + metadata.put(Constants.SQL_TYPE_KEY, rsmd.getColumnTypeName(i)); + + } else { + metadata = null; + } + + final ArrowType arrowType = config.getJdbcToArrowTypeConverter().apply(new JdbcFieldInfo(rsmd, i)); + if (arrowType != null) { + final FieldType fieldType = new FieldType( + isColumnNullable(rsmd, i), arrowType, /* dictionary encoding */ null, metadata); + + List<Field> children = null; + if (arrowType.getTypeID() == ArrowType.List.TYPE_TYPE) { + final JdbcFieldInfo arrayFieldInfo = getJdbcFieldInfoForArraySubType(rsmd, i, config); + if (arrayFieldInfo == null) { + throw new IllegalArgumentException("Configuration does not provide a mapping for array column " + i); + } + children = new ArrayList<Field>(); + final ArrowType childType = config.getJdbcToArrowTypeConverter().apply(arrayFieldInfo); + children.add(new Field("child", FieldType.nullable(childType), null)); + } + + fields.add(new Field(columnName, fieldType, children)); + } + } + + return new Schema(fields, null); + } + + /* Uses the configuration to determine what the array sub-type JdbcFieldInfo is. + * If no sub-type can be found, returns null. + */ + private static JdbcFieldInfo getJdbcFieldInfoForArraySubType( + ResultSetMetaData rsmd, + int arrayColumn, + JdbcToArrowConfig config) + throws SQLException { + + Preconditions.checkNotNull(rsmd, "ResultSet MetaData object cannot be null"); + Preconditions.checkNotNull(config, "Configuration must not be null"); + Preconditions.checkArgument( + arrayColumn > 0, + "ResultSetMetaData columns start with 1; column cannot be less than 1"); + Preconditions.checkArgument( + arrayColumn <= rsmd.getColumnCount(), + "Column number cannot be more than the number of columns"); + + JdbcFieldInfo fieldInfo = config.getArraySubTypeByColumnIndex(arrayColumn); + if (fieldInfo == null) { + fieldInfo = config.getArraySubTypeByColumnName(rsmd.getColumnLabel(arrayColumn)); + } + return fieldInfo; + } + + /** + * Iterate the given JDBC {@link ResultSet} object to fetch the data and transpose it to populate + * the given Arrow Vector objects. + * + * @param rs ResultSet to use to fetch the data from underlying database + * @param root Arrow {@link VectorSchemaRoot} object to populate + * @param calendar The calendar to use when reading {@link Date}, {@link Time}, or {@link Timestamp} + * data types from the {@link ResultSet}, or <code>null</code> if not converting. + * @throws SQLException on error + */ + public static void jdbcToArrowVectors(ResultSet rs, VectorSchemaRoot root, Calendar calendar) + throws SQLException, IOException { + + Preconditions.checkNotNull(calendar, "Calendar object can't be null"); + + jdbcToArrowVectors(rs, root, new JdbcToArrowConfig(new RootAllocator(0), calendar)); + } + + static boolean isColumnNullable(ResultSet resultSet, int index) throws SQLException { + return isColumnNullable(resultSet.getMetaData(), index); + } + + static boolean isColumnNullable(ResultSetMetaData resultSetMetadata, int index) throws SQLException { + int nullableValue = resultSetMetadata.isNullable(index); + return nullableValue == ResultSetMetaData.columnNullable || + nullableValue == ResultSetMetaData.columnNullableUnknown; + } + + /** + * Iterate the given JDBC {@link ResultSet} object to fetch the data and transpose it to populate + * the given Arrow Vector objects. + * + * @param rs ResultSet to use to fetch the data from underlying database + * @param root Arrow {@link VectorSchemaRoot} object to populate + * @param config The configuration to use when reading the data. + * @throws SQLException on error + */ + public static void jdbcToArrowVectors(ResultSet rs, VectorSchemaRoot root, JdbcToArrowConfig config) + throws SQLException, IOException { + + ResultSetMetaData rsmd = rs.getMetaData(); + int columnCount = rsmd.getColumnCount(); + + JdbcConsumer[] consumers = new JdbcConsumer[columnCount]; + for (int i = 1; i <= columnCount; i++) { + FieldVector vector = root.getVector(rsmd.getColumnLabel(i)); + consumers[i - 1] = getConsumer(vector.getField().getType(), i, isColumnNullable(rs, i), vector, config); + } + + CompositeJdbcConsumer compositeConsumer = null; + // Only clean resources when occurs error, + // vectors within consumers are useful and users are responsible for its close. + try { + compositeConsumer = new CompositeJdbcConsumer(consumers); + int readRowCount = 0; + if (config.getTargetBatchSize() == JdbcToArrowConfig.NO_LIMIT_BATCH_SIZE) { + while (rs.next()) { + ValueVectorUtility.ensureCapacity(root, readRowCount + 1); + compositeConsumer.consume(rs); + readRowCount++; + } + } else { + while (rs.next() && readRowCount < config.getTargetBatchSize()) { + compositeConsumer.consume(rs); + readRowCount++; + } + } + + root.setRowCount(readRowCount); + } catch (Exception e) { + // error occurs and clean up resources. + if (compositeConsumer != null) { + compositeConsumer.close(); + } + throw e; + } + } + + static JdbcConsumer getConsumer(ArrowType arrowType, int columnIndex, boolean nullable, + FieldVector vector, JdbcToArrowConfig config) { + final Calendar calendar = config.getCalendar(); + + switch (arrowType.getTypeID()) { + case Bool: + return BitConsumer.createConsumer((BitVector) vector, columnIndex, nullable); + case Int: + switch (((ArrowType.Int) arrowType).getBitWidth()) { + case 8: + return TinyIntConsumer.createConsumer((TinyIntVector) vector, columnIndex, nullable); + case 16: + return SmallIntConsumer.createConsumer((SmallIntVector) vector, columnIndex, nullable); + case 32: + return IntConsumer.createConsumer((IntVector) vector, columnIndex, nullable); + case 64: + return BigIntConsumer.createConsumer((BigIntVector) vector, columnIndex, nullable); + default: + return null; + } + case Decimal: + return DecimalConsumer.createConsumer((DecimalVector) vector, columnIndex, nullable); + case FloatingPoint: + switch (((ArrowType.FloatingPoint) arrowType).getPrecision()) { + case SINGLE: + return FloatConsumer.createConsumer((Float4Vector) vector, columnIndex, nullable); + case DOUBLE: + return DoubleConsumer.createConsumer((Float8Vector) vector, columnIndex, nullable); + default: + return null; + } + case Utf8: + case LargeUtf8: + return VarCharConsumer.createConsumer((VarCharVector) vector, columnIndex, nullable); + case Binary: + case LargeBinary: + return BinaryConsumer.createConsumer((VarBinaryVector) vector, columnIndex, nullable); + case Date: + return DateConsumer.createConsumer((DateDayVector) vector, columnIndex, nullable, calendar); + case Time: + return TimeConsumer.createConsumer((TimeMilliVector) vector, columnIndex, nullable, calendar); + case Timestamp: + if (config.getCalendar() == null) { + return TimestampConsumer.createConsumer((TimeStampMilliVector) vector, columnIndex, nullable); + } else { + return TimestampTZConsumer.createConsumer((TimeStampMilliTZVector) vector, columnIndex, nullable, calendar); + } + case List: + FieldVector childVector = ((ListVector) vector).getDataVector(); + JdbcConsumer delegate = getConsumer(childVector.getField().getType(), JDBC_ARRAY_VALUE_COLUMN, + childVector.getField().isNullable(), childVector, config); + return ArrayConsumer.createConsumer((ListVector) vector, delegate, columnIndex, nullable); + case Null: + return new NullConsumer((NullVector) vector); + default: + // no-op, shouldn't get here + throw new UnsupportedOperationException(); + } + } +} diff --git a/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/ArrayConsumer.java b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/ArrayConsumer.java new file mode 100644 index 000000000..ed12f818a --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/ArrayConsumer.java @@ -0,0 +1,134 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.adapter.jdbc.consumer; + +import java.io.IOException; +import java.sql.Array; +import java.sql.ResultSet; +import java.sql.SQLException; + +import org.apache.arrow.vector.ValueVector; +import org.apache.arrow.vector.complex.ListVector; + +/** + * Consumer which consume array type values from {@link ResultSet}. + * Write the data to {@link org.apache.arrow.vector.complex.ListVector}. + */ +public abstract class ArrayConsumer extends BaseConsumer<ListVector> { + + /** + * Creates a consumer for {@link ListVector}. + */ + public static ArrayConsumer createConsumer( + ListVector vector, JdbcConsumer delegate, int index, boolean nullable) { + if (nullable) { + return new ArrayConsumer.NullableArrayConsumer(vector, delegate, index); + } else { + return new ArrayConsumer.NonNullableArrayConsumer(vector, delegate, index); + } + } + + protected final JdbcConsumer delegate; + + private final ValueVector innerVector; + + protected int innerVectorIndex = 0; + + /** + * Instantiate a ArrayConsumer. + */ + public ArrayConsumer(ListVector vector, JdbcConsumer delegate, int index) { + super(vector, index); + this.delegate = delegate; + this.innerVector = vector.getDataVector(); + } + + @Override + public void close() throws Exception { + this.vector.close(); + this.delegate.close(); + } + + void ensureInnerVectorCapacity(int targetCapacity) { + while (innerVector.getValueCapacity() < targetCapacity) { + innerVector.reAlloc(); + } + } + + /** + * Nullable consumer for {@link ListVector}. + */ + static class NullableArrayConsumer extends ArrayConsumer { + + /** + * Instantiate a nullable array consumer. + */ + public NullableArrayConsumer(ListVector vector, JdbcConsumer delegate, int index) { + super(vector, delegate, index); + } + + @Override + public void consume(ResultSet resultSet) throws SQLException, IOException { + final Array array = resultSet.getArray(columnIndexInResultSet); + if (!resultSet.wasNull()) { + vector.startNewValue(currentIndex); + int count = 0; + try (ResultSet rs = array.getResultSet()) { + while (rs.next()) { + ensureInnerVectorCapacity(innerVectorIndex + count + 1); + delegate.consume(rs); + count++; + } + } + vector.endValue(currentIndex, count); + innerVectorIndex += count; + } + currentIndex++; + } + } + + /** + * Non-nullable consumer for {@link ListVector}. + */ + static class NonNullableArrayConsumer extends ArrayConsumer { + + /** + * Instantiate a nullable array consumer. + */ + public NonNullableArrayConsumer(ListVector vector, JdbcConsumer delegate, int index) { + super(vector, delegate, index); + } + + @Override + public void consume(ResultSet resultSet) throws SQLException, IOException { + final Array array = resultSet.getArray(columnIndexInResultSet); + vector.startNewValue(currentIndex); + int count = 0; + try (ResultSet rs = array.getResultSet()) { + while (rs.next()) { + ensureInnerVectorCapacity(innerVectorIndex + count + 1); + delegate.consume(rs); + count++; + } + } + vector.endValue(currentIndex, count); + innerVectorIndex += count; + currentIndex++; + } + } +} diff --git a/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BaseConsumer.java b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BaseConsumer.java new file mode 100644 index 000000000..2db128d3e --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BaseConsumer.java @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.adapter.jdbc.consumer; + +import org.apache.arrow.vector.ValueVector; + +/** + * Base class for all consumers. + * @param <V> vector type. + */ +public abstract class BaseConsumer<V extends ValueVector> implements JdbcConsumer<V> { + + protected V vector; + + protected final int columnIndexInResultSet; + + protected int currentIndex; + + /** + * Constructs a new consumer. + * @param vector the underlying vector for the consumer. + * @param index the column id for the consumer. + */ + public BaseConsumer(V vector, int index) { + this.vector = vector; + this.columnIndexInResultSet = index; + } + + @Override + public void close() throws Exception { + this.vector.close(); + } + + @Override + public void resetValueVector(V vector) { + this.vector = vector; + this.currentIndex = 0; + } +} diff --git a/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BigIntConsumer.java b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BigIntConsumer.java new file mode 100644 index 000000000..19c8efa91 --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BigIntConsumer.java @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.adapter.jdbc.consumer; + +import java.sql.ResultSet; +import java.sql.SQLException; + +import org.apache.arrow.vector.BigIntVector; + +/** + * Consumer which consume bigint type values from {@link ResultSet}. + * Write the data to {@link org.apache.arrow.vector.BigIntVector}. + */ +public class BigIntConsumer { + + /** + * Creates a consumer for {@link BigIntVector}. + */ + public static JdbcConsumer<BigIntVector> createConsumer(BigIntVector vector, int index, boolean nullable) { + if (nullable) { + return new NullableBigIntConsumer(vector, index); + } else { + return new NonNullableBigIntConsumer(vector, index); + } + } + + /** + * Nullable consumer for big int. + */ + static class NullableBigIntConsumer extends BaseConsumer<BigIntVector> { + + /** + * Instantiate a BigIntConsumer. + */ + public NullableBigIntConsumer(BigIntVector vector, int index) { + super(vector, index); + } + + @Override + public void consume(ResultSet resultSet) throws SQLException { + long value = resultSet.getLong(columnIndexInResultSet); + if (!resultSet.wasNull()) { + // for fixed width vectors, we have allocated enough memory proactively, + // so there is no need to call the setSafe method here. + vector.set(currentIndex, value); + } + currentIndex++; + } + } + + /** + * Non-nullable consumer for big int. + */ + static class NonNullableBigIntConsumer extends BaseConsumer<BigIntVector> { + + /** + * Instantiate a BigIntConsumer. + */ + public NonNullableBigIntConsumer(BigIntVector vector, int index) { + super(vector, index); + } + + @Override + public void consume(ResultSet resultSet) throws SQLException { + long value = resultSet.getLong(columnIndexInResultSet); + // for fixed width vectors, we have allocated enough memory proactively, + // so there is no need to call the setSafe method here. + vector.set(currentIndex, value); + currentIndex++; + } + } +} diff --git a/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BinaryConsumer.java b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BinaryConsumer.java new file mode 100644 index 000000000..8c5f61169 --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BinaryConsumer.java @@ -0,0 +1,135 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.adapter.jdbc.consumer; + +import java.io.IOException; +import java.io.InputStream; +import java.sql.ResultSet; +import java.sql.SQLException; + +import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.vector.BitVectorHelper; +import org.apache.arrow.vector.VarBinaryVector; + +/** + * Consumer which consume binary type values from {@link ResultSet}. + * Write the data to {@link org.apache.arrow.vector.VarBinaryVector}. + */ +public abstract class BinaryConsumer extends BaseConsumer<VarBinaryVector> { + + /** + * Creates a consumer for {@link VarBinaryVector}. + */ + public static BinaryConsumer createConsumer(VarBinaryVector vector, int index, boolean nullable) { + if (nullable) { + return new NullableBinaryConsumer(vector, index); + } else { + return new NonNullableBinaryConsumer(vector, index); + } + } + + private final byte[] reuseBytes = new byte[1024]; + + /** + * Instantiate a BinaryConsumer. + */ + public BinaryConsumer(VarBinaryVector vector, int index) { + super(vector, index); + if (vector != null) { + vector.allocateNewSafe(); + } + } + + /** + * consume a InputStream. + */ + public void consume(InputStream is) throws IOException { + if (is != null) { + while (currentIndex >= vector.getValueCapacity()) { + vector.reallocValidityAndOffsetBuffers(); + } + final int startOffset = vector.getStartOffset(currentIndex); + final ArrowBuf offsetBuffer = vector.getOffsetBuffer(); + int dataLength = 0; + int read; + while ((read = is.read(reuseBytes)) != -1) { + while (vector.getDataBuffer().capacity() < (startOffset + dataLength + read)) { + vector.reallocDataBuffer(); + } + vector.getDataBuffer().setBytes(startOffset + dataLength, reuseBytes, 0, read); + dataLength += read; + } + offsetBuffer.setInt((currentIndex + 1) * VarBinaryVector.OFFSET_WIDTH, startOffset + dataLength); + BitVectorHelper.setBit(vector.getValidityBuffer(), currentIndex); + vector.setLastSet(currentIndex); + } + } + + public void moveWriterPosition() { + currentIndex++; + } + + @Override + public void resetValueVector(VarBinaryVector vector) { + this.vector = vector; + this.vector.allocateNewSafe(); + this.currentIndex = 0; + } + + /** + * Consumer for nullable binary data. + */ + static class NullableBinaryConsumer extends BinaryConsumer { + + /** + * Instantiate a BinaryConsumer. + */ + public NullableBinaryConsumer(VarBinaryVector vector, int index) { + super(vector, index); + } + + @Override + public void consume(ResultSet resultSet) throws SQLException, IOException { + InputStream is = resultSet.getBinaryStream(columnIndexInResultSet); + if (!resultSet.wasNull()) { + consume(is); + } + moveWriterPosition(); + } + } + + /** + * Consumer for non-nullable binary data. + */ + static class NonNullableBinaryConsumer extends BinaryConsumer { + + /** + * Instantiate a BinaryConsumer. + */ + public NonNullableBinaryConsumer(VarBinaryVector vector, int index) { + super(vector, index); + } + + @Override + public void consume(ResultSet resultSet) throws SQLException, IOException { + InputStream is = resultSet.getBinaryStream(columnIndexInResultSet); + consume(is); + moveWriterPosition(); + } + } +} diff --git a/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BitConsumer.java b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BitConsumer.java new file mode 100644 index 000000000..d2d94d0a4 --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BitConsumer.java @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.adapter.jdbc.consumer; + +import java.sql.ResultSet; +import java.sql.SQLException; + +import org.apache.arrow.vector.BitVector; + +/** + * Consumer which consume bit type values from {@link ResultSet}. + * Write the data to {@link BitVector}. + */ +public class BitConsumer { + + /** + * Creates a consumer for {@link BitVector}. + */ + public static JdbcConsumer<BitVector> createConsumer(BitVector vector, int index, boolean nullable) { + if (nullable) { + return new NullableBitConsumer(vector, index); + } else { + return new NonNullableBitConsumer(vector, index); + } + } + + /** + * Nullable consumer for {@link BitVector}. + */ + static class NullableBitConsumer extends BaseConsumer<BitVector> { + + /** + * Instantiate a BitConsumer. + */ + public NullableBitConsumer(BitVector vector, int index) { + super(vector, index); + } + + @Override + public void consume(ResultSet resultSet) throws SQLException { + boolean value = resultSet.getBoolean(columnIndexInResultSet); + if (!resultSet.wasNull()) { + // for fixed width vectors, we have allocated enough memory proactively, + // so there is no need to call the setSafe method here. + vector.set(currentIndex, value ? 1 : 0); + } + currentIndex++; + } + } + + /** + * Non-nullable consumer for {@link BitVector}. + */ + static class NonNullableBitConsumer extends BaseConsumer<BitVector> { + + /** + * Instantiate a BitConsumer. + */ + public NonNullableBitConsumer(BitVector vector, int index) { + super(vector, index); + } + + @Override + public void consume(ResultSet resultSet) throws SQLException { + boolean value = resultSet.getBoolean(columnIndexInResultSet); + // for fixed width vectors, we have allocated enough memory proactively, + // so there is no need to call the setSafe method here. + vector.set(currentIndex, value ? 1 : 0); + currentIndex++; + } + } +} diff --git a/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BlobConsumer.java b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BlobConsumer.java new file mode 100644 index 000000000..e57ecdf91 --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BlobConsumer.java @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.adapter.jdbc.consumer; + +import java.io.IOException; +import java.sql.Blob; +import java.sql.ResultSet; +import java.sql.SQLException; + +import org.apache.arrow.vector.VarBinaryVector; + +/** + * Consumer which consume blob type values from {@link ResultSet}. + * Write the data to {@link VarBinaryVector}. + */ +public class BlobConsumer extends BaseConsumer<VarBinaryVector> { + + private BinaryConsumer delegate; + + private final boolean nullable; + + /** + * Creates a consumer for {@link VarBinaryVector}. + */ + public static BlobConsumer createConsumer( + BinaryConsumer delegate, int index, boolean nullable) { + return new BlobConsumer(delegate, index, nullable); + } + + /** + * Instantiate a BlobConsumer. + */ + public BlobConsumer(BinaryConsumer delegate, int index, boolean nullable) { + super(null, index); + this.delegate = delegate; + this.nullable = nullable; + } + + @Override + public void consume(ResultSet resultSet) throws SQLException, IOException { + Blob blob = resultSet.getBlob(columnIndexInResultSet); + if (blob != null) { + delegate.consume(blob.getBinaryStream()); + } + delegate.moveWriterPosition(); + } + + @Override + public void close() throws Exception { + delegate.close(); + } + + @Override + public void resetValueVector(VarBinaryVector vector) { + delegate = BinaryConsumer.createConsumer(vector, columnIndexInResultSet, nullable); + } +} diff --git a/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/ClobConsumer.java b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/ClobConsumer.java new file mode 100644 index 000000000..4694f1c7e --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/ClobConsumer.java @@ -0,0 +1,161 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.adapter.jdbc.consumer; + +import java.nio.charset.StandardCharsets; +import java.sql.Clob; +import java.sql.ResultSet; +import java.sql.SQLException; + +import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.vector.BitVectorHelper; +import org.apache.arrow.vector.VarCharVector; + +import io.netty.util.internal.PlatformDependent; + +/** + * Consumer which consume clob type values from {@link ResultSet}. + * Write the data to {@link org.apache.arrow.vector.VarCharVector}. + */ +public abstract class ClobConsumer extends BaseConsumer<VarCharVector> { + + /** + * Creates a consumer for {@link VarCharVector}. + */ + public static ClobConsumer createConsumer(VarCharVector vector, int index, boolean nullable) { + if (nullable) { + return new NullableClobConsumer(vector, index); + } else { + return new NonNullableClobConsumer(vector, index); + } + } + + private static final int BUFFER_SIZE = 256; + + /** + * Instantiate a ClobConsumer. + */ + public ClobConsumer(VarCharVector vector, int index) { + super(vector, index); + if (vector != null) { + vector.allocateNewSafe(); + } + } + + @Override + public void resetValueVector(VarCharVector vector) { + this.vector = vector; + this.vector.allocateNewSafe(); + this.currentIndex = 0; + } + + /** + * Nullable consumer for clob data. + */ + static class NullableClobConsumer extends ClobConsumer { + + /** + * Instantiate a ClobConsumer. + */ + public NullableClobConsumer(VarCharVector vector, int index) { + super(vector, index); + } + + @Override + public void consume(ResultSet resultSet) throws SQLException { + Clob clob = resultSet.getClob(columnIndexInResultSet); + if (!resultSet.wasNull()) { + if (clob != null) { + long length = clob.length(); + + int read = 1; + int readSize = length < BUFFER_SIZE ? (int) length : BUFFER_SIZE; + int totalBytes = 0; + + ArrowBuf dataBuffer = vector.getDataBuffer(); + ArrowBuf offsetBuffer = vector.getOffsetBuffer(); + int startIndex = offsetBuffer.getInt(currentIndex * 4); + while (read <= length) { + String str = clob.getSubString(read, readSize); + byte[] bytes = str.getBytes(StandardCharsets.UTF_8); + + while ((dataBuffer.writerIndex() + bytes.length) > dataBuffer.capacity()) { + vector.reallocDataBuffer(); + } + PlatformDependent.copyMemory(bytes, 0, + dataBuffer.memoryAddress() + startIndex + totalBytes, bytes.length); + + totalBytes += bytes.length; + read += readSize; + } + offsetBuffer.setInt((currentIndex + 1) * 4, startIndex + totalBytes); + BitVectorHelper.setBit(vector.getValidityBuffer(), currentIndex); + vector.setLastSet(currentIndex); + } + } + currentIndex++; + } + } + + /** + * Non-nullable consumer for clob data. + */ + static class NonNullableClobConsumer extends ClobConsumer { + + /** + * Instantiate a ClobConsumer. + */ + public NonNullableClobConsumer(VarCharVector vector, int index) { + super(vector, index); + } + + @Override + public void consume(ResultSet resultSet) throws SQLException { + Clob clob = resultSet.getClob(columnIndexInResultSet); + if (clob != null) { + long length = clob.length(); + + int read = 1; + int readSize = length < BUFFER_SIZE ? (int) length : BUFFER_SIZE; + int totalBytes = 0; + + ArrowBuf dataBuffer = vector.getDataBuffer(); + ArrowBuf offsetBuffer = vector.getOffsetBuffer(); + int startIndex = offsetBuffer.getInt(currentIndex * 4); + while (read <= length) { + String str = clob.getSubString(read, readSize); + byte[] bytes = str.getBytes(StandardCharsets.UTF_8); + + while ((dataBuffer.writerIndex() + bytes.length) > dataBuffer.capacity()) { + vector.reallocDataBuffer(); + } + PlatformDependent.copyMemory(bytes, 0, + dataBuffer.memoryAddress() + startIndex + totalBytes, bytes.length); + + totalBytes += bytes.length; + read += readSize; + } + offsetBuffer.setInt((currentIndex + 1) * 4, startIndex + totalBytes); + BitVectorHelper.setBit(vector.getValidityBuffer(), currentIndex); + vector.setLastSet(currentIndex); + } + + currentIndex++; + } + } +} diff --git a/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/CompositeJdbcConsumer.java b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/CompositeJdbcConsumer.java new file mode 100644 index 000000000..99cca71b1 --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/CompositeJdbcConsumer.java @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.adapter.jdbc.consumer; + +import java.io.IOException; +import java.sql.ResultSet; +import java.sql.SQLException; + +import org.apache.arrow.util.AutoCloseables; +import org.apache.arrow.vector.ValueVector; +import org.apache.arrow.vector.VectorSchemaRoot; + +/** + * Composite consumer which hold all consumers. + * It manages the consume and cleanup process. + */ +public class CompositeJdbcConsumer implements JdbcConsumer { + + private final JdbcConsumer[] consumers; + + /** + * Construct an instance. + */ + public CompositeJdbcConsumer(JdbcConsumer[] consumers) { + this.consumers = consumers; + } + + @Override + public void consume(ResultSet rs) throws SQLException, IOException { + for (int i = 0; i < consumers.length; i++) { + consumers[i].consume(rs); + } + } + + @Override + public void close() { + + try { + // clean up + AutoCloseables.close(consumers); + } catch (Exception e) { + throw new RuntimeException("Error occurred while releasing resources.", e); + } + + } + + @Override + public void resetValueVector(ValueVector vector) { + + } + + /** + * Reset inner consumers through vectors in the vector schema root. + */ + public void resetVectorSchemaRoot(VectorSchemaRoot root) { + assert root.getFieldVectors().size() == consumers.length; + for (int i = 0; i < consumers.length; i++) { + consumers[i].resetValueVector(root.getVector(i)); + } + } +} diff --git a/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/DateConsumer.java b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/DateConsumer.java new file mode 100644 index 000000000..b9b83dacc --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/DateConsumer.java @@ -0,0 +1,116 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.adapter.jdbc.consumer; + +import java.sql.Date; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.util.Calendar; +import java.util.concurrent.TimeUnit; + +import org.apache.arrow.vector.DateDayVector; +import org.apache.arrow.vector.DateMilliVector; + +/** + * Consumer which consume date type values from {@link ResultSet}. + * Write the data to {@link org.apache.arrow.vector.DateDayVector}. + */ +public class DateConsumer { + + /** + * Creates a consumer for {@link DateMilliVector}. + */ + public static JdbcConsumer<DateDayVector> createConsumer( + DateDayVector vector, int index, boolean nullable, Calendar calendar) { + if (nullable) { + return new NullableDateConsumer(vector, index, calendar); + } else { + return new NonNullableDateConsumer(vector, index, calendar); + } + } + + /** + * Nullable consumer for date. + */ + static class NullableDateConsumer extends BaseConsumer<DateDayVector> { + + protected final Calendar calendar; + + /** + * Instantiate a DateConsumer. + */ + public NullableDateConsumer(DateDayVector vector, int index) { + this(vector, index, /* calendar */null); + } + + /** + * Instantiate a DateConsumer. + */ + public NullableDateConsumer(DateDayVector vector, int index, Calendar calendar) { + super(vector, index); + this.calendar = calendar; + } + + @Override + public void consume(ResultSet resultSet) throws SQLException { + Date date = calendar == null ? resultSet.getDate(columnIndexInResultSet) : + resultSet.getDate(columnIndexInResultSet, calendar); + if (!resultSet.wasNull()) { + // for fixed width vectors, we have allocated enough memory proactively, + // so there is no need to call the setSafe method here. + vector.set(currentIndex, Math.toIntExact(TimeUnit.MILLISECONDS.toDays(date.getTime()))); + } + currentIndex++; + } + } + + /** + * Non-nullable consumer for date. + */ + static class NonNullableDateConsumer extends BaseConsumer<DateDayVector> { + + protected final Calendar calendar; + + /** + * Instantiate a DateConsumer. + */ + public NonNullableDateConsumer(DateDayVector vector, int index) { + this(vector, index, /* calendar */null); + } + + /** + * Instantiate a DateConsumer. + */ + public NonNullableDateConsumer(DateDayVector vector, int index, Calendar calendar) { + super(vector, index); + this.calendar = calendar; + } + + @Override + public void consume(ResultSet resultSet) throws SQLException { + Date date = calendar == null ? resultSet.getDate(columnIndexInResultSet) : + resultSet.getDate(columnIndexInResultSet, calendar); + // for fixed width vectors, we have allocated enough memory proactively, + // so there is no need to call the setSafe method here. + vector.set(currentIndex, Math.toIntExact(TimeUnit.MILLISECONDS.toDays(date.getTime()))); + currentIndex++; + } + } +} + + diff --git a/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/DecimalConsumer.java b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/DecimalConsumer.java new file mode 100644 index 000000000..4498fdecc --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/DecimalConsumer.java @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.adapter.jdbc.consumer; + +import java.math.BigDecimal; +import java.sql.ResultSet; +import java.sql.SQLException; + +import org.apache.arrow.vector.DecimalVector; + +/** + * Consumer which consume decimal type values from {@link ResultSet}. + * Write the data to {@link org.apache.arrow.vector.DecimalVector}. + */ +public class DecimalConsumer { + + /** + * Creates a consumer for {@link DecimalVector}. + */ + public static JdbcConsumer<DecimalVector> createConsumer(DecimalVector vector, int index, boolean nullable) { + if (nullable) { + return new NullableDecimalConsumer(vector, index); + } else { + return new NonNullableDecimalConsumer(vector, index); + } + } + + /** + * Consumer for nullable decimal. + */ + static class NullableDecimalConsumer extends BaseConsumer<DecimalVector> { + + /** + * Instantiate a DecimalConsumer. + */ + public NullableDecimalConsumer(DecimalVector vector, int index) { + super(vector, index); + } + + @Override + public void consume(ResultSet resultSet) throws SQLException { + BigDecimal value = resultSet.getBigDecimal(columnIndexInResultSet); + if (!resultSet.wasNull()) { + // for fixed width vectors, we have allocated enough memory proactively, + // so there is no need to call the setSafe method here. + vector.set(currentIndex, value); + } + currentIndex++; + } + } + + /** + * Consumer for non-nullable decimal. + */ + static class NonNullableDecimalConsumer extends BaseConsumer<DecimalVector> { + + /** + * Instantiate a DecimalConsumer. + */ + public NonNullableDecimalConsumer(DecimalVector vector, int index) { + super(vector, index); + } + + @Override + public void consume(ResultSet resultSet) throws SQLException { + BigDecimal value = resultSet.getBigDecimal(columnIndexInResultSet); + // for fixed width vectors, we have allocated enough memory proactively, + // so there is no need to call the setSafe method here. + vector.set(currentIndex, value); + currentIndex++; + } + } +} diff --git a/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/DoubleConsumer.java b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/DoubleConsumer.java new file mode 100644 index 000000000..e3db95d15 --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/DoubleConsumer.java @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.adapter.jdbc.consumer; + +import java.sql.ResultSet; +import java.sql.SQLException; + +import org.apache.arrow.vector.Float8Vector; + +/** + * Consumer which consume double type values from {@link ResultSet}. + * Write the data to {@link org.apache.arrow.vector.Float8Vector}. + */ +public class DoubleConsumer { + + /** + * Creates a consumer for {@link Float8Vector}. + */ + public static JdbcConsumer<Float8Vector> createConsumer(Float8Vector vector, int index, boolean nullable) { + if (nullable) { + return new NullableDoubleConsumer(vector, index); + } else { + return new NonNullableDoubleConsumer(vector, index); + } + } + + /** + * Nullable double consumer. + */ + static class NullableDoubleConsumer extends BaseConsumer<Float8Vector> { + + /** + * Instantiate a DoubleConsumer. + */ + public NullableDoubleConsumer(Float8Vector vector, int index) { + super(vector, index); + } + + @Override + public void consume(ResultSet resultSet) throws SQLException { + double value = resultSet.getDouble(columnIndexInResultSet); + if (!resultSet.wasNull()) { + // for fixed width vectors, we have allocated enough memory proactively, + // so there is no need to call the setSafe method here. + vector.set(currentIndex, value); + } + currentIndex++; + } + } + + /** + * Non-nullable double consumer. + */ + static class NonNullableDoubleConsumer extends BaseConsumer<Float8Vector> { + + /** + * Instantiate a DoubleConsumer. + */ + public NonNullableDoubleConsumer(Float8Vector vector, int index) { + super(vector, index); + } + + @Override + public void consume(ResultSet resultSet) throws SQLException { + double value = resultSet.getDouble(columnIndexInResultSet); + // for fixed width vectors, we have allocated enough memory proactively, + // so there is no need to call the setSafe method here. + vector.set(currentIndex, value); + currentIndex++; + } + } +} diff --git a/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/FloatConsumer.java b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/FloatConsumer.java new file mode 100644 index 000000000..830348fe9 --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/FloatConsumer.java @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.adapter.jdbc.consumer; + +import java.sql.ResultSet; +import java.sql.SQLException; + +import org.apache.arrow.vector.Float4Vector; + +/** + * Consumer which consume float type values from {@link ResultSet}. + * Write the data to {@link org.apache.arrow.vector.Float4Vector}. + */ +public class FloatConsumer { + + /** + * Creates a consumer for {@link Float4Vector}. + */ + public static JdbcConsumer<Float4Vector> createConsumer(Float4Vector vector, int index, boolean nullable) { + if (nullable) { + return new NullableFloatConsumer(vector, index); + } else { + return new NonNullableFloatConsumer(vector, index); + } + } + + /** + * Nullable float consumer. + */ + static class NullableFloatConsumer extends BaseConsumer<Float4Vector> { + + /** + * Instantiate a FloatConsumer. + */ + public NullableFloatConsumer(Float4Vector vector, int index) { + super(vector, index); + } + + @Override + public void consume(ResultSet resultSet) throws SQLException { + float value = resultSet.getFloat(columnIndexInResultSet); + if (!resultSet.wasNull()) { + // for fixed width vectors, we have allocated enough memory proactively, + // so there is no need to call the setSafe method here. + vector.set(currentIndex, value); + } + currentIndex++; + } + } + + /** + * Non-nullable float consumer. + */ + static class NonNullableFloatConsumer extends BaseConsumer<Float4Vector> { + + /** + * Instantiate a FloatConsumer. + */ + public NonNullableFloatConsumer(Float4Vector vector, int index) { + super(vector, index); + } + + @Override + public void consume(ResultSet resultSet) throws SQLException { + float value = resultSet.getFloat(columnIndexInResultSet); + // for fixed width vectors, we have allocated enough memory proactively, + // so there is no need to call the setSafe method here. + vector.set(currentIndex, value); + currentIndex++; + } + } +} diff --git a/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/IntConsumer.java b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/IntConsumer.java new file mode 100644 index 000000000..4e537d682 --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/IntConsumer.java @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.adapter.jdbc.consumer; + +import java.sql.ResultSet; +import java.sql.SQLException; + +import org.apache.arrow.vector.IntVector; + +/** + * Consumer which consume int type values from {@link ResultSet}. + * Write the data to {@link org.apache.arrow.vector.IntVector}. + */ +public class IntConsumer { + + /** + * Creates a consumer for {@link IntVector}. + */ + public static JdbcConsumer<IntVector> createConsumer(IntVector vector, int index, boolean nullable) { + if (nullable) { + return new NullableIntConsumer(vector, index); + } else { + return new NonNullableIntConsumer(vector, index); + } + } + + /** + * Nullable consumer for int. + */ + static class NullableIntConsumer extends BaseConsumer<IntVector> { + + /** + * Instantiate a IntConsumer. + */ + public NullableIntConsumer(IntVector vector, int index) { + super(vector, index); + } + + @Override + public void consume(ResultSet resultSet) throws SQLException { + int value = resultSet.getInt(columnIndexInResultSet); + if (!resultSet.wasNull()) { + // for fixed width vectors, we have allocated enough memory proactively, + // so there is no need to call the setSafe method here. + vector.set(currentIndex, value); + } + currentIndex++; + } + } + + /** + * Non-nullable consumer for int. + */ + static class NonNullableIntConsumer extends BaseConsumer<IntVector> { + + /** + * Instantiate a IntConsumer. + */ + public NonNullableIntConsumer(IntVector vector, int index) { + super(vector, index); + } + + @Override + public void consume(ResultSet resultSet) throws SQLException { + int value = resultSet.getInt(columnIndexInResultSet); + // for fixed width vectors, we have allocated enough memory proactively, + // so there is no need to call the setSafe method here. + vector.set(currentIndex, value); + currentIndex++; + } + } +} diff --git a/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/JdbcConsumer.java b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/JdbcConsumer.java new file mode 100644 index 000000000..480dfe3a1 --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/JdbcConsumer.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.adapter.jdbc.consumer; + +import java.io.IOException; +import java.sql.ResultSet; +import java.sql.SQLException; + +import org.apache.arrow.vector.ValueVector; + +/** + * An abstraction that is used to consume values from {@link ResultSet}. + * @param <T> The vector within consumer or its delegate, used for partially consume purpose. + */ +public interface JdbcConsumer<T extends ValueVector> extends AutoCloseable { + + /** + * Consume a specific type value from {@link ResultSet} and write it to vector. + */ + void consume(ResultSet resultSet) throws SQLException, IOException; + + /** + * Close this consumer, do some clean work such as clear reuse ArrowBuf. + */ + void close() throws Exception; + + /** + * Reset the vector within consumer for partial read purpose. + */ + void resetValueVector(T vector); +} diff --git a/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/NullConsumer.java b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/NullConsumer.java new file mode 100644 index 000000000..a79a029f4 --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/NullConsumer.java @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.adapter.jdbc.consumer; + +import java.sql.ResultSet; +import java.sql.SQLException; + +import org.apache.arrow.vector.NullVector; + +/** + * Consumer which consume null type values from ResultSet. + * Corresponding to {@link org.apache.arrow.vector.NullVector}. + */ +public class NullConsumer extends BaseConsumer<NullVector> { + + public NullConsumer(NullVector vector) { + super(vector, 0); + } + + @Override + public void consume(ResultSet resultSet) throws SQLException { + } +} diff --git a/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/SmallIntConsumer.java b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/SmallIntConsumer.java new file mode 100644 index 000000000..2edb3605b --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/SmallIntConsumer.java @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.adapter.jdbc.consumer; + +import java.sql.ResultSet; +import java.sql.SQLException; + +import org.apache.arrow.vector.SmallIntVector; + +/** + * Consumer which consume smallInt type values from {@link ResultSet}. + * Write the data to {@link org.apache.arrow.vector.SmallIntVector}. + */ +public class SmallIntConsumer { + + /** + * Creates a consumer for {@link SmallIntVector}. + */ + public static BaseConsumer<SmallIntVector> createConsumer(SmallIntVector vector, int index, boolean nullable) { + if (nullable) { + return new NullableSmallIntConsumer(vector, index); + } else { + return new NonNullableSmallIntConsumer(vector, index); + } + } + + /** + * Nullable consumer for small int. + */ + static class NullableSmallIntConsumer extends BaseConsumer<SmallIntVector> { + + /** + * Instantiate a SmallIntConsumer. + */ + public NullableSmallIntConsumer(SmallIntVector vector, int index) { + super(vector, index); + } + + @Override + public void consume(ResultSet resultSet) throws SQLException { + short value = resultSet.getShort(columnIndexInResultSet); + if (!resultSet.wasNull()) { + // for fixed width vectors, we have allocated enough memory proactively, + // so there is no need to call the setSafe method here. + vector.set(currentIndex, value); + } + currentIndex++; + } + } + + /** + * Non-nullable consumer for small int. + */ + static class NonNullableSmallIntConsumer extends BaseConsumer<SmallIntVector> { + + /** + * Instantiate a SmallIntConsumer. + */ + public NonNullableSmallIntConsumer(SmallIntVector vector, int index) { + super(vector, index); + } + + @Override + public void consume(ResultSet resultSet) throws SQLException { + short value = resultSet.getShort(columnIndexInResultSet); + // for fixed width vectors, we have allocated enough memory proactively, + // so there is no need to call the setSafe method here. + vector.set(currentIndex, value); + currentIndex++; + } + } +} diff --git a/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TimeConsumer.java b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TimeConsumer.java new file mode 100644 index 000000000..4fa15ad79 --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TimeConsumer.java @@ -0,0 +1,112 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.adapter.jdbc.consumer; + +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Time; +import java.util.Calendar; + +import org.apache.arrow.vector.TimeMilliVector; + +/** + * Consumer which consume time type values from {@link ResultSet}. + * Write the data to {@link org.apache.arrow.vector.TimeMilliVector}. + */ +public abstract class TimeConsumer { + + /** + * Creates a consumer for {@link TimeMilliVector}. + */ + public static JdbcConsumer<TimeMilliVector> createConsumer( + TimeMilliVector vector, int index, boolean nullable, Calendar calendar) { + if (nullable) { + return new NullableTimeConsumer(vector, index, calendar); + } else { + return new NonNullableTimeConsumer(vector, index, calendar); + } + } + + /** + * Nullable consumer for {@link TimeMilliVector}. + */ + static class NullableTimeConsumer extends BaseConsumer<TimeMilliVector> { + + protected final Calendar calendar; + + /** + * Instantiate a TimeConsumer. + */ + public NullableTimeConsumer(TimeMilliVector vector, int index) { + this(vector, index, /* calendar */null); + } + + /** + * Instantiate a TimeConsumer. + */ + public NullableTimeConsumer(TimeMilliVector vector, int index, Calendar calendar) { + super(vector, index); + this.calendar = calendar; + } + + @Override + public void consume(ResultSet resultSet) throws SQLException { + Time time = calendar == null ? resultSet.getTime(columnIndexInResultSet) : + resultSet.getTime(columnIndexInResultSet, calendar); + if (!resultSet.wasNull()) { + // for fixed width vectors, we have allocated enough memory proactively, + // so there is no need to call the setSafe method here. + vector.set(currentIndex, (int) time.getTime()); + } + currentIndex++; + } + } + + /** + * Non-nullable consumer for {@link TimeMilliVector}. + */ + static class NonNullableTimeConsumer extends BaseConsumer<TimeMilliVector> { + + protected final Calendar calendar; + + /** + * Instantiate a TimeConsumer. + */ + public NonNullableTimeConsumer(TimeMilliVector vector, int index) { + this(vector, index, /* calendar */null); + } + + /** + * Instantiate a TimeConsumer. + */ + public NonNullableTimeConsumer(TimeMilliVector vector, int index, Calendar calendar) { + super(vector, index); + this.calendar = calendar; + } + + @Override + public void consume(ResultSet resultSet) throws SQLException { + Time time = calendar == null ? resultSet.getTime(columnIndexInResultSet) : + resultSet.getTime(columnIndexInResultSet, calendar); + // for fixed width vectors, we have allocated enough memory proactively, + // so there is no need to call the setSafe method here. + vector.set(currentIndex, (int) time.getTime()); + currentIndex++; + } + } +} diff --git a/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TimestampConsumer.java b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TimestampConsumer.java new file mode 100644 index 000000000..3351e7e78 --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TimestampConsumer.java @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.adapter.jdbc.consumer; + +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Timestamp; + +import org.apache.arrow.vector.TimeStampMilliVector; + +/** + * Consumer which consume timestamp type values from {@link ResultSet}. + * Write the data to {@link TimeStampMilliVector}. + */ +public abstract class TimestampConsumer { + + /** + * Creates a consumer for {@link TimeStampMilliVector}. + */ + public static JdbcConsumer<TimeStampMilliVector> createConsumer( + TimeStampMilliVector vector, int index, boolean nullable) { + if (nullable) { + return new NullableTimestampConsumer(vector, index); + } else { + return new NonNullableTimestampConsumer(vector, index); + } + } + + /** + * Nullable consumer for timestamp. + */ + static class NullableTimestampConsumer extends BaseConsumer<TimeStampMilliVector> { + + /** + * Instantiate a TimestampConsumer. + */ + public NullableTimestampConsumer(TimeStampMilliVector vector, int index) { + super(vector, index); + } + + @Override + public void consume(ResultSet resultSet) throws SQLException { + Timestamp timestamp = resultSet.getTimestamp(columnIndexInResultSet); + if (!resultSet.wasNull()) { + // for fixed width vectors, we have allocated enough memory proactively, + // so there is no need to call the setSafe method here. + vector.set(currentIndex, timestamp.getTime()); + } + currentIndex++; + } + } + + /** + * Non-nullable consumer for timestamp. + */ + static class NonNullableTimestampConsumer extends BaseConsumer<TimeStampMilliVector> { + + /** + * Instantiate a TimestampConsumer. + */ + public NonNullableTimestampConsumer(TimeStampMilliVector vector, int index) { + super(vector, index); + } + + @Override + public void consume(ResultSet resultSet) throws SQLException { + Timestamp timestamp = resultSet.getTimestamp(columnIndexInResultSet); + // for fixed width vectors, we have allocated enough memory proactively, + // so there is no need to call the setSafe method here. + vector.set(currentIndex, timestamp.getTime()); + currentIndex++; + } + } +} diff --git a/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TimestampTZConsumer.java b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TimestampTZConsumer.java new file mode 100644 index 000000000..f08671f0b --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TimestampTZConsumer.java @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.adapter.jdbc.consumer; + +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Timestamp; +import java.util.Calendar; + +import org.apache.arrow.util.Preconditions; +import org.apache.arrow.vector.TimeStampMilliTZVector; + +/** + * Consumer which consume timestamp (with time zone) type values from {@link ResultSet}. + * Write the data to {@link TimeStampMilliTZVector}. + */ +public class TimestampTZConsumer { + /** + * Creates a consumer for {@link TimeStampMilliTZVector}. + */ + public static JdbcConsumer<TimeStampMilliTZVector> createConsumer( + TimeStampMilliTZVector vector, int index, boolean nullable, Calendar calendar) { + Preconditions.checkArgument(calendar != null, "Calendar cannot be null"); + if (nullable) { + return new TimestampTZConsumer.NullableTimestampTZConsumer(vector, index, calendar); + } else { + return new TimestampTZConsumer.NonNullableTimestampConsumer(vector, index, calendar); + } + } + + /** + * Nullable consumer for timestamp (with time zone). + */ + static class NullableTimestampTZConsumer extends BaseConsumer<TimeStampMilliTZVector> { + + protected final Calendar calendar; + + /** + * Instantiate a TimestampConsumer. + */ + public NullableTimestampTZConsumer(TimeStampMilliTZVector vector, int index, Calendar calendar) { + super(vector, index); + this.calendar = calendar; + } + + @Override + public void consume(ResultSet resultSet) throws SQLException { + Timestamp timestamp = resultSet.getTimestamp(columnIndexInResultSet, calendar); + if (!resultSet.wasNull()) { + // for fixed width vectors, we have allocated enough memory proactively, + // so there is no need to call the setSafe method here. + vector.set(currentIndex, timestamp.getTime()); + } + currentIndex++; + } + } + + /** + * Non-nullable consumer for timestamp (with time zone). + */ + static class NonNullableTimestampConsumer extends BaseConsumer<TimeStampMilliTZVector> { + + protected final Calendar calendar; + + /** + * Instantiate a TimestampConsumer. + */ + public NonNullableTimestampConsumer(TimeStampMilliTZVector vector, int index, Calendar calendar) { + super(vector, index); + this.calendar = calendar; + } + + @Override + public void consume(ResultSet resultSet) throws SQLException { + Timestamp timestamp = resultSet.getTimestamp(columnIndexInResultSet, calendar); + // for fixed width vectors, we have allocated enough memory proactively, + // so there is no need to call the setSafe method here. + vector.set(currentIndex, timestamp.getTime()); + currentIndex++; + } + } +} diff --git a/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TinyIntConsumer.java b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TinyIntConsumer.java new file mode 100644 index 000000000..40cf087a5 --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TinyIntConsumer.java @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.adapter.jdbc.consumer; + +import java.sql.ResultSet; +import java.sql.SQLException; + +import org.apache.arrow.vector.TinyIntVector; + +/** + * Consumer which consume tinyInt type values from {@link ResultSet}. + * Write the data to {@link org.apache.arrow.vector.TinyIntVector}. + */ +public abstract class TinyIntConsumer { + + /** + * Creates a consumer for {@link TinyIntVector}. + */ + public static JdbcConsumer<TinyIntVector> createConsumer(TinyIntVector vector, int index, boolean nullable) { + if (nullable) { + return new NullableTinyIntConsumer(vector, index); + } else { + return new NonNullableTinyIntConsumer(vector, index); + } + } + + /** + * Nullable consumer for tiny int. + */ + static class NullableTinyIntConsumer extends BaseConsumer<TinyIntVector> { + + /** + * Instantiate a TinyIntConsumer. + */ + public NullableTinyIntConsumer(TinyIntVector vector, int index) { + super(vector, index); + } + + @Override + public void consume(ResultSet resultSet) throws SQLException { + byte value = resultSet.getByte(columnIndexInResultSet); + if (!resultSet.wasNull()) { + // for fixed width vectors, we have allocated enough memory proactively, + // so there is no need to call the setSafe method here. + vector.set(currentIndex, value); + } + currentIndex++; + } + } + + /** + * Non-nullable consumer for tiny int. + */ + static class NonNullableTinyIntConsumer extends BaseConsumer<TinyIntVector> { + + /** + * Instantiate a TinyIntConsumer. + */ + public NonNullableTinyIntConsumer(TinyIntVector vector, int index) { + super(vector, index); + } + + @Override + public void consume(ResultSet resultSet) throws SQLException { + byte value = resultSet.getByte(columnIndexInResultSet); + // for fixed width vectors, we have allocated enough memory proactively, + // so there is no need to call the setSafe method here. + vector.set(currentIndex, value); + currentIndex++; + } + } +} diff --git a/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/VarCharConsumer.java b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/VarCharConsumer.java new file mode 100644 index 000000000..05333715b --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/VarCharConsumer.java @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.adapter.jdbc.consumer; + +import java.nio.charset.StandardCharsets; +import java.sql.ResultSet; +import java.sql.SQLException; + +import org.apache.arrow.vector.VarCharVector; + +/** + * Consumer which consume varchar type values from {@link ResultSet}. + * Write the data to {@link org.apache.arrow.vector.VarCharVector}. + */ +public abstract class VarCharConsumer { + + /** + * Creates a consumer for {@link VarCharVector}. + */ + public static JdbcConsumer<VarCharVector> createConsumer(VarCharVector vector, int index, boolean nullable) { + if (nullable) { + return new NullableVarCharConsumer(vector, index); + } else { + return new NonNullableVarCharConsumer(vector, index); + } + } + + /** + * Nullable consumer for var char. + */ + static class NullableVarCharConsumer extends BaseConsumer<VarCharVector> { + + /** + * Instantiate a VarCharConsumer. + */ + public NullableVarCharConsumer(VarCharVector vector, int index) { + super(vector, index); + } + + @Override + public void consume(ResultSet resultSet) throws SQLException { + String value = resultSet.getString(columnIndexInResultSet); + if (!resultSet.wasNull()) { + byte[] bytes = value.getBytes(StandardCharsets.UTF_8); + vector.setSafe(currentIndex, bytes); + } + currentIndex++; + } + } + + /** + * Non-nullable consumer for var char. + */ + static class NonNullableVarCharConsumer extends BaseConsumer<VarCharVector> { + + /** + * Instantiate a VarCharConsumer. + */ + public NonNullableVarCharConsumer(VarCharVector vector, int index) { + super(vector, index); + } + + @Override + public void consume(ResultSet resultSet) throws SQLException { + String value = resultSet.getString(columnIndexInResultSet); + byte[] bytes = value.getBytes(StandardCharsets.UTF_8); + vector.setSafe(currentIndex, bytes); + currentIndex++; + } + } +} diff --git a/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/AbstractJdbcToArrowTest.java b/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/AbstractJdbcToArrowTest.java new file mode 100644 index 000000000..c3cd4574b --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/AbstractJdbcToArrowTest.java @@ -0,0 +1,322 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.adapter.jdbc; + +import java.io.IOException; +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Statement; +import java.util.Calendar; + +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.util.Preconditions; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.util.ValueVectorUtility; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; + +/** + * Class to abstract out some common test functionality for testing JDBC to Arrow. + */ +public abstract class AbstractJdbcToArrowTest { + + protected static final String BIGINT = "BIGINT_FIELD5"; + protected static final String BINARY = "BINARY_FIELD12"; + protected static final String BIT = "BIT_FIELD17"; + protected static final String BLOB = "BLOB_FIELD14"; + protected static final String BOOL = "BOOL_FIELD2"; + protected static final String CHAR = "CHAR_FIELD16"; + protected static final String CLOB = "CLOB_FIELD15"; + protected static final String DATE = "DATE_FIELD10"; + protected static final String DECIMAL = "DECIMAL_FIELD6"; + protected static final String DOUBLE = "DOUBLE_FIELD7"; + protected static final String INT = "INT_FIELD1"; + protected static final String REAL = "REAL_FIELD8"; + protected static final String SMALLINT = "SMALLINT_FIELD4"; + protected static final String TIME = "TIME_FIELD9"; + protected static final String TIMESTAMP = "TIMESTAMP_FIELD11"; + protected static final String TINYINT = "TINYINT_FIELD3"; + protected static final String VARCHAR = "VARCHAR_FIELD13"; + protected static final String NULL = "NULL_FIELD18"; + + protected Connection conn = null; + protected Table table; + protected boolean reuseVectorSchemaRoot; + + /** + * This method creates Table object after reading YAML file. + * + * @param ymlFilePath path to file + * @return Table object + * @throws IOException on error + */ + protected static Table getTable(String ymlFilePath, @SuppressWarnings("rawtypes") Class clss) throws IOException { + return new ObjectMapper(new YAMLFactory()).readValue( + clss.getClassLoader().getResourceAsStream(ymlFilePath), Table.class); + } + + + /** + * This method creates Connection object and DB table and also populate data into table for test. + * + * @throws SQLException on error + * @throws ClassNotFoundException on error + */ + @Before + public void setUp() throws SQLException, ClassNotFoundException { + String url = "jdbc:h2:mem:JdbcToArrowTest"; + String driver = "org.h2.Driver"; + Class.forName(driver); + conn = DriverManager.getConnection(url); + try (Statement stmt = conn.createStatement();) { + stmt.executeUpdate(table.getCreate()); + for (String insert : table.getData()) { + stmt.executeUpdate(insert); + } + } + } + + /** + * Clean up method to close connection after test completes. + * + * @throws SQLException on error + */ + @After + public void destroy() throws SQLException { + if (conn != null) { + conn.close(); + conn = null; + } + } + + /** + * Prepares test data and returns collection of Table object for each test iteration. + * + * @param testFiles files for test + * @param clss Class type + * @return Collection of Table objects + * @throws SQLException on error + * @throws ClassNotFoundException on error + * @throws IOException on error + */ + public static Object[][] prepareTestData(String[] testFiles, @SuppressWarnings("rawtypes") Class clss) + throws SQLException, ClassNotFoundException, IOException { + Object[][] tableArr = new Object[testFiles.length][]; + int i = 0; + for (String testFile : testFiles) { + tableArr[i++] = new Object[]{getTable(testFile, clss)}; + } + return tableArr; + } + + /** + * Abstract method to implement test Functionality to test JdbcToArrow methods. + * + * @throws SQLException on error + * @throws IOException on error + */ + @Test + public abstract void testJdbcToArrowValues() throws SQLException, IOException; + + /** + * Abstract method to implement logic to assert test various datatype values. + * + * @param root VectorSchemaRoot for test + */ + public abstract void testDataSets(VectorSchemaRoot root); + + /** + * For the given SQL query, execute and fetch the data from Relational DB and convert it to Arrow objects. + * This method uses the default Calendar instance with default TimeZone and Locale as returned by the JVM. + * If you wish to use specific TimeZone or Locale for any Date, Time and Timestamp datasets, you may want use + * overloaded API that taken Calendar object instance. + * + * This method is for test only. + * + * @param connection Database connection to be used. This method will not close the passed connection object. Since + * the caller has passed the connection object it's the responsibility of the caller to close or + * return the connection to the pool. + * @param query The DB Query to fetch the data. + * @param allocator Memory allocator + * @return Arrow Data Objects {@link VectorSchemaRoot} + * @throws SQLException Propagate any SQL Exceptions to the caller after closing any resources opened such as + * ResultSet and Statement objects. + */ + public static VectorSchemaRoot sqlToArrow(Connection connection, String query, BufferAllocator allocator) + throws SQLException, IOException { + Preconditions.checkNotNull(allocator, "Memory allocator object can not be null"); + + JdbcToArrowConfig config = + new JdbcToArrowConfig(allocator, JdbcToArrowUtils.getUtcCalendar()); + return sqlToArrow(connection, query, config); + } + + /** + * For the given SQL query, execute and fetch the data from Relational DB and convert it to Arrow objects. + * + * This method is for test only. + * + * @param connection Database connection to be used. This method will not close the passed connection object. Since + * the caller has passed the connection object it's the responsibility of the caller to close or + * return the connection to the pool. + * @param query The DB Query to fetch the data. + * @param allocator Memory allocator + * @param calendar Calendar object to use to handle Date, Time and Timestamp datasets. + * @return Arrow Data Objects {@link VectorSchemaRoot} + * @throws SQLException Propagate any SQL Exceptions to the caller after closing any resources opened such as + * ResultSet and Statement objects. + */ + public static VectorSchemaRoot sqlToArrow( + Connection connection, + String query, + BufferAllocator allocator, + Calendar calendar) throws SQLException, IOException { + + Preconditions.checkNotNull(allocator, "Memory allocator object can not be null"); + Preconditions.checkNotNull(calendar, "Calendar object can not be null"); + + return sqlToArrow(connection, query, new JdbcToArrowConfig(allocator, calendar)); + } + + /** + * For the given SQL query, execute and fetch the data from Relational DB and convert it to Arrow objects. + * + * This method is for test only. + * + * @param connection Database connection to be used. This method will not close the passed connection object. + * Since the caller has passed the connection object it's the responsibility of the caller + * to close or return the connection to the pool. + * @param query The DB Query to fetch the data. + * @param config Configuration + * @return Arrow Data Objects {@link VectorSchemaRoot} + * @throws SQLException Propagate any SQL Exceptions to the caller after closing any resources opened such as + * ResultSet and Statement objects. + */ + public static VectorSchemaRoot sqlToArrow(Connection connection, String query, JdbcToArrowConfig config) + throws SQLException, IOException { + Preconditions.checkNotNull(connection, "JDBC connection object can not be null"); + Preconditions.checkArgument(query != null && query.length() > 0, "SQL query can not be null or empty"); + + try (Statement stmt = connection.createStatement()) { + return sqlToArrow(stmt.executeQuery(query), config); + } + } + + /** + * For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow objects. This + * method uses the default RootAllocator and Calendar object. + * + * This method is for test only. + * + * @param resultSet ResultSet to use to fetch the data from underlying database + * @return Arrow Data Objects {@link VectorSchemaRoot} + * @throws SQLException on error + */ + public static VectorSchemaRoot sqlToArrow(ResultSet resultSet) throws SQLException, IOException { + Preconditions.checkNotNull(resultSet, "JDBC ResultSet object can not be null"); + + return sqlToArrow(resultSet, JdbcToArrowUtils.getUtcCalendar()); + } + + /** + * For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow objects. + * + * This method is for test only. + * + * @param resultSet ResultSet to use to fetch the data from underlying database + * @param allocator Memory allocator + * @return Arrow Data Objects {@link VectorSchemaRoot} + * @throws SQLException on error + */ + public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, BufferAllocator allocator) + throws SQLException, IOException { + Preconditions.checkNotNull(allocator, "Memory Allocator object can not be null"); + + JdbcToArrowConfig config = + new JdbcToArrowConfig(allocator, JdbcToArrowUtils.getUtcCalendar()); + return sqlToArrow(resultSet, config); + } + + /** + * For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow objects. + * + * This method is for test only. + * + * @param resultSet ResultSet to use to fetch the data from underlying database + * @param calendar Calendar instance to use for Date, Time and Timestamp datasets, or <code>null</code> if none. + * @return Arrow Data Objects {@link VectorSchemaRoot} + * @throws SQLException on error + */ + public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, Calendar calendar) throws SQLException, IOException { + Preconditions.checkNotNull(resultSet, "JDBC ResultSet object can not be null"); + return sqlToArrow(resultSet, new JdbcToArrowConfig(new RootAllocator(Integer.MAX_VALUE), calendar)); + } + + /** + * For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow objects. + * + * This method is for test only. + * + * @param resultSet ResultSet to use to fetch the data from underlying database + * @param allocator Memory allocator to use. + * @param calendar Calendar instance to use for Date, Time and Timestamp datasets, or <code>null</code> if none. + * @return Arrow Data Objects {@link VectorSchemaRoot} + * @throws SQLException on error + */ + public static VectorSchemaRoot sqlToArrow( + ResultSet resultSet, + BufferAllocator allocator, + Calendar calendar) + throws SQLException, IOException { + Preconditions.checkNotNull(allocator, "Memory Allocator object can not be null"); + + return sqlToArrow(resultSet, new JdbcToArrowConfig(allocator, calendar)); + } + + /** + * For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow objects. + * + * This method is for test only. + * + * @param resultSet ResultSet to use to fetch the data from underlying database + * @param config Configuration of the conversion from JDBC to Arrow. + * @return Arrow Data Objects {@link VectorSchemaRoot} + * @throws SQLException on error + */ + public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, JdbcToArrowConfig config) + throws SQLException, IOException { + Preconditions.checkNotNull(resultSet, "JDBC ResultSet object can not be null"); + Preconditions.checkNotNull(config, "The configuration cannot be null"); + + VectorSchemaRoot root = VectorSchemaRoot.create( + JdbcToArrowUtils.jdbcToArrowSchema(resultSet.getMetaData(), config), config.getAllocator()); + if (config.getTargetBatchSize() != JdbcToArrowConfig.NO_LIMIT_BATCH_SIZE) { + ValueVectorUtility.preAllocate(root, config.getTargetBatchSize()); + } + JdbcToArrowUtils.jdbcToArrowVectors(resultSet, root, config); + return root; + } + +} diff --git a/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcFieldInfoTest.java b/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcFieldInfoTest.java new file mode 100644 index 000000000..b1a8b8f22 --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcFieldInfoTest.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.adapter.jdbc; + +import static org.junit.Assert.*; + +import java.sql.Types; + +import org.junit.Test; + +public class JdbcFieldInfoTest { + + @Test + public void testCreateJdbcFieldInfoWithJdbcType() { + JdbcFieldInfo fieldInfo = new JdbcFieldInfo(Types.BLOB); + + assertEquals(Types.BLOB, fieldInfo.getJdbcType()); + assertEquals(0, fieldInfo.getPrecision()); + assertEquals(0, fieldInfo.getScale()); + } + + @Test + public void testCreateJdbcFieldInfoWithJdbcTypePrecisionAndScale() { + JdbcFieldInfo fieldInfo = new JdbcFieldInfo(Types.BLOB, 1, 2); + + assertEquals(Types.BLOB, fieldInfo.getJdbcType()); + assertEquals(1, fieldInfo.getPrecision()); + assertEquals(2, fieldInfo.getScale()); + } +} diff --git a/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigTest.java b/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigTest.java new file mode 100644 index 000000000..68a681b05 --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigTest.java @@ -0,0 +1,161 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.adapter.jdbc; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; + +import java.sql.Types; +import java.util.Calendar; +import java.util.HashMap; +import java.util.Locale; +import java.util.TimeZone; + +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; +import org.junit.Test; + +public class JdbcToArrowConfigTest { + + private static final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE); + private static final Calendar calendar = Calendar.getInstance(TimeZone.getTimeZone("UTC"), Locale.ROOT); + + @Test(expected = NullPointerException.class) + public void testConfigNullArguments() { + new JdbcToArrowConfig(null, null); + } + + @Test(expected = NullPointerException.class) + public void testBuilderNullArguments() { + new JdbcToArrowConfigBuilder(null, null); + } + + @Test + public void testConfigNullCalendar() { + JdbcToArrowConfig config = new JdbcToArrowConfig(allocator, null); + assertNull(config.getCalendar()); + } + + @Test + public void testBuilderNullCalendar() { + JdbcToArrowConfigBuilder builder = new JdbcToArrowConfigBuilder(allocator, null); + JdbcToArrowConfig config = builder.build(); + assertNull(config.getCalendar()); + } + + @Test(expected = NullPointerException.class) + public void testConfigNullAllocator() { + new JdbcToArrowConfig(null, calendar); + } + + @Test(expected = NullPointerException.class) + public void testBuilderNullAllocator() { + new JdbcToArrowConfigBuilder(null, calendar); + } + + @Test(expected = NullPointerException.class) + public void testSetNullAllocator() { + JdbcToArrowConfigBuilder builder = new JdbcToArrowConfigBuilder(allocator, calendar); + builder.setAllocator(null); + } + + @Test + public void testSetNullCalendar() { + JdbcToArrowConfigBuilder builder = new JdbcToArrowConfigBuilder(allocator, calendar); + JdbcToArrowConfig config = builder.setCalendar(null).build(); + assertNull(config.getCalendar()); + } + + @Test + public void testConfig() { + JdbcToArrowConfigBuilder builder = new JdbcToArrowConfigBuilder(allocator, calendar); + JdbcToArrowConfig config = builder.build(); + + assertTrue(allocator == config.getAllocator()); + assertTrue(calendar == config.getCalendar()); + + Calendar newCalendar = Calendar.getInstance(); + BufferAllocator newAllocator = new RootAllocator(Integer.SIZE); + + builder.setAllocator(newAllocator).setCalendar(newCalendar); + config = builder.build(); + + assertTrue(newAllocator == config.getAllocator()); + assertTrue(newCalendar == config.getCalendar()); + } + + @Test + public void testIncludeMetadata() { + JdbcToArrowConfigBuilder builder = new JdbcToArrowConfigBuilder(allocator, calendar, false); + + JdbcToArrowConfig config = builder.build(); + assertFalse(config.shouldIncludeMetadata()); + + builder.setIncludeMetadata(true); + config = builder.build(); + assertTrue(config.shouldIncludeMetadata()); + + config = new JdbcToArrowConfigBuilder(allocator, calendar, true).build(); + assertTrue(config.shouldIncludeMetadata()); + + config = new JdbcToArrowConfig(allocator, calendar, /* include metadata */ true, + /* reuse vector schema root */ true, null, null, JdbcToArrowConfig.NO_LIMIT_BATCH_SIZE, null); + assertTrue(config.shouldIncludeMetadata()); + assertTrue(config.isReuseVectorSchemaRoot()); + + config = new JdbcToArrowConfig(allocator, calendar, /* include metadata */ false, + /* reuse vector schema root */ false, null, null, JdbcToArrowConfig.NO_LIMIT_BATCH_SIZE, null); + assertFalse(config.shouldIncludeMetadata()); + assertFalse(config.isReuseVectorSchemaRoot()); + } + + @Test + public void testArraySubTypes() { + JdbcToArrowConfigBuilder builder = new JdbcToArrowConfigBuilder(allocator, calendar, false); + JdbcToArrowConfig config = builder.build(); + + final int columnIndex = 1; + final String columnName = "COLUMN"; + + assertNull(config.getArraySubTypeByColumnIndex(columnIndex)); + assertNull(config.getArraySubTypeByColumnName(columnName)); + + final HashMap<Integer, JdbcFieldInfo> indexMapping = new HashMap<Integer, JdbcFieldInfo>(); + indexMapping.put(2, new JdbcFieldInfo(Types.BIGINT)); + + final HashMap<String, JdbcFieldInfo> fieldMapping = new HashMap<String, JdbcFieldInfo>(); + fieldMapping.put("NEW_COLUMN", new JdbcFieldInfo(Types.BINARY)); + + builder.setArraySubTypeByColumnIndexMap(indexMapping); + builder.setArraySubTypeByColumnNameMap(fieldMapping); + config = builder.build(); + + assertNull(config.getArraySubTypeByColumnIndex(columnIndex)); + assertNull(config.getArraySubTypeByColumnName(columnName)); + + indexMapping.put(columnIndex, new JdbcFieldInfo(Types.BIT)); + fieldMapping.put(columnName, new JdbcFieldInfo(Types.BLOB)); + + assertNotNull(config.getArraySubTypeByColumnIndex(columnIndex)); + assertEquals(Types.BIT, config.getArraySubTypeByColumnIndex(columnIndex).getJdbcType()); + assertEquals(Types.BLOB, config.getArraySubTypeByColumnName(columnName).getJdbcType()); + } +} diff --git a/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowTestHelper.java b/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowTestHelper.java new file mode 100644 index 000000000..9fdb32d80 --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowTestHelper.java @@ -0,0 +1,381 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.adapter.jdbc; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + +import java.math.BigDecimal; +import java.nio.charset.Charset; +import java.sql.ResultSetMetaData; +import java.sql.SQLException; +import java.util.List; +import java.util.Map; + +import org.apache.arrow.vector.BaseValueVector; +import org.apache.arrow.vector.BigIntVector; +import org.apache.arrow.vector.BitVector; +import org.apache.arrow.vector.DateDayVector; +import org.apache.arrow.vector.DecimalVector; +import org.apache.arrow.vector.Float4Vector; +import org.apache.arrow.vector.Float8Vector; +import org.apache.arrow.vector.IntVector; +import org.apache.arrow.vector.NullVector; +import org.apache.arrow.vector.SmallIntVector; +import org.apache.arrow.vector.TimeMilliVector; +import org.apache.arrow.vector.TimeStampVector; +import org.apache.arrow.vector.TinyIntVector; +import org.apache.arrow.vector.VarBinaryVector; +import org.apache.arrow.vector.VarCharVector; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.Schema; + +/** + * This is a Helper class which has functionalities to read and assert the values from the given FieldVector object. + */ +public class JdbcToArrowTestHelper { + + public static void assertIntVectorValues(IntVector intVector, int rowCount, Integer[] values) { + assertEquals(rowCount, intVector.getValueCount()); + + for (int j = 0; j < intVector.getValueCount(); j++) { + if (values[j] == null) { + assertTrue(intVector.isNull(j)); + } else { + assertEquals(values[j].intValue(), intVector.get(j)); + } + } + } + + public static void assertBooleanVectorValues(BitVector bitVector, int rowCount, Boolean[] values) { + assertEquals(rowCount, bitVector.getValueCount()); + + for (int j = 0; j < bitVector.getValueCount(); j++) { + if (values[j] == null) { + assertTrue(bitVector.isNull(j)); + } else { + assertEquals(values[j].booleanValue(), bitVector.get(j) == 1); + } + } + } + + public static void assertBitVectorValues(BitVector bitVector, int rowCount, Integer[] values) { + assertEquals(rowCount, bitVector.getValueCount()); + + for (int j = 0; j < bitVector.getValueCount(); j++) { + if (values[j] == null) { + assertTrue(bitVector.isNull(j)); + } else { + assertEquals(values[j].intValue(), bitVector.get(j)); + } + } + } + + public static void assertTinyIntVectorValues(TinyIntVector tinyIntVector, int rowCount, Integer[] values) { + assertEquals(rowCount, tinyIntVector.getValueCount()); + + for (int j = 0; j < tinyIntVector.getValueCount(); j++) { + if (values[j] == null) { + assertTrue(tinyIntVector.isNull(j)); + } else { + assertEquals(values[j].intValue(), tinyIntVector.get(j)); + } + } + } + + public static void assertSmallIntVectorValues(SmallIntVector smallIntVector, int rowCount, Integer[] values) { + assertEquals(rowCount, smallIntVector.getValueCount()); + + for (int j = 0; j < smallIntVector.getValueCount(); j++) { + if (values[j] == null) { + assertTrue(smallIntVector.isNull(j)); + } else { + assertEquals(values[j].intValue(), smallIntVector.get(j)); + } + } + } + + public static void assertBigIntVectorValues(BigIntVector bigIntVector, int rowCount, Long[] values) { + assertEquals(rowCount, bigIntVector.getValueCount()); + + for (int j = 0; j < bigIntVector.getValueCount(); j++) { + if (values[j] == null) { + assertTrue(bigIntVector.isNull(j)); + } else { + assertEquals(values[j].longValue(), bigIntVector.get(j)); + } + } + } + + public static void assertDecimalVectorValues(DecimalVector decimalVector, int rowCount, BigDecimal[] values) { + assertEquals(rowCount, decimalVector.getValueCount()); + + for (int j = 0; j < decimalVector.getValueCount(); j++) { + if (values[j] == null) { + assertTrue(decimalVector.isNull(j)); + } else { + assertEquals(values[j].doubleValue(), decimalVector.getObject(j).doubleValue(), 0); + } + } + } + + public static void assertFloat8VectorValues(Float8Vector float8Vector, int rowCount, Double[] values) { + assertEquals(rowCount, float8Vector.getValueCount()); + + for (int j = 0; j < float8Vector.getValueCount(); j++) { + if (values[j] == null) { + assertTrue(float8Vector.isNull(j)); + } else { + assertEquals(values[j], float8Vector.get(j), 0.01); + } + } + } + + public static void assertFloat4VectorValues(Float4Vector float4Vector, int rowCount, Float[] values) { + assertEquals(rowCount, float4Vector.getValueCount()); + + for (int j = 0; j < float4Vector.getValueCount(); j++) { + if (values[j] == null) { + assertTrue(float4Vector.isNull(j)); + } else { + assertEquals(values[j], float4Vector.get(j), 0.01); + } + } + } + + public static void assertTimeVectorValues(TimeMilliVector timeMilliVector, int rowCount, Long[] values) { + assertEquals(rowCount, timeMilliVector.getValueCount()); + + for (int j = 0; j < timeMilliVector.getValueCount(); j++) { + if (values[j] == null) { + assertTrue(timeMilliVector.isNull(j)); + } else { + assertEquals(values[j].longValue(), timeMilliVector.get(j)); + } + } + } + + public static void assertDateVectorValues(DateDayVector dateDayVector, int rowCount, Integer[] values) { + assertEquals(rowCount, dateDayVector.getValueCount()); + + for (int j = 0; j < dateDayVector.getValueCount(); j++) { + if (values[j] == null) { + assertTrue(dateDayVector.isNull(j)); + } else { + assertEquals(values[j].longValue(), dateDayVector.get(j)); + } + } + } + + public static void assertTimeStampVectorValues(TimeStampVector timeStampVector, int rowCount, Long[] values) { + assertEquals(rowCount, timeStampVector.getValueCount()); + + for (int j = 0; j < timeStampVector.getValueCount(); j++) { + if (values[j] == null) { + assertTrue(timeStampVector.isNull(j)); + } else { + assertEquals(values[j].longValue(), timeStampVector.get(j)); + } + } + } + + public static void assertVarBinaryVectorValues(VarBinaryVector varBinaryVector, int rowCount, byte[][] values) { + assertEquals(rowCount, varBinaryVector.getValueCount()); + + for (int j = 0; j < varBinaryVector.getValueCount(); j++) { + if (values[j] == null) { + assertTrue(varBinaryVector.isNull(j)); + } else { + assertArrayEquals(values[j], varBinaryVector.get(j)); + } + } + } + + public static void assertVarcharVectorValues(VarCharVector varCharVector, int rowCount, byte[][] values) { + assertEquals(rowCount, varCharVector.getValueCount()); + + for (int j = 0; j < varCharVector.getValueCount(); j++) { + if (values[j] == null) { + assertTrue(varCharVector.isNull(j)); + } else { + assertArrayEquals(values[j], varCharVector.get(j)); + } + } + } + + public static void assertNullVectorValues(NullVector vector, int rowCount) { + assertEquals(rowCount, vector.getValueCount()); + } + + public static void assertNullValues(BaseValueVector vector, int rowCount) { + assertEquals(rowCount, vector.getValueCount()); + + for (int j = 0; j < vector.getValueCount(); j++) { + assertTrue(vector.isNull(j)); + } + } + + public static void assertFieldMetadataIsEmpty(VectorSchemaRoot schema) { + assertNotNull(schema); + assertNotNull(schema.getSchema()); + assertNotNull(schema.getSchema().getFields()); + + for (Field field : schema.getSchema().getFields()) { + assertNotNull(field.getMetadata()); + assertEquals(0, field.getMetadata().size()); + } + } + + public static void assertFieldMetadataMatchesResultSetMetadata(ResultSetMetaData rsmd, Schema schema) + throws SQLException { + assertNotNull(schema); + assertNotNull(schema.getFields()); + assertNotNull(rsmd); + + List<Field> fields = schema.getFields(); + + assertEquals(rsmd.getColumnCount(), fields.size()); + + // Vector columns are created in the same order as ResultSet columns. + for (int i = 1; i <= rsmd.getColumnCount(); ++i) { + Map<String, String> metadata = fields.get(i - 1).getMetadata(); + + assertNotNull(metadata); + assertEquals(4, metadata.size()); + + assertEquals(rsmd.getCatalogName(i), metadata.get(Constants.SQL_CATALOG_NAME_KEY)); + assertEquals(rsmd.getTableName(i), metadata.get(Constants.SQL_TABLE_NAME_KEY)); + assertEquals(rsmd.getColumnLabel(i), metadata.get(Constants.SQL_COLUMN_NAME_KEY)); + assertEquals(rsmd.getColumnTypeName(i), metadata.get(Constants.SQL_TYPE_KEY)); + } + } + + public static byte[] hexStringToByteArray(String s) { + int len = s.length(); + byte[] data = new byte[len / 2]; + for (int i = 0; i < len; i += 2) { + data[i / 2] = (byte) ((Character.digit(s.charAt(i), 16) << 4) + + Character.digit(s.charAt(i + 1), 16)); + } + return data; + } + + public static Integer[] getIntValues(String[] values, String dataType) { + String[] dataArr = getValues(values, dataType); + Integer[] valueArr = new Integer[dataArr.length]; + int i = 0; + for (String data : dataArr) { + valueArr[i++] = "null".equals(data.trim()) ? null : Integer.parseInt(data); + } + return valueArr; + } + + public static Boolean[] getBooleanValues(String[] values, String dataType) { + String[] dataArr = getValues(values, dataType); + Boolean[] valueArr = new Boolean[dataArr.length]; + int i = 0; + for (String data : dataArr) { + valueArr[i++] = "null".equals(data.trim()) ? null : data.trim().equals("1"); + } + return valueArr; + } + + public static BigDecimal[] getDecimalValues(String[] values, String dataType) { + String[] dataArr = getValues(values, dataType); + BigDecimal[] valueArr = new BigDecimal[dataArr.length]; + int i = 0; + for (String data : dataArr) { + valueArr[i++] = "null".equals(data.trim()) ? null : new BigDecimal(data); + } + return valueArr; + } + + public static Double[] getDoubleValues(String[] values, String dataType) { + String[] dataArr = getValues(values, dataType); + Double[] valueArr = new Double[dataArr.length]; + int i = 0; + for (String data : dataArr) { + valueArr[i++] = "null".equals(data.trim()) ? null : Double.parseDouble(data); + } + return valueArr; + } + + public static Float[] getFloatValues(String[] values, String dataType) { + String[] dataArr = getValues(values, dataType); + Float[] valueArr = new Float[dataArr.length]; + int i = 0; + for (String data : dataArr) { + valueArr[i++] = "null".equals(data.trim()) ? null : Float.parseFloat(data); + } + return valueArr; + } + + public static Long[] getLongValues(String[] values, String dataType) { + String[] dataArr = getValues(values, dataType); + Long[] valueArr = new Long[dataArr.length]; + int i = 0; + for (String data : dataArr) { + valueArr[i++] = "null".equals(data.trim()) ? null : Long.parseLong(data); + } + return valueArr; + } + + public static byte[][] getCharArray(String[] values, String dataType) { + String[] dataArr = getValues(values, dataType); + byte[][] valueArr = new byte[dataArr.length][]; + int i = 0; + for (String data : dataArr) { + valueArr[i++] = "null".equals(data.trim()) ? null : data.trim().getBytes(); + } + return valueArr; + } + + public static byte[][] getCharArrayWithCharSet(String[] values, String dataType, Charset charSet) { + String[] dataArr = getValues(values, dataType); + byte[][] valueArr = new byte[dataArr.length][]; + int i = 0; + for (String data : dataArr) { + valueArr[i++] = "null".equals(data.trim()) ? null : data.trim().getBytes(charSet); + } + return valueArr; + } + + public static byte[][] getBinaryValues(String[] values, String dataType) { + String[] dataArr = getValues(values, dataType); + byte[][] valueArr = new byte[dataArr.length][]; + int i = 0; + for (String data : dataArr) { + valueArr[i++] = "null".equals(data.trim()) ? null : hexStringToByteArray(data.trim()); + } + return valueArr; + } + + public static String[] getValues(String[] values, String dataType) { + String value = ""; + for (String val : values) { + if (val.startsWith(dataType)) { + value = val.split("=")[1]; + break; + } + } + return value.split(","); + } +} diff --git a/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/Table.java b/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/Table.java new file mode 100644 index 000000000..213716266 --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/Table.java @@ -0,0 +1,234 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.adapter.jdbc; + +import java.math.BigDecimal; +import java.nio.charset.StandardCharsets; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; + +/** + * POJO to handle the YAML data from the test YAML file. + */ +@JsonIgnoreProperties(ignoreUnknown = true) +public class Table { + private String name; + private String type; + private String vector; + private String timezone; + private String create; + private String[] data; + private String query; + private String drop; + private String[] values; + private String[] vectors; + private int rowCount; + + public Table() { + } + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + public String getType() { + return type; + } + + public void setType(String type) { + this.type = type; + } + + public String getVector() { + return vector; + } + + public void setVector(String vector) { + this.vector = vector; + } + + public String[] getValues() { + return values; + } + + public void setValues(String[] values) { + this.values = values; + } + + public Long[] getLongValues() { + Long[] arr = new Long[values.length]; + int i = 0; + for (String str : values) { + arr[i++] = Long.parseLong(str); + } + return arr; + } + + public Integer[] getIntValues() { + Integer[] arr = new Integer[values.length]; + int i = 0; + for (String str : values) { + arr[i++] = Integer.parseInt(str); + } + return arr; + } + + public Boolean[] getBoolValues() { + Boolean[] arr = new Boolean[values.length]; + int i = 0; + for (String str : values) { + arr[i++] = Boolean.parseBoolean(str); + } + return arr; + } + + public BigDecimal[] getBigDecimalValues() { + BigDecimal[] arr = new BigDecimal[values.length]; + int i = 0; + for (String str : values) { + arr[i++] = new BigDecimal(str); + } + return arr; + } + + public Double[] getDoubleValues() { + Double[] arr = new Double[values.length]; + int i = 0; + for (String str : values) { + arr[i++] = Double.parseDouble(str); + } + return arr; + } + + public Float[] getFloatValues() { + Float[] arr = new Float[values.length]; + int i = 0; + for (String str : values) { + arr[i++] = Float.parseFloat(str); + } + return arr; + } + + public byte[][] getBinaryValues() { + return getHexToByteArray(values); + } + + public byte[][] getVarCharValues() { + return getByteArray(values); + } + + public byte[][] getBlobValues() { + return getBinaryValues(); + } + + public byte[][] getClobValues() { + return getByteArray(values); + } + + public byte[][] getCharValues() { + return getByteArray(values); + } + + public String getCreate() { + return create; + } + + public void setCreate(String create) { + this.create = create; + } + + public String[] getData() { + return data; + } + + public void setData(String[] data) { + this.data = data; + } + + public String getQuery() { + return query; + } + + public void setQuery(String query) { + this.query = query; + } + + public String getDrop() { + return drop; + } + + public void setDrop(String drop) { + this.drop = drop; + } + + public String getTimezone() { + return timezone; + } + + public void setTimezone(String timezone) { + this.timezone = timezone; + } + + public String[] getVectors() { + return vectors; + } + + public void setVectors(String[] vectors) { + this.vectors = vectors; + } + + public int getRowCount() { + return rowCount; + } + + public void setRowCount(int rowCount) { + this.rowCount = rowCount; + } + + static byte[][] getByteArray(String[] data) { + byte[][] byteArr = new byte[data.length][]; + + for (int i = 0; i < data.length; i++) { + byteArr[i] = data[i].getBytes(StandardCharsets.UTF_8); + } + return byteArr; + } + + static byte[][] getHexToByteArray(String[] data) { + byte[][] byteArr = new byte[data.length][]; + + for (int i = 0; i < data.length; i++) { + byteArr[i] = hexStringToByteArray(data[i]); + } + return byteArr; + } + + static byte[] hexStringToByteArray(String s) { + int len = s.length(); + byte[] data = new byte[len / 2]; + for (int i = 0; i < len; i += 2) { + data[i / 2] = (byte) ((Character.digit(s.charAt(i), 16) << 4) + + Character.digit(s.charAt(i + 1), 16)); + } + return data; + } +} diff --git a/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/consumer/AbstractConsumerTest.java b/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/consumer/AbstractConsumerTest.java new file mode 100644 index 000000000..96bac4221 --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/consumer/AbstractConsumerTest.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.adapter.jdbc.consumer; + +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; +import org.junit.After; +import org.junit.Before; + +public abstract class AbstractConsumerTest { + + protected BufferAllocator allocator; + + @Before + public void setUp() { + allocator = new RootAllocator(Long.MAX_VALUE); + } + + @After + public void tearDown() { + allocator.close(); + } + +} diff --git a/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/consumer/BinaryConsumerTest.java b/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/consumer/BinaryConsumerTest.java new file mode 100644 index 000000000..a368023d4 --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/consumer/BinaryConsumerTest.java @@ -0,0 +1,117 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.adapter.jdbc.consumer; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.io.ByteArrayInputStream; +import java.io.IOException; + +import org.apache.arrow.vector.BaseValueVector; +import org.apache.arrow.vector.VarBinaryVector; +import org.junit.Test; + +public class BinaryConsumerTest extends AbstractConsumerTest { + + private static final int INITIAL_VALUE_ALLOCATION = BaseValueVector.INITIAL_VALUE_ALLOCATION; + private static final int DEFAULT_RECORD_BYTE_COUNT = 8; + + interface InputStreamConsumer { + void consume(BinaryConsumer consumer) throws IOException; + } + + protected void assertConsume(boolean nullable, InputStreamConsumer dataConsumer, byte[][] expect) throws IOException { + try (final VarBinaryVector vector = new VarBinaryVector("binary", allocator)) { + BinaryConsumer consumer = BinaryConsumer.createConsumer(vector, 0, nullable); + dataConsumer.consume(consumer); + assertEquals(expect.length - 1, vector.getLastSet()); + for (int i = 0; i < expect.length; i++) { + byte[] value = expect[i]; + if (value == null) { + assertTrue(vector.isNull(i)); + } else { + assertArrayEquals(expect[i], vector.get(i)); + } + } + } + } + + private byte[] createBytes(int length) { + byte[] bytes = new byte[length]; + for (int i = 0; i < length; i++) { + bytes[i] = (byte) (i % 1024); + } + return bytes; + } + + + public void testConsumeInputStream(byte[][] values, boolean nullable) throws IOException { + assertConsume(nullable, binaryConsumer -> { + for (byte[] value : values) { + binaryConsumer.consume(new ByteArrayInputStream(value)); + binaryConsumer.moveWriterPosition(); + } + }, values); + } + + @Test + public void testConsumeInputStream() throws IOException { + testConsumeInputStream(new byte[][]{ + createBytes(DEFAULT_RECORD_BYTE_COUNT) + }, false); + + testConsumeInputStream(new byte[][]{ + createBytes(DEFAULT_RECORD_BYTE_COUNT), + createBytes(DEFAULT_RECORD_BYTE_COUNT) + }, false); + + testConsumeInputStream(new byte[][]{ + createBytes(DEFAULT_RECORD_BYTE_COUNT * 2), + createBytes(DEFAULT_RECORD_BYTE_COUNT), + createBytes(DEFAULT_RECORD_BYTE_COUNT) + }, false); + + testConsumeInputStream(new byte[][]{ + createBytes(INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT) + }, false); + + testConsumeInputStream(new byte[][]{ + createBytes(INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT * 10), + }, false); + + testConsumeInputStream(new byte[][]{ + createBytes(INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT), + createBytes(INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT) + }, false); + + testConsumeInputStream(new byte[][]{ + createBytes(INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT), + createBytes(DEFAULT_RECORD_BYTE_COUNT), + createBytes(INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT) + }, false); + + byte[][] testRecords = new byte[INITIAL_VALUE_ALLOCATION * 2][]; + for (int i = 0; i < testRecords.length; i++) { + testRecords[i] = createBytes(DEFAULT_RECORD_BYTE_COUNT); + } + testConsumeInputStream(testRecords, false); + } + +} diff --git a/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcAliasToArrowTest.java b/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcAliasToArrowTest.java new file mode 100644 index 000000000..a6e6b22fc --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcAliasToArrowTest.java @@ -0,0 +1,140 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.adapter.jdbc.h2; + +import static org.apache.arrow.adapter.jdbc.AbstractJdbcToArrowTest.sqlToArrow; +import static org.junit.Assert.assertEquals; + +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.ResultSetMetaData; +import java.sql.SQLException; +import java.sql.Statement; +import java.util.List; + +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.Schema; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +public class JdbcAliasToArrowTest { + private Connection conn = null; + + private static final String CREATE_STATEMENT = + "CREATE TABLE example_table (id INTEGER);"; + private static final String INSERT_STATEMENT = + "INSERT INTO example_table (id) VALUES (?);"; + private static final String QUERY = "SELECT id as a, id as b FROM example_table;"; + private static final String DROP_STATEMENT = "DROP TABLE example_table;"; + private static final String ORIGINAL_COLUMN_NAME = "ID"; + private static final String COLUMN_A = "A"; + private static final String COLUMN_B = "B"; + + @Before + public void setUp() throws Exception { + String url = "jdbc:h2:mem:JdbcAliasToArrowTest"; + String driver = "org.h2.Driver"; + Class.forName(driver); + conn = DriverManager.getConnection(url); + try (Statement stmt = conn.createStatement()) { + stmt.executeUpdate(CREATE_STATEMENT); + } + } + + /** + * Test h2 database query with alias for column name and column label. + * To vetify reading field alias from an H2 database works as expected. + * If this test fails, something is either wrong with the setup, + * or the H2 SQL behavior changed. + */ + @Test + public void testReadH2Alias() throws Exception { + // insert rows + int rowCount = 4; + insertRows(rowCount); + + try (ResultSet resultSet = conn.createStatement().executeQuery(QUERY)) { + ResultSetMetaData rsmd = resultSet.getMetaData(); + assertEquals(2, rsmd.getColumnCount()); + + // check column name and column label + assertEquals(ORIGINAL_COLUMN_NAME, rsmd.getColumnName(1)); + assertEquals(COLUMN_A, rsmd.getColumnLabel(1)); + assertEquals(ORIGINAL_COLUMN_NAME, rsmd.getColumnName(2)); + assertEquals(COLUMN_B, rsmd.getColumnLabel(2)); + + int rowNum = 0; + + while (resultSet.next()) { + assertEquals(rowNum, resultSet.getInt(COLUMN_A)); + assertEquals(rowNum, resultSet.getInt(COLUMN_B)); + ++rowNum; + } + + assertEquals(rowCount, rowNum); + } + } + + /** + * Test jdbc query results with alias to arrow works expected. + * Arrow result schema name should be field alias name. + */ + @Test + public void testJdbcAliasToArrow() throws Exception { + int rowCount = 4; + insertRows(rowCount); + + try (ResultSet resultSet = conn.createStatement().executeQuery(QUERY)) { + final VectorSchemaRoot vector = + sqlToArrow(resultSet, new RootAllocator(Integer.MAX_VALUE)); + + assertEquals(rowCount, vector.getRowCount()); + Schema vectorSchema = vector.getSchema(); + List<Field> vectorFields = vectorSchema.getFields(); + assertEquals(vectorFields.get(0).getName(), COLUMN_A); + assertEquals(vectorFields.get(1).getName(), COLUMN_B); + } + } + + @After + public void tearDown() throws SQLException { + try (Statement stmt = conn.createStatement()) { + stmt.executeUpdate(DROP_STATEMENT); + } finally { + if (conn != null) { + conn.close(); + conn = null; + } + } + } + + private void insertRows(int numRows) throws SQLException { + // Insert [numRows] Rows + try (PreparedStatement stmt = conn.prepareStatement(INSERT_STATEMENT)) { + for (int i = 0; i < numRows; ++i) { + stmt.setInt(1, i); + stmt.executeUpdate(); + } + } + } +} diff --git a/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowArrayTest.java b/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowArrayTest.java new file mode 100644 index 000000000..b7dc1ee58 --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowArrayTest.java @@ -0,0 +1,375 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.adapter.jdbc.h2; + +import static org.apache.arrow.adapter.jdbc.AbstractJdbcToArrowTest.sqlToArrow; +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; + +import java.sql.Array; +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.ResultSetMetaData; +import java.sql.SQLException; +import java.sql.Statement; +import java.sql.Types; +import java.util.HashMap; +import java.util.Map; + +import org.apache.arrow.adapter.jdbc.JdbcFieldInfo; +import org.apache.arrow.adapter.jdbc.JdbcToArrowConfig; +import org.apache.arrow.adapter.jdbc.JdbcToArrowConfigBuilder; +import org.apache.arrow.adapter.jdbc.JdbcToArrowUtils; +import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.Float4Vector; +import org.apache.arrow.vector.IntVector; +import org.apache.arrow.vector.VarCharVector; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.complex.ListVector; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +public class JdbcToArrowArrayTest { + private Connection conn = null; + + private static final String CREATE_STATEMENT = + "CREATE TABLE array_table (id INTEGER, int_array ARRAY, float_array ARRAY, string_array ARRAY);"; + private static final String INSERT_STATEMENT = + "INSERT INTO array_table (id, int_array, float_array, string_array) VALUES (?, ?, ?, ?);"; + private static final String QUERY = "SELECT int_array, float_array, string_array FROM array_table ORDER BY id;"; + private static final String DROP_STATEMENT = "DROP TABLE array_table;"; + + private static Map<String, JdbcFieldInfo> arrayFieldMapping; + + private static final String INT_ARRAY_FIELD_NAME = "INT_ARRAY"; + private static final String FLOAT_ARRAY_FIELD_NAME = "FLOAT_ARRAY"; + private static final String STRING_ARRAY_FIELD_NAME = "STRING_ARRAY"; + + @Before + public void setUp() throws Exception { + String url = "jdbc:h2:mem:JdbcToArrowTest"; + String driver = "org.h2.Driver"; + Class.forName(driver); + conn = DriverManager.getConnection(url); + try (Statement stmt = conn.createStatement()) { + stmt.executeUpdate(CREATE_STATEMENT); + } + + arrayFieldMapping = new HashMap<String, JdbcFieldInfo>(); + arrayFieldMapping.put(INT_ARRAY_FIELD_NAME, new JdbcFieldInfo(Types.INTEGER)); + arrayFieldMapping.put(FLOAT_ARRAY_FIELD_NAME, new JdbcFieldInfo(Types.REAL)); + arrayFieldMapping.put(STRING_ARRAY_FIELD_NAME, new JdbcFieldInfo(Types.VARCHAR)); + } + + // This test verifies reading an array field from an H2 database + // works as expected. If this test fails, something is either wrong + // with the setup, or the H2 SQL behavior changed. + @Test + public void testReadH2Array() throws Exception { + int rowCount = 4; + + Integer[][] intArrays = generateIntegerArrayField(rowCount); + Float[][] floatArrays = generateFloatArrayField(rowCount); + String[][] strArrays = generateStringArrayField(rowCount); + + insertRows(rowCount, intArrays, floatArrays, strArrays); + + try (ResultSet resultSet = conn.createStatement().executeQuery(QUERY)) { + ResultSetMetaData rsmd = resultSet.getMetaData(); + assertEquals(3, rsmd.getColumnCount()); + + for (int i = 1; i <= rsmd.getColumnCount(); ++i) { + assertEquals(Types.ARRAY, rsmd.getColumnType(i)); + } + + int rowNum = 0; + + while (resultSet.next()) { + Array intArray = resultSet.getArray(INT_ARRAY_FIELD_NAME); + assertFalse(resultSet.wasNull()); + try (ResultSet rs = intArray.getResultSet()) { + int arrayIndex = 0; + while (rs.next()) { + assertEquals(intArrays[rowNum][arrayIndex].intValue(), rs.getInt(2)); + ++arrayIndex; + } + assertEquals(intArrays[rowNum].length, arrayIndex); + } + + Array floatArray = resultSet.getArray(FLOAT_ARRAY_FIELD_NAME); + assertFalse(resultSet.wasNull()); + try (ResultSet rs = floatArray.getResultSet()) { + int arrayIndex = 0; + while (rs.next()) { + assertEquals(floatArrays[rowNum][arrayIndex].floatValue(), rs.getFloat(2), 0.001); + ++arrayIndex; + } + assertEquals(floatArrays[rowNum].length, arrayIndex); + } + + Array strArray = resultSet.getArray(STRING_ARRAY_FIELD_NAME); + assertFalse(resultSet.wasNull()); + try (ResultSet rs = strArray.getResultSet()) { + int arrayIndex = 0; + while (rs.next()) { + assertEquals(strArrays[rowNum][arrayIndex], rs.getString(2)); + ++arrayIndex; + } + assertEquals(strArrays[rowNum].length, arrayIndex); + } + + ++rowNum; + } + + assertEquals(rowCount, rowNum); + } + } + + @Test + public void testJdbcToArrow() throws Exception { + int rowCount = 4; + + Integer[][] intArrays = generateIntegerArrayField(rowCount); + Float[][] floatArrays = generateFloatArrayField(rowCount); + String[][] strArrays = generateStringArrayField(rowCount); + + insertRows(rowCount, intArrays, floatArrays, strArrays); + + final JdbcToArrowConfigBuilder builder = + new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), JdbcToArrowUtils.getUtcCalendar(), false); + builder.setArraySubTypeByColumnNameMap(arrayFieldMapping); + + final JdbcToArrowConfig config = builder.build(); + + try (ResultSet resultSet = conn.createStatement().executeQuery(QUERY)) { + final VectorSchemaRoot vector = sqlToArrow(resultSet, config); + + assertEquals(rowCount, vector.getRowCount()); + + assertIntegerVectorEquals((ListVector) vector.getVector(INT_ARRAY_FIELD_NAME), rowCount, intArrays); + assertFloatVectorEquals((ListVector) vector.getVector(FLOAT_ARRAY_FIELD_NAME), rowCount, floatArrays); + assertStringVectorEquals((ListVector) vector.getVector(STRING_ARRAY_FIELD_NAME), rowCount, strArrays); + } + } + + @Test + public void testJdbcToArrowWithNulls() throws Exception { + int rowCount = 4; + + Integer[][] intArrays = { + null, + {0}, + {1}, + {}, + }; + + Float[][] floatArrays = { + { 2.0f }, + null, + { 3.0f }, + {}, + }; + + String[][] stringArrays = { + {"4"}, + null, + {"5"}, + {}, + }; + + insertRows(rowCount, intArrays, floatArrays, stringArrays); + + final JdbcToArrowConfigBuilder builder = + new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), JdbcToArrowUtils.getUtcCalendar(), false); + builder.setArraySubTypeByColumnNameMap(arrayFieldMapping); + + final JdbcToArrowConfig config = builder.build(); + + try (ResultSet resultSet = conn.createStatement().executeQuery(QUERY)) { + final VectorSchemaRoot vector = sqlToArrow(resultSet, config); + + assertEquals(rowCount, vector.getRowCount()); + + assertIntegerVectorEquals((ListVector) vector.getVector(INT_ARRAY_FIELD_NAME), rowCount, intArrays); + assertFloatVectorEquals((ListVector) vector.getVector(FLOAT_ARRAY_FIELD_NAME), rowCount, floatArrays); + assertStringVectorEquals((ListVector) vector.getVector(STRING_ARRAY_FIELD_NAME), rowCount, stringArrays); + } + } + + private void assertIntegerVectorEquals(ListVector listVector, int rowCount, Integer[][] expectedValues) { + IntVector vector = (IntVector) listVector.getDataVector(); + ArrowBuf offsetBuffer = listVector.getOffsetBuffer(); + + int prevOffset = 0; + for (int row = 0; row < rowCount; ++row) { + int offset = offsetBuffer.getInt((row + 1) * ListVector.OFFSET_WIDTH); + + if (expectedValues[row] == null) { + assertEquals(0, listVector.isSet(row)); + assertEquals(0, offset - prevOffset); + continue; + } + + assertEquals(1, listVector.isSet(row)); + assertEquals(expectedValues[row].length, offset - prevOffset); + + for (int i = prevOffset; i < offset; ++i) { + assertEquals(expectedValues[row][i - prevOffset].intValue(), vector.get(i)); + } + + prevOffset = offset; + } + } + + private void assertFloatVectorEquals(ListVector listVector, int rowCount, Float[][] expectedValues) { + Float4Vector vector = (Float4Vector) listVector.getDataVector(); + ArrowBuf offsetBuffer = listVector.getOffsetBuffer(); + + int prevOffset = 0; + for (int row = 0; row < rowCount; ++row) { + int offset = offsetBuffer.getInt((row + 1) * ListVector.OFFSET_WIDTH); + + if (expectedValues[row] == null) { + assertEquals(0, listVector.isSet(row)); + assertEquals(0, offset - prevOffset); + continue; + } + + assertEquals(1, listVector.isSet(row)); + assertEquals(expectedValues[row].length, offset - prevOffset); + + for (int i = prevOffset; i < offset; ++i) { + assertEquals(expectedValues[row][i - prevOffset].floatValue(), vector.get(i), 0); + } + + prevOffset = offset; + } + } + + private void assertStringVectorEquals(ListVector listVector, int rowCount, String[][] expectedValues) { + VarCharVector vector = (VarCharVector) listVector.getDataVector(); + ArrowBuf offsetBuffer = listVector.getOffsetBuffer(); + + int prevOffset = 0; + for (int row = 0; row < rowCount; ++row) { + int offset = offsetBuffer.getInt((row + 1) * ListVector.OFFSET_WIDTH); + + if (expectedValues[row] == null) { + assertEquals(0, listVector.isSet(row)); + assertEquals(0, offset - prevOffset); + continue; + } + + assertEquals(1, listVector.isSet(row)); + assertEquals(expectedValues[row].length, offset - prevOffset); + for (int i = prevOffset; i < offset; ++i) { + assertArrayEquals(expectedValues[row][i - prevOffset].getBytes(), vector.get(i)); + } + + prevOffset = offset; + } + } + + @After + public void tearDown() throws SQLException { + try (Statement stmt = conn.createStatement()) { + stmt.executeUpdate(DROP_STATEMENT); + } finally { + if (conn != null) { + conn.close(); + conn = null; + } + } + } + + private Integer[][] generateIntegerArrayField(int numRows) { + Integer[][] result = new Integer[numRows][]; + + for (int i = 0; i < numRows; ++i) { + int val = i * 4; + result[i] = new Integer[]{val, val + 1, val + 2, val + 3}; + } + + return result; + } + + private Float[][] generateFloatArrayField(int numRows) { + Float[][] result = new Float[numRows][]; + + for (int i = 0; i < numRows; ++i) { + int val = i * 4; + result[i] = new Float[]{(float) val, (float) val + 1, (float) val + 2, (float) val + 3}; + } + + return result; + } + + private String[][] generateStringArrayField(int numRows) { + String[][] result = new String[numRows][]; + + for (int i = 0; i < numRows; ++i) { + int val = i * 4; + result[i] = new String[]{ + String.valueOf(val), + String.valueOf(val + 1), + String.valueOf(val + 2), + String.valueOf(val + 3) }; + } + + return result; + } + + private void insertRows( + int numRows, + Integer[][] integerArrays, + Float[][] floatArrays, + String[][] strArrays) + throws SQLException { + + // Insert 4 Rows + try (PreparedStatement stmt = conn.prepareStatement(INSERT_STATEMENT)) { + + for (int i = 0; i < numRows; ++i) { + Integer[] integerArray = integerArrays[i]; + Float[] floatArray = floatArrays[i]; + String[] strArray = strArrays[i]; + + Array intArray = conn.createArrayOf("INT", integerArray); + Array realArray = conn.createArrayOf("REAL", floatArray); + Array varcharArray = conn.createArrayOf("VARCHAR", strArray); + + // Insert Arrays of 4 Values in Each Row + stmt.setInt(1, i); + stmt.setArray(2, intArray); + stmt.setArray(3, realArray); + stmt.setArray(4, varcharArray); + + stmt.executeUpdate(); + + intArray.free(); + realArray.free(); + varcharArray.free(); + } + } + } +} diff --git a/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowCharSetTest.java b/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowCharSetTest.java new file mode 100644 index 000000000..b548c9169 --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowCharSetTest.java @@ -0,0 +1,155 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.adapter.jdbc.h2; + +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertVarcharVectorValues; +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getCharArrayWithCharSet; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.sql.DriverManager; +import java.sql.ResultSetMetaData; +import java.sql.SQLException; +import java.sql.Statement; +import java.util.Arrays; +import java.util.Calendar; +import java.util.Collection; + +import org.apache.arrow.adapter.jdbc.AbstractJdbcToArrowTest; +import org.apache.arrow.adapter.jdbc.JdbcToArrowConfig; +import org.apache.arrow.adapter.jdbc.JdbcToArrowConfigBuilder; +import org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper; +import org.apache.arrow.adapter.jdbc.JdbcToArrowUtils; +import org.apache.arrow.adapter.jdbc.Table; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.VarCharVector; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.types.pojo.Schema; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; + +/** + * JUnit Test Class which contains methods to test JDBC to Arrow data conversion functionality with UTF-8 Charset, + * including the multi-byte CJK characters for H2 database. + */ +@RunWith(Parameterized.class) +public class JdbcToArrowCharSetTest extends AbstractJdbcToArrowTest { + + private static final String[] testFiles = { + "h2/test1_charset_h2.yml", + "h2/test1_charset_ch_h2.yml", + "h2/test1_charset_jp_h2.yml", + "h2/test1_charset_kr_h2.yml" + }; + + /** + * Constructor which populates the table object for each test iteration. + * + * @param table Table oject + */ + public JdbcToArrowCharSetTest(Table table) { + this.table = table; + } + + /** + * This method creates Connection object and DB table and also populate data into table for test. + * + * @throws SQLException on error + * @throws ClassNotFoundException on error + */ + @Before + public void setUp() throws SQLException, ClassNotFoundException { + String url = "jdbc:h2:mem:JdbcToArrowTest?characterEncoding=UTF-8"; + String driver = "org.h2.Driver"; + Class.forName(driver); + conn = DriverManager.getConnection(url); + try (Statement stmt = conn.createStatement();) { + stmt.executeUpdate(table.getCreate()); + for (String insert : table.getData()) { + stmt.executeUpdate(insert); + } + } + } + + /** + * Get the test data as a collection of Table objects for each test iteration. + * + * @return Collection of Table objects + * @throws SQLException on error + * @throws ClassNotFoundException on error + * @throws IOException on error + */ + @Parameters + public static Collection<Object[]> getTestData() throws SQLException, ClassNotFoundException, IOException { + return Arrays.asList(prepareTestData(testFiles, JdbcToArrowCharSetTest.class)); + } + + /** + * Test Method to test JdbcToArrow Functionality for various H2 DB based datatypes with UTF-8 Charset, including + * the multi-byte CJK characters. + */ + @Test + public void testJdbcToArrowValues() throws SQLException, IOException { + testDataSets(sqlToArrow(conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE), + Calendar.getInstance())); + testDataSets(sqlToArrow(conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE))); + testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), + new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance())); + testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()))); + testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), + new RootAllocator(Integer.MAX_VALUE))); + testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), + Calendar.getInstance())); + testDataSets(sqlToArrow( + conn.createStatement().executeQuery(table.getQuery()), + new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()).build())); + testDataSets(sqlToArrow( + conn, + table.getQuery(), + new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()).build())); + } + + @Test + public void testJdbcSchemaMetadata() throws SQLException { + JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(new RootAllocator(0), Calendar.getInstance(), true).build(); + ResultSetMetaData rsmd = conn.createStatement().executeQuery(table.getQuery()).getMetaData(); + Schema schema = JdbcToArrowUtils.jdbcToArrowSchema(rsmd, config); + JdbcToArrowTestHelper.assertFieldMetadataMatchesResultSetMetadata(rsmd, schema); + } + + /** + * This method calls the assert methods for various DataSets. + * + * @param root VectorSchemaRoot for test + */ + public void testDataSets(VectorSchemaRoot root) { + JdbcToArrowTestHelper.assertFieldMetadataIsEmpty(root); + + assertVarcharVectorValues((VarCharVector) root.getVector(CLOB), table.getRowCount(), + getCharArrayWithCharSet(table.getValues(), CLOB, StandardCharsets.UTF_8)); + + assertVarcharVectorValues((VarCharVector) root.getVector(VARCHAR), table.getRowCount(), + getCharArrayWithCharSet(table.getValues(), VARCHAR, StandardCharsets.UTF_8)); + + assertVarcharVectorValues((VarCharVector) root.getVector(CHAR), table.getRowCount(), + getCharArrayWithCharSet(table.getValues(), CHAR, StandardCharsets.UTF_8)); + } +} diff --git a/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowDataTypesTest.java b/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowDataTypesTest.java new file mode 100644 index 000000000..40db5c235 --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowDataTypesTest.java @@ -0,0 +1,248 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.adapter.jdbc.h2; + +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertBigIntVectorValues; +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertBitVectorValues; +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertBooleanVectorValues; +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertDateVectorValues; +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertDecimalVectorValues; +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertFloat4VectorValues; +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertFloat8VectorValues; +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertIntVectorValues; +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertNullVectorValues; +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertSmallIntVectorValues; +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertTimeStampVectorValues; +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertTimeVectorValues; +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertTinyIntVectorValues; +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertVarBinaryVectorValues; +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertVarcharVectorValues; + +import java.io.IOException; +import java.sql.ResultSetMetaData; +import java.sql.SQLException; +import java.util.Arrays; +import java.util.Calendar; +import java.util.Collection; + +import org.apache.arrow.adapter.jdbc.AbstractJdbcToArrowTest; +import org.apache.arrow.adapter.jdbc.JdbcToArrowConfig; +import org.apache.arrow.adapter.jdbc.JdbcToArrowConfigBuilder; +import org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper; +import org.apache.arrow.adapter.jdbc.JdbcToArrowUtils; +import org.apache.arrow.adapter.jdbc.Table; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.BigIntVector; +import org.apache.arrow.vector.BitVector; +import org.apache.arrow.vector.DateDayVector; +import org.apache.arrow.vector.DecimalVector; +import org.apache.arrow.vector.Float4Vector; +import org.apache.arrow.vector.Float8Vector; +import org.apache.arrow.vector.IntVector; +import org.apache.arrow.vector.NullVector; +import org.apache.arrow.vector.SmallIntVector; +import org.apache.arrow.vector.TimeMilliVector; +import org.apache.arrow.vector.TimeStampVector; +import org.apache.arrow.vector.TinyIntVector; +import org.apache.arrow.vector.VarBinaryVector; +import org.apache.arrow.vector.VarCharVector; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.types.pojo.Schema; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; + +/** + * JUnit Test Class which contains methods to test JDBC to Arrow data conversion functionality with various data types + * for H2 database using multiple test data files. + */ +@RunWith(Parameterized.class) +public class JdbcToArrowDataTypesTest extends AbstractJdbcToArrowTest { + + private static final String BIGINT = "big_int"; + private static final String BINARY = "binary"; + private static final String BIT = "bit"; + private static final String BLOB = "blob"; + private static final String BOOL = "bool"; + private static final String CHAR = "char"; + private static final String CLOB = "clob"; + private static final String DATE = "date"; + private static final String DECIMAL = "decimal"; + private static final String DOUBLE = "double"; + private static final String INT = "int"; + private static final String REAL = "real"; + private static final String SMALLINT = "small_int"; + private static final String TIME = "time"; + private static final String TIMESTAMP = "timestamp"; + private static final String TINYINT = "tiny_int"; + private static final String VARCHAR = "varchar"; + private static final String NULL = "null"; + + private static final String[] testFiles = { + "h2/test1_bigint_h2.yml", + "h2/test1_binary_h2.yml", + "h2/test1_bit_h2.yml", + "h2/test1_blob_h2.yml", + "h2/test1_bool_h2.yml", + "h2/test1_char_h2.yml", + "h2/test1_clob_h2.yml", + "h2/test1_date_h2.yml", + "h2/test1_decimal_h2.yml", + "h2/test1_double_h2.yml", + "h2/test1_int_h2.yml", + "h2/test1_real_h2.yml", + "h2/test1_smallint_h2.yml", + "h2/test1_time_h2.yml", + "h2/test1_timestamp_h2.yml", + "h2/test1_tinyint_h2.yml", + "h2/test1_varchar_h2.yml", + "h2/test1_null_h2.yml" + }; + + /** + * Constructor which populates the table object for each test iteration. + * + * @param table Table object + */ + public JdbcToArrowDataTypesTest(Table table) { + this.table = table; + } + + /** + * Get the test data as a collection of Table objects for each test iteration. + * + * @return Collection of Table objects + * @throws SQLException on error + * @throws ClassNotFoundException on error + * @throws IOException on error + */ + @Parameters + public static Collection<Object[]> getTestData() throws SQLException, ClassNotFoundException, IOException { + return Arrays.asList(prepareTestData(testFiles, JdbcToArrowDataTypesTest.class)); + } + + /** + * Test Method to test JdbcToArrow Functionality for various H2 DB based datatypes. + */ + @Test + public void testJdbcToArrowValues() throws SQLException, IOException { + testDataSets(sqlToArrow(conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE), + Calendar.getInstance())); + testDataSets(sqlToArrow(conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE))); + testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), + new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance())); + testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()))); + testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), + new RootAllocator(Integer.MAX_VALUE))); + testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), Calendar.getInstance())); + testDataSets(sqlToArrow( + conn.createStatement().executeQuery(table.getQuery()), + new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()).build())); + testDataSets(sqlToArrow( + conn, + table.getQuery(), + new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()).build())); + } + + @Test + public void testJdbcSchemaMetadata() throws SQLException { + JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(new RootAllocator(0), Calendar.getInstance(), true).build(); + ResultSetMetaData rsmd = conn.createStatement().executeQuery(table.getQuery()).getMetaData(); + Schema schema = JdbcToArrowUtils.jdbcToArrowSchema(rsmd, config); + JdbcToArrowTestHelper.assertFieldMetadataMatchesResultSetMetadata(rsmd, schema); + } + + /** + * This method calls the assert methods for various DataSets. + * + * @param root VectorSchemaRoot for test + */ + public void testDataSets(VectorSchemaRoot root) { + JdbcToArrowTestHelper.assertFieldMetadataIsEmpty(root); + + switch (table.getType()) { + case BIGINT: + assertBigIntVectorValues((BigIntVector) root.getVector(table.getVector()), table.getValues().length, + table.getLongValues()); + break; + case BINARY: + case BLOB: + assertVarBinaryVectorValues((VarBinaryVector) root.getVector(table.getVector()), table.getValues().length, + table.getBinaryValues()); + break; + case BIT: + assertBitVectorValues((BitVector) root.getVector(table.getVector()), table.getValues().length, + table.getIntValues()); + break; + case BOOL: + assertBooleanVectorValues((BitVector) root.getVector(table.getVector()), table.getValues().length, + table.getBoolValues()); + break; + case CHAR: + case VARCHAR: + case CLOB: + assertVarcharVectorValues((VarCharVector) root.getVector(table.getVector()), table.getValues().length, + table.getCharValues()); + break; + case DATE: + assertDateVectorValues((DateDayVector) root.getVector(table.getVector()), table.getValues().length, + table.getIntValues()); + break; + case TIME: + assertTimeVectorValues((TimeMilliVector) root.getVector(table.getVector()), table.getValues().length, + table.getLongValues()); + break; + case TIMESTAMP: + assertTimeStampVectorValues((TimeStampVector) root.getVector(table.getVector()), table.getValues().length, + table.getLongValues()); + break; + case DECIMAL: + assertDecimalVectorValues((DecimalVector) root.getVector(table.getVector()), table.getValues().length, + table.getBigDecimalValues()); + break; + case DOUBLE: + assertFloat8VectorValues((Float8Vector) root.getVector(table.getVector()), table.getValues().length, + table.getDoubleValues()); + break; + case INT: + assertIntVectorValues((IntVector) root.getVector(table.getVector()), table.getValues().length, + table.getIntValues()); + break; + case SMALLINT: + assertSmallIntVectorValues((SmallIntVector) root.getVector(table.getVector()), table.getValues().length, + table.getIntValues()); + break; + case TINYINT: + assertTinyIntVectorValues((TinyIntVector) root.getVector(table.getVector()), table.getValues().length, + table.getIntValues()); + break; + case REAL: + assertFloat4VectorValues((Float4Vector) root.getVector(table.getVector()), table.getValues().length, + table.getFloatValues()); + break; + case NULL: + assertNullVectorValues((NullVector) root.getVector(table.getVector()), table.getRowCount()); + break; + default: + // do nothing + break; + } + } +} + diff --git a/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowNullTest.java b/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowNullTest.java new file mode 100644 index 000000000..71cc70056 --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowNullTest.java @@ -0,0 +1,277 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.adapter.jdbc.h2; + +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertBigIntVectorValues; +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertBitVectorValues; +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertBooleanVectorValues; +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertDateVectorValues; +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertDecimalVectorValues; +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertFloat4VectorValues; +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertFloat8VectorValues; +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertIntVectorValues; +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertNullValues; +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertSmallIntVectorValues; +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertTimeStampVectorValues; +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertTimeVectorValues; +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertTinyIntVectorValues; +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertVarBinaryVectorValues; +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertVarcharVectorValues; +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getBinaryValues; +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getBooleanValues; +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getCharArray; +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getDecimalValues; +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getDoubleValues; +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getFloatValues; +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getIntValues; +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getLongValues; + +import java.io.IOException; +import java.sql.ResultSetMetaData; +import java.sql.SQLException; +import java.util.Arrays; +import java.util.Calendar; +import java.util.Collection; + +import org.apache.arrow.adapter.jdbc.AbstractJdbcToArrowTest; +import org.apache.arrow.adapter.jdbc.JdbcToArrowConfig; +import org.apache.arrow.adapter.jdbc.JdbcToArrowConfigBuilder; +import org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper; +import org.apache.arrow.adapter.jdbc.JdbcToArrowUtils; +import org.apache.arrow.adapter.jdbc.Table; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.BigIntVector; +import org.apache.arrow.vector.BitVector; +import org.apache.arrow.vector.DateDayVector; +import org.apache.arrow.vector.DecimalVector; +import org.apache.arrow.vector.Float4Vector; +import org.apache.arrow.vector.Float8Vector; +import org.apache.arrow.vector.IntVector; +import org.apache.arrow.vector.SmallIntVector; +import org.apache.arrow.vector.TimeMilliVector; +import org.apache.arrow.vector.TimeStampVector; +import org.apache.arrow.vector.TinyIntVector; +import org.apache.arrow.vector.VarBinaryVector; +import org.apache.arrow.vector.VarCharVector; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.types.pojo.Schema; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; + +/** + * JUnit Test Class which contains methods to test JDBC to Arrow data conversion functionality with null values for + * H2 database. + */ +@RunWith(Parameterized.class) +public class JdbcToArrowNullTest extends AbstractJdbcToArrowTest { + + private static final String NULL = "null"; + private static final String SELECTED_NULL_ROW = "selected_null_row"; + private static final String SELECTED_NULL_COLUMN = "selected_null_column"; + + private static final String[] testFiles = { + "h2/test1_all_datatypes_null_h2.yml", + "h2/test1_selected_datatypes_null_h2.yml", + "h2/test1_all_datatypes_selected_null_rows_h2.yml" + }; + + /** + * Constructor which populates the table object for each test iteration. + * + * @param table Table object + */ + public JdbcToArrowNullTest(Table table) { + this.table = table; + } + + /** + * Get the test data as a collection of Table objects for each test iteration. + * + * @return Collection of Table objects + * @throws SQLException on error + * @throws ClassNotFoundException on error + * @throws IOException on error + */ + @Parameters + public static Collection<Object[]> getTestData() throws SQLException, ClassNotFoundException, IOException { + return Arrays.asList(prepareTestData(testFiles, JdbcToArrowNullTest.class)); + } + + /** + * Test Method to test JdbcToArrow Functionality for various H2 DB based datatypes with null values. + */ + @Test + public void testJdbcToArrowValues() throws SQLException, IOException { + testDataSets(sqlToArrow(conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE), + Calendar.getInstance())); + testDataSets(sqlToArrow(conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE))); + testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), + new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance())); + testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()))); + testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), + new RootAllocator(Integer.MAX_VALUE))); + testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), Calendar.getInstance())); + testDataSets(sqlToArrow( + conn.createStatement().executeQuery(table.getQuery()), + new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()).build())); + testDataSets(sqlToArrow( + conn, + table.getQuery(), + new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()).build())); + } + + @Test + public void testJdbcSchemaMetadata() throws SQLException { + JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(new RootAllocator(0), Calendar.getInstance(), true).build(); + ResultSetMetaData rsmd = conn.createStatement().executeQuery(table.getQuery()).getMetaData(); + Schema schema = JdbcToArrowUtils.jdbcToArrowSchema(rsmd, config); + JdbcToArrowTestHelper.assertFieldMetadataMatchesResultSetMetadata(rsmd, schema); + } + + /** + * This method calls the assert methods for various DataSets. + * + * @param root VectorSchemaRoot for test + */ + public void testDataSets(VectorSchemaRoot root) { + JdbcToArrowTestHelper.assertFieldMetadataIsEmpty(root); + + switch (table.getType()) { + case NULL: + sqlToArrowTestNullValues(table.getVectors(), root, table.getRowCount()); + break; + case SELECTED_NULL_COLUMN: + sqlToArrowTestSelectedNullColumnsValues(table.getVectors(), root, table.getRowCount()); + break; + case SELECTED_NULL_ROW: + testAllVectorValues(root); + break; + default: + // do nothing + break; + } + } + + private void testAllVectorValues(VectorSchemaRoot root) { + JdbcToArrowTestHelper.assertFieldMetadataIsEmpty(root); + + assertBigIntVectorValues((BigIntVector) root.getVector(BIGINT), table.getRowCount(), + getLongValues(table.getValues(), BIGINT)); + + assertTinyIntVectorValues((TinyIntVector) root.getVector(TINYINT), table.getRowCount(), + getIntValues(table.getValues(), TINYINT)); + + assertSmallIntVectorValues((SmallIntVector) root.getVector(SMALLINT), table.getRowCount(), + getIntValues(table.getValues(), SMALLINT)); + + assertVarBinaryVectorValues((VarBinaryVector) root.getVector(BINARY), table.getRowCount(), + getBinaryValues(table.getValues(), BINARY)); + + assertVarBinaryVectorValues((VarBinaryVector) root.getVector(BLOB), table.getRowCount(), + getBinaryValues(table.getValues(), BLOB)); + + assertVarcharVectorValues((VarCharVector) root.getVector(CLOB), table.getRowCount(), + getCharArray(table.getValues(), CLOB)); + + assertVarcharVectorValues((VarCharVector) root.getVector(VARCHAR), table.getRowCount(), + getCharArray(table.getValues(), VARCHAR)); + + assertVarcharVectorValues((VarCharVector) root.getVector(CHAR), table.getRowCount(), + getCharArray(table.getValues(), CHAR)); + + assertIntVectorValues((IntVector) root.getVector(INT), table.getRowCount(), + getIntValues(table.getValues(), INT)); + + assertBitVectorValues((BitVector) root.getVector(BIT), table.getRowCount(), + getIntValues(table.getValues(), BIT)); + + assertBooleanVectorValues((BitVector) root.getVector(BOOL), table.getRowCount(), + getBooleanValues(table.getValues(), BOOL)); + + assertDateVectorValues((DateDayVector) root.getVector(DATE), table.getRowCount(), + getIntValues(table.getValues(), DATE)); + + assertTimeVectorValues((TimeMilliVector) root.getVector(TIME), table.getRowCount(), + getLongValues(table.getValues(), TIME)); + + assertTimeStampVectorValues((TimeStampVector) root.getVector(TIMESTAMP), table.getRowCount(), + getLongValues(table.getValues(), TIMESTAMP)); + + assertDecimalVectorValues((DecimalVector) root.getVector(DECIMAL), table.getRowCount(), + getDecimalValues(table.getValues(), DECIMAL)); + + assertFloat8VectorValues((Float8Vector) root.getVector(DOUBLE), table.getRowCount(), + getDoubleValues(table.getValues(), DOUBLE)); + + assertFloat4VectorValues((Float4Vector) root.getVector(REAL), table.getRowCount(), + getFloatValues(table.getValues(), REAL)); + } + + /** + * This method assert tests null values in vectors for all the datatypes. + * + * @param vectors Vectors to test + * @param root VectorSchemaRoot for test + * @param rowCount number of rows + */ + public void sqlToArrowTestNullValues(String[] vectors, VectorSchemaRoot root, int rowCount) { + assertNullValues((IntVector) root.getVector(vectors[0]), rowCount); + assertNullValues((BitVector) root.getVector(vectors[1]), rowCount); + assertNullValues((TinyIntVector) root.getVector(vectors[2]), rowCount); + assertNullValues((SmallIntVector) root.getVector(vectors[3]), rowCount); + assertNullValues((BigIntVector) root.getVector(vectors[4]), rowCount); + assertNullValues((DecimalVector) root.getVector(vectors[5]), rowCount); + assertNullValues((Float8Vector) root.getVector(vectors[6]), rowCount); + assertNullValues((Float4Vector) root.getVector(vectors[7]), rowCount); + assertNullValues((TimeMilliVector) root.getVector(vectors[8]), rowCount); + assertNullValues((DateDayVector) root.getVector(vectors[9]), rowCount); + assertNullValues((TimeStampVector) root.getVector(vectors[10]), rowCount); + assertNullValues((VarBinaryVector) root.getVector(vectors[11]), rowCount); + assertNullValues((VarCharVector) root.getVector(vectors[12]), rowCount); + assertNullValues((VarBinaryVector) root.getVector(vectors[13]), rowCount); + assertNullValues((VarCharVector) root.getVector(vectors[14]), rowCount); + assertNullValues((VarCharVector) root.getVector(vectors[15]), rowCount); + assertNullValues((BitVector) root.getVector(vectors[16]), rowCount); + } + + /** + * This method assert tests null values in vectors for some selected datatypes. + * + * @param vectors Vectors to test + * @param root VectorSchemaRoot for test + * @param rowCount number of rows + */ + public void sqlToArrowTestSelectedNullColumnsValues(String[] vectors, VectorSchemaRoot root, int rowCount) { + assertNullValues((BigIntVector) root.getVector(vectors[0]), rowCount); + assertNullValues((DecimalVector) root.getVector(vectors[1]), rowCount); + assertNullValues((Float8Vector) root.getVector(vectors[2]), rowCount); + assertNullValues((Float4Vector) root.getVector(vectors[3]), rowCount); + assertNullValues((TimeMilliVector) root.getVector(vectors[4]), rowCount); + assertNullValues((DateDayVector) root.getVector(vectors[5]), rowCount); + assertNullValues((TimeStampVector) root.getVector(vectors[6]), rowCount); + assertNullValues((VarBinaryVector) root.getVector(vectors[7]), rowCount); + assertNullValues((VarCharVector) root.getVector(vectors[8]), rowCount); + assertNullValues((VarBinaryVector) root.getVector(vectors[9]), rowCount); + assertNullValues((VarCharVector) root.getVector(vectors[10]), rowCount); + assertNullValues((VarCharVector) root.getVector(vectors[11]), rowCount); + assertNullValues((BitVector) root.getVector(vectors[12]), rowCount); + } + +} diff --git a/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowOptionalColumnsTest.java b/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowOptionalColumnsTest.java new file mode 100644 index 000000000..84960dc88 --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowOptionalColumnsTest.java @@ -0,0 +1,90 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.adapter.jdbc.h2; + +import static junit.framework.TestCase.assertTrue; +import static org.junit.Assert.assertFalse; + +import java.io.IOException; +import java.sql.SQLException; +import java.util.Arrays; +import java.util.Collection; + +import org.apache.arrow.adapter.jdbc.AbstractJdbcToArrowTest; +import org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper; +import org.apache.arrow.adapter.jdbc.Table; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +/** + * JUnit Test Class which contains methods to test JDBC to Arrow data conversion functionality for + * (non-)optional columns, in particular with regard to the ensuing VectorSchemaRoot's schema. + */ +@RunWith(Parameterized.class) +public class JdbcToArrowOptionalColumnsTest extends AbstractJdbcToArrowTest { + private static final String[] testFiles = { + "h2/test1_null_and_notnull.yml" + }; + + /** + * Constructor which populates the table object for each test iteration. + * + * @param table Table object + */ + public JdbcToArrowOptionalColumnsTest(Table table) { + this.table = table; + } + + /** + * Get the test data as a collection of Table objects for each test iteration. + * + * @return Collection of Table objects + * @throws SQLException on error + * @throws ClassNotFoundException on error + * @throws IOException on error + */ + @Parameterized.Parameters + public static Collection<Object[]> getTestData() throws SQLException, ClassNotFoundException, IOException { + return Arrays.asList(prepareTestData(testFiles, JdbcToArrowOptionalColumnsTest.class)); + } + + /** + * Test Method to test JdbcToArrow Functionality for dealing with nullable and non-nullable columns. + */ + @Test + public void testJdbcToArrowValues() throws SQLException, IOException { + testDataSets(sqlToArrow(conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE))); + } + + /** + * This method calls the assert methods for various DataSets. We verify that a SQL `NULL` column becomes + * nullable in the VectorSchemaRoot, and that a SQL `NOT NULL` column becomes non-nullable. + * + * @param root VectorSchemaRoot for test + */ + public void testDataSets(VectorSchemaRoot root) { + JdbcToArrowTestHelper.assertFieldMetadataIsEmpty(root); + + assertTrue(root.getSchema().getFields().get(0).isNullable()); + assertFalse(root.getSchema().getFields().get(1).isNullable()); + } + +} diff --git a/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTest.java b/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTest.java new file mode 100644 index 000000000..ca1c0c00b --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTest.java @@ -0,0 +1,1350 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.adapter.jdbc.h2; + +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertBigIntVectorValues; +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertBitVectorValues; +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertBooleanVectorValues; +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertDateVectorValues; +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertDecimalVectorValues; +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertFloat4VectorValues; +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertFloat8VectorValues; +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertIntVectorValues; +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertNullVectorValues; +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertSmallIntVectorValues; +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertTimeStampVectorValues; +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertTimeVectorValues; +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertTinyIntVectorValues; +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertVarBinaryVectorValues; +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertVarcharVectorValues; +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getBinaryValues; +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getBooleanValues; +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getCharArray; +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getDecimalValues; +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getDoubleValues; +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getFloatValues; +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getIntValues; +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getLongValues; +import static org.junit.Assert.assertEquals; + +import java.io.IOException; +import java.io.InputStream; +import java.io.Reader; +import java.math.BigDecimal; +import java.net.URL; +import java.sql.Array; +import java.sql.Blob; +import java.sql.Clob; +import java.sql.Date; +import java.sql.NClob; +import java.sql.Ref; +import java.sql.ResultSet; +import java.sql.ResultSetMetaData; +import java.sql.RowId; +import java.sql.SQLException; +import java.sql.SQLWarning; +import java.sql.SQLXML; +import java.sql.Statement; +import java.sql.Time; +import java.sql.Timestamp; +import java.sql.Types; +import java.util.Arrays; +import java.util.Calendar; +import java.util.Collection; +import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import org.apache.arrow.adapter.jdbc.AbstractJdbcToArrowTest; +import org.apache.arrow.adapter.jdbc.ArrowVectorIterator; +import org.apache.arrow.adapter.jdbc.JdbcToArrow; +import org.apache.arrow.adapter.jdbc.JdbcToArrowConfig; +import org.apache.arrow.adapter.jdbc.JdbcToArrowConfigBuilder; +import org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper; +import org.apache.arrow.adapter.jdbc.JdbcToArrowUtils; +import org.apache.arrow.adapter.jdbc.Table; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.BigIntVector; +import org.apache.arrow.vector.BitVector; +import org.apache.arrow.vector.DateDayVector; +import org.apache.arrow.vector.DecimalVector; +import org.apache.arrow.vector.Float4Vector; +import org.apache.arrow.vector.Float8Vector; +import org.apache.arrow.vector.IntVector; +import org.apache.arrow.vector.NullVector; +import org.apache.arrow.vector.SmallIntVector; +import org.apache.arrow.vector.TimeMilliVector; +import org.apache.arrow.vector.TimeStampVector; +import org.apache.arrow.vector.TinyIntVector; +import org.apache.arrow.vector.VarBinaryVector; +import org.apache.arrow.vector.VarCharVector; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.types.pojo.Schema; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +/** + * JUnit Test Class which contains methods to test JDBC to Arrow data conversion functionality with various data types + * for H2 database using single test data file. + */ +@RunWith(Parameterized.class) +public class JdbcToArrowTest extends AbstractJdbcToArrowTest { + + private static final String[] testFiles = {"h2/test1_all_datatypes_h2.yml"}; + + /** + * Constructor which populates the table object for each test iteration. + * + * @param table Table object + * @param reuseVectorSchemaRoot A flag indicating if we should reuse vector schema roots. + */ + public JdbcToArrowTest(Table table, boolean reuseVectorSchemaRoot) { + this.table = table; + this.reuseVectorSchemaRoot = reuseVectorSchemaRoot; + } + + /** + * Get the test data as a collection of Table objects for each test iteration. + * + * @return Collection of Table objects + * @throws SQLException on error + * @throws ClassNotFoundException on error + * @throws IOException on error + */ + @Parameterized.Parameters(name = "table = {0}, reuse batch = {1}") + public static Collection<Object[]> getTestData() throws SQLException, ClassNotFoundException, IOException { + return Arrays.stream(prepareTestData(testFiles, JdbcToArrowTest.class)).flatMap(row -> + Stream.of(new Object[] {row[0], true}, new Object[] {row[0], false})).collect(Collectors.toList()); + } + + /** + * Test Method to test JdbcToArrow Functionality for various H2 DB based datatypes with only one test data file. + */ + @Test + public void testJdbcToArrowValues() throws SQLException, IOException { + testDataSets(sqlToArrow(conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE), + Calendar.getInstance())); + testDataSets(sqlToArrow(conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE))); + testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), + new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance())); + testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()))); + testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), + new RootAllocator(Integer.MAX_VALUE))); + testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), + Calendar.getInstance())); + testDataSets(sqlToArrow( + conn.createStatement().executeQuery(table.getQuery()), + new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()).build())); + testDataSets(sqlToArrow( + conn, + table.getQuery(), + new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()).build())); + } + + @Test + public void testJdbcSchemaMetadata() throws SQLException { + JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(new RootAllocator(0), Calendar.getInstance(), true) + .setReuseVectorSchemaRoot(reuseVectorSchemaRoot).build(); + ResultSetMetaData rsmd = conn.createStatement().executeQuery(table.getQuery()).getMetaData(); + Schema schema = JdbcToArrowUtils.jdbcToArrowSchema(rsmd, config); + JdbcToArrowTestHelper.assertFieldMetadataMatchesResultSetMetadata(rsmd, schema); + } + + /** + * This method calls the assert methods for various DataSets. + * + * @param root VectorSchemaRoot for test + */ + public void testDataSets(VectorSchemaRoot root) { + JdbcToArrowTestHelper.assertFieldMetadataIsEmpty(root); + + assertBigIntVectorValues((BigIntVector) root.getVector(BIGINT), table.getRowCount(), + getLongValues(table.getValues(), BIGINT)); + + assertTinyIntVectorValues((TinyIntVector) root.getVector(TINYINT), table.getRowCount(), + getIntValues(table.getValues(), TINYINT)); + + assertSmallIntVectorValues((SmallIntVector) root.getVector(SMALLINT), table.getRowCount(), + getIntValues(table.getValues(), SMALLINT)); + + assertVarBinaryVectorValues((VarBinaryVector) root.getVector(BINARY), table.getRowCount(), + getBinaryValues(table.getValues(), BINARY)); + + assertVarBinaryVectorValues((VarBinaryVector) root.getVector(BLOB), table.getRowCount(), + getBinaryValues(table.getValues(), BLOB)); + + assertVarcharVectorValues((VarCharVector) root.getVector(CLOB), table.getRowCount(), + getCharArray(table.getValues(), CLOB)); + + assertVarcharVectorValues((VarCharVector) root.getVector(VARCHAR), table.getRowCount(), + getCharArray(table.getValues(), VARCHAR)); + + assertVarcharVectorValues((VarCharVector) root.getVector(CHAR), table.getRowCount(), + getCharArray(table.getValues(), CHAR)); + + assertIntVectorValues((IntVector) root.getVector(INT), table.getRowCount(), + getIntValues(table.getValues(), INT)); + + assertBitVectorValues((BitVector) root.getVector(BIT), table.getRowCount(), + getIntValues(table.getValues(), BIT)); + + assertBooleanVectorValues((BitVector) root.getVector(BOOL), table.getRowCount(), + getBooleanValues(table.getValues(), BOOL)); + + assertDateVectorValues((DateDayVector) root.getVector(DATE), table.getRowCount(), + getIntValues(table.getValues(), DATE)); + + assertTimeVectorValues((TimeMilliVector) root.getVector(TIME), table.getRowCount(), + getLongValues(table.getValues(), TIME)); + + assertTimeStampVectorValues((TimeStampVector) root.getVector(TIMESTAMP), table.getRowCount(), + getLongValues(table.getValues(), TIMESTAMP)); + + assertDecimalVectorValues((DecimalVector) root.getVector(DECIMAL), table.getRowCount(), + getDecimalValues(table.getValues(), DECIMAL)); + + assertFloat8VectorValues((Float8Vector) root.getVector(DOUBLE), table.getRowCount(), + getDoubleValues(table.getValues(), DOUBLE)); + + assertFloat4VectorValues((Float4Vector) root.getVector(REAL), table.getRowCount(), + getFloatValues(table.getValues(), REAL)); + + assertNullVectorValues((NullVector) root.getVector(NULL), table.getRowCount()); + } + + @Test + public void runLargeNumberOfRows() throws IOException, SQLException { + BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE); + int x = 0; + final int targetRows = 600000; + ResultSet rs = new FakeResultSet(targetRows); + JdbcToArrowConfig config = new JdbcToArrowConfigBuilder( + allocator, JdbcToArrowUtils.getUtcCalendar(), /* include metadata */ false) + .setReuseVectorSchemaRoot(reuseVectorSchemaRoot).build(); + + try (ArrowVectorIterator iter = JdbcToArrow.sqlToArrowVectorIterator(rs, config)) { + while (iter.hasNext()) { + VectorSchemaRoot root = iter.next(); + x += root.getRowCount(); + if (!reuseVectorSchemaRoot) { + root.close(); + } + } + } finally { + allocator.close(); + } + + assertEquals(x, targetRows); + } + + private class FakeResultSet implements ResultSet { + + public int numRows; + + FakeResultSet(int numRows) { + this.numRows = numRows; + } + + @Override + public boolean next() throws SQLException { + numRows--; + return numRows >= 0; + } + + @Override + public void close() throws SQLException { + + } + + @Override + public boolean wasNull() throws SQLException { + return false; + } + + @Override + public String getString(int columnIndex) throws SQLException { + return "test123test123" + numRows; + } + + @Override + public boolean getBoolean(int columnIndex) throws SQLException { + return false; + } + + @Override + public byte getByte(int columnIndex) throws SQLException { + return 0; + } + + @Override + public short getShort(int columnIndex) throws SQLException { + return 0; + } + + @Override + public int getInt(int columnIndex) throws SQLException { + return 0; + } + + @Override + public long getLong(int columnIndex) throws SQLException { + return 0; + } + + @Override + public float getFloat(int columnIndex) throws SQLException { + return 0; + } + + @Override + public double getDouble(int columnIndex) throws SQLException { + return 0; + } + + @Override + public BigDecimal getBigDecimal(int columnIndex, int scale) throws SQLException { + return new BigDecimal(5); + } + + @Override + public byte[] getBytes(int columnIndex) throws SQLException { + return new byte[0]; + } + + @Override + public Date getDate(int columnIndex) throws SQLException { + return null; + } + + @Override + public Time getTime(int columnIndex) throws SQLException { + return null; + } + + @Override + public Timestamp getTimestamp(int columnIndex) throws SQLException { + return null; + } + + @Override + public InputStream getAsciiStream(int columnIndex) throws SQLException { + return null; + } + + @Override + public InputStream getUnicodeStream(int columnIndex) throws SQLException { + return null; + } + + @Override + public InputStream getBinaryStream(int columnIndex) throws SQLException { + return null; + } + + @Override + public String getString(String columnLabel) throws SQLException { + throw new UnsupportedOperationException("get column by label not supported"); + } + + @Override + public boolean getBoolean(String columnLabel) throws SQLException { + return false; + } + + @Override + public byte getByte(String columnLabel) throws SQLException { + return 0; + } + + @Override + public short getShort(String columnLabel) throws SQLException { + return 0; + } + + @Override + public int getInt(String columnLabel) throws SQLException { + return 0; + } + + @Override + public long getLong(String columnLabel) throws SQLException { + return 0; + } + + @Override + public float getFloat(String columnLabel) throws SQLException { + return 0; + } + + @Override + public double getDouble(String columnLabel) throws SQLException { + return 0; + } + + @Override + public BigDecimal getBigDecimal(String columnLabel, int scale) throws SQLException { + return null; + } + + @Override + public byte[] getBytes(String columnLabel) throws SQLException { + return new byte[0]; + } + + @Override + public Date getDate(String columnLabel) throws SQLException { + return null; + } + + @Override + public Time getTime(String columnLabel) throws SQLException { + return null; + } + + @Override + public Timestamp getTimestamp(String columnLabel) throws SQLException { + return null; + } + + @Override + public InputStream getAsciiStream(String columnLabel) throws SQLException { + return null; + } + + @Override + public InputStream getUnicodeStream(String columnLabel) throws SQLException { + return null; + } + + @Override + public InputStream getBinaryStream(String columnLabel) throws SQLException { + return null; + } + + @Override + public SQLWarning getWarnings() throws SQLException { + return null; + } + + @Override + public void clearWarnings() throws SQLException { + + } + + @Override + public String getCursorName() throws SQLException { + return null; + } + + @Override + public ResultSetMetaData getMetaData() throws SQLException { + return new ResultSetMetaData() { + @Override + public int getColumnCount() throws SQLException { + return 5; + } + + @Override + public boolean isAutoIncrement(int column) throws SQLException { + return false; + } + + @Override + public boolean isCaseSensitive(int column) throws SQLException { + return false; + } + + @Override + public boolean isSearchable(int column) throws SQLException { + return false; + } + + @Override + public boolean isCurrency(int column) throws SQLException { + return false; + } + + @Override + public int isNullable(int column) throws SQLException { + return 0; + } + + @Override + public boolean isSigned(int column) throws SQLException { + return false; + } + + @Override + public int getColumnDisplaySize(int column) throws SQLException { + return 0; + } + + @Override + public String getColumnLabel(int column) throws SQLException { + return getColumnName(column); + } + + @Override + public String getColumnName(int column) throws SQLException { + return "col_" + column; + } + + @Override + public String getSchemaName(int column) throws SQLException { + return null; + } + + @Override + public int getPrecision(int column) throws SQLException { + return 0; + } + + @Override + public int getScale(int column) throws SQLException { + return 0; + } + + @Override + public String getTableName(int column) throws SQLException { + return null; + } + + @Override + public String getCatalogName(int column) throws SQLException { + return null; + } + + @Override + public int getColumnType(int column) throws SQLException { + switch (column) { + case 1: + return Types.VARCHAR; + case 2: + return Types.INTEGER; + case 3: + return Types.BIGINT; + case 4: + return Types.VARCHAR; + case 5: + return Types.VARCHAR; + default: + throw new IllegalArgumentException("not supported"); + } + + } + + @Override + public String getColumnTypeName(int column) throws SQLException { + return null; + } + + @Override + public boolean isReadOnly(int column) throws SQLException { + return false; + } + + @Override + public boolean isWritable(int column) throws SQLException { + return false; + } + + @Override + public boolean isDefinitelyWritable(int column) throws SQLException { + return false; + } + + @Override + public String getColumnClassName(int column) throws SQLException { + return null; + } + + @Override + public <T> T unwrap(Class<T> iface) throws SQLException { + return null; + } + + @Override + public boolean isWrapperFor(Class<?> iface) throws SQLException { + return false; + } + }; + } + + @Override + public Object getObject(int columnIndex) throws SQLException { + return null; + } + + @Override + public Object getObject(String columnLabel) throws SQLException { + return null; + } + + @Override + public int findColumn(String columnLabel) throws SQLException { + return 0; + } + + @Override + public Reader getCharacterStream(int columnIndex) throws SQLException { + return null; + } + + @Override + public Reader getCharacterStream(String columnLabel) throws SQLException { + return null; + } + + @Override + public BigDecimal getBigDecimal(int columnIndex) throws SQLException { + return null; + } + + @Override + public BigDecimal getBigDecimal(String columnLabel) throws SQLException { + return null; + } + + @Override + public boolean isBeforeFirst() throws SQLException { + return false; + } + + @Override + public boolean isAfterLast() throws SQLException { + return numRows < 0; + } + + @Override + public boolean isFirst() throws SQLException { + return false; + } + + @Override + public boolean isLast() throws SQLException { + return false; + } + + @Override + public void beforeFirst() throws SQLException { + + } + + @Override + public void afterLast() throws SQLException { + + } + + @Override + public boolean first() throws SQLException { + return false; + } + + @Override + public boolean last() throws SQLException { + return false; + } + + @Override + public int getRow() throws SQLException { + return 0; + } + + @Override + public boolean absolute(int row) throws SQLException { + return false; + } + + @Override + public boolean relative(int rows) throws SQLException { + return false; + } + + @Override + public boolean previous() throws SQLException { + return false; + } + + @Override + public void setFetchDirection(int direction) throws SQLException { + + } + + @Override + public int getFetchDirection() throws SQLException { + return 0; + } + + @Override + public void setFetchSize(int rows) throws SQLException { + + } + + @Override + public int getFetchSize() throws SQLException { + return 0; + } + + @Override + public int getType() throws SQLException { + return 0; + } + + @Override + public int getConcurrency() throws SQLException { + return 0; + } + + @Override + public boolean rowUpdated() throws SQLException { + return false; + } + + @Override + public boolean rowInserted() throws SQLException { + return false; + } + + @Override + public boolean rowDeleted() throws SQLException { + return false; + } + + @Override + public void updateNull(int columnIndex) throws SQLException { + + } + + @Override + public void updateBoolean(int columnIndex, boolean x) throws SQLException { + + } + + @Override + public void updateByte(int columnIndex, byte x) throws SQLException { + + } + + @Override + public void updateShort(int columnIndex, short x) throws SQLException { + + } + + @Override + public void updateInt(int columnIndex, int x) throws SQLException { + + } + + @Override + public void updateLong(int columnIndex, long x) throws SQLException { + + } + + @Override + public void updateFloat(int columnIndex, float x) throws SQLException { + + } + + @Override + public void updateDouble(int columnIndex, double x) throws SQLException { + + } + + @Override + public void updateBigDecimal(int columnIndex, BigDecimal x) throws SQLException { + + } + + @Override + public void updateString(int columnIndex, String x) throws SQLException { + + } + + @Override + public void updateBytes(int columnIndex, byte[] x) throws SQLException { + + } + + @Override + public void updateDate(int columnIndex, Date x) throws SQLException { + + } + + @Override + public void updateTime(int columnIndex, Time x) throws SQLException { + + } + + @Override + public void updateTimestamp(int columnIndex, Timestamp x) throws SQLException { + + } + + @Override + public void updateAsciiStream(int columnIndex, InputStream x, int length) throws SQLException { + + } + + @Override + public void updateBinaryStream(int columnIndex, InputStream x, int length) throws SQLException { + + } + + @Override + public void updateCharacterStream(int columnIndex, Reader x, int length) throws SQLException { + + } + + @Override + public void updateObject(int columnIndex, Object x, int scaleOrLength) throws SQLException { + + } + + @Override + public void updateObject(int columnIndex, Object x) throws SQLException { + + } + + @Override + public void updateNull(String columnLabel) throws SQLException { + + } + + @Override + public void updateBoolean(String columnLabel, boolean x) throws SQLException { + + } + + @Override + public void updateByte(String columnLabel, byte x) throws SQLException { + + } + + @Override + public void updateShort(String columnLabel, short x) throws SQLException { + + } + + @Override + public void updateInt(String columnLabel, int x) throws SQLException { + + } + + @Override + public void updateLong(String columnLabel, long x) throws SQLException { + + } + + @Override + public void updateFloat(String columnLabel, float x) throws SQLException { + + } + + @Override + public void updateDouble(String columnLabel, double x) throws SQLException { + + } + + @Override + public void updateBigDecimal(String columnLabel, BigDecimal x) throws SQLException { + + } + + @Override + public void updateString(String columnLabel, String x) throws SQLException { + + } + + @Override + public void updateBytes(String columnLabel, byte[] x) throws SQLException { + + } + + @Override + public void updateDate(String columnLabel, Date x) throws SQLException { + + } + + @Override + public void updateTime(String columnLabel, Time x) throws SQLException { + + } + + @Override + public void updateTimestamp(String columnLabel, Timestamp x) throws SQLException { + + } + + @Override + public void updateAsciiStream(String columnLabel, InputStream x, int length) throws SQLException { + + } + + @Override + public void updateBinaryStream(String columnLabel, InputStream x, int length) throws SQLException { + + } + + @Override + public void updateCharacterStream(String columnLabel, Reader reader, int length) throws SQLException { + + } + + @Override + public void updateObject(String columnLabel, Object x, int scaleOrLength) throws SQLException { + + } + + @Override + public void updateObject(String columnLabel, Object x) throws SQLException { + + } + + @Override + public void insertRow() throws SQLException { + + } + + @Override + public void updateRow() throws SQLException { + + } + + @Override + public void deleteRow() throws SQLException { + + } + + @Override + public void refreshRow() throws SQLException { + + } + + @Override + public void cancelRowUpdates() throws SQLException { + + } + + @Override + public void moveToInsertRow() throws SQLException { + + } + + @Override + public void moveToCurrentRow() throws SQLException { + + } + + @Override + public Statement getStatement() throws SQLException { + return null; + } + + @Override + public Object getObject(int columnIndex, Map<String, Class<?>> map) throws SQLException { + return null; + } + + @Override + public Ref getRef(int columnIndex) throws SQLException { + return null; + } + + @Override + public Blob getBlob(int columnIndex) throws SQLException { + return null; + } + + @Override + public Clob getClob(int columnIndex) throws SQLException { + return null; + } + + @Override + public Array getArray(int columnIndex) throws SQLException { + return null; + } + + @Override + public Object getObject(String columnLabel, Map<String, Class<?>> map) throws SQLException { + return null; + } + + @Override + public Ref getRef(String columnLabel) throws SQLException { + return null; + } + + @Override + public Blob getBlob(String columnLabel) throws SQLException { + return null; + } + + @Override + public Clob getClob(String columnLabel) throws SQLException { + return null; + } + + @Override + public Array getArray(String columnLabel) throws SQLException { + return null; + } + + @Override + public Date getDate(int columnIndex, Calendar cal) throws SQLException { + return null; + } + + @Override + public Date getDate(String columnLabel, Calendar cal) throws SQLException { + return null; + } + + @Override + public Time getTime(int columnIndex, Calendar cal) throws SQLException { + return null; + } + + @Override + public Time getTime(String columnLabel, Calendar cal) throws SQLException { + return null; + } + + @Override + public Timestamp getTimestamp(int columnIndex, Calendar cal) throws SQLException { + return null; + } + + @Override + public Timestamp getTimestamp(String columnLabel, Calendar cal) throws SQLException { + return null; + } + + @Override + public URL getURL(int columnIndex) throws SQLException { + return null; + } + + @Override + public URL getURL(String columnLabel) throws SQLException { + return null; + } + + @Override + public void updateRef(int columnIndex, Ref x) throws SQLException { + + } + + @Override + public void updateRef(String columnLabel, Ref x) throws SQLException { + + } + + @Override + public void updateBlob(int columnIndex, Blob x) throws SQLException { + + } + + @Override + public void updateBlob(String columnLabel, Blob x) throws SQLException { + + } + + @Override + public void updateClob(int columnIndex, Clob x) throws SQLException { + + } + + @Override + public void updateClob(String columnLabel, Clob x) throws SQLException { + + } + + @Override + public void updateArray(int columnIndex, Array x) throws SQLException { + + } + + @Override + public void updateArray(String columnLabel, Array x) throws SQLException { + + } + + @Override + public RowId getRowId(int columnIndex) throws SQLException { + return null; + } + + @Override + public RowId getRowId(String columnLabel) throws SQLException { + return null; + } + + @Override + public void updateRowId(int columnIndex, RowId x) throws SQLException { + + } + + @Override + public void updateRowId(String columnLabel, RowId x) throws SQLException { + + } + + @Override + public int getHoldability() throws SQLException { + return 0; + } + + @Override + public boolean isClosed() throws SQLException { + return false; + } + + @Override + public void updateNString(int columnIndex, String nString) throws SQLException { + + } + + @Override + public void updateNString(String columnLabel, String nString) throws SQLException { + + } + + @Override + public void updateNClob(int columnIndex, NClob nClob) throws SQLException { + + } + + @Override + public void updateNClob(String columnLabel, NClob nClob) throws SQLException { + + } + + @Override + public NClob getNClob(int columnIndex) throws SQLException { + return null; + } + + @Override + public NClob getNClob(String columnLabel) throws SQLException { + return null; + } + + @Override + public SQLXML getSQLXML(int columnIndex) throws SQLException { + return null; + } + + @Override + public SQLXML getSQLXML(String columnLabel) throws SQLException { + return null; + } + + @Override + public void updateSQLXML(int columnIndex, SQLXML xmlObject) throws SQLException { + + } + + @Override + public void updateSQLXML(String columnLabel, SQLXML xmlObject) throws SQLException { + + } + + @Override + public String getNString(int columnIndex) throws SQLException { + return null; + } + + @Override + public String getNString(String columnLabel) throws SQLException { + return null; + } + + @Override + public Reader getNCharacterStream(int columnIndex) throws SQLException { + return null; + } + + @Override + public Reader getNCharacterStream(String columnLabel) throws SQLException { + return null; + } + + @Override + public void updateNCharacterStream(int columnIndex, Reader x, long length) throws SQLException { + + } + + @Override + public void updateNCharacterStream(String columnLabel, Reader reader, long length) throws SQLException { + + } + + @Override + public void updateAsciiStream(int columnIndex, InputStream x, long length) throws SQLException { + + } + + @Override + public void updateBinaryStream(int columnIndex, InputStream x, long length) throws SQLException { + + } + + @Override + public void updateCharacterStream(int columnIndex, Reader x, long length) throws SQLException { + + } + + @Override + public void updateAsciiStream(String columnLabel, InputStream x, long length) throws SQLException { + + } + + @Override + public void updateBinaryStream(String columnLabel, InputStream x, long length) throws SQLException { + + } + + @Override + public void updateCharacterStream(String columnLabel, Reader reader, long length) throws SQLException { + + } + + @Override + public void updateBlob(int columnIndex, InputStream inputStream, long length) throws SQLException { + + } + + @Override + public void updateBlob(String columnLabel, InputStream inputStream, long length) throws SQLException { + + } + + @Override + public void updateClob(int columnIndex, Reader reader, long length) throws SQLException { + + } + + @Override + public void updateClob(String columnLabel, Reader reader, long length) throws SQLException { + + } + + @Override + public void updateNClob(int columnIndex, Reader reader, long length) throws SQLException { + + } + + @Override + public void updateNClob(String columnLabel, Reader reader, long length) throws SQLException { + + } + + @Override + public void updateNCharacterStream(int columnIndex, Reader x) throws SQLException { + + } + + @Override + public void updateNCharacterStream(String columnLabel, Reader reader) throws SQLException { + + } + + @Override + public void updateAsciiStream(int columnIndex, InputStream x) throws SQLException { + + } + + @Override + public void updateBinaryStream(int columnIndex, InputStream x) throws SQLException { + + } + + @Override + public void updateCharacterStream(int columnIndex, Reader x) throws SQLException { + + } + + @Override + public void updateAsciiStream(String columnLabel, InputStream x) throws SQLException { + + } + + @Override + public void updateBinaryStream(String columnLabel, InputStream x) throws SQLException { + + } + + @Override + public void updateCharacterStream(String columnLabel, Reader reader) throws SQLException { + + } + + @Override + public void updateBlob(int columnIndex, InputStream inputStream) throws SQLException { + + } + + @Override + public void updateBlob(String columnLabel, InputStream inputStream) throws SQLException { + + } + + @Override + public void updateClob(int columnIndex, Reader reader) throws SQLException { + + } + + @Override + public void updateClob(String columnLabel, Reader reader) throws SQLException { + + } + + @Override + public void updateNClob(int columnIndex, Reader reader) throws SQLException { + + } + + @Override + public void updateNClob(String columnLabel, Reader reader) throws SQLException { + + } + + @Override + public <T> T getObject(int columnIndex, Class<T> type) throws SQLException { + return null; + } + + @Override + public <T> T getObject(String columnLabel, Class<T> type) throws SQLException { + return null; + } + + @Override + public <T> T unwrap(Class<T> iface) throws SQLException { + return null; + } + + @Override + public boolean isWrapperFor(Class<?> iface) throws SQLException { + return false; + } + } +} diff --git a/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTimeZoneTest.java b/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTimeZoneTest.java new file mode 100644 index 000000000..f5ddbdb9b --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTimeZoneTest.java @@ -0,0 +1,168 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.adapter.jdbc.h2; + +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertDateVectorValues; +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertTimeStampVectorValues; +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertTimeVectorValues; + +import java.io.IOException; +import java.sql.ResultSetMetaData; +import java.sql.SQLException; +import java.util.Arrays; +import java.util.Calendar; +import java.util.Collection; +import java.util.TimeZone; + +import org.apache.arrow.adapter.jdbc.AbstractJdbcToArrowTest; +import org.apache.arrow.adapter.jdbc.JdbcToArrowConfig; +import org.apache.arrow.adapter.jdbc.JdbcToArrowConfigBuilder; +import org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper; +import org.apache.arrow.adapter.jdbc.JdbcToArrowUtils; +import org.apache.arrow.adapter.jdbc.Table; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.DateDayVector; +import org.apache.arrow.vector.TimeMilliVector; +import org.apache.arrow.vector.TimeStampVector; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.types.pojo.Schema; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; + +/** + * JUnit Test Class which contains methods to test JDBC to Arrow data conversion functionality with TimeZone based Date, + * Time and Timestamp datatypes for H2 database. + */ + +@RunWith(Parameterized.class) +public class JdbcToArrowTimeZoneTest extends AbstractJdbcToArrowTest { + + private static final String EST_DATE = "est_date"; + private static final String EST_TIME = "est_time"; + private static final String EST_TIMESTAMP = "est_timestamp"; + private static final String GMT_DATE = "gmt_date"; + private static final String GMT_TIME = "gmt_time"; + private static final String GMT_TIMESTAMP = "gmt_timestamp"; + private static final String PST_DATE = "pst_date"; + private static final String PST_TIME = "pst_time"; + private static final String PST_TIMESTAMP = "pst_timestamp"; + + private static final String[] testFiles = { + "h2/test1_est_date_h2.yml", + "h2/test1_est_time_h2.yml", + "h2/test1_est_timestamp_h2.yml", + "h2/test1_gmt_date_h2.yml", + "h2/test1_gmt_time_h2.yml", + "h2/test1_gmt_timestamp_h2.yml", + "h2/test1_pst_date_h2.yml", + "h2/test1_pst_time_h2.yml", + "h2/test1_pst_timestamp_h2.yml" + }; + + /** + * Constructor which populates the table object for each test iteration. + * + * @param table Table object + */ + public JdbcToArrowTimeZoneTest(Table table) { + this.table = table; + } + + /** + * Get the test data as a collection of Table objects for each test iteration. + * + * @return Collection of Table objects + * @throws SQLException on error + * @throws ClassNotFoundException on error + * @throws IOException on error + */ + @Parameters + public static Collection<Object[]> getTestData() throws SQLException, ClassNotFoundException, IOException { + return Arrays.asList(prepareTestData(testFiles, JdbcToArrowTimeZoneTest.class)); + } + + /** + * Test Method to test JdbcToArrow Functionality for various H2 DB based datatypes with TimeZone based Date, + * Time and Timestamp datatype. + */ + @Test + public void testJdbcToArrowValues() throws SQLException, IOException { + testDataSets(sqlToArrow(conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE), + Calendar.getInstance(TimeZone.getTimeZone(table.getTimezone())))); + testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), + new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance(TimeZone.getTimeZone(table.getTimezone())))); + testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), + Calendar.getInstance(TimeZone.getTimeZone(table.getTimezone())))); + testDataSets(sqlToArrow( + conn.createStatement().executeQuery(table.getQuery()), + new JdbcToArrowConfigBuilder( + new RootAllocator(Integer.MAX_VALUE), + Calendar.getInstance(TimeZone.getTimeZone(table.getTimezone()))).build())); + testDataSets(sqlToArrow( + conn, + table.getQuery(), + new JdbcToArrowConfigBuilder( + new RootAllocator(Integer.MAX_VALUE), + Calendar.getInstance(TimeZone.getTimeZone(table.getTimezone()))).build())); + } + + @Test + public void testJdbcSchemaMetadata() throws SQLException { + Calendar calendar = Calendar.getInstance(TimeZone.getTimeZone(table.getTimezone())); + JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(new RootAllocator(0), calendar, true).build(); + ResultSetMetaData rsmd = conn.createStatement().executeQuery(table.getQuery()).getMetaData(); + Schema schema = JdbcToArrowUtils.jdbcToArrowSchema(rsmd, config); + JdbcToArrowTestHelper.assertFieldMetadataMatchesResultSetMetadata(rsmd, schema); + } + + /** + * This method calls the assert methods for various DataSets. + * + * @param root VectorSchemaRoot for test + */ + public void testDataSets(VectorSchemaRoot root) { + JdbcToArrowTestHelper.assertFieldMetadataIsEmpty(root); + + switch (table.getType()) { + case EST_DATE: + case GMT_DATE: + case PST_DATE: + assertDateVectorValues((DateDayVector) root.getVector(table.getVector()), table.getValues().length, + table.getIntValues()); + break; + case EST_TIME: + case GMT_TIME: + case PST_TIME: + assertTimeVectorValues((TimeMilliVector) root.getVector(table.getVector()), table.getValues().length, + table.getLongValues()); + break; + case EST_TIMESTAMP: + case GMT_TIMESTAMP: + case PST_TIMESTAMP: + assertTimeStampVectorValues((TimeStampVector) root.getVector(table.getVector()), table.getValues().length, + table.getLongValues()); + break; + default: + // do nothing + break; + } + } + +} diff --git a/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowVectorIteratorTest.java b/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowVectorIteratorTest.java new file mode 100644 index 000000000..6206ddcf7 --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowVectorIteratorTest.java @@ -0,0 +1,467 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.adapter.jdbc.h2; + +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getBinaryValues; +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getBooleanValues; +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getCharArray; +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getDecimalValues; +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getDoubleValues; +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getFloatValues; +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getIntValues; +import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getLongValues; +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.io.IOException; +import java.math.BigDecimal; +import java.sql.SQLException; +import java.sql.Types; +import java.util.ArrayList; +import java.util.Calendar; +import java.util.List; + +import org.apache.arrow.adapter.jdbc.ArrowVectorIterator; +import org.apache.arrow.adapter.jdbc.JdbcToArrow; +import org.apache.arrow.adapter.jdbc.JdbcToArrowConfig; +import org.apache.arrow.adapter.jdbc.JdbcToArrowConfigBuilder; +import org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper; +import org.apache.arrow.adapter.jdbc.Table; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.BigIntVector; +import org.apache.arrow.vector.BitVector; +import org.apache.arrow.vector.DateDayVector; +import org.apache.arrow.vector.DecimalVector; +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.Float4Vector; +import org.apache.arrow.vector.Float8Vector; +import org.apache.arrow.vector.IntVector; +import org.apache.arrow.vector.SmallIntVector; +import org.apache.arrow.vector.TimeMilliVector; +import org.apache.arrow.vector.TimeStampMilliTZVector; +import org.apache.arrow.vector.TimeStampMilliVector; +import org.apache.arrow.vector.TimeStampVector; +import org.apache.arrow.vector.TinyIntVector; +import org.apache.arrow.vector.ValueVector; +import org.apache.arrow.vector.VarBinaryVector; +import org.apache.arrow.vector.VarCharVector; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.types.FloatingPointPrecision; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class JdbcToArrowVectorIteratorTest extends JdbcToArrowTest { + + /** + * Constructor which populates the table object for each test iteration. + * + * @param table Table object + * @param reuseVectorSchemaRoot A flag indicating if we should reuse vector schema roots. + */ + public JdbcToArrowVectorIteratorTest(Table table, boolean reuseVectorSchemaRoot) { + super(table, reuseVectorSchemaRoot); + } + + @Test + @Override + public void testJdbcToArrowValues() throws SQLException, IOException { + + JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), + Calendar.getInstance()).setTargetBatchSize(3).build(); + + ArrowVectorIterator iterator = + JdbcToArrow.sqlToArrowVectorIterator(conn.createStatement().executeQuery(table.getQuery()), config); + + validate(iterator); + } + + @Test + public void testVectorSchemaRootReuse() throws SQLException, IOException { + + JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), + Calendar.getInstance()).setTargetBatchSize(3).setReuseVectorSchemaRoot(reuseVectorSchemaRoot).build(); + + ArrowVectorIterator iterator = + JdbcToArrow.sqlToArrowVectorIterator(conn.createStatement().executeQuery(table.getQuery()), config); + + int batchCount = 0; + VectorSchemaRoot prev = null; + while (iterator.hasNext()) { + VectorSchemaRoot cur = iterator.next(); + assertNotNull(cur); + + if (prev != null) { + // skip the first iteration + + if (reuseVectorSchemaRoot) { + // when reuse is enabled, different iterations are based on the same vector schema root. + assertTrue(prev == cur); + } else { + // when reuse is enabled, a new vector schema root is created in each iteration. + assertFalse(prev == cur); + cur.close(); + } + } + + prev = cur; + batchCount += 1; + } + + // make sure we have at least two batches, so the above test paths are actually covered + assertTrue(batchCount > 1); + } + + @Test + public void testJdbcToArrowValuesNoLimit() throws SQLException, IOException { + + JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), + Calendar.getInstance()).setTargetBatchSize(JdbcToArrowConfig.NO_LIMIT_BATCH_SIZE).build(); + + ArrowVectorIterator iterator = + JdbcToArrow.sqlToArrowVectorIterator(conn.createStatement().executeQuery(table.getQuery()), config); + + validate(iterator); + } + + @Test + public void testTimeStampConsumer() throws SQLException, IOException { + final String sql = "select timestamp_field11 from table1"; + + // first experiment, with calendar and time zone. + JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), + Calendar.getInstance()).setTargetBatchSize(3).setReuseVectorSchemaRoot(reuseVectorSchemaRoot).build(); + assertNotNull(config.getCalendar()); + + try (ArrowVectorIterator iterator = + JdbcToArrow.sqlToArrowVectorIterator(conn.createStatement().executeQuery(sql), config)) { + VectorSchemaRoot root = iterator.next(); + assertEquals(1, root.getFieldVectors().size()); + + // vector with time zone info. + assertTrue(root.getVector(0) instanceof TimeStampMilliTZVector); + } + + // second experiment, without calendar and time zone. + config = new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), + null).setTargetBatchSize(3).setReuseVectorSchemaRoot(reuseVectorSchemaRoot).build(); + assertNull(config.getCalendar()); + + try (ArrowVectorIterator iterator = + JdbcToArrow.sqlToArrowVectorIterator(conn.createStatement().executeQuery(sql), config)) { + VectorSchemaRoot root = iterator.next(); + assertEquals(1, root.getFieldVectors().size()); + + // vector without time zone info. + assertTrue(root.getVector(0) instanceof TimeStampMilliVector); + } + } + + private void validate(ArrowVectorIterator iterator) throws SQLException, IOException { + + List<BigIntVector> bigIntVectors = new ArrayList<>(); + List<TinyIntVector> tinyIntVectors = new ArrayList<>(); + List<IntVector> intVectors = new ArrayList<>(); + List<SmallIntVector> smallIntVectors = new ArrayList<>(); + List<VarBinaryVector> vectorsForBinary = new ArrayList<>(); + List<VarBinaryVector> vectorsForBlob = new ArrayList<>(); + List<VarCharVector> vectorsForClob = new ArrayList<>(); + List<VarCharVector> vectorsForVarChar = new ArrayList<>(); + List<VarCharVector> vectorsForChar = new ArrayList<>(); + List<BitVector> vectorsForBit = new ArrayList<>(); + List<BitVector> vectorsForBool = new ArrayList<>(); + List<DateDayVector> dateDayVectors = new ArrayList<>(); + List<TimeMilliVector> timeMilliVectors = new ArrayList<>(); + List<TimeStampVector> timeStampVectors = new ArrayList<>(); + List<DecimalVector> decimalVectors = new ArrayList<>(); + List<Float4Vector> float4Vectors = new ArrayList<>(); + List<Float8Vector> float8Vectors = new ArrayList<>(); + + List<VectorSchemaRoot> roots = new ArrayList<>(); + while (iterator.hasNext()) { + VectorSchemaRoot root = iterator.next(); + roots.add(root); + + JdbcToArrowTestHelper.assertFieldMetadataIsEmpty(root); + + bigIntVectors.add((BigIntVector) root.getVector(BIGINT)); + tinyIntVectors.add((TinyIntVector) root.getVector(TINYINT)); + intVectors.add((IntVector) root.getVector(INT)); + smallIntVectors.add((SmallIntVector) root.getVector(SMALLINT)); + vectorsForBinary.add((VarBinaryVector) root.getVector(BINARY)); + vectorsForBlob.add((VarBinaryVector) root.getVector(BLOB)); + vectorsForClob.add((VarCharVector) root.getVector(CLOB)); + vectorsForVarChar.add((VarCharVector) root.getVector(VARCHAR)); + vectorsForChar.add((VarCharVector) root.getVector(CHAR)); + vectorsForBit.add((BitVector) root.getVector(BIT)); + vectorsForBool.add((BitVector) root.getVector(BOOL)); + dateDayVectors.add((DateDayVector) root.getVector(DATE)); + timeMilliVectors.add((TimeMilliVector) root.getVector(TIME)); + timeStampVectors.add((TimeStampVector) root.getVector(TIMESTAMP)); + decimalVectors.add((DecimalVector) root.getVector(DECIMAL)); + float4Vectors.add((Float4Vector) root.getVector(REAL)); + float8Vectors.add((Float8Vector) root.getVector(DOUBLE)); + + } + assertBigIntVectorValues(bigIntVectors, table.getRowCount(), getLongValues(table.getValues(), BIGINT)); + assertTinyIntVectorValues(tinyIntVectors, table.getRowCount(), getIntValues(table.getValues(), TINYINT)); + assertIntVectorValues(intVectors, table.getRowCount(), getIntValues(table.getValues(), INT)); + assertSmallIntVectorValues(smallIntVectors, table.getRowCount(), getIntValues(table.getValues(), SMALLINT)); + assertBinaryVectorValues(vectorsForBinary, table.getRowCount(), getBinaryValues(table.getValues(), BINARY)); + assertBinaryVectorValues(vectorsForBlob, table.getRowCount(), getBinaryValues(table.getValues(), BLOB)); + assertVarCharVectorValues(vectorsForClob, table.getRowCount(), getCharArray(table.getValues(), CLOB)); + assertVarCharVectorValues(vectorsForVarChar, table.getRowCount(), getCharArray(table.getValues(), VARCHAR)); + assertVarCharVectorValues(vectorsForChar, table.getRowCount(), getCharArray(table.getValues(), CHAR)); + assertBitVectorValues(vectorsForBit, table.getRowCount(), getIntValues(table.getValues(), BIT)); + assertBooleanVectorValues(vectorsForBool, table.getRowCount(), getBooleanValues(table.getValues(), BOOL)); + assertDateDayVectorValues(dateDayVectors, table.getRowCount(), getLongValues(table.getValues(), DATE)); + assertTimeMilliVectorValues(timeMilliVectors, table.getRowCount(), getLongValues(table.getValues(), TIME)); + assertTimeStampVectorValues(timeStampVectors, table.getRowCount(), getLongValues(table.getValues(), TIMESTAMP)); + assertDecimalVectorValues(decimalVectors, table.getRowCount(), getDecimalValues(table.getValues(), DECIMAL)); + assertFloat4VectorValues(float4Vectors, table.getRowCount(), getFloatValues(table.getValues(), REAL)); + assertFloat8VectorValues(float8Vectors, table.getRowCount(), getDoubleValues(table.getValues(), DOUBLE)); + + roots.forEach(root -> root.close()); + } + + private void assertFloat8VectorValues(List<Float8Vector> vectors, int rowCount, Double[] values) { + int valueCount = vectors.stream().mapToInt(ValueVector::getValueCount).sum(); + assertEquals(rowCount, valueCount); + + int index = 0; + for (Float8Vector vector : vectors) { + for (int i = 0; i < vector.getValueCount(); i++) { + assertEquals(values[index++].doubleValue(), vector.get(i), 0.01); + } + } + } + + private void assertFloat4VectorValues(List<Float4Vector> vectors, int rowCount, Float[] values) { + int valueCount = vectors.stream().mapToInt(ValueVector::getValueCount).sum(); + assertEquals(rowCount, valueCount); + + int index = 0; + for (Float4Vector vector : vectors) { + for (int i = 0; i < vector.getValueCount(); i++) { + assertEquals(values[index++].floatValue(), vector.get(i), 0.01); + } + } + } + + private void assertDecimalVectorValues(List<DecimalVector> vectors, int rowCount, BigDecimal[] values) { + int valueCount = vectors.stream().mapToInt(ValueVector::getValueCount).sum(); + assertEquals(rowCount, valueCount); + + int index = 0; + for (DecimalVector vector : vectors) { + for (int i = 0; i < vector.getValueCount(); i++) { + assertNotNull(vector.getObject(i)); + assertEquals(values[index++].doubleValue(), vector.getObject(i).doubleValue(), 0); + } + } + } + + private void assertTimeStampVectorValues(List<TimeStampVector> vectors, int rowCount, Long[] values) { + int valueCount = vectors.stream().mapToInt(ValueVector::getValueCount).sum(); + assertEquals(rowCount, valueCount); + + int index = 0; + for (TimeStampVector vector : vectors) { + for (int i = 0; i < vector.getValueCount(); i++) { + assertEquals(values[index++].longValue(), vector.get(i)); + } + } + } + + private void assertTimeMilliVectorValues(List<TimeMilliVector> vectors, int rowCount, Long[] values) { + int valueCount = vectors.stream().mapToInt(ValueVector::getValueCount).sum(); + assertEquals(rowCount, valueCount); + + int index = 0; + for (TimeMilliVector vector : vectors) { + for (int i = 0; i < vector.getValueCount(); i++) { + assertEquals(values[index++].longValue(), vector.get(i)); + } + } + } + + private void assertDateDayVectorValues(List<DateDayVector> vectors, int rowCount, Long[] values) { + int valueCount = vectors.stream().mapToInt(ValueVector::getValueCount).sum(); + assertEquals(rowCount, valueCount); + + int index = 0; + for (DateDayVector vector : vectors) { + for (int i = 0; i < vector.getValueCount(); i++) { + assertEquals(values[index++].longValue(), vector.get(i)); + } + } + } + + private void assertBitVectorValues(List<BitVector> vectors, int rowCount, Integer[] values) { + int valueCount = vectors.stream().mapToInt(ValueVector::getValueCount).sum(); + assertEquals(rowCount, valueCount); + + int index = 0; + for (BitVector vector : vectors) { + for (int i = 0; i < vector.getValueCount(); i++) { + assertEquals(values[index++].intValue(), vector.get(i)); + } + } + } + + private void assertBooleanVectorValues(List<BitVector> vectors, int rowCount, Boolean[] values) { + int valueCount = vectors.stream().mapToInt(ValueVector::getValueCount).sum(); + assertEquals(rowCount, valueCount); + + int index = 0; + for (BitVector vector : vectors) { + for (int i = 0; i < vector.getValueCount(); i++) { + assertEquals(values[index++], vector.get(i) == 1); + } + } + } + + private void assertVarCharVectorValues(List<VarCharVector> vectors, int rowCount, byte[][] values) { + int valueCount = vectors.stream().mapToInt(ValueVector::getValueCount).sum(); + assertEquals(rowCount, valueCount); + + int index = 0; + for (VarCharVector vector : vectors) { + for (int i = 0; i < vector.getValueCount(); i++) { + assertArrayEquals(values[index++], vector.get(i)); + } + } + } + + private void assertBinaryVectorValues(List<VarBinaryVector> vectors, int rowCount, byte[][] values) { + int valueCount = vectors.stream().mapToInt(ValueVector::getValueCount).sum(); + assertEquals(rowCount, valueCount); + + int index = 0; + for (VarBinaryVector vector : vectors) { + for (int i = 0; i < vector.getValueCount(); i++) { + assertArrayEquals(values[index++], vector.get(i)); + } + } + } + + private void assertSmallIntVectorValues(List<SmallIntVector> vectors, int rowCount, Integer[] values) { + int valueCount = vectors.stream().mapToInt(ValueVector::getValueCount).sum(); + assertEquals(rowCount, valueCount); + + int index = 0; + for (SmallIntVector vector : vectors) { + for (int i = 0; i < vector.getValueCount(); i++) { + assertEquals(values[index++].intValue(), vector.get(i)); + } + } + } + + private void assertTinyIntVectorValues(List<TinyIntVector> vectors, int rowCount, Integer[] values) { + int valueCount = vectors.stream().mapToInt(ValueVector::getValueCount).sum(); + assertEquals(rowCount, valueCount); + + int index = 0; + for (TinyIntVector vector : vectors) { + for (int i = 0; i < vector.getValueCount(); i++) { + assertEquals(values[index++].intValue(), vector.get(i)); + } + } + } + + private void assertBigIntVectorValues(List<BigIntVector> vectors, int rowCount, Long[] values) { + int valueCount = vectors.stream().mapToInt(ValueVector::getValueCount).sum(); + assertEquals(rowCount, valueCount); + + int index = 0; + for (BigIntVector vector : vectors) { + for (int i = 0; i < vector.getValueCount(); i++) { + assertEquals(values[index++].longValue(), vector.get(i)); + } + } + } + + private void assertIntVectorValues(List<IntVector> vectors, int rowCount, Integer[] values) { + int valueCount = vectors.stream().mapToInt(ValueVector::getValueCount).sum(); + assertEquals(rowCount, valueCount); + + int index = 0; + for (IntVector vector : vectors) { + for (int i = 0; i < vector.getValueCount(); i++) { + assertEquals(values[index++].intValue(), vector.get(i)); + } + } + } + + /** + * Runs a simple query, and encapsulates the result into a field vector. + */ + private FieldVector getQueryResult(JdbcToArrowConfig config) throws SQLException, IOException { + ArrowVectorIterator iterator = JdbcToArrow.sqlToArrowVectorIterator( + conn.createStatement().executeQuery("select real_field8 from table1"), config); + + VectorSchemaRoot root = iterator.next(); + + // only one vector, since there is one column in the select statement. + assertEquals(1, root.getFieldVectors().size()); + FieldVector result = root.getVector(0); + + // make sure some data is actually read + assertTrue(result.getValueCount() > 0); + + return result; + } + + @Test + public void testJdbcToArrowCustomTypeConversion() throws SQLException, IOException { + JdbcToArrowConfigBuilder builder = new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), + Calendar.getInstance()).setTargetBatchSize(JdbcToArrowConfig.NO_LIMIT_BATCH_SIZE) + .setReuseVectorSchemaRoot(reuseVectorSchemaRoot); + + // first experiment, using default type converter + JdbcToArrowConfig config = builder.build(); + + try (FieldVector vector = getQueryResult(config)) { + // the default converter translates real to float4 + assertTrue(vector instanceof Float4Vector); + } + + // second experiment, using customized type converter + builder.setJdbcToArrowTypeConverter(fieldInfo -> { + switch (fieldInfo.getJdbcType()) { + case Types.REAL: + // this is different from the default type converter + return new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE); + default: + return null; + } + }); + config = builder.build(); + + try (FieldVector vector = getQueryResult(config)) { + // the customized converter translates real to float8 + assertTrue(vector instanceof Float8Vector); + } + } +} diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_all_datatypes_h2.yml b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_all_datatypes_h2.yml new file mode 100644 index 000000000..9baae643a --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_all_datatypes_h2.yml @@ -0,0 +1,121 @@ +#Licensed to the Apache Software Foundation (ASF) under one or more contributor +#license agreements. See the NOTICE file distributed with this work for additional +#information regarding copyright ownership. The ASF licenses this file to +#You under the Apache License, Version 2.0 (the "License"); you may not use +#this file except in compliance with the License. You may obtain a copy of +#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required +#by applicable law or agreed to in writing, software distributed under the +#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS +#OF ANY KIND, either express or implied. See the License for the specific +#language governing permissions and limitations under the License. + +name: 'table1' + +create: 'CREATE TABLE table1 (int_field1 INT, bool_field2 BOOLEAN, tinyint_field3 TINYINT, smallint_field4 SMALLINT, bigint_field5 BIGINT, + decimal_field6 DECIMAL(20,2), double_field7 DOUBLE, real_field8 REAL, time_field9 TIME, date_field10 DATE, timestamp_field11 TIMESTAMP, + binary_field12 BINARY(100), varchar_field13 VARCHAR(256), blob_field14 BLOB, clob_field15 CLOB, char_field16 CHAR(16), bit_field17 BIT, + null_field18 NULL);' + +data: + - 'INSERT INTO table1 VALUES (101, 1, 45, 12000, 92233720, 17345667789.23, 56478356785.345, 56478356785.345, PARSEDATETIME(''12:45:35 GMT'', ''HH:mm:ss z''), + PARSEDATETIME(''2018-02-12 GMT'', ''yyyy-MM-dd z''), PARSEDATETIME(''2018-02-12 12:45:35 GMT'', ''yyyy-MM-dd HH:mm:ss z''), + ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to varchar'', + ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to clob'', ''some char text'', 1, null);' + + - 'INSERT INTO table1 VALUES (101, 1, 45, 12000, 92233720, 17345667789.23, 56478356785.345, 56478356785.345, PARSEDATETIME(''12:45:35 GMT'', ''HH:mm:ss z''), + PARSEDATETIME(''2018-02-12 GMT'', ''yyyy-MM-dd z''), PARSEDATETIME(''2018-02-12 12:45:35 GMT'', ''yyyy-MM-dd HH:mm:ss z''), + ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to varchar'', + ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to clob'', ''some char text'', 1, null);' + + - 'INSERT INTO table1 VALUES (101, 1, 45, 12000, 92233720, 17345667789.23, 56478356785.345, 56478356785.345, PARSEDATETIME(''12:45:35 GMT'', ''HH:mm:ss z''), + PARSEDATETIME(''2018-02-12 GMT'', ''yyyy-MM-dd z''), PARSEDATETIME(''2018-02-12 12:45:35 GMT'', ''yyyy-MM-dd HH:mm:ss z''), + ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to varchar'', + ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to clob'', ''some char text'', 1, null);' + + - 'INSERT INTO table1 VALUES (101, 1, 45, 12000, 92233720, 17345667789.23, 56478356785.345, 56478356785.345, PARSEDATETIME(''12:45:35 GMT'', ''HH:mm:ss z''), + PARSEDATETIME(''2018-02-12 GMT'', ''yyyy-MM-dd z''), PARSEDATETIME(''2018-02-12 12:45:35 GMT'', ''yyyy-MM-dd HH:mm:ss z''), + ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to varchar'', + ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to clob'', ''some char text'', 1, null);' + + - 'INSERT INTO table1 VALUES (101, 1, 45, 12000, 92233720, 17345667789.23, 56478356785.345, 56478356785.345, PARSEDATETIME(''12:45:35 GMT'', ''HH:mm:ss z''), + PARSEDATETIME(''2018-02-12 GMT'', ''yyyy-MM-dd z''), PARSEDATETIME(''2018-02-12 12:45:35 GMT'', ''yyyy-MM-dd HH:mm:ss z''), + ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to varchar'', + ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to clob'', ''some char text'', 1, null);' + + - 'INSERT INTO table1 VALUES (101, 1, 45, 12000, 92233720, 17345667789.23, 56478356785.345, 56478356785.345, PARSEDATETIME(''12:45:35 GMT'', ''HH:mm:ss z''), + PARSEDATETIME(''2018-02-12 GMT'', ''yyyy-MM-dd z''), PARSEDATETIME(''2018-02-12 12:45:35 GMT'', ''yyyy-MM-dd HH:mm:ss z''), + ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to varchar'', + ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to clob'', ''some char text'', 1, null);' + + - 'INSERT INTO table1 VALUES (101, 1, 45, 12000, 92233720, 17345667789.23, 56478356785.345, 56478356785.345, PARSEDATETIME(''12:45:35 GMT'', ''HH:mm:ss z''), + PARSEDATETIME(''2018-02-12 GMT'', ''yyyy-MM-dd z''), PARSEDATETIME(''2018-02-12 12:45:35 GMT'', ''yyyy-MM-dd HH:mm:ss z''), + ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to varchar'', + ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to clob'', ''some char text'', 1, null);' + + - 'INSERT INTO table1 VALUES (101, 1, 45, 12000, 92233720, 17345667789.23, 56478356785.345, 56478356785.345, PARSEDATETIME(''12:45:35 GMT'', ''HH:mm:ss z''), + PARSEDATETIME(''2018-02-12 GMT'', ''yyyy-MM-dd z''), PARSEDATETIME(''2018-02-12 12:45:35 GMT'', ''yyyy-MM-dd HH:mm:ss z''), + ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to varchar'', + ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to clob'', ''some char text'', 1, null);' + + - 'INSERT INTO table1 VALUES (101, 1, 45, 12000, 92233720, 17345667789.23, 56478356785.345, 56478356785.345, PARSEDATETIME(''12:45:35 GMT'', ''HH:mm:ss z''), + PARSEDATETIME(''2018-02-12 GMT'', ''yyyy-MM-dd z''), PARSEDATETIME(''2018-02-12 12:45:35 GMT'', ''yyyy-MM-dd HH:mm:ss z''), + ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to varchar'', + ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to clob'', ''some char text'', 1, null);' + + - 'INSERT INTO table1 VALUES (101, 1, 45, 12000, 92233720, 17345667789.23, 56478356785.345, 56478356785.345, PARSEDATETIME(''12:45:35 GMT'', ''HH:mm:ss z''), + PARSEDATETIME(''2018-02-12 GMT'', ''yyyy-MM-dd z''), PARSEDATETIME(''2018-02-12 12:45:35 GMT'', ''yyyy-MM-dd HH:mm:ss z''), + ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to varchar'', + ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to clob'', ''some char text'', 1, null);' + +query: 'select int_field1, bool_field2, tinyint_field3, smallint_field4, bigint_field5, decimal_field6, double_field7, real_field8, + time_field9, date_field10, timestamp_field11, binary_field12, varchar_field13, blob_field14, clob_field15, char_field16, bit_field17, null_field18 from table1' + +drop: 'DROP table table1;' + +rowCount: '10' + +values: + - 'INT_FIELD1=101,101,101,101,101,101,101,101,101,101' + - 'BOOL_FIELD2=1,1,1,1,1,1,1,1,1,1' + - 'BIT_FIELD17=1,1,1,1,1,1,1,1,1,1' + - 'TINYINT_FIELD3=45,45,45,45,45,45,45,45,45,45' + - 'SMALLINT_FIELD4=12000,12000,12000,12000,12000,12000,12000,12000,12000,12000' + - 'BIGINT_FIELD5=92233720,92233720,92233720,92233720,92233720,92233720,92233720,92233720,92233720,92233720' + - 'REAL_FIELD8=56478356785.345f,56478356785.345f,56478356785.345f,56478356785.345f,56478356785.345f,56478356785.345f,56478356785.345f,56478356785.345f,56478356785.345f,56478356785.345f' + - 'DECIMAL_FIELD6=17345667789.23,17345667789.23,17345667789.23,17345667789.23,17345667789.23,17345667789.23,17345667789.23,17345667789.23,17345667789.23,17345667789.23' + - 'DOUBLE_FIELD7=56478356785.345,56478356785.345,56478356785.345,56478356785.345,56478356785.345,56478356785.345,56478356785.345,56478356785.345,56478356785.345,56478356785.345' + - 'TIME_FIELD9=45935000,45935000,45935000,45935000,45935000,45935000,45935000,45935000,45935000,45935000' + - 'DATE_FIELD10=17574,17574,17574,17574,17574,17574,17574,17574,17574,17574' + - 'TIMESTAMP_FIELD11=1518439535000,1518439535000,1518439535000,1518439535000,1518439535000,1518439535000,1518439535000,1518439535000,1518439535000,1518439535000' + - 'CHAR_FIELD16=some char text,some char text,some char text,some char text,some char text, + some char text,some char text,some char text,some char text,some char text' + - 'VARCHAR_FIELD13=some text that needs to be converted to varchar,some text that needs to be converted to varchar, + some text that needs to be converted to varchar,some text that needs to be converted to varchar, + some text that needs to be converted to varchar,some text that needs to be converted to varchar, + some text that needs to be converted to varchar,some text that needs to be converted to varchar, + some text that needs to be converted to varchar,some text that needs to be converted to varchar' + - 'BINARY_FIELD12=736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279, + 736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279, + 736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279, + 736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279, + 736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279, + 736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279, + 736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279, + 736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279, + 736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279, + 736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279' + - 'BLOB_FIELD14=736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279, + 736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279, + 736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279, + 736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279, + 736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279, + 736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279, + 736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279, + 736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279, + 736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279, + 736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279' + - 'CLOB_FIELD15=some text that needs to be converted to clob,some text that needs to be converted to clob, + some text that needs to be converted to clob,some text that needs to be converted to clob, + some text that needs to be converted to clob,some text that needs to be converted to clob, + some text that needs to be converted to clob,some text that needs to be converted to clob, + some text that needs to be converted to clob,some text that needs to be converted to clob' diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_all_datatypes_null_h2.yml b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_all_datatypes_null_h2.yml new file mode 100644 index 000000000..977879df4 --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_all_datatypes_null_h2.yml @@ -0,0 +1,51 @@ +#Licensed to the Apache Software Foundation (ASF) under one or more contributor +#license agreements. See the NOTICE file distributed with this work for additional +#information regarding copyright ownership. The ASF licenses this file to +#You under the Apache License, Version 2.0 (the "License"); you may not use +#this file except in compliance with the License. You may obtain a copy of +#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required +#by applicable law or agreed to in writing, software distributed under the +#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS +#OF ANY KIND, either express or implied. See the License for the specific +#language governing permissions and limitations under the License. + +name: 'table1' + +type: 'null' + +vectors: + - 'INT_FIELD1' + - 'BOOL_FIELD2' + - 'TINYINT_FIELD3' + - 'SMALLINT_FIELD4' + - 'BIGINT_FIELD5' + - 'DECIMAL_FIELD6' + - 'DOUBLE_FIELD7' + - 'REAL_FIELD8' + - 'TIME_FIELD9' + - 'DATE_FIELD10' + - 'TIMESTAMP_FIELD11' + - 'BINARY_FIELD12' + - 'VARCHAR_FIELD13' + - 'BLOB_FIELD14' + - 'CLOB_FIELD15' + - 'CHAR_FIELD16' + - 'BIT_FIELD17' + +rowCount: '5' + +create: 'CREATE TABLE table1 (int_field1 INT, bool_field2 BOOLEAN, tinyint_field3 TINYINT, smallint_field4 SMALLINT, bigint_field5 BIGINT, + decimal_field6 DECIMAL(20,2), double_field7 DOUBLE, real_field8 REAL, time_field9 TIME, date_field10 DATE, timestamp_field11 TIMESTAMP, + binary_field12 BINARY(100), varchar_field13 VARCHAR(256), blob_field14 BLOB, clob_field15 CLOB, char_field16 CHAR(16), bit_field17 BIT);' + +data: + - 'INSERT INTO table1 VALUES (null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null);' + - 'INSERT INTO table1 VALUES (null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null);' + - 'INSERT INTO table1 VALUES (null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null);' + - 'INSERT INTO table1 VALUES (null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null);' + - 'INSERT INTO table1 VALUES (null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null);' + +query: 'select int_field1, bool_field2, tinyint_field3, smallint_field4, bigint_field5, decimal_field6, double_field7, real_field8, + time_field9, date_field10, timestamp_field11, binary_field12, varchar_field13, blob_field14, clob_field15, char_field16, bit_field17 from table1' + +drop: 'DROP table table1;'
\ No newline at end of file diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_all_datatypes_selected_null_rows_h2.yml b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_all_datatypes_selected_null_rows_h2.yml new file mode 100644 index 000000000..4be8ab86e --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_all_datatypes_selected_null_rows_h2.yml @@ -0,0 +1,83 @@ +#Licensed to the Apache Software Foundation (ASF) under one or more contributor +#license agreements. See the NOTICE file distributed with this work for additional +#information regarding copyright ownership. The ASF licenses this file to +#You under the Apache License, Version 2.0 (the "License"); you may not use +#this file except in compliance with the License. You may obtain a copy of +#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required +#by applicable law or agreed to in writing, software distributed under the +#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS +#OF ANY KIND, either express or implied. See the License for the specific +#language governing permissions and limitations under the License. + +name: 'table1' + +type: 'selected_null_row' + +vectors: + - 'INT_FIELD1' + - 'BOOL_FIELD2' + - 'TINYINT_FIELD3' + - 'SMALLINT_FIELD4' + - 'BIGINT_FIELD5' + - 'DECIMAL_FIELD6' + - 'DOUBLE_FIELD7' + - 'REAL_FIELD8' + - 'TIME_FIELD9' + - 'DATE_FIELD10' + - 'TIMESTAMP_FIELD11' + - 'BINARY_FIELD12' + - 'VARCHAR_FIELD13' + - 'BLOB_FIELD14' + - 'CLOB_FIELD15' + - 'CHAR_FIELD16' + - 'BIT_FIELD17' + +create: 'CREATE TABLE table1 (int_field1 INT, bool_field2 BOOLEAN, tinyint_field3 TINYINT, smallint_field4 SMALLINT, bigint_field5 BIGINT, + decimal_field6 DECIMAL(20,2), double_field7 DOUBLE, real_field8 REAL, time_field9 TIME, date_field10 DATE, timestamp_field11 TIMESTAMP, + binary_field12 BINARY(100), varchar_field13 VARCHAR(256), blob_field14 BLOB, clob_field15 CLOB, char_field16 CHAR(16), bit_field17 BIT);' + +data: + - 'INSERT INTO table1 VALUES (null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null);' + + - 'INSERT INTO table1 VALUES (101, 1, 45, 12000, 92233720, 17345667789.23, 56478356785.345, 56478356785.345, PARSEDATETIME(''12:45:35 GMT'', ''HH:mm:ss z''), + PARSEDATETIME(''2018-02-12 GMT'', ''yyyy-MM-dd z''), PARSEDATETIME(''2018-02-12 12:45:35 GMT'', ''yyyy-MM-dd HH:mm:ss z''), + ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to varchar'', + ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to clob'', ''some char text'', 1);' + + - 'INSERT INTO table1 VALUES (null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null);' + + - 'INSERT INTO table1 VALUES (101, 1, 45, 12000, 92233720, 17345667789.23, 56478356785.345, 56478356785.345, PARSEDATETIME(''12:45:35 GMT'', ''HH:mm:ss z''), + PARSEDATETIME(''2018-02-12 GMT'', ''yyyy-MM-dd z''), PARSEDATETIME(''2018-02-12 12:45:35 GMT'', ''yyyy-MM-dd HH:mm:ss z''), + ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to varchar'', + ''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'', ''some text that needs to be converted to clob'', ''some char text'', 1);' + + - 'INSERT INTO table1 VALUES (null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null);' + +query: 'select int_field1, bool_field2, tinyint_field3, smallint_field4, bigint_field5, decimal_field6, double_field7, real_field8, + time_field9, date_field10, timestamp_field11, binary_field12, varchar_field13, blob_field14, clob_field15, char_field16, bit_field17 from table1' + +drop: 'DROP table table1;' + +rowCount: '5' + +values: + - 'INT_FIELD1=null,101,null,101,null' + - 'BOOL_FIELD2=null,1,null,1,null' + - 'BIT_FIELD17=null,1,null,1,null' + - 'TINYINT_FIELD3=null,45,null,45,null' + - 'SMALLINT_FIELD4=null,12000,null,12000,null' + - 'BIGINT_FIELD5=null,92233720,null,92233720,null' + - 'REAL_FIELD8=null,56478356785.345f,null,56478356785.345f,null' + - 'DECIMAL_FIELD6=null,17345667789.23,null,17345667789.23,null' + - 'DOUBLE_FIELD7=null,56478356785.345,null,56478356785.345,null' + - 'TIME_FIELD9=null,45935000,null,45935000,null' + - 'DATE_FIELD10=null,17574,null,17574,null' + - 'TIMESTAMP_FIELD11=null,1518439535000,null,1518439535000,null' + - 'CHAR_FIELD16=null,some char text,null,some char text,null' + - 'VARCHAR_FIELD13=null,some text that needs to be converted to varchar,null, + some text that needs to be converted to varchar,null' + - 'BINARY_FIELD12=null,736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279, + null,736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279,null' + - 'BLOB_FIELD14=null,736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279, + null,736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279,null' + - 'CLOB_FIELD15=null,some text that needs to be converted to clob,null,some text that needs to be converted to clob,null' diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_bigint_h2.yml b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_bigint_h2.yml new file mode 100644 index 000000000..066bececf --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_bigint_h2.yml @@ -0,0 +1,46 @@ +#Licensed to the Apache Software Foundation (ASF) under one or more contributor +#license agreements. See the NOTICE file distributed with this work for additional +#information regarding copyright ownership. The ASF licenses this file to +#You under the Apache License, Version 2.0 (the "License"); you may not use +#this file except in compliance with the License. You may obtain a copy of +#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required +#by applicable law or agreed to in writing, software distributed under the +#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS +#OF ANY KIND, either express or implied. See the License for the specific +#language governing permissions and limitations under the License. + +name: 'table1' + +type: 'big_int' + +vector: 'BIGINT_FIELD5' + +create: 'CREATE TABLE table1 (bigint_field5 BIGINT);' + +data: + - 'INSERT INTO table1 VALUES (92233720);' + - 'INSERT INTO table1 VALUES (92233720);' + - 'INSERT INTO table1 VALUES (92233720);' + - 'INSERT INTO table1 VALUES (92233720);' + - 'INSERT INTO table1 VALUES (92233720);' + - 'INSERT INTO table1 VALUES (92233720);' + - 'INSERT INTO table1 VALUES (92233720);' + - 'INSERT INTO table1 VALUES (92233720);' + - 'INSERT INTO table1 VALUES (92233720);' + - 'INSERT INTO table1 VALUES (92233720);' + +query: 'select bigint_field5 from table1;' + +drop: 'DROP table table1;' + +values: + - '92233720' + - '92233720' + - '92233720' + - '92233720' + - '92233720' + - '92233720' + - '92233720' + - '92233720' + - '92233720' + - '92233720' diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_binary_h2.yml b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_binary_h2.yml new file mode 100644 index 000000000..ce3e4f127 --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_binary_h2.yml @@ -0,0 +1,46 @@ +#Licensed to the Apache Software Foundation (ASF) under one or more contributor +#license agreements. See the NOTICE file distributed with this work for additional +#information regarding copyright ownership. The ASF licenses this file to +#You under the Apache License, Version 2.0 (the "License"); you may not use +#this file except in compliance with the License. You may obtain a copy of +#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required +#by applicable law or agreed to in writing, software distributed under the +#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS +#OF ANY KIND, either express or implied. See the License for the specific +#language governing permissions and limitations under the License. + +name: 'table1' + +type: 'binary' + +vector: 'BINARY_FIELD12' + +create: 'CREATE TABLE table1 (binary_field12 BINARY(100));' + +data: + - 'INSERT INTO table1 VALUES (''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'');' + - 'INSERT INTO table1 VALUES (''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'');' + - 'INSERT INTO table1 VALUES (''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'');' + - 'INSERT INTO table1 VALUES (''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'');' + - 'INSERT INTO table1 VALUES (''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'');' + - 'INSERT INTO table1 VALUES (''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'');' + - 'INSERT INTO table1 VALUES (''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'');' + - 'INSERT INTO table1 VALUES (''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'');' + - 'INSERT INTO table1 VALUES (''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'');' + - 'INSERT INTO table1 VALUES (''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'');' + +query: 'select binary_field12 from table1;' + +drop: 'DROP table table1;' + +values: + - '736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279' + - '736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279' + - '736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279' + - '736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279' + - '736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279' + - '736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279' + - '736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279' + - '736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279' + - '736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279' + - '736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279' diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_bit_h2.yml b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_bit_h2.yml new file mode 100644 index 000000000..aeb7a20e0 --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_bit_h2.yml @@ -0,0 +1,46 @@ +#Licensed to the Apache Software Foundation (ASF) under one or more contributor +#license agreements. See the NOTICE file distributed with this work for additional +#information regarding copyright ownership. The ASF licenses this file to +#You under the Apache License, Version 2.0 (the "License"); you may not use +#this file except in compliance with the License. You may obtain a copy of +#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required +#by applicable law or agreed to in writing, software distributed under the +#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS +#OF ANY KIND, either express or implied. See the License for the specific +#language governing permissions and limitations under the License. + +name: 'table1' + +type: 'bit' + +vector: 'BIT_FIELD17' + +create: 'CREATE TABLE table1 (bit_field17 BIT);' + +data: + - 'INSERT INTO table1 VALUES (1);' + - 'INSERT INTO table1 VALUES (1);' + - 'INSERT INTO table1 VALUES (1);' + - 'INSERT INTO table1 VALUES (1);' + - 'INSERT INTO table1 VALUES (1);' + - 'INSERT INTO table1 VALUES (1);' + - 'INSERT INTO table1 VALUES (1);' + - 'INSERT INTO table1 VALUES (1);' + - 'INSERT INTO table1 VALUES (1);' + - 'INSERT INTO table1 VALUES (1);' + +query: 'select bit_field17 from table1;' + +drop: 'DROP table table1;' + +values: + - '1' + - '1' + - '1' + - '1' + - '1' + - '1' + - '1' + - '1' + - '1' + - '1' diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_blob_h2.yml b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_blob_h2.yml new file mode 100644 index 000000000..b4cd2ca80 --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_blob_h2.yml @@ -0,0 +1,46 @@ +#Licensed to the Apache Software Foundation (ASF) under one or more contributor +#license agreements. See the NOTICE file distributed with this work for additional +#information regarding copyright ownership. The ASF licenses this file to +#You under the Apache License, Version 2.0 (the "License"); you may not use +#this file except in compliance with the License. You may obtain a copy of +#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required +#by applicable law or agreed to in writing, software distributed under the +#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS +#OF ANY KIND, either express or implied. See the License for the specific +#language governing permissions and limitations under the License. + +name: 'table1' + +type: 'blob' + +vector: 'BLOB_FIELD14' + +create: 'CREATE TABLE table1 (blob_field14 BLOB);' + +data: + - 'INSERT INTO table1 VALUES (''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'');' + - 'INSERT INTO table1 VALUES (''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'');' + - 'INSERT INTO table1 VALUES (''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'');' + - 'INSERT INTO table1 VALUES (''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'');' + - 'INSERT INTO table1 VALUES (''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'');' + - 'INSERT INTO table1 VALUES (''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'');' + - 'INSERT INTO table1 VALUES (''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'');' + - 'INSERT INTO table1 VALUES (''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'');' + - 'INSERT INTO table1 VALUES (''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'');' + - 'INSERT INTO table1 VALUES (''736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279'');' + +query: 'select blob_field14 from table1;' + +drop: 'DROP table table1;' + +values: + - '736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279' + - '736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279' + - '736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279' + - '736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279' + - '736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279' + - '736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279' + - '736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279' + - '736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279' + - '736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279' + - '736f6d6520746578742074686174206e6565647320746f20626520636f6e76657274656420746f2062696e617279' diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_bool_h2.yml b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_bool_h2.yml new file mode 100644 index 000000000..8219a55ec --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_bool_h2.yml @@ -0,0 +1,46 @@ +#Licensed to the Apache Software Foundation (ASF) under one or more contributor +#license agreements. See the NOTICE file distributed with this work for additional +#information regarding copyright ownership. The ASF licenses this file to +#You under the Apache License, Version 2.0 (the "License"); you may not use +#this file except in compliance with the License. You may obtain a copy of +#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required +#by applicable law or agreed to in writing, software distributed under the +#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS +#OF ANY KIND, either express or implied. See the License for the specific +#language governing permissions and limitations under the License. + +name: 'table1' + +type: 'bool' + +vector: 'BOOL_FIELD2' + +create: 'CREATE TABLE table1 (bool_field2 BOOLEAN);' + +data: + - 'INSERT INTO table1 VALUES (1);' + - 'INSERT INTO table1 VALUES (1);' + - 'INSERT INTO table1 VALUES (1);' + - 'INSERT INTO table1 VALUES (1);' + - 'INSERT INTO table1 VALUES (1);' + - 'INSERT INTO table1 VALUES (1);' + - 'INSERT INTO table1 VALUES (1);' + - 'INSERT INTO table1 VALUES (1);' + - 'INSERT INTO table1 VALUES (1);' + - 'INSERT INTO table1 VALUES (1);' + +query: 'select bool_field2 from table1;' + +drop: 'DROP table table1;' + +values: + - 'true' + - 'true' + - 'true' + - 'true' + - 'true' + - 'true' + - 'true' + - 'true' + - 'true' + - 'true' diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_char_h2.yml b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_char_h2.yml new file mode 100644 index 000000000..6e2cb185c --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_char_h2.yml @@ -0,0 +1,46 @@ +#Licensed to the Apache Software Foundation (ASF) under one or more contributor +#license agreements. See the NOTICE file distributed with this work for additional +#information regarding copyright ownership. The ASF licenses this file to +#You under the Apache License, Version 2.0 (the "License"); you may not use +#this file except in compliance with the License. You may obtain a copy of +#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required +#by applicable law or agreed to in writing, software distributed under the +#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS +#OF ANY KIND, either express or implied. See the License for the specific +#language governing permissions and limitations under the License. + +name: 'table1' + +type: 'char' + +vector: 'CHAR_FIELD16' + +create: 'CREATE TABLE table1 (char_field16 CHAR(16));' + +data: + - 'INSERT INTO table1 VALUES (''some char text'');' + - 'INSERT INTO table1 VALUES (''some char text'');' + - 'INSERT INTO table1 VALUES (''some char text'');' + - 'INSERT INTO table1 VALUES (''some char text'');' + - 'INSERT INTO table1 VALUES (''some char text'');' + - 'INSERT INTO table1 VALUES (''some char text'');' + - 'INSERT INTO table1 VALUES (''some char text'');' + - 'INSERT INTO table1 VALUES (''some char text'');' + - 'INSERT INTO table1 VALUES (''some char text'');' + - 'INSERT INTO table1 VALUES (''some char text'');' + +query: 'select char_field16 from table1;' + +drop: 'DROP table table1;' + +values: + - 'some char text' + - 'some char text' + - 'some char text' + - 'some char text' + - 'some char text' + - 'some char text' + - 'some char text' + - 'some char text' + - 'some char text' + - 'some char text' diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_charset_ch_h2.yml b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_charset_ch_h2.yml new file mode 100644 index 000000000..1b6d7d503 --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_charset_ch_h2.yml @@ -0,0 +1,43 @@ +#Licensed to the Apache Software Foundation (ASF) under one or more contributor +#license agreements. See the NOTICE file distributed with this work for additional +#information regarding copyright ownership. The ASF licenses this file to +#You under the Apache License, Version 2.0 (the "License"); you may not use +#this file except in compliance with the License. You may obtain a copy of +#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required +#by applicable law or agreed to in writing, software distributed under the +#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS +#OF ANY KIND, either express or implied. See the License for the specific +#language governing permissions and limitations under the License. + +name: 'table1' + +type: 'charset' + +vectors: + - 'VARCHAR_FIELD13' + - 'CHAR_FIELD16' + - 'CLOB_FIELD15' + +rowCount: '5' + +charSet: 'GBK' + +create: 'CREATE TABLE table1 (int_field1 INT, varchar_field13 VARCHAR(256), clob_field15 CLOB, char_field16 CHAR(128));' + +data: + - 'INSERT INTO table1 VALUES (101,''一些帶有char編碼的文本需要轉換為varchar'', ''一些带有char编码的文本需要转换为clob'', ''一些char编码的字符文本'');' + - 'INSERT INTO table1 VALUES (101,''一些帶有char編碼的文本需要轉換為varchar'', ''一些带有char编码的文本需要转换为clob'', ''一些char编码的字符文本'');' + - 'INSERT INTO table1 VALUES (101,''一些帶有char編碼的文本需要轉換為varchar'', ''一些带有char编码的文本需要转换为clob'', ''一些char编码的字符文本'');' + - 'INSERT INTO table1 VALUES (101,''一些帶有char編碼的文本需要轉換為varchar'', ''一些带有char编码的文本需要转换为clob'', ''一些char编码的字符文本'');' + - 'INSERT INTO table1 VALUES (101,''一些帶有char編碼的文本需要轉換為varchar'', ''一些带有char编码的文本需要转换为clob'', ''一些char编码的字符文本'');' + +query: 'select varchar_field13, clob_field15, char_field16 from table1' + +drop: 'DROP table table1;' + +values: + - 'VARCHAR_FIELD13=一些帶有char編碼的文本需要轉換為varchar,一些帶有char編碼的文本需要轉換為varchar,一些帶有char編碼的文本需要轉換為varchar, + 一些帶有char編碼的文本需要轉換為varchar,一些帶有char編碼的文本需要轉換為varchar' + - 'CLOB_FIELD15=一些带有char编码的文本需要转换为clob,一些带有char编码的文本需要转换为clob,一些带有char编码的文本需要转换为clob, + 一些带有char编码的文本需要转换为clob,一些带有char编码的文本需要转换为clob' + - 'CHAR_FIELD16=一些char编码的字符文本,一些char编码的字符文本,一些char编码的字符文本,一些char编码的字符文本,一些char编码的字符文本'
\ No newline at end of file diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_charset_h2.yml b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_charset_h2.yml new file mode 100644 index 000000000..75734a221 --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_charset_h2.yml @@ -0,0 +1,53 @@ +#Licensed to the Apache Software Foundation (ASF) under one or more contributor +#license agreements. See the NOTICE file distributed with this work for additional +#information regarding copyright ownership. The ASF licenses this file to +#You under the Apache License, Version 2.0 (the "License"); you may not use +#this file except in compliance with the License. You may obtain a copy of +#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required +#by applicable law or agreed to in writing, software distributed under the +#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS +#OF ANY KIND, either express or implied. See the License for the specific +#language governing permissions and limitations under the License. + +name: 'table1' + +type: 'charset' + +vectors: + - 'VARCHAR_FIELD13' + - 'CHAR_FIELD16' + - 'CLOB_FIELD15' + +rowCount: '10' + +create: 'CREATE TABLE table1 (int_field1 INT, varchar_field13 VARCHAR(256), clob_field15 CLOB, char_field16 CHAR(128));' + +data: + - 'INSERT INTO table1 VALUES (101,''some text with char encoding that needs to be converted to varchar'', ''some text with char encoding that needs to be converted to clob'', ''some char text with char encoding'');' + - 'INSERT INTO table1 VALUES (101,''some text with char encoding that needs to be converted to varchar'', ''some text with char encoding that needs to be converted to clob'', ''some char text with char encoding'');' + - 'INSERT INTO table1 VALUES (101,''some text with char encoding that needs to be converted to varchar'', ''some text with char encoding that needs to be converted to clob'', ''some char text with char encoding'');' + - 'INSERT INTO table1 VALUES (101,''some text with char encoding that needs to be converted to varchar'', ''some text with char encoding that needs to be converted to clob'', ''some char text with char encoding'');' + - 'INSERT INTO table1 VALUES (101,''some text with char encoding that needs to be converted to varchar'', ''some text with char encoding that needs to be converted to clob'', ''some char text with char encoding'');' + - 'INSERT INTO table1 VALUES (101,''some text with char encoding that needs to be converted to varchar'', ''some text with char encoding that needs to be converted to clob'', ''some char text with char encoding'');' + - 'INSERT INTO table1 VALUES (101,''some text with char encoding that needs to be converted to varchar'', ''some text with char encoding that needs to be converted to clob'', ''some char text with char encoding'');' + - 'INSERT INTO table1 VALUES (101,''some text with char encoding that needs to be converted to varchar'', ''some text with char encoding that needs to be converted to clob'', ''some char text with char encoding'');' + - 'INSERT INTO table1 VALUES (101,''some text with char encoding that needs to be converted to varchar'', ''some text with char encoding that needs to be converted to clob'', ''some char text with char encoding'');' + - 'INSERT INTO table1 VALUES (101,''some text with char encoding that needs to be converted to varchar'', ''some text with char encoding that needs to be converted to clob'', ''some char text with char encoding'');' + +query: 'select varchar_field13, clob_field15, char_field16 from table1' + +drop: 'DROP table table1;' + +values: + - 'VARCHAR_FIELD13=some text with char encoding that needs to be converted to varchar,some text with char encoding that needs to be converted to varchar, + some text with char encoding that needs to be converted to varchar,some text with char encoding that needs to be converted to varchar, + some text with char encoding that needs to be converted to varchar,some text with char encoding that needs to be converted to varchar, + some text with char encoding that needs to be converted to varchar,some text with char encoding that needs to be converted to varchar, + some text with char encoding that needs to be converted to varchar,some text with char encoding that needs to be converted to varchar' + - 'CLOB_FIELD15=some text with char encoding that needs to be converted to clob,some text with char encoding that needs to be converted to clob, + some text with char encoding that needs to be converted to clob,some text with char encoding that needs to be converted to clob, + some text with char encoding that needs to be converted to clob,some text with char encoding that needs to be converted to clob, + some text with char encoding that needs to be converted to clob,some text with char encoding that needs to be converted to clob, + some text with char encoding that needs to be converted to clob,some text with char encoding that needs to be converted to clob' + - 'CHAR_FIELD16=some char text with char encoding,some char text with char encoding,some char text with char encoding,some char text with char encoding,some char text with char encoding, + some char text with char encoding,some char text with char encoding,some char text with char encoding,some char text with char encoding,some char text with char encoding'
\ No newline at end of file diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_charset_jp_h2.yml b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_charset_jp_h2.yml new file mode 100644 index 000000000..10c33f443 --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_charset_jp_h2.yml @@ -0,0 +1,43 @@ +#Licensed to the Apache Software Foundation (ASF) under one or more contributor +#license agreements. See the NOTICE file distributed with this work for additional +#information regarding copyright ownership. The ASF licenses this file to +#You under the Apache License, Version 2.0 (the "License"); you may not use +#this file except in compliance with the License. You may obtain a copy of +#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required +#by applicable law or agreed to in writing, software distributed under the +#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS +#OF ANY KIND, either express or implied. See the License for the specific +#language governing permissions and limitations under the License. + +name: 'table1' + +type: 'charset' + +vectors: + - 'VARCHAR_FIELD13' + - 'CHAR_FIELD16' + - 'CLOB_FIELD15' + +rowCount: '5' + +charSet: 'SJIS' + +create: 'CREATE TABLE table1 (int_field1 INT, varchar_field13 VARCHAR(256), clob_field15 CLOB, char_field16 CHAR(128));' + +data: + - 'INSERT INTO table1 VALUES (101,''varcharに変換する必要があるcharエンコーディングのテキスト'', ''charエンコーディングのあるテキストをclobに変換する必要がある'', ''charエンコーディングのあるcharテキスト'');' + - 'INSERT INTO table1 VALUES (101,''varcharに変換する必要があるcharエンコーディングのテキスト'', ''charエンコーディングのあるテキストをclobに変換する必要がある'', ''charエンコーディングのあるcharテキスト'');' + - 'INSERT INTO table1 VALUES (101,''varcharに変換する必要があるcharエンコーディングのテキスト'', ''charエンコーディングのあるテキストをclobに変換する必要がある'', ''charエンコーディングのあるcharテキスト'');' + - 'INSERT INTO table1 VALUES (101,''varcharに変換する必要があるcharエンコーディングのテキスト'', ''charエンコーディングのあるテキストをclobに変換する必要がある'', ''charエンコーディングのあるcharテキスト'');' + - 'INSERT INTO table1 VALUES (101,''varcharに変換する必要があるcharエンコーディングのテキスト'', ''charエンコーディングのあるテキストをclobに変換する必要がある'', ''charエンコーディングのあるcharテキスト'');' + +query: 'select varchar_field13, clob_field15, char_field16 from table1' + +drop: 'DROP table table1;' + +values: + - 'VARCHAR_FIELD13=varcharに変換する必要があるcharエンコーディングのテキスト,varcharに変換する必要があるcharエンコーディングのテキスト,varcharに変換する必要があるcharエンコーディングのテキスト, + varcharに変換する必要があるcharエンコーディングのテキスト,varcharに変換する必要があるcharエンコーディングのテキスト' + - 'CLOB_FIELD15=charエンコーディングのあるテキストをclobに変換する必要がある,charエンコーディングのあるテキストをclobに変換する必要がある,charエンコーディングのあるテキストをclobに変換する必要がある, + charエンコーディングのあるテキストをclobに変換する必要がある,charエンコーディングのあるテキストをclobに変換する必要がある' + - 'CHAR_FIELD16=charエンコーディングのあるcharテキスト,charエンコーディングのあるcharテキスト,charエンコーディングのあるcharテキスト,charエンコーディングのあるcharテキスト,charエンコーディングのあるcharテキスト'
\ No newline at end of file diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_charset_kr_h2.yml b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_charset_kr_h2.yml new file mode 100644 index 000000000..a00a41b53 --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_charset_kr_h2.yml @@ -0,0 +1,43 @@ +#Licensed to the Apache Software Foundation (ASF) under one or more contributor +#license agreements. See the NOTICE file distributed with this work for additional +#information regarding copyright ownership. The ASF licenses this file to +#You under the Apache License, Version 2.0 (the "License"); you may not use +#this file except in compliance with the License. You may obtain a copy of +#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required +#by applicable law or agreed to in writing, software distributed under the +#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS +#OF ANY KIND, either express or implied. See the License for the specific +#language governing permissions and limitations under the License. + +name: 'table1' + +type: 'charset' + +vectors: + - 'VARCHAR_FIELD13' + - 'CHAR_FIELD16' + - 'CLOB_FIELD15' + +rowCount: '5' + +charSet: 'EUC-KR' + +create: 'CREATE TABLE table1 (int_field1 INT, varchar_field13 VARCHAR(256), clob_field15 CLOB, char_field16 CHAR(128));' + +data: + - 'INSERT INTO table1 VALUES (101,''char 인코딩을 사용하는 일부 텍스트를 varchar로 변환해야합니다.'', ''clob로 변환해야하는 char 인코딩을 가진 텍스트'', ''char 인코딩을 사용한 char 텍스트'');' + - 'INSERT INTO table1 VALUES (101,''char 인코딩을 사용하는 일부 텍스트를 varchar로 변환해야합니다.'', ''clob로 변환해야하는 char 인코딩을 가진 텍스트'', ''char 인코딩을 사용한 char 텍스트'');' + - 'INSERT INTO table1 VALUES (101,''char 인코딩을 사용하는 일부 텍스트를 varchar로 변환해야합니다.'', ''clob로 변환해야하는 char 인코딩을 가진 텍스트'', ''char 인코딩을 사용한 char 텍스트'');' + - 'INSERT INTO table1 VALUES (101,''char 인코딩을 사용하는 일부 텍스트를 varchar로 변환해야합니다.'', ''clob로 변환해야하는 char 인코딩을 가진 텍스트'', ''char 인코딩을 사용한 char 텍스트'');' + - 'INSERT INTO table1 VALUES (101,''char 인코딩을 사용하는 일부 텍스트를 varchar로 변환해야합니다.'', ''clob로 변환해야하는 char 인코딩을 가진 텍스트'', ''char 인코딩을 사용한 char 텍스트'');' + +query: 'select varchar_field13, clob_field15, char_field16 from table1' + +drop: 'DROP table table1;' + +values: + - 'VARCHAR_FIELD13=char 인코딩을 사용하는 일부 텍스트를 varchar로 변환해야합니다.,char 인코딩을 사용하는 일부 텍스트를 varchar로 변환해야합니다.,char 인코딩을 사용하는 일부 텍스트를 varchar로 변환해야합니다., + char 인코딩을 사용하는 일부 텍스트를 varchar로 변환해야합니다.,char 인코딩을 사용하는 일부 텍스트를 varchar로 변환해야합니다.' + - 'CLOB_FIELD15=clob로 변환해야하는 char 인코딩을 가진 텍스트,clob로 변환해야하는 char 인코딩을 가진 텍스트,clob로 변환해야하는 char 인코딩을 가진 텍스트, + clob로 변환해야하는 char 인코딩을 가진 텍스트,clob로 변환해야하는 char 인코딩을 가진 텍스트' + - 'CHAR_FIELD16=char 인코딩을 사용한 char 텍스트,char 인코딩을 사용한 char 텍스트,char 인코딩을 사용한 char 텍스트,char 인코딩을 사용한 char 텍스트,char 인코딩을 사용한 char 텍스트'
\ No newline at end of file diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_clob_h2.yml b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_clob_h2.yml new file mode 100644 index 000000000..57c69ffe0 --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_clob_h2.yml @@ -0,0 +1,46 @@ +#Licensed to the Apache Software Foundation (ASF) under one or more contributor +#license agreements. See the NOTICE file distributed with this work for additional +#information regarding copyright ownership. The ASF licenses this file to +#You under the Apache License, Version 2.0 (the "License"); you may not use +#this file except in compliance with the License. You may obtain a copy of +#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required +#by applicable law or agreed to in writing, software distributed under the +#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS +#OF ANY KIND, either express or implied. See the License for the specific +#language governing permissions and limitations under the License. + +name: 'table1' + +type: 'clob' + +vector: 'CLOB_FIELD15' + +create: 'CREATE TABLE table1 (clob_field15 CLOB);' + +data: + - 'INSERT INTO table1 VALUES (''some text that needs to be converted to clob'');' + - 'INSERT INTO table1 VALUES (''some text that needs to be converted to clob'');' + - 'INSERT INTO table1 VALUES (''some text that needs to be converted to clob'');' + - 'INSERT INTO table1 VALUES (''some text that needs to be converted to clob'');' + - 'INSERT INTO table1 VALUES (''some text that needs to be converted to clob'');' + - 'INSERT INTO table1 VALUES (''some text that needs to be converted to clob'');' + - 'INSERT INTO table1 VALUES (''some text that needs to be converted to clob'');' + - 'INSERT INTO table1 VALUES (''some text that needs to be converted to clob'');' + - 'INSERT INTO table1 VALUES (''some text that needs to be converted to clob'');' + - 'INSERT INTO table1 VALUES (''some text that needs to be converted to clob'');' + +query: 'select CLOB_FIELD15 from table1;' + +drop: 'DROP table table1;' + +values: + - 'some text that needs to be converted to clob' + - 'some text that needs to be converted to clob' + - 'some text that needs to be converted to clob' + - 'some text that needs to be converted to clob' + - 'some text that needs to be converted to clob' + - 'some text that needs to be converted to clob' + - 'some text that needs to be converted to clob' + - 'some text that needs to be converted to clob' + - 'some text that needs to be converted to clob' + - 'some text that needs to be converted to clob' diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_date_h2.yml b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_date_h2.yml new file mode 100644 index 000000000..bca886ceb --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_date_h2.yml @@ -0,0 +1,48 @@ +#Licensed to the Apache Software Foundation (ASF) under one or more contributor +#license agreements. See the NOTICE file distributed with this work for additional +#information regarding copyright ownership. The ASF licenses this file to +#You under the Apache License, Version 2.0 (the "License"); you may not use +#this file except in compliance with the License. You may obtain a copy of +#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required +#by applicable law or agreed to in writing, software distributed under the +#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS +#OF ANY KIND, either express or implied. See the License for the specific +#language governing permissions and limitations under the License. + +name: 'table1' + +type: 'date' + +vector: 'DATE_FIELD10' + +create: 'CREATE TABLE table1 (date_field10 DATE);' + +data: + - 'INSERT INTO table1 VALUES (''2018-02-12'');' + - 'INSERT INTO table1 VALUES (''2018-02-12'');' + - 'INSERT INTO table1 VALUES (''2018-02-12'');' + - 'INSERT INTO table1 VALUES (''2018-02-12'');' + - 'INSERT INTO table1 VALUES (''2018-02-12'');' + - 'INSERT INTO table1 VALUES (''2018-02-12'');' + - 'INSERT INTO table1 VALUES (''2018-02-12'');' + - 'INSERT INTO table1 VALUES (''2018-02-12'');' + - 'INSERT INTO table1 VALUES (''2018-02-12'');' + - 'INSERT INTO table1 VALUES (''2018-02-12'');' + - 'INSERT INTO table1 VALUES (''1969-01-01'');' + +query: 'select date_field10 from table1;' + +drop: 'DROP table table1;' + +values: + - '17574' + - '17574' + - '17574' + - '17574' + - '17574' + - '17574' + - '17574' + - '17574' + - '17574' + - '17574' + - '-365' diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_decimal_h2.yml b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_decimal_h2.yml new file mode 100644 index 000000000..3ee15c409 --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_decimal_h2.yml @@ -0,0 +1,46 @@ +#Licensed to the Apache Software Foundation (ASF) under one or more contributor +#license agreements. See the NOTICE file distributed with this work for additional +#information regarding copyright ownership. The ASF licenses this file to +#You under the Apache License, Version 2.0 (the "License"); you may not use +#this file except in compliance with the License. You may obtain a copy of +#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required +#by applicable law or agreed to in writing, software distributed under the +#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS +#OF ANY KIND, either express or implied. See the License for the specific +#language governing permissions and limitations under the License. + +name: 'table1' + +type: 'decimal' + +vector: 'DECIMAL_FIELD6' + +create: 'CREATE TABLE table1 (decimal_field6 DECIMAL(20,2));' + +data: + - 'INSERT INTO table1 VALUES (17345667789.23);' + - 'INSERT INTO table1 VALUES (17345667789.23);' + - 'INSERT INTO table1 VALUES (17345667789.23);' + - 'INSERT INTO table1 VALUES (17345667789.23);' + - 'INSERT INTO table1 VALUES (17345667789.23);' + - 'INSERT INTO table1 VALUES (17345667789.23);' + - 'INSERT INTO table1 VALUES (17345667789.23);' + - 'INSERT INTO table1 VALUES (17345667789.23);' + - 'INSERT INTO table1 VALUES (17345667789.23);' + - 'INSERT INTO table1 VALUES (17345667789.23);' + +query: 'select decimal_field6 from table1;' + +drop: 'DROP table table1;' + +values: + - '17345667789.23' + - '17345667789.23' + - '17345667789.23' + - '17345667789.23' + - '17345667789.23' + - '17345667789.23' + - '17345667789.23' + - '17345667789.23' + - '17345667789.23' + - '17345667789.23' diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_double_h2.yml b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_double_h2.yml new file mode 100644 index 000000000..f41900928 --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_double_h2.yml @@ -0,0 +1,46 @@ +#Licensed to the Apache Software Foundation (ASF) under one or more contributor +#license agreements. See the NOTICE file distributed with this work for additional +#information regarding copyright ownership. The ASF licenses this file to +#You under the Apache License, Version 2.0 (the "License"); you may not use +#this file except in compliance with the License. You may obtain a copy of +#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required +#by applicable law or agreed to in writing, software distributed under the +#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS +#OF ANY KIND, either express or implied. See the License for the specific +#language governing permissions and limitations under the License. + +name: 'table1' + +type: 'double' + +vector: 'DOUBLE_FIELD7' + +create: 'CREATE TABLE table1 (double_field7 DOUBLE);' + +data: + - 'INSERT INTO table1 VALUES (56478356785.345);' + - 'INSERT INTO table1 VALUES (56478356785.345);' + - 'INSERT INTO table1 VALUES (56478356785.345);' + - 'INSERT INTO table1 VALUES (56478356785.345);' + - 'INSERT INTO table1 VALUES (56478356785.345);' + - 'INSERT INTO table1 VALUES (56478356785.345);' + - 'INSERT INTO table1 VALUES (56478356785.345);' + - 'INSERT INTO table1 VALUES (56478356785.345);' + - 'INSERT INTO table1 VALUES (56478356785.345);' + - 'INSERT INTO table1 VALUES (56478356785.345);' + +query: 'select double_field7 from table1;' + +drop: 'DROP table table1;' + +values: + - '56478356785.345' + - '56478356785.345' + - '56478356785.345' + - '56478356785.345' + - '56478356785.345' + - '56478356785.345' + - '56478356785.345' + - '56478356785.345' + - '56478356785.345' + - '56478356785.345' diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_est_date_h2.yml b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_est_date_h2.yml new file mode 100644 index 000000000..1868db3ad --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_est_date_h2.yml @@ -0,0 +1,48 @@ +#Licensed to the Apache Software Foundation (ASF) under one or more contributor +#license agreements. See the NOTICE file distributed with this work for additional +#information regarding copyright ownership. The ASF licenses this file to +#You under the Apache License, Version 2.0 (the "License"); you may not use +#this file except in compliance with the License. You may obtain a copy of +#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required +#by applicable law or agreed to in writing, software distributed under the +#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS +#OF ANY KIND, either express or implied. See the License for the specific +#language governing permissions and limitations under the License. + +name: 'table1' + +type: 'est_date' + +timezone: 'EST' + +vector: 'DATE_FIELD10' + +create: 'CREATE TABLE table1 (date_field10 DATE);' + +data: + - 'INSERT INTO table1 VALUES (''2018-02-12'');' + - 'INSERT INTO table1 VALUES (''2018-02-12'');' + - 'INSERT INTO table1 VALUES (''2018-02-12'');' + - 'INSERT INTO table1 VALUES (''2018-02-12'');' + - 'INSERT INTO table1 VALUES (''2018-02-12'');' + - 'INSERT INTO table1 VALUES (''2018-02-12'');' + - 'INSERT INTO table1 VALUES (''2018-02-12'');' + - 'INSERT INTO table1 VALUES (''2018-02-12'');' + - 'INSERT INTO table1 VALUES (''2018-02-12'');' + - 'INSERT INTO table1 VALUES (''2018-02-12'');' + +query: 'select date_field10 from table1;' + +drop: 'DROP table table1;' + +values: + - '17574' + - '17574' + - '17574' + - '17574' + - '17574' + - '17574' + - '17574' + - '17574' + - '17574' + - '17574' diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_est_time_h2.yml b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_est_time_h2.yml new file mode 100644 index 000000000..c6fc7a180 --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_est_time_h2.yml @@ -0,0 +1,48 @@ +#Licensed to the Apache Software Foundation (ASF) under one or more contributor +#license agreements. See the NOTICE file distributed with this work for additional +#information regarding copyright ownership. The ASF licenses this file to +#You under the Apache License, Version 2.0 (the "License"); you may not use +#this file except in compliance with the License. You may obtain a copy of +#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required +#by applicable law or agreed to in writing, software distributed under the +#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS +#OF ANY KIND, either express or implied. See the License for the specific +#language governing permissions and limitations under the License. + +name: 'table1' + +type: 'est_time' + +timezone: 'EST' + +vector: 'TIME_FIELD9' + +create: 'CREATE TABLE table1 (time_field9 TIME);' + +data: + - 'INSERT INTO table1 VALUES (''12:45:35'');' + - 'INSERT INTO table1 VALUES (''12:45:35'');' + - 'INSERT INTO table1 VALUES (''12:45:35'');' + - 'INSERT INTO table1 VALUES (''12:45:35'');' + - 'INSERT INTO table1 VALUES (''12:45:35'');' + - 'INSERT INTO table1 VALUES (''12:45:35'');' + - 'INSERT INTO table1 VALUES (''12:45:35'');' + - 'INSERT INTO table1 VALUES (''12:45:35'');' + - 'INSERT INTO table1 VALUES (''12:45:35'');' + - 'INSERT INTO table1 VALUES (''12:45:35'');' + +query: 'select time_field9 from table1;' + +drop: 'DROP table table1;' + +values: + - '63935000' + - '63935000' + - '63935000' + - '63935000' + - '63935000' + - '63935000' + - '63935000' + - '63935000' + - '63935000' + - '63935000' diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_est_timestamp_h2.yml b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_est_timestamp_h2.yml new file mode 100644 index 000000000..b0ec5b708 --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_est_timestamp_h2.yml @@ -0,0 +1,49 @@ +#Licensed to the Apache Software Foundation (ASF) under one or more contributor +#license agreements. See the NOTICE file distributed with this work for additional +#information regarding copyright ownership. The ASF licenses this file to +#You under the Apache License, Version 2.0 (the "License"); you may not use +#this file except in compliance with the License. You may obtain a copy of +#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required +#by applicable law or agreed to in writing, software distributed under the +#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS +#OF ANY KIND, either express or implied. See the License for the specific +#language governing permissions and limitations under the License. + +name: 'table1' + +type: 'est_timestamp' + +timezone: 'EST' + +vector: 'TIMESTAMP_FIELD11' + +create: 'CREATE TABLE table1 (timestamp_field11 TIMESTAMP);' + +data: + - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' + - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' + - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' + - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' + - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' + - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' + - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' + - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' + - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' + - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' + +query: 'select timestamp_field11 from table1;' + +drop: 'DROP table table1;' + +values: + - '1518457535000' + - '1518457535000' + - '1518457535000' + - '1518457535000' + - '1518457535000' + - '1518457535000' + - '1518457535000' + - '1518457535000' + - '1518457535000' + - '1518457535000' +
\ No newline at end of file diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_gmt_date_h2.yml b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_gmt_date_h2.yml new file mode 100644 index 000000000..65824861a --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_gmt_date_h2.yml @@ -0,0 +1,48 @@ +#Licensed to the Apache Software Foundation (ASF) under one or more contributor +#license agreements. See the NOTICE file distributed with this work for additional +#information regarding copyright ownership. The ASF licenses this file to +#You under the Apache License, Version 2.0 (the "License"); you may not use +#this file except in compliance with the License. You may obtain a copy of +#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required +#by applicable law or agreed to in writing, software distributed under the +#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS +#OF ANY KIND, either express or implied. See the License for the specific +#language governing permissions and limitations under the License. + +name: 'table1' + +type: 'gmt_date' + +timezone: 'GMT' + +vector: 'DATE_FIELD10' + +create: 'CREATE TABLE table1 (date_field10 DATE);' + +data: + - 'INSERT INTO table1 VALUES (''2018-02-12'');' + - 'INSERT INTO table1 VALUES (''2018-02-12'');' + - 'INSERT INTO table1 VALUES (''2018-02-12'');' + - 'INSERT INTO table1 VALUES (''2018-02-12'');' + - 'INSERT INTO table1 VALUES (''2018-02-12'');' + - 'INSERT INTO table1 VALUES (''2018-02-12'');' + - 'INSERT INTO table1 VALUES (''2018-02-12'');' + - 'INSERT INTO table1 VALUES (''2018-02-12'');' + - 'INSERT INTO table1 VALUES (''2018-02-12'');' + - 'INSERT INTO table1 VALUES (''2018-02-12'');' + +query: 'select date_field10 from table1;' + +drop: 'DROP table table1;' + +values: + - '17574' + - '17574' + - '17574' + - '17574' + - '17574' + - '17574' + - '17574' + - '17574' + - '17574' + - '17574'
\ No newline at end of file diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_gmt_time_h2.yml b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_gmt_time_h2.yml new file mode 100644 index 000000000..ae28c51e2 --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_gmt_time_h2.yml @@ -0,0 +1,48 @@ +#Licensed to the Apache Software Foundation (ASF) under one or more contributor +#license agreements. See the NOTICE file distributed with this work for additional +#information regarding copyright ownership. The ASF licenses this file to +#You under the Apache License, Version 2.0 (the "License"); you may not use +#this file except in compliance with the License. You may obtain a copy of +#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required +#by applicable law or agreed to in writing, software distributed under the +#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS +#OF ANY KIND, either express or implied. See the License for the specific +#language governing permissions and limitations under the License. + +name: 'table1' + +type: 'gmt_time' + +timezone: 'GMT' + +vector: 'TIME_FIELD9' + +create: 'CREATE TABLE table1 (time_field9 TIME);' + +data: + - 'INSERT INTO table1 VALUES (''12:45:35'');' + - 'INSERT INTO table1 VALUES (''12:45:35'');' + - 'INSERT INTO table1 VALUES (''12:45:35'');' + - 'INSERT INTO table1 VALUES (''12:45:35'');' + - 'INSERT INTO table1 VALUES (''12:45:35'');' + - 'INSERT INTO table1 VALUES (''12:45:35'');' + - 'INSERT INTO table1 VALUES (''12:45:35'');' + - 'INSERT INTO table1 VALUES (''12:45:35'');' + - 'INSERT INTO table1 VALUES (''12:45:35'');' + - 'INSERT INTO table1 VALUES (''12:45:35'');' + +query: 'select time_field9 from table1;' + +drop: 'DROP table table1;' + +values: + - '45935000' + - '45935000' + - '45935000' + - '45935000' + - '45935000' + - '45935000' + - '45935000' + - '45935000' + - '45935000' + - '45935000'
\ No newline at end of file diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_gmt_timestamp_h2.yml b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_gmt_timestamp_h2.yml new file mode 100644 index 000000000..b468f5af9 --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_gmt_timestamp_h2.yml @@ -0,0 +1,48 @@ +#Licensed to the Apache Software Foundation (ASF) under one or more contributor +#license agreements. See the NOTICE file distributed with this work for additional +#information regarding copyright ownership. The ASF licenses this file to +#You under the Apache License, Version 2.0 (the "License"); you may not use +#this file except in compliance with the License. You may obtain a copy of +#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required +#by applicable law or agreed to in writing, software distributed under the +#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS +#OF ANY KIND, either express or implied. See the License for the specific +#language governing permissions and limitations under the License. + +name: 'table1' + +type: 'gmt_timestamp' + +timezone: 'GMT' + +vector: 'TIMESTAMP_FIELD11' + +create: 'CREATE TABLE table1 (timestamp_field11 TIMESTAMP);' + +data: + - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' + - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' + - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' + - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' + - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' + - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' + - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' + - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' + - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' + - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' + +query: 'select timestamp_field11 from table1;' + +drop: 'DROP table table1;' + +values: + - '1518439535000' + - '1518439535000' + - '1518439535000' + - '1518439535000' + - '1518439535000' + - '1518439535000' + - '1518439535000' + - '1518439535000' + - '1518439535000' + - '1518439535000'
\ No newline at end of file diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_int_h2.yml b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_int_h2.yml new file mode 100644 index 000000000..8b6e776cb --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_int_h2.yml @@ -0,0 +1,46 @@ +#Licensed to the Apache Software Foundation (ASF) under one or more contributor +#license agreements. See the NOTICE file distributed with this work for additional +#information regarding copyright ownership. The ASF licenses this file to +#You under the Apache License, Version 2.0 (the "License"); you may not use +#this file except in compliance with the License. You may obtain a copy of +#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required +#by applicable law or agreed to in writing, software distributed under the +#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS +#OF ANY KIND, either express or implied. See the License for the specific +#language governing permissions and limitations under the License. + +name: 'table1' + +type: 'int' + +vector: 'INT_FIELD1' + +create: 'CREATE TABLE table1 (int_field1 INT);' + +data: + - 'INSERT INTO table1 VALUES (101);' + - 'INSERT INTO table1 VALUES (101);' + - 'INSERT INTO table1 VALUES (101);' + - 'INSERT INTO table1 VALUES (101);' + - 'INSERT INTO table1 VALUES (101);' + - 'INSERT INTO table1 VALUES (101);' + - 'INSERT INTO table1 VALUES (101);' + - 'INSERT INTO table1 VALUES (101);' + - 'INSERT INTO table1 VALUES (101);' + - 'INSERT INTO table1 VALUES (101);' + +values: + - '101' + - '101' + - '101' + - '101' + - '101' + - '101' + - '101' + - '101' + - '101' + - '101' + +query: 'select int_field1 from table1;' + +drop: 'DROP table table1;' diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_null_and_notnull.yml b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_null_and_notnull.yml new file mode 100644 index 000000000..20e9e5e5e --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_null_and_notnull.yml @@ -0,0 +1,26 @@ +#Licensed to the Apache Software Foundation (ASF) under one or more contributor +#license agreements. See the NOTICE file distributed with this work for additional +#information regarding copyright ownership. The ASF licenses this file to +#You under the Apache License, Version 2.0 (the "License"); you may not use +#this file except in compliance with the License. You may obtain a copy of +#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required +#by applicable law or agreed to in writing, software distributed under the +#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS +#OF ANY KIND, either express or implied. See the License for the specific +#language governing permissions and limitations under the License. + +name: 'table1' + +type: 'nullnotnull' + +create: 'CREATE TABLE table1 (int_field_null INT, int_field_notnull INT NOT NULL);' + +data: + - 'INSERT INTO table1 VALUES (0, 0);' + - 'INSERT INTO table1 VALUES (1, 1);' + +rowCount: '2' + +query: 'select int_field_null, int_field_notnull from table1;' + +drop: 'DROP table table1;'
\ No newline at end of file diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_null_h2.yml b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_null_h2.yml new file mode 100644 index 000000000..37a9883f9 --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_null_h2.yml @@ -0,0 +1,36 @@ +#Licensed to the Apache Software Foundation (ASF) under one or more contributor +#license agreements. See the NOTICE file distributed with this work for additional +#information regarding copyright ownership. The ASF licenses this file to +#You under the Apache License, Version 2.0 (the "License"); you may not use +#this file except in compliance with the License. You may obtain a copy of +#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required +#by applicable law or agreed to in writing, software distributed under the +#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS +#OF ANY KIND, either express or implied. See the License for the specific +#language governing permissions and limitations under the License. + +name: 'table1' + +type: 'null' + +vector: 'NULL_FIELD18' + +create: 'CREATE TABLE table1 (null_field18 NULL);' + +rowCount: '10' + +data: + - 'INSERT INTO table1 VALUES (null);' + - 'INSERT INTO table1 VALUES (null);' + - 'INSERT INTO table1 VALUES (null);' + - 'INSERT INTO table1 VALUES (null);' + - 'INSERT INTO table1 VALUES (null);' + - 'INSERT INTO table1 VALUES (null);' + - 'INSERT INTO table1 VALUES (null);' + - 'INSERT INTO table1 VALUES (null);' + - 'INSERT INTO table1 VALUES (null);' + - 'INSERT INTO table1 VALUES (null);' + +query: 'select null_field18 from table1;' + +drop: 'DROP table table1;' diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_pst_date_h2.yml b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_pst_date_h2.yml new file mode 100644 index 000000000..798cfc7d6 --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_pst_date_h2.yml @@ -0,0 +1,48 @@ +#Licensed to the Apache Software Foundation (ASF) under one or more contributor +#license agreements. See the NOTICE file distributed with this work for additional +#information regarding copyright ownership. The ASF licenses this file to +#You under the Apache License, Version 2.0 (the "License"); you may not use +#this file except in compliance with the License. You may obtain a copy of +#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required +#by applicable law or agreed to in writing, software distributed under the +#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS +#OF ANY KIND, either express or implied. See the License for the specific +#language governing permissions and limitations under the License. + +name: 'table1' + +type: 'pst_date' + +timezone: 'PST' + +vector: 'DATE_FIELD10' + +create: 'CREATE TABLE table1 (date_field10 DATE);' + +data: + - 'INSERT INTO table1 VALUES (''2018-02-12'');' + - 'INSERT INTO table1 VALUES (''2018-02-12'');' + - 'INSERT INTO table1 VALUES (''2018-02-12'');' + - 'INSERT INTO table1 VALUES (''2018-02-12'');' + - 'INSERT INTO table1 VALUES (''2018-02-12'');' + - 'INSERT INTO table1 VALUES (''2018-02-12'');' + - 'INSERT INTO table1 VALUES (''2018-02-12'');' + - 'INSERT INTO table1 VALUES (''2018-02-12'');' + - 'INSERT INTO table1 VALUES (''2018-02-12'');' + - 'INSERT INTO table1 VALUES (''2018-02-12'');' + +query: 'select date_field10 from table1;' + +drop: 'DROP table table1;' + +values: + - '17574' + - '17574' + - '17574' + - '17574' + - '17574' + - '17574' + - '17574' + - '17574' + - '17574' + - '17574'
\ No newline at end of file diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_pst_time_h2.yml b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_pst_time_h2.yml new file mode 100644 index 000000000..058d54d20 --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_pst_time_h2.yml @@ -0,0 +1,48 @@ +#Licensed to the Apache Software Foundation (ASF) under one or more contributor +#license agreements. See the NOTICE file distributed with this work for additional +#information regarding copyright ownership. The ASF licenses this file to +#You under the Apache License, Version 2.0 (the "License"); you may not use +#this file except in compliance with the License. You may obtain a copy of +#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required +#by applicable law or agreed to in writing, software distributed under the +#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS +#OF ANY KIND, either express or implied. See the License for the specific +#language governing permissions and limitations under the License. + +name: 'table1' + +type: 'pst_time' + +timezone: 'PST' + +vector: 'TIME_FIELD9' + +create: 'CREATE TABLE table1 (time_field9 TIME);' + +data: + - 'INSERT INTO table1 VALUES (''12:45:35'');' + - 'INSERT INTO table1 VALUES (''12:45:35'');' + - 'INSERT INTO table1 VALUES (''12:45:35'');' + - 'INSERT INTO table1 VALUES (''12:45:35'');' + - 'INSERT INTO table1 VALUES (''12:45:35'');' + - 'INSERT INTO table1 VALUES (''12:45:35'');' + - 'INSERT INTO table1 VALUES (''12:45:35'');' + - 'INSERT INTO table1 VALUES (''12:45:35'');' + - 'INSERT INTO table1 VALUES (''12:45:35'');' + - 'INSERT INTO table1 VALUES (''12:45:35'');' + +query: 'select time_field9 from table1;' + +drop: 'DROP table table1;' + +values: + - '74735000' + - '74735000' + - '74735000' + - '74735000' + - '74735000' + - '74735000' + - '74735000' + - '74735000' + - '74735000' + - '74735000'
\ No newline at end of file diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_pst_timestamp_h2.yml b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_pst_timestamp_h2.yml new file mode 100644 index 000000000..19b6b5f44 --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_pst_timestamp_h2.yml @@ -0,0 +1,48 @@ +#Licensed to the Apache Software Foundation (ASF) under one or more contributor +#license agreements. See the NOTICE file distributed with this work for additional +#information regarding copyright ownership. The ASF licenses this file to +#You under the Apache License, Version 2.0 (the "License"); you may not use +#this file except in compliance with the License. You may obtain a copy of +#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required +#by applicable law or agreed to in writing, software distributed under the +#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS +#OF ANY KIND, either express or implied. See the License for the specific +#language governing permissions and limitations under the License. + +name: 'table1' + +type: 'pst_timestamp' + +timezone: 'PST' + +vector: 'TIMESTAMP_FIELD11' + +create: 'CREATE TABLE table1 (timestamp_field11 TIMESTAMP);' + +data: + - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' + - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' + - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' + - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' + - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' + - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' + - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' + - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' + - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' + - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' + +query: 'select timestamp_field11 from table1;' + +drop: 'DROP table table1;' + +values: + - '1518468335000' + - '1518468335000' + - '1518468335000' + - '1518468335000' + - '1518468335000' + - '1518468335000' + - '1518468335000' + - '1518468335000' + - '1518468335000' + - '1518468335000'
\ No newline at end of file diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_real_h2.yml b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_real_h2.yml new file mode 100644 index 000000000..c8f8aeb78 --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_real_h2.yml @@ -0,0 +1,46 @@ +#Licensed to the Apache Software Foundation (ASF) under one or more contributor +#license agreements. See the NOTICE file distributed with this work for additional +#information regarding copyright ownership. The ASF licenses this file to +#You under the Apache License, Version 2.0 (the "License"); you may not use +#this file except in compliance with the License. You may obtain a copy of +#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required +#by applicable law or agreed to in writing, software distributed under the +#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS +#OF ANY KIND, either express or implied. See the License for the specific +#language governing permissions and limitations under the License. + +name: 'table1' + +type: 'real' + +vector: 'REAL_FIELD8' + +create: 'CREATE TABLE table1 (real_field8 REAL);' + +data: + - 'INSERT INTO table1 VALUES (56478356785.345);' + - 'INSERT INTO table1 VALUES (56478356785.345);' + - 'INSERT INTO table1 VALUES (56478356785.345);' + - 'INSERT INTO table1 VALUES (56478356785.345);' + - 'INSERT INTO table1 VALUES (56478356785.345);' + - 'INSERT INTO table1 VALUES (56478356785.345);' + - 'INSERT INTO table1 VALUES (56478356785.345);' + - 'INSERT INTO table1 VALUES (56478356785.345);' + - 'INSERT INTO table1 VALUES (56478356785.345);' + - 'INSERT INTO table1 VALUES (56478356785.345);' + +query: 'select real_field8 from table1;' + +drop: 'DROP table table1;' + +values: + - '56478356785.345f' + - '56478356785.345f' + - '56478356785.345f' + - '56478356785.345f' + - '56478356785.345f' + - '56478356785.345f' + - '56478356785.345f' + - '56478356785.345f' + - '56478356785.345f' + - '56478356785.345f' diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_selected_datatypes_null_h2.yml b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_selected_datatypes_null_h2.yml new file mode 100644 index 000000000..93b1aae55 --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_selected_datatypes_null_h2.yml @@ -0,0 +1,46 @@ +#Licensed to the Apache Software Foundation (ASF) under one or more contributor +#license agreements. See the NOTICE file distributed with this work for additional +#information regarding copyright ownership. The ASF licenses this file to +#You under the Apache License, Version 2.0 (the "License"); you may not use +#this file except in compliance with the License. You may obtain a copy of +#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required +#by applicable law or agreed to in writing, software distributed under the +#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS +#OF ANY KIND, either express or implied. See the License for the specific +#language governing permissions and limitations under the License. + +name: 'table1' + +type: 'selected_null_column' + +vectors: + - 'BIGINT_FIELD5' + - 'DECIMAL_FIELD6' + - 'DOUBLE_FIELD7' + - 'REAL_FIELD8' + - 'TIME_FIELD9' + - 'DATE_FIELD10' + - 'TIMESTAMP_FIELD11' + - 'BINARY_FIELD12' + - 'VARCHAR_FIELD13' + - 'BLOB_FIELD14' + - 'CLOB_FIELD15' + - 'CHAR_FIELD16' + - 'BIT_FIELD17' + +rowCount: '5' + +create: 'CREATE TABLE table1 (int_field1 INT, bool_field2 BOOLEAN, tinyint_field3 TINYINT, smallint_field4 SMALLINT, bigint_field5 BIGINT, + decimal_field6 DECIMAL(20,2), double_field7 DOUBLE, real_field8 REAL, time_field9 TIME, date_field10 DATE, timestamp_field11 TIMESTAMP, + binary_field12 BINARY(100), varchar_field13 VARCHAR(256), blob_field14 BLOB, clob_field15 CLOB, char_field16 CHAR(16), bit_field17 BIT);' + +data: + - 'INSERT INTO table1 (int_field1, bool_field2, tinyint_field3, smallint_field4) VALUES (102, 0, 46, 12001);' + - 'INSERT INTO table1 (int_field1, bool_field2, tinyint_field3, smallint_field4) VALUES (102, 0, 46, 12001);' + - 'INSERT INTO table1 (int_field1, bool_field2, tinyint_field3, smallint_field4) VALUES (102, 0, 46, 12001);' + - 'INSERT INTO table1 (int_field1, bool_field2, tinyint_field3, smallint_field4) VALUES (102, 0, 46, 12001);' + - 'INSERT INTO table1 (int_field1, bool_field2, tinyint_field3, smallint_field4) VALUES (102, 0, 46, 12001);' + +query: 'select bigint_field5, decimal_field6, double_field7, real_field8, time_field9, date_field10, timestamp_field11, binary_field12, varchar_field13, blob_field14, clob_field15, char_field16, bit_field17 from table1' + +drop: 'DROP table table1;'
\ No newline at end of file diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_smallint_h2.yml b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_smallint_h2.yml new file mode 100644 index 000000000..887c74f4d --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_smallint_h2.yml @@ -0,0 +1,46 @@ +#Licensed to the Apache Software Foundation (ASF) under one or more contributor +#license agreements. See the NOTICE file distributed with this work for additional +#information regarding copyright ownership. The ASF licenses this file to +#You under the Apache License, Version 2.0 (the "License"); you may not use +#this file except in compliance with the License. You may obtain a copy of +#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required +#by applicable law or agreed to in writing, software distributed under the +#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS +#OF ANY KIND, either express or implied. See the License for the specific +#language governing permissions and limitations under the License. + +name: 'table1' + +type: 'small_int' + +vector: 'SMALLINT_FIELD4' + +create: 'CREATE TABLE table1 (smallint_field4 SMALLINT);' + +data: + - 'INSERT INTO table1 VALUES (12000);' + - 'INSERT INTO table1 VALUES (12000);' + - 'INSERT INTO table1 VALUES (12000);' + - 'INSERT INTO table1 VALUES (12000);' + - 'INSERT INTO table1 VALUES (12000);' + - 'INSERT INTO table1 VALUES (12000);' + - 'INSERT INTO table1 VALUES (12000);' + - 'INSERT INTO table1 VALUES (12000);' + - 'INSERT INTO table1 VALUES (12000);' + - 'INSERT INTO table1 VALUES (12000);' + +query: 'select smallint_field4 from table1;' + +drop: 'DROP table table1;' + +values: + - '12000' + - '12000' + - '12000' + - '12000' + - '12000' + - '12000' + - '12000' + - '12000' + - '12000' + - '12000' diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_time_h2.yml b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_time_h2.yml new file mode 100644 index 000000000..c9baaee8d --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_time_h2.yml @@ -0,0 +1,46 @@ +#Licensed to the Apache Software Foundation (ASF) under one or more contributor +#license agreements. See the NOTICE file distributed with this work for additional +#information regarding copyright ownership. The ASF licenses this file to +#You under the Apache License, Version 2.0 (the "License"); you may not use +#this file except in compliance with the License. You may obtain a copy of +#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required +#by applicable law or agreed to in writing, software distributed under the +#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS +#OF ANY KIND, either express or implied. See the License for the specific +#language governing permissions and limitations under the License. + +name: 'table1' + +type: 'time' + +vector: 'TIME_FIELD9' + +create: 'CREATE TABLE table1 (time_field9 TIME);' + +data: + - 'INSERT INTO table1 VALUES (''12:45:35'');' + - 'INSERT INTO table1 VALUES (''12:45:35'');' + - 'INSERT INTO table1 VALUES (''12:45:35'');' + - 'INSERT INTO table1 VALUES (''12:45:35'');' + - 'INSERT INTO table1 VALUES (''12:45:35'');' + - 'INSERT INTO table1 VALUES (''12:45:35'');' + - 'INSERT INTO table1 VALUES (''12:45:35'');' + - 'INSERT INTO table1 VALUES (''12:45:35'');' + - 'INSERT INTO table1 VALUES (''12:45:35'');' + - 'INSERT INTO table1 VALUES (''12:45:35'');' + +query: 'select time_field9 from table1;' + +drop: 'DROP table table1;' + +values: + - '45935000' + - '45935000' + - '45935000' + - '45935000' + - '45935000' + - '45935000' + - '45935000' + - '45935000' + - '45935000' + - '45935000' diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_timestamp_h2.yml b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_timestamp_h2.yml new file mode 100644 index 000000000..7d93faad1 --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_timestamp_h2.yml @@ -0,0 +1,46 @@ +#Licensed to the Apache Software Foundation (ASF) under one or more contributor +#license agreements. See the NOTICE file distributed with this work for additional +#information regarding copyright ownership. The ASF licenses this file to +#You under the Apache License, Version 2.0 (the "License"); you may not use +#this file except in compliance with the License. You may obtain a copy of +#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required +#by applicable law or agreed to in writing, software distributed under the +#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS +#OF ANY KIND, either express or implied. See the License for the specific +#language governing permissions and limitations under the License. + +name: 'table1' + +type: 'timestamp' + +vector: 'TIMESTAMP_FIELD11' + +create: 'CREATE TABLE table1 (timestamp_field11 TIMESTAMP);' + +data: + - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' + - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' + - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' + - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' + - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' + - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' + - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' + - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' + - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' + - 'INSERT INTO table1 VALUES (''2018-02-12 12:45:35'');' + +query: 'select timestamp_field11 from table1;' + +drop: 'DROP table table1;' + +values: + - '1518439535000' + - '1518439535000' + - '1518439535000' + - '1518439535000' + - '1518439535000' + - '1518439535000' + - '1518439535000' + - '1518439535000' + - '1518439535000' + - '1518439535000' diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_tinyint_h2.yml b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_tinyint_h2.yml new file mode 100644 index 000000000..a419416c8 --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_tinyint_h2.yml @@ -0,0 +1,46 @@ +#Licensed to the Apache Software Foundation (ASF) under one or more contributor +#license agreements. See the NOTICE file distributed with this work for additional +#information regarding copyright ownership. The ASF licenses this file to +#You under the Apache License, Version 2.0 (the "License"); you may not use +#this file except in compliance with the License. You may obtain a copy of +#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required +#by applicable law or agreed to in writing, software distributed under the +#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS +#OF ANY KIND, either express or implied. See the License for the specific +#language governing permissions and limitations under the License. + +name: 'table1' + +type: 'tinyint' + +vector: 'TINYINT_FIELD3' + +create: 'CREATE TABLE table1 (tinyint_field3 TINYINT);' + +data: + - 'INSERT INTO table1 VALUES (45);' + - 'INSERT INTO table1 VALUES (45);' + - 'INSERT INTO table1 VALUES (45);' + - 'INSERT INTO table1 VALUES (45);' + - 'INSERT INTO table1 VALUES (45);' + - 'INSERT INTO table1 VALUES (45);' + - 'INSERT INTO table1 VALUES (45);' + - 'INSERT INTO table1 VALUES (45);' + - 'INSERT INTO table1 VALUES (45);' + - 'INSERT INTO table1 VALUES (45);' + +query: 'select tinyint_field3 from table1;' + +drop: 'DROP table table1;' + +values: + - '45' + - '45' + - '45' + - '45' + - '45' + - '45' + - '45' + - '45' + - '45' + - '45' diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_varchar_h2.yml b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_varchar_h2.yml new file mode 100644 index 000000000..0bd142178 --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/test/resources/h2/test1_varchar_h2.yml @@ -0,0 +1,46 @@ +#Licensed to the Apache Software Foundation (ASF) under one or more contributor +#license agreements. See the NOTICE file distributed with this work for additional +#information regarding copyright ownership. The ASF licenses this file to +#You under the Apache License, Version 2.0 (the "License"); you may not use +#this file except in compliance with the License. You may obtain a copy of +#the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required +#by applicable law or agreed to in writing, software distributed under the +#License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS +#OF ANY KIND, either express or implied. See the License for the specific +#language governing permissions and limitations under the License. + +name: 'table1' + +type: 'varchar' + +vector: 'VARCHAR_FIELD13' + +create: 'CREATE TABLE table1 (varchar_field13 VARCHAR(256));' + +data: + - 'INSERT INTO table1 VALUES (''some text that needs to be converted to varchar'');' + - 'INSERT INTO table1 VALUES (''some text that needs to be converted to varchar'');' + - 'INSERT INTO table1 VALUES (''some text that needs to be converted to varchar'');' + - 'INSERT INTO table1 VALUES (''some text that needs to be converted to varchar'');' + - 'INSERT INTO table1 VALUES (''some text that needs to be converted to varchar'');' + - 'INSERT INTO table1 VALUES (''some text that needs to be converted to varchar'');' + - 'INSERT INTO table1 VALUES (''some text that needs to be converted to varchar'');' + - 'INSERT INTO table1 VALUES (''some text that needs to be converted to varchar'');' + - 'INSERT INTO table1 VALUES (''some text that needs to be converted to varchar'');' + - 'INSERT INTO table1 VALUES (''some text that needs to be converted to varchar'');' + +query: 'select varchar_field13 from table1;' + +drop: 'DROP table table1;' + +values: + - 'some text that needs to be converted to varchar' + - 'some text that needs to be converted to varchar' + - 'some text that needs to be converted to varchar' + - 'some text that needs to be converted to varchar' + - 'some text that needs to be converted to varchar' + - 'some text that needs to be converted to varchar' + - 'some text that needs to be converted to varchar' + - 'some text that needs to be converted to varchar' + - 'some text that needs to be converted to varchar' + - 'some text that needs to be converted to varchar' diff --git a/src/arrow/java/adapter/jdbc/src/test/resources/logback.xml b/src/arrow/java/adapter/jdbc/src/test/resources/logback.xml new file mode 100644 index 000000000..4c54d18a2 --- /dev/null +++ b/src/arrow/java/adapter/jdbc/src/test/resources/logback.xml @@ -0,0 +1,28 @@ +<?xml version="1.0" encoding="UTF-8" ?> +<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor + license agreements. See the NOTICE file distributed with this work for additional + information regarding copyright ownership. The ASF licenses this file to + You under the Apache License, Version 2.0 (the "License"); you may not use + this file except in compliance with the License. You may obtain a copy of + the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required + by applicable law or agreed to in writing, software distributed under the + License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS + OF ANY KIND, either express or implied. See the License for the specific + language governing permissions and limitations under the License. --> + +<configuration> + <statusListener class="ch.qos.logback.core.status.NopStatusListener"/> + <appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender"> + <!-- encoders are assigned the type + ch.qos.logback.classic.encoder.PatternLayoutEncoder by default --> + <encoder> + <pattern>%d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern> + </encoder> + </appender> + + <logger name="org.apache.arrow" additivity="false"> + <level value="info" /> + <appender-ref ref="STDOUT" /> + </logger> + +</configuration> diff --git a/src/arrow/java/adapter/orc/CMakeLists.txt b/src/arrow/java/adapter/orc/CMakeLists.txt new file mode 100644 index 000000000..e2d4655d7 --- /dev/null +++ b/src/arrow/java/adapter/orc/CMakeLists.txt @@ -0,0 +1,43 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# +# arrow_orc_java +# + +# Headers: top level + +project(arrow_orc_java) + +# Find java/jni +include(FindJava) +include(UseJava) +include(FindJNI) + +message("generating headers to ${JNI_HEADERS_DIR}") + +add_jar(arrow_orc_java + src/main/java/org/apache/arrow/adapter/orc/OrcReaderJniWrapper.java + src/main/java/org/apache/arrow/adapter/orc/OrcStripeReaderJniWrapper.java + src/main/java/org/apache/arrow/adapter/orc/OrcMemoryJniWrapper.java + src/main/java/org/apache/arrow/adapter/orc/OrcJniUtils.java + src/main/java/org/apache/arrow/adapter/orc/OrcRecordBatch.java + src/main/java/org/apache/arrow/adapter/orc/OrcFieldNode.java + GENERATE_NATIVE_HEADERS + arrow_orc_java-native + DESTINATION + ${JNI_HEADERS_DIR}) diff --git a/src/arrow/java/adapter/orc/pom.xml b/src/arrow/java/adapter/orc/pom.xml new file mode 100644 index 000000000..26f5f0c28 --- /dev/null +++ b/src/arrow/java/adapter/orc/pom.xml @@ -0,0 +1,113 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor + license agreements. See the NOTICE file distributed with this work for additional + information regarding copyright ownership. The ASF licenses this file to + You under the Apache License, Version 2.0 (the "License"); you may not use + this file except in compliance with the License. You may obtain a copy of + the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required + by applicable law or agreed to in writing, software distributed under the + License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS + OF ANY KIND, either express or implied. See the License for the specific + language governing permissions and limitations under the License. --> + +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + <dependencies> + <dependency> + <groupId>org.apache.arrow</groupId> + <artifactId>arrow-memory-core</artifactId> + <version>${project.version}</version> + <scope>compile</scope> + </dependency> + <dependency> + <groupId>org.apache.arrow</groupId> + <artifactId>arrow-memory-netty</artifactId> + <version>${project.version}</version> + <scope>runtime</scope> + </dependency> + <dependency> + <groupId>org.apache.arrow</groupId> + <artifactId>arrow-vector</artifactId> + <version>${project.version}</version> + <scope>compile</scope> + <classifier>${arrow.vector.classifier}</classifier> + </dependency> + <dependency> + <groupId>org.apache.orc</groupId> + <artifactId>orc-core</artifactId> + <version>1.7.0</version> + <scope>test</scope> + <exclusions> + <exclusion> + <groupId>log4j</groupId> + <artifactId>log4j</artifactId> + </exclusion> + <exclusion> + <groupId>org.slf4j</groupId> + <artifactId>slf4j-log4j12</artifactId> + </exclusion> + <exclusion> + <groupId>commons-logging</groupId> + <artifactId>commons-logging</artifactId> + </exclusion> + </exclusions> + </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-common</artifactId> + <version>2.2.0</version> + <scope>test</scope> + <exclusions> + <exclusion> + <groupId>commons-logging</groupId> + <artifactId>commons-logging</artifactId> + </exclusion> + <exclusion> + <groupId>log4j</groupId> + <artifactId>log4j</artifactId> + </exclusion> + <exclusion> + <groupId>org.slf4j</groupId> + <artifactId>slf4j-log4j12</artifactId> + </exclusion> + <exclusion> + <groupId>javax.servlet</groupId> + <artifactId>servlet-api</artifactId> + </exclusion> + </exclusions> + </dependency> + <dependency> + <groupId>org.apache.hive</groupId> + <artifactId>hive-storage-api</artifactId> + <version>2.8.1</version> + <scope>test</scope> + </dependency> + </dependencies> + + <parent> + <groupId>org.apache.arrow</groupId> + <artifactId>arrow-java-root</artifactId> + <version>6.0.1</version> + <relativePath>../../pom.xml</relativePath> + </parent> + + <groupId>org.apache.arrow.orc</groupId> + <artifactId>arrow-orc</artifactId> + <name>Arrow Orc Adapter</name> + <description>(Experimental/Contrib)A JNI wrapper for the C++ ORC reader implementation.</description> + <packaging>jar</packaging> + <properties> + <arrow.cpp.build.dir>../../../cpp/release-build/</arrow.cpp.build.dir> + </properties> + + <build> + <resources> + <resource> + <directory>${arrow.cpp.build.dir}</directory> + <includes> + <include>**/libarrow_orc_jni.*</include> + </includes> + </resource> + </resources> + </build> +</project> diff --git a/src/arrow/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcFieldNode.java b/src/arrow/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcFieldNode.java new file mode 100644 index 000000000..716a13876 --- /dev/null +++ b/src/arrow/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcFieldNode.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.adapter.orc; + +/** + * Metadata about Vectors/Arrays that is passed via JNI interface. + */ +class OrcFieldNode { + + private final int length; + private final int nullCount; + + /** + * Construct a new instance. + * @param length the number of values written. + * @param nullCount the number of null values. + */ + public OrcFieldNode(int length, int nullCount) { + this.length = length; + this.nullCount = nullCount; + } + + int getLength() { + return length; + } + + int getNullCount() { + return nullCount; + } +} diff --git a/src/arrow/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcJniUtils.java b/src/arrow/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcJniUtils.java new file mode 100644 index 000000000..600569be7 --- /dev/null +++ b/src/arrow/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcJniUtils.java @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.adapter.orc; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.nio.file.Files; +import java.nio.file.StandardCopyOption; + +/** + * Helper class for JNI related operations. + */ +class OrcJniUtils { + private static final String LIBRARY_NAME = "arrow_orc_jni"; + private static boolean isLoaded = false; + + private OrcJniUtils() {} + + static void loadOrcAdapterLibraryFromJar() + throws IOException, IllegalAccessException { + synchronized (OrcJniUtils.class) { + if (!isLoaded) { + final String libraryToLoad = System.mapLibraryName(LIBRARY_NAME); + final File libraryFile = moveFileFromJarToTemp( + System.getProperty("java.io.tmpdir"), libraryToLoad); + System.load(libraryFile.getAbsolutePath()); + isLoaded = true; + } + } + } + + private static File moveFileFromJarToTemp(final String tmpDir, String libraryToLoad) + throws IOException { + final File temp = File.createTempFile(tmpDir, libraryToLoad); + try (final InputStream is = OrcReaderJniWrapper.class.getClassLoader() + .getResourceAsStream(libraryToLoad)) { + if (is == null) { + throw new FileNotFoundException(libraryToLoad); + } else { + Files.copy(is, temp.toPath(), StandardCopyOption.REPLACE_EXISTING); + } + } + return temp; + } +} diff --git a/src/arrow/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcMemoryJniWrapper.java b/src/arrow/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcMemoryJniWrapper.java new file mode 100644 index 000000000..473e83142 --- /dev/null +++ b/src/arrow/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcMemoryJniWrapper.java @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.adapter.orc; + +/** + * Wrapper for orc memory allocated by native code. + */ +class OrcMemoryJniWrapper implements AutoCloseable { + + private final long nativeInstanceId; + + private final long memoryAddress; + + private final long size; + + private final long capacity; + + /** + * Construct a new instance. + * @param nativeInstanceId unique id of the underlying memory. + * @param memoryAddress starting memory address of the underlying memory. + * @param size size of the valid data. + * @param capacity allocated memory size. + */ + OrcMemoryJniWrapper(long nativeInstanceId, long memoryAddress, long size, long capacity) { + this.nativeInstanceId = nativeInstanceId; + this.memoryAddress = memoryAddress; + this.size = size; + this.capacity = capacity; + } + + /** + * Return the size of underlying chunk of memory that has valid data. + * @return valid data size + */ + long getSize() { + return size; + } + + /** + * Return the size of underlying chunk of memory managed by this OrcMemoryJniWrapper. + * @return underlying memory size + */ + long getCapacity() { + return capacity; + } + + /** + * Return the memory address of underlying chunk of memory. + * @return memory address + */ + long getMemoryAddress() { + return memoryAddress; + } + + @Override + public void close() { + release(nativeInstanceId); + } + + private native void release(long id); +} diff --git a/src/arrow/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcReader.java b/src/arrow/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcReader.java new file mode 100644 index 000000000..b42ddb484 --- /dev/null +++ b/src/arrow/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcReader.java @@ -0,0 +1,90 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.adapter.orc; + +import java.io.IOException; + +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.vector.ipc.ArrowReader; + +/** + * Orc Reader that allow accessing orc stripes in Orc file. + * This orc reader basically acts like an ArrowReader iterator that + * iterate over orc stripes. Each stripe will be accessed via an + * ArrowReader. + */ +public class OrcReader implements AutoCloseable { + private final OrcReaderJniWrapper jniWrapper; + private BufferAllocator allocator; + + /** + * reference to native reader instance. + */ + private final long nativeInstanceId; + + /** + * Create an OrcReader that iterate over orc stripes. + * @param filePath file path to target file, currently only support local file. + * @param allocator allocator provided to ArrowReader. + * @throws IOException throws exception in case of file not found + */ + public OrcReader(String filePath, BufferAllocator allocator) throws IOException, IllegalAccessException { + this.allocator = allocator; + this.jniWrapper = OrcReaderJniWrapper.getInstance(); + this.nativeInstanceId = jniWrapper.open(filePath); + } + + /** + * Seek to designated row. Invoke NextStripeReader() after seek + * will return stripe reader starting from designated row. + * @param rowNumber the rows number to seek + * @return true if seek operation is succeeded + */ + public boolean seek(int rowNumber) throws IllegalArgumentException { + return jniWrapper.seek(nativeInstanceId, rowNumber); + } + + /** + * Get a stripe level ArrowReader with specified batchSize in each record batch. + * + * @param batchSize the number of rows loaded on each iteration + * @return ArrowReader that iterate over current stripes + */ + public ArrowReader nextStripeReader(long batchSize) throws IllegalArgumentException { + long stripeReaderId = jniWrapper.nextStripeReader(nativeInstanceId, batchSize); + if (stripeReaderId < 0) { + return null; + } + + return new OrcStripeReader(stripeReaderId, allocator); + } + + /** + * The number of stripes in the file. + * + * @return number of stripes + */ + public int getNumberOfStripes() throws IllegalArgumentException { + return jniWrapper.getNumberOfStripes(nativeInstanceId); + } + + @Override + public void close() throws Exception { + jniWrapper.close(nativeInstanceId); + } +} diff --git a/src/arrow/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcReaderJniWrapper.java b/src/arrow/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcReaderJniWrapper.java new file mode 100644 index 000000000..ff449c343 --- /dev/null +++ b/src/arrow/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcReaderJniWrapper.java @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.adapter.orc; + +import java.io.IOException; + +/** + * JNI wrapper for Orc reader. + */ +class OrcReaderJniWrapper { + + private static volatile OrcReaderJniWrapper INSTANCE; + + static OrcReaderJniWrapper getInstance() throws IOException, IllegalAccessException { + if (INSTANCE == null) { + synchronized (OrcReaderJniWrapper.class) { + if (INSTANCE == null) { + OrcJniUtils.loadOrcAdapterLibraryFromJar(); + INSTANCE = new OrcReaderJniWrapper(); + } + } + } + + return INSTANCE; + } + + /** + * Construct a orc file reader over the target file. + * @param fileName absolute file path of target file + * @return id of the orc reader instance if file opened successfully, + * otherwise return error code * -1. + */ + native long open(String fileName); + + /** + * Release resources associated with designated reader instance. + * @param readerId id of the reader instance. + */ + native void close(long readerId); + + /** + * Seek to designated row. Invoke nextStripeReader() after seek + * will return id of stripe reader starting from designated row. + * @param readerId id of the reader instance + * @param rowNumber the rows number to seek + * @return true if seek operation is succeeded + */ + native boolean seek(long readerId, int rowNumber); + + /** + * The number of stripes in the file. + * @param readerId id of the reader instance + * @return number of stripes + */ + native int getNumberOfStripes(long readerId); + + /** + * Get a stripe level ArrowReader with specified batchSize in each record batch. + * @param readerId id of the reader instance + * @param batchSize the number of rows loaded on each iteration + * @return id of the stripe reader instance. + */ + native long nextStripeReader(long readerId, long batchSize); +} diff --git a/src/arrow/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcRecordBatch.java b/src/arrow/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcRecordBatch.java new file mode 100644 index 000000000..a006cacab --- /dev/null +++ b/src/arrow/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcRecordBatch.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.adapter.orc; + +import java.util.Arrays; +import java.util.List; + +/** + * Wrapper for record batch meta and native memory. + */ +class OrcRecordBatch { + final int length; + + /** + * Nodes correspond to the pre-ordered flattened logical schema. + */ + final List<OrcFieldNode> nodes; + + final List<OrcMemoryJniWrapper> buffers; + + /** + * Construct a new instance. + * @param length number of records included in current batch + * @param nodes meta data for each fields + * @param buffers buffers for underlying data + */ + OrcRecordBatch(int length, OrcFieldNode[] nodes, OrcMemoryJniWrapper[] buffers) { + this.length = length; + this.nodes = Arrays.asList(nodes); + this.buffers = Arrays.asList(buffers); + } +} diff --git a/src/arrow/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcReferenceManager.java b/src/arrow/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcReferenceManager.java new file mode 100644 index 000000000..fdec337e8 --- /dev/null +++ b/src/arrow/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcReferenceManager.java @@ -0,0 +1,121 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.adapter.orc; + +import java.util.concurrent.atomic.AtomicInteger; + +import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.OwnershipTransferResult; +import org.apache.arrow.memory.ReferenceManager; +import org.apache.arrow.util.Preconditions; + +/** + * A simple reference manager implementation for memory allocated by native code. + * The underlying memory will be released when reference count reach zero. + */ +public class OrcReferenceManager implements ReferenceManager { + private final AtomicInteger bufRefCnt = new AtomicInteger(0); + + private OrcMemoryJniWrapper memory; + + OrcReferenceManager(OrcMemoryJniWrapper memory) { + this.memory = memory; + } + + @Override + public int getRefCount() { + return bufRefCnt.get(); + } + + @Override + public boolean release() { + return release(1); + } + + @Override + public boolean release(int decrement) { + Preconditions.checkState(decrement >= 1, + "ref count decrement should be greater than or equal to 1"); + // decrement the ref count + final int refCnt; + synchronized (this) { + refCnt = bufRefCnt.addAndGet(-decrement); + if (refCnt == 0) { + // refcount of this reference manager has dropped to 0 + // release the underlying memory + memory.close(); + } + } + // the new ref count should be >= 0 + Preconditions.checkState(refCnt >= 0, "RefCnt has gone negative"); + return refCnt == 0; + } + + @Override + public void retain() { + retain(1); + } + + @Override + public void retain(int increment) { + Preconditions.checkArgument(increment > 0, "retain(%s) argument is not positive", increment); + bufRefCnt.addAndGet(increment); + } + + @Override + public ArrowBuf retain(ArrowBuf srcBuffer, BufferAllocator targetAllocator) { + retain(); + return srcBuffer; + } + + @Override + public ArrowBuf deriveBuffer(ArrowBuf sourceBuffer, long index, long length) { + final long derivedBufferAddress = sourceBuffer.memoryAddress() + index; + + // create new ArrowBuf + final ArrowBuf derivedBuf = new ArrowBuf( + this, + null, + length, // length (in bytes) in the underlying memory chunk for this new ArrowBuf + derivedBufferAddress // starting byte address in the underlying memory for this new ArrowBuf, + ); + + return derivedBuf; + } + + @Override + public OwnershipTransferResult transferOwnership(ArrowBuf sourceBuffer, BufferAllocator targetAllocator) { + throw new UnsupportedOperationException(); + } + + @Override + public BufferAllocator getAllocator() { + return null; + } + + @Override + public long getSize() { + return memory.getSize(); + } + + @Override + public long getAccountedSize() { + return 0; + } +} diff --git a/src/arrow/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcStripeReader.java b/src/arrow/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcStripeReader.java new file mode 100644 index 000000000..484296d92 --- /dev/null +++ b/src/arrow/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcStripeReader.java @@ -0,0 +1,109 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.adapter.orc; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.stream.Collectors; + +import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.vector.ipc.ArrowReader; +import org.apache.arrow.vector.ipc.ReadChannel; +import org.apache.arrow.vector.ipc.message.ArrowFieldNode; +import org.apache.arrow.vector.ipc.message.ArrowRecordBatch; +import org.apache.arrow.vector.ipc.message.MessageChannelReader; +import org.apache.arrow.vector.ipc.message.MessageResult; +import org.apache.arrow.vector.ipc.message.MessageSerializer; +import org.apache.arrow.vector.types.pojo.Schema; +import org.apache.arrow.vector.util.ByteArrayReadableSeekableByteChannel; + +/** + * Orc stripe that load data into ArrowRecordBatch. + */ +public class OrcStripeReader extends ArrowReader { + /** + * reference to native stripe reader instance. + */ + private final long nativeInstanceId; + + /** + * Construct a new instance. + * @param nativeInstanceId nativeInstanceId of the stripe reader instance, obtained by + * calling nextStripeReader from OrcReaderJniWrapper + * @param allocator memory allocator for accounting. + */ + OrcStripeReader(long nativeInstanceId, BufferAllocator allocator) { + super(allocator); + this.nativeInstanceId = nativeInstanceId; + } + + @Override + public boolean loadNextBatch() throws IOException { + OrcRecordBatch recordBatch = OrcStripeReaderJniWrapper.next(nativeInstanceId); + if (recordBatch == null) { + return false; + } + + ArrayList<ArrowBuf> buffers = new ArrayList<>(); + for (OrcMemoryJniWrapper buffer : recordBatch.buffers) { + buffers.add(new ArrowBuf( + new OrcReferenceManager(buffer), + null, + (int) buffer.getSize(), + buffer.getMemoryAddress())); + } + + loadRecordBatch(new ArrowRecordBatch( + recordBatch.length, + recordBatch.nodes.stream() + .map(buf -> new ArrowFieldNode(buf.getLength(), buf.getNullCount())) + .collect(Collectors.toList()), + buffers)); + return true; + } + + @Override + public long bytesRead() { + return 0; + } + + + @Override + protected void closeReadSource() throws IOException { + OrcStripeReaderJniWrapper.close(nativeInstanceId); + } + + @Override + protected Schema readSchema() throws IOException { + byte[] schemaBytes = OrcStripeReaderJniWrapper.getSchema(nativeInstanceId); + + try (MessageChannelReader schemaReader = + new MessageChannelReader( + new ReadChannel( + new ByteArrayReadableSeekableByteChannel(schemaBytes)), allocator)) { + + MessageResult result = schemaReader.readNext(); + if (result == null) { + throw new IOException("Unexpected end of input. Missing schema."); + } + + return MessageSerializer.deserializeSchema(result.getMessage()); + } + } +} diff --git a/src/arrow/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcStripeReaderJniWrapper.java b/src/arrow/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcStripeReaderJniWrapper.java new file mode 100644 index 000000000..1dd969861 --- /dev/null +++ b/src/arrow/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcStripeReaderJniWrapper.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.adapter.orc; + +/** + * JNI wrapper for orc stripe reader. + */ +class OrcStripeReaderJniWrapper { + + /** + * Get the schema of current stripe. + * @param readerId id of the stripe reader instance. + * @return serialized schema. + */ + static native byte[] getSchema(long readerId); + + /** + * Load next record batch. + * @param readerId id of the stripe reader instance. + * @return loaded record batch, return null when reached + * the end of current stripe. + */ + static native OrcRecordBatch next(long readerId); + + /** + * Release resources of underlying reader. + * @param readerId id of the stripe reader instance. + */ + static native void close(long readerId); +} diff --git a/src/arrow/java/adapter/orc/src/test/java/org/apache/arrow/adapter/orc/OrcReaderTest.java b/src/arrow/java/adapter/orc/src/test/java/org/apache/arrow/adapter/orc/OrcReaderTest.java new file mode 100644 index 000000000..4153a35a6 --- /dev/null +++ b/src/arrow/java/adapter/orc/src/test/java/org/apache/arrow/adapter/orc/OrcReaderTest.java @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.adapter.orc; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNull; + +import java.io.File; +import java.nio.charset.StandardCharsets; +import java.util.List; + + +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.IntVector; +import org.apache.arrow.vector.VarCharVector; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.ipc.ArrowReader; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.orc.OrcFile; +import org.apache.orc.TypeDescription; +import org.apache.orc.Writer; +import org.junit.BeforeClass; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; + + +public class OrcReaderTest { + + @Rule + public TemporaryFolder testFolder = new TemporaryFolder(); + + private static final int MAX_ALLOCATION = 8 * 1024; + private static RootAllocator allocator; + + @BeforeClass + public static void beforeClass() { + allocator = new RootAllocator(MAX_ALLOCATION); + } + + @Test + public void testOrcJniReader() throws Exception { + TypeDescription schema = TypeDescription.fromString("struct<x:int,y:string>"); + File testFile = new File(testFolder.getRoot(), "test-orc"); + + Writer writer = OrcFile.createWriter(new Path(testFile.getAbsolutePath()), + OrcFile.writerOptions(new Configuration()).setSchema(schema)); + VectorizedRowBatch batch = schema.createRowBatch(); + LongColumnVector longColumnVector = (LongColumnVector) batch.cols[0]; + BytesColumnVector bytesColumnVector = (BytesColumnVector) batch.cols[1]; + for (int r = 0; r < 1024; ++r) { + int row = batch.size++; + longColumnVector.vector[row] = r; + byte[] buffer = ("Last-" + (r * 3)).getBytes(StandardCharsets.UTF_8); + bytesColumnVector.setRef(row, buffer, 0, buffer.length); + } + writer.addRowBatch(batch); + writer.close(); + + OrcReader reader = new OrcReader(testFile.getAbsolutePath(), allocator); + assertEquals(1, reader.getNumberOfStripes()); + + ArrowReader stripeReader = reader.nextStripeReader(1024); + VectorSchemaRoot schemaRoot = stripeReader.getVectorSchemaRoot(); + stripeReader.loadNextBatch(); + + List<FieldVector> fields = schemaRoot.getFieldVectors(); + assertEquals(2, fields.size()); + + IntVector intVector = (IntVector) fields.get(0); + VarCharVector varCharVector = (VarCharVector) fields.get(1); + for (int i = 0; i < 1024; ++i) { + assertEquals(i, intVector.get(i)); + assertEquals("Last-" + (i * 3), new String(varCharVector.get(i), StandardCharsets.UTF_8)); + } + + assertFalse(stripeReader.loadNextBatch()); + assertNull(reader.nextStripeReader(1024)); + + stripeReader.close(); + reader.close(); + } +} |