diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
commit | e6918187568dbd01842d8d1d2c808ce16a894239 (patch) | |
tree | 64f88b554b444a49f656b6c656111a145cbbaa28 /src/arrow/js/test/unit/vector | |
parent | Initial commit. (diff) | |
download | ceph-e6918187568dbd01842d8d1d2c808ce16a894239.tar.xz ceph-e6918187568dbd01842d8d1d2c808ce16a894239.zip |
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/arrow/js/test/unit/vector')
-rw-r--r-- | src/arrow/js/test/unit/vector/bool-vector-tests.ts | 111 | ||||
-rw-r--r-- | src/arrow/js/test/unit/vector/date-vector-tests.ts | 102 | ||||
-rw-r--r-- | src/arrow/js/test/unit/vector/numeric-vector-tests.ts | 616 | ||||
-rw-r--r-- | src/arrow/js/test/unit/vector/vector-tests.ts | 127 |
4 files changed, 956 insertions, 0 deletions
diff --git a/src/arrow/js/test/unit/vector/bool-vector-tests.ts b/src/arrow/js/test/unit/vector/bool-vector-tests.ts new file mode 100644 index 000000000..41c53da60 --- /dev/null +++ b/src/arrow/js/test/unit/vector/bool-vector-tests.ts @@ -0,0 +1,111 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import { Data, Bool, Vector, BoolVector } from 'apache-arrow'; + +const newBoolVector = (length: number, data: Uint8Array) => Vector.new(Data.Bool(new Bool(), 0, length, 0, null, data)); + +describe(`BoolVector`, () => { + const values = [true, true, false, true, true, false, false, false]; + const n = values.length; + const vector = newBoolVector(n, new Uint8Array([27, 0, 0, 0, 0, 0, 0, 0])); + test(`gets expected values`, () => { + let i = -1; + while (++i < n) { + expect(vector.get(i)).toEqual(values[i]); + } + }); + test(`iterates expected values`, () => { + let i = -1; + for (let v of vector) { + expect(++i).toBeLessThan(n); + expect(v).toEqual(values[i]); + } + }); + test(`indexOf returns expected values`, () => { + for (let test_value of [true, false]) { + const expected = values.indexOf(test_value); + expect(vector.indexOf(test_value)).toEqual(expected); + } + }); + test(`indexOf returns -1 when value not found`, () => { + const v = newBoolVector(3, new Uint8Array([0xFF])); + expect(v.indexOf(false)).toEqual(-1); + }); + test(`can set values to true and false`, () => { + const v = newBoolVector(n, new Uint8Array([27, 0, 0, 0, 0, 0, 0, 0])); + const expected1 = [true, true, false, true, true, false, false, false]; + const expected2 = [true, true, true, true, true, false, false, false]; + const expected3 = [true, true, false, false, false, false, true, true]; + function validate(expected: boolean[]) { + for (let i = -1; ++i < n;) { + expect(v.get(i)).toEqual(expected[i]); + } + } + validate(expected1); + v.set(2, true); + validate(expected2); + v.set(2, false); + validate(expected1); + v.set(3, false); + v.set(4, false); + v.set(6, true); + v.set(7, true); + validate(expected3); + v.set(3, true); + v.set(4, true); + v.set(6, false); + v.set(7, false); + validate(expected1); + }); + test(`packs 0 values`, () => { + const expected = new Uint8Array(64); + expect(BoolVector.from([]).values).toEqual(expected); + }); + test(`packs 3 values`, () => { + const expected = new Uint8Array(64); + expected[0] = 5; + expect(BoolVector.from([ + true, false, true + ]).values).toEqual(expected); + }); + test(`packs 8 values`, () => { + const expected = new Uint8Array(64); + expected[0] = 27; + expect(BoolVector.from([ + true, true, false, true, true, false, false, false + ]).values).toEqual(expected); + }); + test(`packs 25 values`, () => { + const expected = new Uint8Array(64); + expected[0] = 27; + expected[1] = 216; + expect(BoolVector.from([ + true, true, false, true, true, false, false, false, + false, false, false, true, true, false, true, true, + false + ]).values).toEqual(expected); + }); + test(`from with boolean Array packs values`, () => { + const expected = new Uint8Array(64); + expected[0] = 5; + expect(BoolVector + .from([true, false, true]) + .slice().values + ).toEqual(expected); + }); +}); diff --git a/src/arrow/js/test/unit/vector/date-vector-tests.ts b/src/arrow/js/test/unit/vector/date-vector-tests.ts new file mode 100644 index 000000000..4658633ba --- /dev/null +++ b/src/arrow/js/test/unit/vector/date-vector-tests.ts @@ -0,0 +1,102 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import { Table, DateDay, DateMillisecond } from 'apache-arrow'; + +describe(`DateVector`, () => { + it('returns days since the epoch as correct JS Dates', () => { + const table = Table.from(test_data); + const expectedMillis = expectedMillis32(); + const date32 = table.getColumnAt<DateDay>(0)!; + for (const date of date32) { + const millis = expectedMillis.shift(); + expect(date).toEqual(millis === null ? null : new Date(millis!)); + } + }); + it('returns millisecond longs since the epoch as correct JS Dates', () => { + const table = Table.from(test_data); + const expectedMillis = expectedMillis64(); + const date64 = table.getColumnAt<DateMillisecond>(1)!; + for (const date of date64) { + const millis = expectedMillis.shift(); + expect(date).toEqual(millis === null ? null : new Date(millis!)); + } + }); +}); + +const expectedMillis32 = () => [ + 165247430400000, 34582809600000, 232604524800000, null, + 199808812800000, 165646771200000, 209557238400000, null +]; + +const expectedMillis64 = () => [ + 27990830234011, -41278585914325, 12694624797111, + null, null, 10761360520213, null, 1394015437000 +]; + +const test_data = { + 'schema': { + 'fields': [ + { + 'name': 'f0', + 'type': { + 'name': 'date', + 'unit': 'DAY' + }, + 'nullable': true, + 'children': [] + }, + { + 'name': 'f1', + 'type': { + 'name': 'date', + 'unit': 'MILLISECOND' + }, + 'nullable': true, + 'children': [] + } + ] + }, + 'batches': [ + { + 'count': 8, + 'columns': [ + { + 'name': 'f0', + 'count': 8, + 'VALIDITY': [1, 1, 1, 0, 1, 1, 1, 0], + 'DATA': [1912586, 400264, 2692182, 2163746, 2312602, 1917208, 2425431] + }, + { + 'name': 'f1', + 'count': 8, + 'VALIDITY': [1, 1, 1, 0, 0, 1, 0, 1], + 'DATA': [ + 27990830234011, + -41278585914325, + 12694624797111, + -38604948562547, + -37802308043516, + 10761360520213, + -25129181633384, + 1394015437000 // <-- the tricky one + ] + } + ] + } + ] +}; diff --git a/src/arrow/js/test/unit/vector/numeric-vector-tests.ts b/src/arrow/js/test/unit/vector/numeric-vector-tests.ts new file mode 100644 index 000000000..61418c431 --- /dev/null +++ b/src/arrow/js/test/unit/vector/numeric-vector-tests.ts @@ -0,0 +1,616 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +/* eslint-disable jest/no-identical-title */ + +import { + util, + Data, Vector, + Float, Float16, Float32, Float64, + Int, Int8, Int16, Int32, Int64, Uint8, Uint16, Uint32, Uint64, + FloatVector, Float16Vector, Float32Vector, Float64Vector, + IntVector, Int8Vector, Int16Vector, Int32Vector, Int64Vector, + Uint8Vector, Uint16Vector, Uint32Vector, Uint64Vector, +} from 'apache-arrow'; + +const { float64ToUint16, uint16ToFloat64 } = util; +import { VectorType as V } from 'apache-arrow/interfaces'; +import { TypedArray, TypedArrayConstructor } from 'apache-arrow/interfaces'; +import { BigIntArray, BigIntArrayConstructor } from 'apache-arrow/interfaces'; + +const { joinUint8Arrays, BN } = util; +const uint16ToFloat64Array = (b: ArrayBuffer) => new Float64Array([...new Uint16Array(b)].map(uint16ToFloat64)); +const randomBytes = (n: number) => new Uint16Array([ + ...Uint16Array.from([0, 65535]), + ...Uint16Array.from({ length: (n / 2) - 2 }, () => (Math.random() * 65536) | 0), +]).buffer; +const toBigNumsArray = (values: Int32Array | Uint32Array) => { + const array = new Array(values.length * 0.5); + for (let i = -1, n = values.length * 0.5; ++i < n;) { + array[i] = BN.new(values.subarray(i * 2, i * 2 + 2))[Symbol.toPrimitive](); + } + return array; +}; + +const testValueBuffers = Array.from({ length: 5 }, () => randomBytes(64)); +const testValuesBuffer = joinUint8Arrays(testValueBuffers.map((b) => new Uint8Array(b)))[0].buffer; + +const checkType = <T, R extends T>(Ctor: new (...args: any) => T, inst: R) => expect(inst).toBeInstanceOf(Ctor); +const valuesArray = <T extends TypedArray>(ArrayType: TypedArrayConstructor<T>) => [...valuesTyped<T>(ArrayType)]; +const valuesArray64 = <T extends TypedArray>(ArrayType: TypedArrayConstructor<T>) => { + const typed = valuesTyped<T>(ArrayType); + const array = new Array(typed.length * 0.5); + for (let i = -1, n = array.length; ++i < n;) { + // Interleave regular Arrays and TypedArrays to cover more surface area + array[i] = i % 2 === 0 + ? [...typed.subarray(i * 2, (i + 1) * 2)] + : typed.subarray(i * 2, (i + 1) * 2); + } + return array; +}; +const valuesTyped = <T extends TypedArray>(ArrayType: TypedArrayConstructor<T>) => new ArrayType(testValuesBuffer); +const bigIntValuesTyped = <T extends BigIntArray>(ArrayType: BigIntArrayConstructor<T>) => new ArrayType(testValuesBuffer); +const bigIntValuesArray = <T extends BigIntArray>(ArrayType: BigIntArrayConstructor<T>) => [...bigIntValuesTyped<T>(ArrayType)]; + +describe(`FloatVector`, () => { + + describe(`FloatVector.from infers the type from the input TypedArray`, () => { + + const u16s = valuesTyped(Uint16Array).map((x) => float64ToUint16(uint16ToFloat64(x))); + const f16s = valuesArray(Uint16Array).map(uint16ToFloat64); + const f32s = valuesTyped(Float32Array); + const f64s = valuesTyped(Float64Array); + const f16Vec = FloatVector.from(u16s); + const f32Vec = FloatVector.from(valuesTyped(Float32Array)); + const f64Vec = FloatVector.from(valuesTyped(Float64Array)); + + // test strong typing at compile-time + test(`return type is correct`, () => checkType(Float16Vector, f16Vec)); + test(`return type is correct`, () => checkType(Float32Vector, f32Vec)); + test(`return type is correct`, () => checkType(Float64Vector, f64Vec)); + test(`throws on bad input`, () => { + expect(() => FloatVector.from(<any> {})).toThrow('Unrecognized FloatVector input'); + }); + + testAndValidateVector(f16Vec, u16s, f16s); + testAndValidateVector(f32Vec, f32s); + testAndValidateVector(f64Vec, f64s); + }); + + describe(`FloatVector.from casts the input values to the correct float type`, () => { + + const u16s = valuesTyped(Uint16Array).map((x) => float64ToUint16(uint16ToFloat64(x))); + const f16s = valuesArray(Uint16Array).map(uint16ToFloat64); + const f16Vec_ = FloatVector.from(u16s); + + const f16Vec = Float16Vector.from(f16Vec_); + const f32Vec = Float32Vector.from(f16Vec_); + const f64Vec = Float64Vector.from(f16Vec_); + + // test strong typing at compile-time + test(`return type is correct`, () => checkType(Float16Vector, f16Vec)); + test(`return type is correct`, () => checkType(Float32Vector, f32Vec)); + test(`return type is correct`, () => checkType(Float64Vector, f64Vec)); + + testAndValidateVector(f16Vec, u16s, f16s); + testAndValidateVector(f32Vec, Float32Array.from(f16s)); + testAndValidateVector(f64Vec, Float64Array.from(f16s)); + }); + + describe(`Float16Vector`, () => { + testFloatVector(Float16, valuesArray(Uint16Array).map(uint16ToFloat64)); + describe(`Float16Vector.from accepts regular Arrays`, () => { + const u16s = valuesTyped(Uint16Array).map((x) => float64ToUint16(uint16ToFloat64(x))); + const f16s = valuesArray(Uint16Array).map(uint16ToFloat64); + const vector = Float16Vector.from(f16s); + test(`return type is correct`, () => checkType(Float16Vector, vector)); + testAndValidateVector(vector, u16s, f16s); + }); + describe(`Float16Vector.from accepts Uint16Arrays`, () => { + const u16s = valuesTyped(Uint16Array).map((x) => float64ToUint16(uint16ToFloat64(x))); + const f16s = valuesArray(Uint16Array).map(uint16ToFloat64); + const vector = Float16Vector.from(u16s); + test(`return type is correct`, () => checkType(Float16Vector, vector)); + testAndValidateVector(vector, u16s, f16s); + }); + }); + describe(`Float32Vector`, () => { + testFloatVector(Float32); + describe(`Float32Vector.from accepts regular Arrays`, () => { + const values = valuesArray(Float32Array); + const vector = Float32Vector.from(values); + testAndValidateVector(vector, valuesTyped(Float32Array), values); + test(`return type is correct`, () => checkType(Float32Vector, vector)); + }); + }); + describe(`Float64Vector`, () => { + testFloatVector(Float64); + describe(`Float64Vector.from accepts regular Arrays`, () => { + const values = valuesArray(Float64Array); + const vector = Float64Vector.from(values); + testAndValidateVector(vector, valuesTyped(Float64Array), values); + test(`return type is correct`, () => checkType(Float64Vector, vector)); + }); + }); +}); + +describe(`IntVector`, () => { + + describe(`IntVector.from infers the type from the input TypedArray`, () => { + + const i8s = valuesTyped(Int8Array); + const i16s = valuesTyped(Int16Array); + const i32s = valuesTyped(Int32Array); + const i64s = valuesTyped(Int32Array); + const u8s = valuesTyped(Uint8Array); + const u16s = valuesTyped(Uint16Array); + const u32s = valuesTyped(Uint32Array); + const u64s = valuesTyped(Uint32Array); + const i8Vec = IntVector.from(i8s); + const i16Vec = IntVector.from(i16s); + const i32Vec = IntVector.from(i32s); + const i64Vec = IntVector.from(i64s, true); + const u8Vec = IntVector.from(u8s); + const u16Vec = IntVector.from(u16s); + const u32Vec = IntVector.from(u32s); + const u64Vec = IntVector.from(u64s, true); + + // test strong typing at compile-time + test(`return type is correct`, () => checkType(Int8Vector, i8Vec)); + test(`return type is correct`, () => checkType(Int16Vector, i16Vec)); + test(`return type is correct`, () => checkType(Int32Vector, i32Vec)); + test(`return type is correct`, () => checkType(Int64Vector, i64Vec)); + test(`return type is correct`, () => checkType(Uint8Vector, u8Vec)); + test(`return type is correct`, () => checkType(Uint16Vector, u16Vec)); + test(`return type is correct`, () => checkType(Uint32Vector, u32Vec)); + test(`return type is correct`, () => checkType(Uint64Vector, u64Vec)); + test(`throws on bad input`, () => { + expect(() => IntVector.from(<any> {})).toThrow('Unrecognized IntVector input'); + }); + + const bigI64s = BigInt64Array.from(toBigNumsArray(i64s)); + const bigU64s = BigUint64Array.from(toBigNumsArray(u64s)); + + testAndValidateVector(i8Vec, i8s); + testAndValidateVector(i16Vec, i16s); + testAndValidateVector(i32Vec, i32s); + // This tests when values are represented as pairs of lo, hi + testAndValidateVector(i64Vec, i64s); + // This tests when values are represented as native JS bigints + testAndValidateVector(i64Vec, i64s, [...bigI64s]); + testAndValidateVector(u8Vec, u8s); + testAndValidateVector(u16Vec, u16s); + testAndValidateVector(u32Vec, u32s); + // This tests when values are represented as pairs of lo, hi + testAndValidateVector(u64Vec, u64s); + // This tests when values are represented as native JS bigints + testAndValidateVector(u64Vec, u64s, [...bigU64s]); + }); + + describe('IntVector.from casts the input values to the correct integer type', () => { + + const i8s = valuesTyped(Int8Array); + const i16s = valuesTyped(Int16Array); + const i32s = valuesTyped(Int32Array); + const i64s = valuesTyped(Int32Array); + const u8s = valuesTyped(Uint8Array); + const u16s = valuesTyped(Uint16Array); + const u32s = valuesTyped(Uint32Array); + const u64s = valuesTyped(Uint32Array); + const i8Vec_ = IntVector.from(i8s); + const i16Vec_ = IntVector.from(i16s); + const i32Vec_ = IntVector.from(i32s); + const i64Vec_ = IntVector.from(i64s, true); + const u8Vec_ = IntVector.from(u8s); + const u16Vec_ = IntVector.from(u16s); + const u32Vec_ = IntVector.from(u32s); + const u64Vec_ = IntVector.from(u64s, true); + + // Convert from a Vector of the opposite sign + const i8Vec = Int8Vector.from(u8Vec_); + const i16Vec = Int16Vector.from(u16Vec_); + const i32Vec = Int32Vector.from(u32Vec_); + const i64Vec = Int64Vector.from(u64Vec_); + const u8Vec = Uint8Vector.from(i8Vec_); + const u16Vec = Uint16Vector.from(i16Vec_); + const u32Vec = Uint32Vector.from(i32Vec_); + const u64Vec = Uint64Vector.from(i64Vec_); + + // test strong typing at compile-time + test(`return type is correct`, () => checkType(Int8Vector, i8Vec)); + test(`return type is correct`, () => checkType(Int16Vector, i16Vec)); + test(`return type is correct`, () => checkType(Int32Vector, i32Vec)); + test(`return type is correct`, () => checkType(Int64Vector, i64Vec)); + test(`return type is correct`, () => checkType(Uint8Vector, u8Vec)); + test(`return type is correct`, () => checkType(Uint16Vector, u16Vec)); + test(`return type is correct`, () => checkType(Uint32Vector, u32Vec)); + test(`return type is correct`, () => checkType(Uint64Vector, u64Vec)); + + const bigI64s = BigInt64Array.from(toBigNumsArray(u64s)); + const bigU64s = BigUint64Array.from(toBigNumsArray(i64s)); + + testAndValidateVector(i8Vec, Int8Array.from(u8s)); + testAndValidateVector(i16Vec, Int16Array.from(u16s)); + testAndValidateVector(i32Vec, Int32Array.from(u32s)); + // This tests when values are represented as pairs of lo, hi + testAndValidateVector(i64Vec, new Int32Array(bigI64s.buffer)); + // This tests when values are represented as native JS bigints + testAndValidateVector(i64Vec, new Int32Array(bigI64s.buffer), [...bigI64s]); + testAndValidateVector(u8Vec, Uint8Array.from(i8s)); + testAndValidateVector(u16Vec, Uint16Array.from(i16s)); + testAndValidateVector(u32Vec, Uint32Array.from(i32s)); + // This tests when values are represented as pairs of lo, hi + testAndValidateVector(u64Vec, new Uint32Array(bigU64s.buffer)); + // This tests when values are represented as native JS bigints + testAndValidateVector(u64Vec, new Uint32Array(bigU64s.buffer), [...bigU64s]); + }); + + describe(`Int8Vector`, () => { + testIntVector(Int8); + describe(`Int8Vector.from accepts regular Arrays`, () => { + const values = valuesArray(Int8Array); + const vector = Int8Vector.from(values); + testAndValidateVector(vector, valuesTyped(Int8Array), values); + test(`return type is correct`, () => checkType(Int8Vector, vector)); + }); + }); + describe(`Int16Vector`, () => { + testIntVector(Int16); + describe(`Int16Vector.from accepts regular Arrays`, () => { + const values = valuesArray(Int16Array); + const vector = Int16Vector.from(values); + testAndValidateVector(vector, valuesTyped(Int16Array), values); + test(`return type is correct`, () => checkType(Int16Vector, vector)); + }); + }); + describe(`Int32Vector`, () => { + testIntVector(Int32); + describe(`Int32Vector.from accepts regular Arrays`, () => { + const values = valuesArray(Int32Array); + const vector = Int32Vector.from(values); + testAndValidateVector(vector, valuesTyped(Int32Array), values); + test(`return type is correct`, () => checkType(Int32Vector, vector)); + }); + }); + describe(`Int64Vector`, () => { + testIntVector(Int64); + testIntVector(Int64, bigIntValuesArray(BigInt64Array)); + describe(`Int64Vector.from accepts regular Arrays`, () => { + const values = valuesArray64(Int32Array); + const vector = Int64Vector.from(values); + testAndValidateVector(vector, valuesTyped(Int32Array), values); + testAndValidateVector(vector, valuesTyped(Int32Array), bigIntValuesArray(BigInt64Array)); + test(`return type is correct`, () => checkType(Int64Vector, vector)); + }); + }); + describe(`Uint8Vector`, () => { + testIntVector(Uint8); + describe(`Uint8Vector.from accepts regular Arrays`, () => { + const values = valuesArray(Uint8Array); + const vector = Uint8Vector.from(values); + testAndValidateVector(vector, valuesTyped(Uint8Array), values); + test(`return type is correct`, () => checkType(Uint8Vector, vector)); + }); + }); + describe(`Uint16Vector`, () => { + testIntVector(Uint16); + describe(`Uint16Vector.from accepts regular Arrays`, () => { + const values = valuesArray(Uint16Array); + const vector = Uint16Vector.from(values); + testAndValidateVector(vector, valuesTyped(Uint16Array), values); + test(`return type is correct`, () => checkType(Uint16Vector, vector)); + }); + }); + describe(`Uint32Vector`, () => { + testIntVector(Uint32); + describe(`Uint32Vector.from accepts regular Arrays`, () => { + const values = valuesArray(Uint32Array); + const vector = Uint32Vector.from(values); + testAndValidateVector(vector, valuesTyped(Uint32Array), values); + test(`return type is correct`, () => checkType(Uint32Vector, vector)); + }); + }); + describe(`Uint64Vector`, () => { + testIntVector(Uint64); + testIntVector(Uint64, bigIntValuesArray(BigUint64Array)); + describe(`Uint64Vector.from accepts regular Arrays`, () => { + const values = valuesArray64(Uint32Array); + const vector = Uint64Vector.from(values); + testAndValidateVector(vector, valuesTyped(Uint32Array), values); + testAndValidateVector(vector, valuesTyped(Uint32Array), bigIntValuesArray(BigUint64Array)); + test(`return type is correct`, () => checkType(Uint64Vector, vector)); + }); + }); +}); + +function testIntVector<T extends Int>(DataType: new () => T, values?: Array<any>) { + + const type = new DataType(); + const ArrayType = type.ArrayType; + const stride = type.bitWidth < 64 ? 1 : 2; + + const typed = valuesTyped(ArrayType); + const jsArray = values || [...typed]; + const vector = Vector.new(Data.Int(type, 0, typed.length / stride, 0, null, typed)); + const chunked = testValueBuffers.map((b) => new ArrayType(b)) + .map((b) => Vector.new(Data.Int(type, 0, b.length / stride, 0, null, b))) + .reduce((v: any, v2) => v.concat(v2)); + + const vectorBegin = (vector.length * .25) | 0; + const vectorEnd = (vector.length * .75) | 0; + const typedBegin = vectorBegin * (typed.length / vector.length); + const typedEnd = vectorEnd * (typed.length / vector.length); + const jsArrayBegin = vectorBegin * (jsArray.length / vector.length); + const jsArrayEnd = vectorEnd * (jsArray.length / vector.length); + + const combos = [[`vector`, vector], [`chunked`, chunked]] as [string, V<T>][]; + combos.forEach(([chunksType, vector]) => { + describe(chunksType, () => { + // test base case no slicing + describe(`base case no slicing`, () => { testAndValidateVector(vector, typed, jsArray); }); + // test slicing without args + describe(`slicing without args`, () => { testAndValidateVector(vector.slice(), typed.slice(), jsArray.slice()); }); + // test slicing the middle half + describe(`slice the middle half`, () => { + testAndValidateVector( + vector.slice(vectorBegin, vectorEnd), + typed.slice(typedBegin, typedEnd), + jsArray.slice(jsArrayBegin, jsArrayEnd) + ); + }); + // test splicing out the middle half + describe(`splicing out the middle half`, () => { + testAndValidateVector( + vector.slice(0, vectorBegin).concat(vector.slice(vectorEnd)), + new ArrayType([...typed.slice(0, typedBegin), ...typed.slice(typedEnd)]), + [...jsArray.slice(0, jsArrayBegin), ...jsArray.slice(jsArrayEnd)] + ); + }); + }); + }); +} + +function testFloatVector<T extends Float>(DataType: new () => T, values?: Array<any>) { + + const type = new DataType(); + const ArrayType = type.ArrayType; + + const typed = valuesTyped(ArrayType); + const jsArray = values || [...typed]; + const vector = Vector.new(Data.Float(type, 0, typed.length, 0, null, typed)); + const chunked = testValueBuffers.map((b) => new ArrayType(b)) + .map((b) => Vector.new(Data.Float(type, 0, b.length, 0, null, b))) + .reduce((v: any, v2) => v.concat(v2)); + + const begin = (vector.length * .25) | 0; + const end = (vector.length * .75) | 0; + const combos = [[`vector`, vector], [`chunked`, chunked]] as [string, V<T>][]; + + combos.forEach(([chunksType, vector]) => { + describe(chunksType, () => { + // test base case no slicing + describe(`base case no slicing`, () => { testAndValidateVector(vector, typed, jsArray); }); + // test slicing without args + describe(`slicing without args`, () => { testAndValidateVector(vector.slice(), typed.slice(), jsArray.slice()); }); + // test slicing the middle half + describe(`slice the middle half`, () => { + testAndValidateVector( + vector.slice(begin, end), + typed.slice(begin, end), + jsArray.slice(begin, end) + ); + }); + // test splicing out the middle half + describe(`splicing out the middle half`, () => { + testAndValidateVector( + vector.slice(0, begin).concat(vector.slice(end)), + new ArrayType([...typed.slice(0, begin), ...typed.slice(end)]), + [...jsArray.slice(0, begin), ...jsArray.slice(end)] + ); + }); + }); + }); +} + +function testAndValidateVector<T extends Int | Float>(vector: Vector<T>, typed: T['TArray'], values: any[] = [...typed]) { + gets_expected_values(vector, typed, values); + iterates_expected_values(vector, typed, values); + indexof_returns_expected_values(vector, typed, values); + slice_returns_a_typedarray(vector); + slices_the_entire_array(vector, typed); + slices_from_minus_20_to_length(vector, typed); + slices_from_0_to_minus_20(vector, typed); + slices_the_array_from_0_to_length_minus_20(vector, typed); + slices_the_array_from_0_to_length_plus_20(vector, typed); +} + +function gets_expected_values<T extends Int | Float>(vector: Vector<T>, typed: T['TArray'], values: any[] = [...typed]) { + test(`gets expected values`, () => { + expect.hasAssertions(); + let i = -1, n = vector.length; + let stride = vector.stride; + try { + if (stride === 1) { + while (++i < n) { + expect(vector.get(i)).toEqual(values[i]); + } + } else if (typeof values[0] === 'bigint') { + while (++i < n) { + const x: any = vector.get(i)!; + expect(0n + x).toEqual(values[i]); + } + } else { + const vector64 = vector as Vector<Int64 | Uint64>; + const i64 = (() => typed.subarray(stride * i, stride * (i + 1))); + while (++i < n) { + expect((vector64.get(i) as any).subarray(0, stride)).toEqual(i64()); + } + } + } catch (e) { throw new Error(`${i}: ${e}`); } + }); +} + +function iterates_expected_values<T extends Int | Float>(vector: Vector<T>, typed: T['TArray'], values: any[] = [...typed]) { + test(`iterates expected values`, () => { + expect.hasAssertions(); + let i = -1, n = vector.length; + let stride = vector.stride; + try { + if (stride === 1) { + for (let v of vector) { + expect(++i).toBeLessThan(n); + expect(v).toEqual(values[i]); + } + } else if (typeof values[0] === 'bigint') { + let x: any; + for (let v of vector) { + x = v; + expect(++i).toBeLessThan(n); + expect(0n + x).toEqual(values[i]); + } + } else { + const vector64 = vector as Vector<Int64 | Uint64>; + const i64 = (() => typed.subarray(stride * i, stride * (i + 1))); + for (let v of vector64) { + expect(++i).toBeLessThan(n); + expect((v as any).subarray(0, stride)).toEqual(i64()); + } + } + } catch (e) { throw new Error(`${i}: ${e}`); } + }); +} + +function indexof_returns_expected_values<T extends Int | Float>(vector: Vector<T>, typed: T['TArray'], values: any = [...typed]) { + test(`indexOf returns expected values`, () => { + + expect.hasAssertions(); + + const stride = vector.stride; + const BPE = vector.ArrayType.BYTES_PER_ELEMENT; + const isBigInt = typeof values[0] === 'bigint'; + const isInt64 = util.compareTypes(vector.type, new Int64()); + const isFloat16 = util.compareTypes(vector.type, new Float16()); + + // Create a few random values + let missing: any = new vector.ArrayType(randomBytes(8 * 2 * BPE)); + + // Special cases convert the values and/or missing to the + // representations that indexOf() expects to receive + + if (isFloat16) { + missing = uint16ToFloat64Array(missing); + } else if (isBigInt) { + const BigIntArray = isInt64 ? BigInt64Array : BigUint64Array; + missing = Array.from({ length: missing.length / stride }, + (_, i) => new BigIntArray(missing.buffer, BPE * stride * i, 1)[0]); + } else if (stride !== 1) { + values = Array.from({ length: typed.length / stride }, + (_, i) => typed.slice(stride * i, stride * (i + 1))); + missing = Array.from({ length: missing.length / stride }, + (_, i) => missing.slice(stride * i, stride * (i + 1))); + } + + const original = values.slice(); + // Combine with the expected values and shuffle the order + const shuffled = shuffle(values.concat([...missing])); + let i = -1, j: number, k: number, n = shuffled.length; + + try { + if (!isBigInt) { + while (++i < n) { + const search = shuffled[i]; + if (typeof search !== 'number' || !isNaN(search)) { + expect(vector.indexOf(search)).toEqual(original.indexOf(search)); + } else { + for (j = -1, k = original.length; ++j < k;) { + if (isNaN(original[j])) { break; } + } + expect(vector.indexOf(search)).toEqual(j < k ? j : -1); + } + } + } else { + // Distinguish the bigint comparisons to ensure the indexOf type signature accepts bigints + let shuffled64 = shuffled as bigint[]; + if (isInt64) { + let vector64 = (<unknown> vector) as Int64Vector; + while (++i < n) { + expect(vector64.indexOf(shuffled64[i])).toEqual(original.indexOf(shuffled64[i])); + } + } else { + let vector64 = (<unknown> vector) as Uint64Vector; + while (++i < n) { + expect(vector64.indexOf(shuffled64[i])).toEqual(original.indexOf(shuffled64[i])); + } + } + } + } catch (e) { throw new Error(`${i} (${shuffled[i]}): ${e}`); } + }); +} + +function slice_returns_a_typedarray<T extends Int | Float>(vector: Vector<T>) { + test(`slice returns a TypedArray`, () => { + expect.hasAssertions(); + expect(vector.slice().toArray()).toBeInstanceOf(vector.ArrayType); + }); +} + +function slices_the_entire_array<T extends Int | Float>(vector: Vector<T>, values: T['TArray']) { + test(`slices the entire array`, () => { + expect.hasAssertions(); + expect(vector.slice().toArray()).toEqual(values); + }); +} + +function slices_from_minus_20_to_length<T extends Int | Float>(vector: Vector<T>, values: T['TArray']) { + test(`slices from -20 to length`, () => { + expect.hasAssertions(); + expect(vector.slice(-20).toArray()).toEqual(values.slice(-(20 * vector.stride))); + }); +} + +function slices_from_0_to_minus_20<T extends Int | Float>(vector: Vector<T>, values: T['TArray']) { + test(`slices from 0 to -20`, () => { + expect.hasAssertions(); + expect(vector.slice(0, -20).toArray()).toEqual(values.slice(0, -(20 * vector.stride))); + }); +} + +function slices_the_array_from_0_to_length_minus_20 <T extends Int | Float>(vector: Vector<T>, values: T['TArray']) { + test(`slices the array from 0 to length - 20`, () => { + expect.hasAssertions(); + expect(vector.slice(0, vector.length - 20).toArray()).toEqual(values.slice(0, values.length - (20 * vector.stride))); + }); +} + +function slices_the_array_from_0_to_length_plus_20<T extends Int | Float>(vector: Vector<T>, values: T['TArray']) { + test(`slices the array from 0 to length + 20`, () => { + expect.hasAssertions(); + expect(vector.slice(0, vector.length + 20).toArray()).toEqual(values.slice(0, values.length + (20 * vector.stride))); + }); +} + +function shuffle(input: any[]) { + const result = input.slice(); + let j, tmp, i = result.length; + while (--i > 0) { + j = (Math.random() * (i + 1)) | 0; + tmp = result[i]; + result[i] = result[j]; + result[j] = tmp; + } + return result; +} diff --git a/src/arrow/js/test/unit/vector/vector-tests.ts b/src/arrow/js/test/unit/vector/vector-tests.ts new file mode 100644 index 000000000..60bff94f8 --- /dev/null +++ b/src/arrow/js/test/unit/vector/vector-tests.ts @@ -0,0 +1,127 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import { + Int32, Dictionary, DateUnit, util, + Data, Vector, Utf8Vector, DateVector, DictionaryVector, +} from 'apache-arrow'; + +describe(`DateVector`, () => { + const extras = [ + new Date(2000, 0, 1), + new Date(1991, 5, 28, 12, 11, 10) + ]; + describe(`unit = MILLISECOND`, () => { + const values = [ + new Date(1989, 5, 22, 1, 2, 3), + new Date(1988, 3, 25, 4, 5, 6), + new Date(1987, 2, 24, 7, 8, 9), + new Date(2018, 4, 12, 17, 30, 0) + ]; + const vector = DateVector.from(values); + basicVectorTests(vector, values, extras); + }); + describe(`unit = DAY`, () => { + // Use UTC to ensure that dates are always at midnight + const values = [ + new Date(Date.UTC(1989, 5, 22)), + new Date(Date.UTC(1988, 3, 25)), + new Date(Date.UTC(1987, 2, 24)), + new Date(Date.UTC(2018, 4, 12)) + ]; + const vector = DateVector.from(values, DateUnit.DAY); + basicVectorTests(vector, values, extras); + }); +}); + +describe(`DictionaryVector`, () => { + + const dictionary = ['foo', 'bar', 'baz']; + const extras = ['abc', '123']; // values to search for that should NOT be found + const dictionary_vec = Utf8Vector.from(dictionary); + + const indices = Array.from({length: 50}, () => Math.random() * 3 | 0); + const validity = Array.from({ length: indices.length }, () => Math.random() > 0.2 ? true : false); + + describe(`index with nullCount == 0`, () => { + + const values = Array.from(indices).map((d) => dictionary[d]); + const vector = DictionaryVector.from(dictionary_vec, new Int32(), indices); + + basicVectorTests(vector, values, extras); + + describe(`sliced`, () => { + basicVectorTests(vector.slice(10, 20), values.slice(10,20), extras); + }); + }); + + describe(`index with nullCount > 0`, () => { + + const nullBitmap = util.packBools(validity); + const nullCount = validity.reduce((acc, d) => acc + (d ? 0 : 1), 0); + const values = Array.from(indices).map((d, i) => validity[i] ? dictionary[d] : null); + const type = new Dictionary(dictionary_vec.type, new Int32(), null, null); + const vector = Vector.new(Data.Dictionary(type, 0, indices.length, nullCount, nullBitmap, indices, dictionary_vec)); + + basicVectorTests(vector, values, ['abc', '123']); + describe(`sliced`, () => { + basicVectorTests(vector.slice(10, 20), values.slice(10,20), extras); + }); + }); +}); + +describe(`Utf8Vector`, () => { + const values = ['foo', 'bar', 'baz', 'foo bar', 'bar']; + const vector = Utf8Vector.from(values); + basicVectorTests(vector, values, ['abc', '123']); + describe(`sliced`, () => { + basicVectorTests(vector.slice(1,3), values.slice(1,3), ['foo', 'abc']); + }); +}); + +// Creates some basic tests for the given vector. +// Verifies that: +// - `get` and the native iterator return the same data as `values` +// - `indexOf` returns the same indices as `values` +function basicVectorTests(vector: Vector, values: any[], extras: any[]) { + + const n = values.length; + + test(`gets expected values`, () => { + let i = -1; + while (++i < n) { + expect(vector.get(i)).toEqual(values[i]); + } + }); + test(`iterates expected values`, () => { + expect.hasAssertions(); + let i = -1; + for (let v of vector) { + expect(++i).toBeLessThan(n); + expect(v).toEqual(values[i]); + } + }); + test(`indexOf returns expected values`, () => { + let testValues = values.concat(extras); + + for (const value of testValues) { + const actual = vector.indexOf(value); + const expected = values.indexOf(value); + expect(actual).toEqual(expected); + } + }); +} |