summaryrefslogtreecommitdiffstats
path: root/src/arrow/js/test
diff options
context:
space:
mode:
Diffstat (limited to 'src/arrow/js/test')
-rw-r--r--src/arrow/js/test/.eslintrc.js31
-rw-r--r--src/arrow/js/test/Arrow.ts20
-rw-r--r--src/arrow/js/test/data/tables.ts84
-rw-r--r--src/arrow/js/test/generate-test-data.ts720
-rw-r--r--src/arrow/js/test/inference/column.ts62
-rw-r--r--src/arrow/js/test/inference/nested.ts62
-rw-r--r--src/arrow/js/test/inference/visitor/get.ts56
-rw-r--r--src/arrow/js/test/jest-extensions.ts162
-rw-r--r--src/arrow/js/test/tsconfig.json24
-rw-r--r--src/arrow/js/test/tsconfig/tsconfig.apache-arrow.json8
-rw-r--r--src/arrow/js/test/tsconfig/tsconfig.base.json26
-rw-r--r--src/arrow/js/test/tsconfig/tsconfig.coverage.json6
-rw-r--r--src/arrow/js/test/tsconfig/tsconfig.es2015.cjs.json8
-rw-r--r--src/arrow/js/test/tsconfig/tsconfig.es2015.esm.json8
-rw-r--r--src/arrow/js/test/tsconfig/tsconfig.es2015.umd.json11
-rw-r--r--src/arrow/js/test/tsconfig/tsconfig.es5.cjs.json9
-rw-r--r--src/arrow/js/test/tsconfig/tsconfig.es5.esm.json9
-rw-r--r--src/arrow/js/test/tsconfig/tsconfig.es5.umd.json12
-rw-r--r--src/arrow/js/test/tsconfig/tsconfig.esnext.cjs.json8
-rw-r--r--src/arrow/js/test/tsconfig/tsconfig.esnext.esm.json8
-rw-r--r--src/arrow/js/test/tsconfig/tsconfig.esnext.umd.json11
-rw-r--r--src/arrow/js/test/tsconfig/tsconfig.src.json8
-rw-r--r--src/arrow/js/test/tsconfig/tsconfig.ts.json8
-rw-r--r--src/arrow/js/test/unit/bit-tests.ts41
-rw-r--r--src/arrow/js/test/unit/builders/builder-tests.ts268
-rw-r--r--src/arrow/js/test/unit/builders/date-tests.ts106
-rw-r--r--src/arrow/js/test/unit/builders/dictionary-tests.ts65
-rw-r--r--src/arrow/js/test/unit/builders/int64-tests.ts91
-rw-r--r--src/arrow/js/test/unit/builders/primitive-tests.ts154
-rw-r--r--src/arrow/js/test/unit/builders/uint64-tests.ts91
-rw-r--r--src/arrow/js/test/unit/builders/utf8-tests.ts62
-rw-r--r--src/arrow/js/test/unit/builders/utils.ts221
-rw-r--r--src/arrow/js/test/unit/dataframe-tests.ts282
-rw-r--r--src/arrow/js/test/unit/generated-data-tests.ts61
-rw-r--r--src/arrow/js/test/unit/generated-data-validators.ts184
-rw-r--r--src/arrow/js/test/unit/int-tests.ts241
-rw-r--r--src/arrow/js/test/unit/ipc/helpers.ts202
-rw-r--r--src/arrow/js/test/unit/ipc/message-reader-tests.ts109
-rw-r--r--src/arrow/js/test/unit/ipc/reader/file-reader-tests.ts123
-rw-r--r--src/arrow/js/test/unit/ipc/reader/from-inference-tests.ts150
-rw-r--r--src/arrow/js/test/unit/ipc/reader/json-reader-tests.ts40
-rw-r--r--src/arrow/js/test/unit/ipc/reader/stream-reader-tests.ts65
-rw-r--r--src/arrow/js/test/unit/ipc/reader/streams-dom-tests.ts227
-rw-r--r--src/arrow/js/test/unit/ipc/reader/streams-node-tests.ts219
-rw-r--r--src/arrow/js/test/unit/ipc/validate.ts74
-rw-r--r--src/arrow/js/test/unit/ipc/writer/file-writer-tests.ts46
-rw-r--r--src/arrow/js/test/unit/ipc/writer/json-writer-tests.ts46
-rw-r--r--src/arrow/js/test/unit/ipc/writer/stream-writer-tests.ts119
-rw-r--r--src/arrow/js/test/unit/ipc/writer/streams-dom-tests.ts272
-rw-r--r--src/arrow/js/test/unit/ipc/writer/streams-node-tests.ts274
-rw-r--r--src/arrow/js/test/unit/math-tests.ts47
-rw-r--r--src/arrow/js/test/unit/recordbatch/record-batch-tests.ts130
-rw-r--r--src/arrow/js/test/unit/table-tests.ts406
-rw-r--r--src/arrow/js/test/unit/table/assign-tests.ts80
-rw-r--r--src/arrow/js/test/unit/table/serialize-tests.ts167
-rw-r--r--src/arrow/js/test/unit/utils-tests.ts32
-rw-r--r--src/arrow/js/test/unit/utils.ts21
-rw-r--r--src/arrow/js/test/unit/vector/bool-vector-tests.ts111
-rw-r--r--src/arrow/js/test/unit/vector/date-vector-tests.ts102
-rw-r--r--src/arrow/js/test/unit/vector/numeric-vector-tests.ts616
-rw-r--r--src/arrow/js/test/unit/vector/vector-tests.ts127
-rw-r--r--src/arrow/js/test/unit/visitor-tests.ts169
62 files changed, 7202 insertions, 0 deletions
diff --git a/src/arrow/js/test/.eslintrc.js b/src/arrow/js/test/.eslintrc.js
new file mode 100644
index 000000000..311a356e2
--- /dev/null
+++ b/src/arrow/js/test/.eslintrc.js
@@ -0,0 +1,31 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+module.exports = {
+ rules: {
+ "@typescript-eslint/no-require-imports": "off",
+ "@typescript-eslint/no-inferrable-types": "off",
+ "@typescript-eslint/naming-convention": "off",
+ "prefer-const": "off",
+ "max-len": "off",
+
+ "jest/no-export": "off",
+ "jest/valid-title": "off",
+ "jest/expect-expect": "off",
+ "jest/no-conditional-expect": "off",
+ },
+};
diff --git a/src/arrow/js/test/Arrow.ts b/src/arrow/js/test/Arrow.ts
new file mode 100644
index 000000000..de2bc58c7
--- /dev/null
+++ b/src/arrow/js/test/Arrow.ts
@@ -0,0 +1,20 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import 'web-streams-polyfill';
+
+export * from 'apache-arrow';
diff --git a/src/arrow/js/test/data/tables.ts b/src/arrow/js/test/data/tables.ts
new file mode 100644
index 000000000..6ce2c861d
--- /dev/null
+++ b/src/arrow/js/test/data/tables.ts
@@ -0,0 +1,84 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { vecs } from '../generate-test-data';
+import * as generate from '../generate-test-data';
+import { Schema, Field, Dictionary } from '../Arrow';
+
+const listVectorGeneratorNames = ['list', 'fixedSizeList'];
+const nestedVectorGeneratorNames = [ 'struct', 'denseUnion', 'sparseUnion', 'map' ];
+const dictionaryKeyGeneratorNames = ['int8' ,'int16' ,'int32' ,'uint8' ,'uint16' ,'uint32'];
+const valueVectorGeneratorNames = [
+ 'null_', 'bool', 'int8', 'int16', 'int32', 'int64', 'uint8', 'uint16', 'uint32', 'uint64',
+ 'float16', 'float32', 'float64', 'utf8', 'binary', 'fixedSizeBinary', 'dateDay', 'dateMillisecond',
+ 'timestampSecond', 'timestampMillisecond', 'timestampMicrosecond', 'timestampNanosecond',
+ 'timeSecond', 'timeMillisecond', 'timeMicrosecond', 'timeNanosecond', 'decimal',
+ 'dictionary', 'intervalDayTime', 'intervalYearMonth'
+];
+
+const vectorGeneratorNames = [...valueVectorGeneratorNames, ...listVectorGeneratorNames, ...nestedVectorGeneratorNames];
+
+export function* generateRandomTables(batchLengths = [1000, 2000, 3000], minCols = 1, maxCols = 5) {
+
+ let numCols = 0;
+ let allNames = shuffle(vectorGeneratorNames);
+
+ do {
+ numCols = Math.max(Math.min(
+ Math.random() * maxCols | 0, allNames.length), minCols);
+
+ let names = allNames.slice(0, numCols);
+ let types = names.map((fn) => vecs[fn](0).vector.type);
+ let schema = new Schema(names.map((name, i) => new Field(name, types[i])));
+
+ yield generate.table(batchLengths, schema).table;
+
+ } while ((allNames = allNames.slice(numCols)).length > 0);
+}
+
+/**
+ * Yields a series of tables containing a single Dictionary-encoded column.
+ * Each yielded table will be a unique combination of dictionary and indexType,
+ * such that consuming all tables ensures all Arrow types dictionary-encode.
+ *
+ * @param batchLengths number[] Number and length of recordbatches to generate
+ */
+export function* generateDictionaryTables(batchLengths = [100, 200, 300]) {
+ for (const dictName of valueVectorGeneratorNames) {
+ if (dictName === 'dictionary') { continue; }
+ const dictionary = vecs[dictName](100).vector;
+ for (const keys of dictionaryKeyGeneratorNames) {
+ const valsType = dictionary.type;
+ const keysType = vecs[keys](0).vector.type;
+ const dictType = new Dictionary(valsType, keysType);
+ const schema = new Schema([new Field(`dict[${keys}]`, dictType, true)]);
+ yield generate.table(batchLengths, schema).table;
+ }
+ }
+}
+
+function shuffle(input: any[]) {
+ const result = input.slice();
+ let j, tmp, i = result.length;
+ while (--i > 0) {
+ j = (Math.random() * (i + 1)) | 0;
+ tmp = result[i];
+ result[i] = result[j];
+ result[j] = tmp;
+ }
+ return result;
+}
diff --git a/src/arrow/js/test/generate-test-data.ts b/src/arrow/js/test/generate-test-data.ts
new file mode 100644
index 000000000..030176e62
--- /dev/null
+++ b/src/arrow/js/test/generate-test-data.ts
@@ -0,0 +1,720 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import randomatic from 'randomatic';
+import { VectorType as V } from 'apache-arrow/interfaces';
+
+import {
+ Data, Vector, Visitor, DataType,
+ Table, Schema, Field, RecordBatch,
+ Null,
+ Bool,
+ Int, Int8, Int16, Int32, Int64, Uint8, Uint16, Uint32, Uint64,
+ Float, Float16, Float32, Float64,
+ Utf8,
+ Binary,
+ FixedSizeBinary,
+ Date_, DateDay, DateMillisecond,
+ Timestamp, TimestampSecond, TimestampMillisecond, TimestampMicrosecond, TimestampNanosecond,
+ Time, TimeSecond, TimeMillisecond, TimeMicrosecond, TimeNanosecond,
+ Decimal,
+ List,
+ Struct,
+ Union, DenseUnion, SparseUnion,
+ Dictionary,
+ Interval, IntervalDayTime, IntervalYearMonth,
+ FixedSizeList,
+ Map_,
+ DateUnit, TimeUnit, UnionMode,
+ util
+} from './Arrow';
+
+type TKeys = Int8 | Int16 | Int32 | Uint8 | Uint16 | Uint32;
+
+interface TestDataVectorGenerator extends Visitor {
+
+ visit<T extends Null> (type: T, length?: number): GeneratedVector<V<T>>;
+ visit<T extends Bool> (type: T, length?: number, nullCount?: number): GeneratedVector<V<T>>;
+ visit<T extends Int> (type: T, length?: number, nullCount?: number): GeneratedVector<V<T>>;
+ visit<T extends Float> (type: T, length?: number, nullCount?: number): GeneratedVector<V<T>>;
+ visit<T extends Utf8> (type: T, length?: number, nullCount?: number): GeneratedVector<V<T>>;
+ visit<T extends Binary> (type: T, length?: number, nullCount?: number): GeneratedVector<V<T>>;
+ visit<T extends FixedSizeBinary> (type: T, length?: number, nullCount?: number): GeneratedVector<V<T>>;
+ visit<T extends Date_> (type: T, length?: number, nullCount?: number): GeneratedVector<V<T>>;
+ visit<T extends Timestamp> (type: T, length?: number, nullCount?: number): GeneratedVector<V<T>>;
+ visit<T extends Time> (type: T, length?: number, nullCount?: number): GeneratedVector<V<T>>;
+ visit<T extends Decimal> (type: T, length?: number, nullCount?: number): GeneratedVector<V<T>>;
+ visit<T extends Interval> (type: T, length?: number, nullCount?: number): GeneratedVector<V<T>>;
+ visit<T extends List> (type: T, length?: number, nullCount?: number, child?: Vector): GeneratedVector<V<T>>;
+ visit<T extends FixedSizeList> (type: T, length?: number, nullCount?: number, child?: Vector): GeneratedVector<V<T>>;
+ visit<T extends Dictionary> (type: T, length?: number, nullCount?: number, dictionary?: Vector): GeneratedVector<V<T>>;
+ visit<T extends Union> (type: T, length?: number, nullCount?: number, children?: Vector[]): GeneratedVector<V<T>>;
+ visit<T extends Struct> (type: T, length?: number, nullCount?: number, children?: Vector[]): GeneratedVector<V<T>>;
+ visit<T extends Map_> (type: T, length?: number, nullCount?: number, child?: Vector): GeneratedVector<V<T>>;
+ visit<T extends DataType> (type: T, length?: number, ...args: any[]): GeneratedVector<V<T>>;
+
+ visitNull: typeof generateNull;
+ visitBool: typeof generateBool;
+ visitInt: typeof generateInt;
+ visitFloat: typeof generateFloat;
+ visitUtf8: typeof generateUtf8;
+ visitBinary: typeof generateBinary;
+ visitFixedSizeBinary: typeof generateFixedSizeBinary;
+ visitDate: typeof generateDate;
+ visitTimestamp: typeof generateTimestamp;
+ visitTime: typeof generateTime;
+ visitDecimal: typeof generateDecimal;
+ visitList: typeof generateList;
+ visitStruct: typeof generateStruct;
+ visitUnion: typeof generateUnion;
+ visitDictionary: typeof generateDictionary;
+ visitInterval: typeof generateInterval;
+ visitFixedSizeList: typeof generateFixedSizeList;
+ visitMap: typeof generateMap;
+}
+
+class TestDataVectorGenerator extends Visitor {}
+
+TestDataVectorGenerator.prototype.visitNull = generateNull;
+TestDataVectorGenerator.prototype.visitBool = generateBool;
+TestDataVectorGenerator.prototype.visitInt = generateInt;
+TestDataVectorGenerator.prototype.visitFloat = generateFloat;
+TestDataVectorGenerator.prototype.visitUtf8 = generateUtf8;
+TestDataVectorGenerator.prototype.visitBinary = generateBinary;
+TestDataVectorGenerator.prototype.visitFixedSizeBinary = generateFixedSizeBinary;
+TestDataVectorGenerator.prototype.visitDate = generateDate;
+TestDataVectorGenerator.prototype.visitTimestamp = generateTimestamp;
+TestDataVectorGenerator.prototype.visitTime = generateTime;
+TestDataVectorGenerator.prototype.visitDecimal = generateDecimal;
+TestDataVectorGenerator.prototype.visitList = generateList;
+TestDataVectorGenerator.prototype.visitStruct = generateStruct;
+TestDataVectorGenerator.prototype.visitUnion = generateUnion;
+TestDataVectorGenerator.prototype.visitDictionary = generateDictionary;
+TestDataVectorGenerator.prototype.visitInterval = generateInterval;
+TestDataVectorGenerator.prototype.visitFixedSizeList = generateFixedSizeList;
+TestDataVectorGenerator.prototype.visitMap = generateMap;
+
+const vectorGenerator = new TestDataVectorGenerator();
+
+const defaultListChild = new Field('list[Int32]', new Int32());
+
+const defaultRecordBatchChildren = () => [
+ new Field('i32', new Int32()),
+ new Field('f32', new Float32()),
+ new Field('dict', new Dictionary(new Utf8(), new Int32()))
+];
+
+const defaultStructChildren = () => [
+ new Field('struct[0]', new Int32()),
+ new Field('struct[1]', new Utf8()),
+ new Field('struct[2]', new List(new Field('list[DateDay]', new DateDay())))
+];
+
+const defaultMapChild = () => [
+ new Field('', new Struct<{ key: Utf8; value: Float32 }>([
+ new Field('key', new Utf8()),
+ new Field('value', new Float32())
+ ]))
+][0];
+
+const defaultUnionChildren = () => [
+ new Field('union[0]', new Float64()),
+ new Field('union[1]', new Dictionary(new Uint32(), new Int32())),
+ new Field('union[2]', new Map_(defaultMapChild()))
+];
+
+export interface GeneratedTable {
+ table: Table;
+ rows: () => any[][];
+ cols: () => any[][];
+ keys: () => number[][];
+ rowBatches: (() => any[][])[];
+ colBatches: (() => any[][])[];
+ keyBatches: (() => number[][])[];
+}
+
+export interface GeneratedRecordBatch {
+ recordBatch: RecordBatch;
+ rows: () => any[][];
+ cols: () => any[][];
+ keys: () => number[][];
+}
+
+export type GeneratedVector<TVec extends Vector = Vector> = {
+ vector: TVec;
+ keys?: number[];
+ values: () => (TVec['TValue'] | null)[];
+};
+
+export const table = (lengths = [100], schema: Schema = new Schema(defaultRecordBatchChildren(), new Map([['foo', 'bar']]))): GeneratedTable => {
+ const generated = lengths.map((length) => recordBatch(length, schema));
+ const rowBatches = generated.map(({ rows }) => rows);
+ const colBatches = generated.map(({ cols }) => cols);
+ const keyBatches = generated.map(({ keys }) => keys);
+ const rows = memoize(() => rowBatches.reduce((rows: any[][], batch) => [...rows, ...batch()], []));
+ const keys = memoize(() => keyBatches.reduce((keys: any[][], batch) => (
+ !keys.length ? batch() : keys.map((idxs, i) => [...(idxs || []), ...(batch()[i] || [])])
+ ), []));
+ const cols = memoize(() => colBatches.reduce((cols: any[][], batch) => (
+ !cols.length ? batch() : cols.map((vals, i) => [...vals, ...batch()[i]])
+ ), []));
+
+ return { rows, cols, keys, rowBatches, colBatches, keyBatches, table: new Table(schema, generated.map(({ recordBatch }) => recordBatch)) };
+};
+
+export const recordBatch = (length = 100, schema: Schema = new Schema(defaultRecordBatchChildren())): GeneratedRecordBatch => {
+
+ const generated = schema.fields.map((f) => vectorGenerator.visit(f.type, length));
+ const vecs = generated.map(({ vector }) => vector);
+
+ const keys = memoize(() => generated.map(({ keys }) => keys));
+ const cols = memoize(() => generated.map(({ values }) => values()));
+ const rows = ((_cols: () => any[][]) => memoize((rows: any[][] = [], cols: any[][] = _cols()) => {
+ for (let i = -1; ++i < length; rows[i] = cols.map((vals) => vals[i]));
+ return rows;
+ }))(cols);
+
+ return { rows, cols, keys, recordBatch: new RecordBatch(schema, length, vecs) };
+};
+
+export const null_ = (length = 100) => vectorGenerator.visit(new Null(), length);
+export const bool = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new Bool(), length, nullCount);
+export const int8 = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new Int8(), length, nullCount);
+export const int16 = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new Int16(), length, nullCount);
+export const int32 = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new Int32(), length, nullCount);
+export const int64 = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new Int64(), length, nullCount);
+export const uint8 = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new Uint8(), length, nullCount);
+export const uint16 = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new Uint16(), length, nullCount);
+export const uint32 = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new Uint32(), length, nullCount);
+export const uint64 = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new Uint64(), length, nullCount);
+export const float16 = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new Float16(), length, nullCount);
+export const float32 = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new Float32(), length, nullCount);
+export const float64 = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new Float64(), length, nullCount);
+export const utf8 = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new Utf8(), length, nullCount);
+export const binary = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new Binary(), length, nullCount);
+export const fixedSizeBinary = (length = 100, nullCount = length * 0.2 | 0, byteWidth = 8) => vectorGenerator.visit(new FixedSizeBinary(byteWidth), length, nullCount);
+export const dateDay = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new DateDay(), length, nullCount);
+export const dateMillisecond = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new DateMillisecond(), length, nullCount);
+export const timestampSecond = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new TimestampSecond(), length, nullCount);
+export const timestampMillisecond = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new TimestampMillisecond(), length, nullCount);
+export const timestampMicrosecond = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new TimestampMicrosecond(), length, nullCount);
+export const timestampNanosecond = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new TimestampNanosecond(), length, nullCount);
+export const timeSecond = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new TimeSecond(), length, nullCount);
+export const timeMillisecond = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new TimeMillisecond(), length, nullCount);
+export const timeMicrosecond = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new TimeMicrosecond(), length, nullCount);
+export const timeNanosecond = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new TimeNanosecond(), length, nullCount);
+export const decimal = (length = 100, nullCount = length * 0.2 | 0, scale = 2, precision = 9) => vectorGenerator.visit(new Decimal(scale, precision), length, nullCount);
+export const list = (length = 100, nullCount = length * 0.2 | 0, child = defaultListChild) => vectorGenerator.visit(new List(child), length, nullCount);
+export const struct = <T extends { [key: string]: DataType } = any>(length = 100, nullCount = length * 0.2 | 0, children: Field<T[keyof T]>[] = <any> defaultStructChildren()) => vectorGenerator.visit(new Struct<T>(children), length, nullCount);
+export const denseUnion = (length = 100, nullCount = length * 0.2 | 0, children: Field[] = defaultUnionChildren()) => vectorGenerator.visit(new DenseUnion(children.map((f) => f.typeId), children), length, nullCount);
+export const sparseUnion = (length = 100, nullCount = length * 0.2 | 0, children: Field[] = defaultUnionChildren()) => vectorGenerator.visit(new SparseUnion(children.map((f) => f.typeId), children), length, nullCount);
+export const dictionary = <T extends DataType = Utf8, TKey extends TKeys = Int32> (length = 100, nullCount = length * 0.2 | 0, dict: T = <any> new Utf8(), keys: TKey = <any> new Int32()) => vectorGenerator.visit(new Dictionary(dict, keys), length, nullCount);
+export const intervalDayTime = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new IntervalDayTime(), length, nullCount);
+export const intervalYearMonth = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new IntervalYearMonth(), length, nullCount);
+export const fixedSizeList = (length = 100, nullCount = length * 0.2 | 0, listSize = 2, child = defaultListChild) => vectorGenerator.visit(new FixedSizeList(listSize, child), length, nullCount);
+export const map = <TKey extends DataType = any, TValue extends DataType = any>(length = 100, nullCount = length * 0.2 | 0, child: Field<Struct<{key: TKey; value: TValue}>> = <any> defaultMapChild()) => vectorGenerator.visit(new Map_<TKey, TValue>(child), length, nullCount);
+
+export const vecs = {
+ null_, bool, int8, int16, int32, int64, uint8, uint16, uint32, uint64, float16, float32, float64, utf8, binary, fixedSizeBinary, dateDay, dateMillisecond, timestampSecond, timestampMillisecond, timestampMicrosecond, timestampNanosecond, timeSecond, timeMillisecond, timeMicrosecond, timeNanosecond, decimal, list, struct, denseUnion, sparseUnion, dictionary, intervalDayTime, intervalYearMonth, fixedSizeList, map
+} as { [k: string]: (...args: any[]) => any };
+
+function generateNull<T extends Null>(this: TestDataVectorGenerator, type: T, length = 100): GeneratedVector<V<T>> {
+ return { values: () => Array.from({ length }, () => null), vector: Vector.new(Data.Null(type, 0, length)) };
+}
+
+function generateBool<T extends Bool>(this: TestDataVectorGenerator, type: T, length = 100, nullCount = length * 0.2 | 0): GeneratedVector<V<T>> {
+ const data = createBitmap(length, length / 2 | 0);
+ const nullBitmap = createBitmap(length, nullCount);
+ const values = memoize(() => {
+ const values = [] as (boolean | null)[];
+ iterateBitmap(length, nullBitmap, (i, valid) => values[i] = !valid ? null : isValid(data, i));
+ return values;
+ });
+ iterateBitmap(length, nullBitmap, (i, valid) => !valid && (data[i >> 3] &= ~(1 << (i % 8))));
+
+ return { values, vector: Vector.new(Data.Bool(type, 0, length, nullCount, nullBitmap, data)) };
+}
+
+function generateInt<T extends Int>(this: TestDataVectorGenerator, type: T, length = 100, nullCount = length * 0.2 | 0): GeneratedVector<V<T>> {
+ const ArrayType = type.ArrayType;
+ const stride = 1 + Number(type.bitWidth > 32);
+ const nullBitmap = createBitmap(length, nullCount);
+ const data = fillRandom(ArrayType as any, length * stride);
+ const values = memoize(() => {
+ const values = [] as (number | null)[];
+ iterateBitmap(length, nullBitmap, (i, valid) => {
+ values[i] = !valid ? null
+ : stride === 1 ? data[i]
+ : data.subarray(i * stride, (i + 1) * stride);
+ });
+ return values;
+ });
+ iterateBitmap(length, nullBitmap, (i, valid) => !valid && (data.set(new Uint8Array(stride), i * stride)));
+ return { values, vector: Vector.new(Data.Int(type, 0, length, nullCount, nullBitmap, data)) };
+}
+
+function generateFloat<T extends Float>(this: TestDataVectorGenerator, type: T, length = 100, nullCount = length * 0.2 | 0): GeneratedVector<V<T>> {
+ const ArrayType = type.ArrayType;
+ const precision = type.precision;
+ const data = fillRandom(ArrayType as any, length);
+ const nullBitmap = createBitmap(length, nullCount);
+ const values = memoize(() => {
+ const values = [] as (number | null)[];
+ iterateBitmap(length, nullBitmap, (i, valid) => {
+ values[i] = !valid ? null : precision > 0 ? data[i] : util.uint16ToFloat64(data[i]);
+ });
+ return values;
+ });
+ iterateBitmap(length, nullBitmap, (i, valid) => data[i] = !valid ? 0 : data[i] * Math.random());
+ return { values, vector: Vector.new(Data.Float(type, 0, length, nullCount, nullBitmap, data)) };
+}
+
+function generateUtf8<T extends Utf8>(this: TestDataVectorGenerator, type: T, length = 100, nullCount = length * 0.2 | 0): GeneratedVector<V<T>> {
+ const nullBitmap = createBitmap(length, nullCount);
+ const offsets = createVariableWidthOffsets(length, nullBitmap, undefined, undefined, nullCount != 0);
+ const values: string[] = new Array(offsets.length - 1).fill(null);
+ [...offsets.slice(1)]
+ .map((o, i) => isValid(nullBitmap, i) ? o - offsets[i] : null)
+ .reduce((map, length, i) => {
+ if (length !== null) {
+ if (length > 0) {
+ do {
+ values[i] = randomString(length);
+ } while (map.has(values[i]));
+ return map.set(values[i], i);
+ }
+ values[i] = '';
+ }
+ return map;
+ }, new Map<string, number>());
+ const data = createVariableWidthBytes(length, nullBitmap, offsets, (i) => encodeUtf8(values[i]));
+ return { values: () => values, vector: Vector.new(Data.Utf8(type, 0, length, nullCount, nullBitmap, offsets, data)) };
+}
+
+function generateBinary<T extends Binary>(this: TestDataVectorGenerator, type: T, length = 100, nullCount = length * 0.2 | 0): GeneratedVector<V<T>> {
+ const nullBitmap = createBitmap(length, nullCount);
+ const offsets = createVariableWidthOffsets(length, nullBitmap, undefined, undefined, nullCount != 0);
+ const values = [...offsets.slice(1)]
+ .map((o, i) => isValid(nullBitmap, i) ? o - offsets[i] : null)
+ .map((length) => length == null ? null : randomBytes(length));
+ const data = createVariableWidthBytes(length, nullBitmap, offsets, (i) => values[i]!);
+ return { values: () => values, vector: Vector.new(Data.Binary(type, 0, length, nullCount, nullBitmap, offsets, data)) };
+}
+
+function generateFixedSizeBinary<T extends FixedSizeBinary>(this: TestDataVectorGenerator, type: T, length = 100, nullCount = length * 0.2 | 0): GeneratedVector<V<T>> {
+ const nullBitmap = createBitmap(length, nullCount);
+ const data = fillRandom(Uint8Array, length * type.byteWidth);
+ const values = memoize(() => {
+ const values = [] as (Uint8Array | null)[];
+ iterateBitmap(length, nullBitmap, (i, valid) => {
+ values[i] = !valid ? null : data.subarray(i * type.byteWidth, (i + 1) * type.byteWidth);
+ });
+ return values;
+ });
+ iterateBitmap(length, nullBitmap, (i, valid) => !valid && data.set(new Uint8Array(type.byteWidth), i * type.byteWidth));
+ return { values, vector: Vector.new(Data.FixedSizeBinary(type, 0, length, nullCount, nullBitmap, data)) };
+}
+
+function generateDate<T extends Date_>(this: TestDataVectorGenerator, type: T, length = 100, nullCount = length * 0.2 | 0): GeneratedVector<V<T>> {
+ const values = [] as (number | null)[];
+ const nullBitmap = createBitmap(length, nullCount);
+ const data = type.unit === DateUnit.DAY
+ ? createDate32(length, nullBitmap, values)
+ : createDate64(length, nullBitmap, values);
+ return {
+ values: () => values.map((x) => x == null ? null : new Date(x)),
+ vector: Vector.new(Data.Date(type, 0, length, nullCount, nullBitmap, data))
+ };
+}
+
+function generateTimestamp<T extends Timestamp>(this: TestDataVectorGenerator, type: T, length = 100, nullCount = length * 0.2 | 0): GeneratedVector<V<T>> {
+ const values = [] as (number | null)[];
+ const nullBitmap = createBitmap(length, nullCount);
+ const multiple = type.unit === TimeUnit.NANOSECOND ? 1000000000 :
+ type.unit === TimeUnit.MICROSECOND ? 1000000 :
+ type.unit === TimeUnit.MILLISECOND ? 1000 : 1;
+ const data = createTimestamp(length, nullBitmap, multiple, values);
+ return { values: () => values, vector: Vector.new(Data.Timestamp(type, 0, length, nullCount, nullBitmap, data)) };
+}
+
+function generateTime<T extends Time>(this: TestDataVectorGenerator, type: T, length = 100, nullCount = length * 0.2 | 0): GeneratedVector<V<T>> {
+ const values = [] as (Int32Array | number | null)[];
+ const nullBitmap = createBitmap(length, nullCount);
+ const multiple = type.unit === TimeUnit.NANOSECOND ? 1000000000 :
+ type.unit === TimeUnit.MICROSECOND ? 1000000 :
+ type.unit === TimeUnit.MILLISECOND ? 1000 : 1;
+ const data = type.bitWidth === 32
+ ? createTime32(length, nullBitmap, multiple, values as (number | null)[])
+ : createTime64(length, nullBitmap, multiple, values as (Int32Array | null)[]);
+ return { values: () => values, vector: Vector.new(Data.Time(type, 0, length, nullCount, nullBitmap, data)) };
+}
+
+function generateDecimal<T extends Decimal>(this: TestDataVectorGenerator, type: T, length = 100, nullCount = length * 0.2 | 0): GeneratedVector<V<T>> {
+ const data = fillRandom(Uint32Array, length * 4);
+ const nullBitmap = createBitmap(length, nullCount);
+ const view = new DataView(data.buffer, 0, data.byteLength);
+ const values = memoize(() => {
+ const values = [] as (Uint32Array | null)[];
+ iterateBitmap(length, nullBitmap, (i, valid) => {
+ values[i] = !valid ? null : new Uint32Array(data.buffer, 16 * i, 4);
+ });
+ return values;
+ });
+ iterateBitmap(length, nullBitmap, (i, valid) => {
+ if (!valid) {
+ view.setFloat64(4 * (i + 0), 0, true);
+ view.setFloat64(4 * (i + 1), 0, true);
+ }
+ });
+ return { values, vector: Vector.new(Data.Decimal(type, 0, length, nullCount, nullBitmap, data))};
+}
+
+function generateInterval<T extends Interval>(this: TestDataVectorGenerator, type: T, length = 100, nullCount = length * 0.2 | 0): GeneratedVector<V<T>> {
+ const stride = (1 + type.unit);
+ const nullBitmap = createBitmap(length, nullCount);
+ const data = fillRandom(Int32Array, length * stride);
+ const values = memoize(() => {
+ const values = [] as (Int32Array | null)[];
+ iterateBitmap(length, nullBitmap, (i: number, valid: boolean) => {
+ values[i] = !valid ? null : stride === 2
+ ? new Int32Array(data.buffer, 4 * i * stride, stride)
+ : new Int32Array([data[i] / 12 | 0, data[i] % 12 | 0]);
+ });
+ return values;
+ });
+ iterateBitmap(length, nullBitmap, (i: number, valid: boolean) => {
+ !valid && data.set(new Int32Array(stride), i * stride);
+ });
+ return { values, vector: Vector.new(Data.Interval(type, 0, length, nullCount, nullBitmap, data)) };
+}
+
+function generateList<T extends List>(this: TestDataVectorGenerator, type: T, length = 100, nullCount = length * 0.2 | 0, child = this.visit(type.children[0].type, length * 3, nullCount * 3)): GeneratedVector<V<T>> {
+ const childVec = child.vector;
+ const nullBitmap = createBitmap(length, nullCount);
+ const stride = childVec.length / (length - nullCount);
+ const offsets = createVariableWidthOffsets(length, nullBitmap, childVec.length, stride);
+ const values = memoize(() => {
+ const childValues = child.values();
+ const values: (T['valueType'] | null)[] = [...offsets.slice(1)]
+ .map((offset, i) => isValid(nullBitmap, i) ? offset : null)
+ .map((o, i) => o == null ? null : childValues.slice(offsets[i], o));
+ return values;
+ });
+ return { values, vector: Vector.new(Data.List(type, 0, length, nullCount, nullBitmap, offsets, childVec)) };
+}
+
+function generateFixedSizeList<T extends FixedSizeList>(this: TestDataVectorGenerator, type: T, length = 100, nullCount = length * 0.2 | 0, child = this.visit(type.children[0].type, length * type.listSize, nullCount * type.listSize)): GeneratedVector<V<T>> {
+ const nullBitmap = createBitmap(length, nullCount);
+ const values = memoize(() => {
+ const childValues = child.values();
+ const values = [] as (T['valueType'] | null)[];
+ for (let i = -1, stride = type.listSize; ++i < length;) {
+ values[i] = isValid(nullBitmap, i) ? childValues.slice(i * stride, (i + 1) * stride) : null;
+ }
+ return values;
+ });
+ return { values, vector: Vector.new(Data.FixedSizeList(type, 0, length, nullCount, nullBitmap, child.vector)) };
+}
+
+function generateDictionary<T extends Dictionary>(this: TestDataVectorGenerator, type: T, length = 100, nullCount = length * 0.2 | 0, dictionary = this.visit(type.dictionary, length, 0)): GeneratedVector<V<T>> {
+
+ const t = <any> type;
+ const currValues = t.dictionaryValues;
+ const hasDict = t.dictionaryVector && t.dictionaryVector.length > 0;
+ const dict = hasDict ? t.dictionaryVector.concat(dictionary.vector) : dictionary.vector;
+ const vals = hasDict ? (() => [...currValues(), ...dictionary.values()]) : dictionary.values;
+
+ const maxIdx = dict.length - 1;
+ const keys = new t.indices.ArrayType(length);
+ const nullBitmap = createBitmap(length, nullCount);
+
+ const values = memoize(() => {
+ const dict = vals();
+ const values = [] as (T['TValue'] | null)[];
+ iterateBitmap(length, nullBitmap, (i, valid) => {
+ values[i] = !valid ? null : dict[keys[i]];
+ });
+ return values;
+ });
+
+ iterateBitmap(length, nullBitmap, (i, valid) => {
+ keys[i] = !valid ? 0 : rand() * maxIdx | 0;
+ });
+
+ t.dictionaryVector = dict;
+ t.dictionaryValues = vals;
+
+ return { values, keys, vector: Vector.new(Data.Dictionary(type, 0, length, nullCount, nullBitmap, keys, dict)) };
+}
+
+function generateUnion<T extends Union>(this: TestDataVectorGenerator, type: T, length = 100, nullCount = length * 0.2 | 0, children?: GeneratedVector<any>[]): GeneratedVector<V<T>> {
+
+ const numChildren = type.children.length;
+
+ if (!children) {
+ if (type.mode === UnionMode.Sparse) {
+ children = type.children.map((f) => this.visit(f.type, length, nullCount));
+ } else {
+ const childLength = Math.ceil(length / numChildren);
+ const childNullCount = (nullCount / childLength) | 0;
+ children = type.children.map((f) => this.visit(f.type, childLength, childNullCount));
+ }
+ }
+
+ const typeIds = type.typeIds;
+ const typeIdsBuffer = new Int8Array(length);
+ const vecs = children.map(({ vector }) => vector);
+ const cols = children.map(({ values }) => values);
+ const nullBitmap = createBitmap(length, nullCount);
+ const typeIdToChildIndex = typeIds.reduce((typeIdToChildIndex, typeId, idx) => {
+ return (typeIdToChildIndex[typeId] = idx) && typeIdToChildIndex || typeIdToChildIndex;
+ }, Object.create(null) as { [key: number]: number });
+
+ if (type.mode === UnionMode.Sparse) {
+ const values = memoize(() => {
+ const values = [] as any[];
+ const childValues = cols.map((x) => x());
+ iterateBitmap(length, nullBitmap, (i, valid) => {
+ values[i] = !valid ? null : childValues[typeIdToChildIndex[typeIdsBuffer[i]]][i];
+ });
+ return values;
+ });
+ iterateBitmap(length, nullBitmap, (i, valid) => {
+ typeIdsBuffer[i] = !valid ? 0 : typeIds[rand() * numChildren | 0];
+ });
+ return { values, vector: Vector.new(Data.Union(type as SparseUnion, 0, length, nullCount, nullBitmap, typeIdsBuffer, vecs)) } as GeneratedVector<V<T>>;
+ }
+
+ const offsets = new Int32Array(length);
+ const values = memoize(() => {
+ const values = [] as any[];
+ const childValues = cols.map((x) => x());
+ iterateBitmap(length, nullBitmap, (i, valid) => {
+ values[i] = !valid ? null : childValues[typeIdToChildIndex[typeIdsBuffer[i]]][offsets[i]];
+ });
+ return values;
+ });
+ iterateBitmap(length, nullBitmap, (i, valid) => {
+ if (!valid) {
+ offsets[i] = 0;
+ typeIdsBuffer[i] = 0;
+ } else {
+ const colIdx = rand() * numChildren | 0;
+ offsets[i] = i / numChildren | 0;
+ typeIdsBuffer[i] = typeIds[colIdx];
+ }
+ });
+ return { values, vector: Vector.new(Data.Union(type as DenseUnion, 0, length, nullCount, nullBitmap, typeIdsBuffer, offsets, vecs)) } as GeneratedVector<V<T>>;
+}
+
+function generateStruct<T extends Struct>(this: TestDataVectorGenerator, type: T, length = 100, nullCount = length * 0.2 | 0, children = type.children.map((f) => this.visit(f.type, length, nullCount))): GeneratedVector<V<T>> {
+ const vecs = children.map(({ vector }) => vector);
+ const cols = children.map(({ values }) => values);
+ const nullBitmap = createBitmap(length, nullCount);
+ const values = memoize(() => {
+ const values = [] as any[];
+ const childValues = cols.map((x) => x());
+ const names = type.children.map((f) => f.name);
+ iterateBitmap(length, nullBitmap, (i, valid) => {
+ values[i] = !valid ? null : childValues.reduce((row, col, j) => ({
+ ...row, [names[j]]: col[i]
+ }), {});
+ });
+ return values;
+ });
+ return { values, vector: Vector.new(Data.Struct(type, 0, length, nullCount, nullBitmap, vecs)) };
+}
+
+function generateMap<T extends Map_>(this: TestDataVectorGenerator,
+ type: T, length = 100, nullCount = length * 0.2 | 0,
+ child = this.visit(type.children[0].type, length * 3, 0, [
+ this.visit(type.children[0].type.children[0].type, length * 3, 0),
+ this.visit(type.children[0].type.children[1].type, length * 3, nullCount * 3)
+ ])): GeneratedVector<V<T>> {
+
+ type K = T['keyType']['TValue'];
+ type V = T['valueType']['TValue'];
+
+ const childVec = child.vector;
+ const nullBitmap = createBitmap(length, nullCount);
+ const stride = childVec.length / (length - nullCount);
+ const offsets = createVariableWidthOffsets(length, nullBitmap, childVec.length, stride);
+ const values = memoize(() => {
+ const childValues = child.values() as { key: K; value: V }[];
+ const values: (T['TValue'] | null)[] = [...offsets.slice(1)]
+ .map((offset, i) => isValid(nullBitmap, i) ? offset : null)
+ .map((o, i) => o == null ? null : (() => {
+ const slice = childValues.slice(offsets[i], o);
+ const pairs = slice.map(({ key, value }) => [key, value]);
+ return new Map<K, V>(pairs as any as (readonly [K, V])[]);
+ })());
+ return values;
+ });
+ return { values, vector: Vector.new(Data.Map(type, 0, length, nullCount, nullBitmap, offsets, childVec)) };
+}
+
+type TypedArrayConstructor =
+ (typeof Int8Array) |
+ (typeof Int16Array) |
+ (typeof Int32Array) |
+ (typeof Uint8Array) |
+ (typeof Uint16Array) |
+ (typeof Uint32Array) |
+ (typeof Float32Array) |
+ (typeof Float64Array);
+
+
+const rand = Math.random.bind(Math);
+const randomBytes = (length: number) => fillRandom(Uint8Array, length);
+const randomString = (length: number) => randomatic('?', length, { chars: `abcdefghijklmnopqrstuvwxyz0123456789_` });
+
+const memoize = (fn: () => any) => ((x?: any) => () => x || (x = fn()))();
+
+const encodeUtf8 = ((encoder) =>
+ encoder.encode.bind(encoder) as (input?: string, options?: { stream?: boolean }) => Uint8Array
+)(new TextEncoder());
+
+function fillRandom<T extends TypedArrayConstructor>(ArrayType: T, length: number) {
+ const BPE = ArrayType.BYTES_PER_ELEMENT;
+ const array = new ArrayType(length);
+ const max = (2 ** (8 * BPE)) - 1;
+ for (let i = -1; ++i < length; array[i] = rand() * max * (rand() > 0.5 ? -1 : 1));
+ return array as InstanceType<T>;
+}
+
+function isValid(bitmap: Uint8Array, i: number) {
+ return (bitmap[i >> 3] & 1 << (i % 8)) !== 0;
+}
+
+function iterateBitmap(length: number, bitmap: Uint8Array, fn: (index: number, valid: boolean) => any) {
+ let byteIndex = 0, valueIndex = 0;
+ for (let bit = 0; length > 0; bit = 0) {
+ let byte = bitmap[byteIndex++];
+ do {
+ fn(valueIndex++, (byte & 1 << bit) !== 0);
+ } while (--length > 0 && ++bit < 8);
+ }
+}
+
+function createBitmap(length: number, nullCount: number) {
+ const nulls = Object.create(null) as { [key: number]: boolean };
+ const bytes = new Uint8Array((((length >> 3) + 7) & ~7) || 8).fill(255);
+ for (let i, j = -1; ++j < nullCount;) {
+ while (nulls[i = (rand() * length) | 0]);
+ nulls[i] = true;
+ bytes[i >> 3] &= ~(1 << (i % 8)); // false
+ }
+ return bytes;
+}
+
+function createVariableWidthOffsets(length: number, nullBitmap: Uint8Array, max = Infinity, stride = 20, allowEmpty = true) {
+ const offsets = new Int32Array(length + 1);
+ iterateBitmap(length, nullBitmap, (i, valid) => {
+ if (!valid) {
+ offsets[i + 1] = offsets[i];
+ } else {
+ do {
+ offsets[i + 1] = Math.min(max, offsets[i] + (rand() * stride | 0));
+ } while (!allowEmpty && offsets[i + 1] === offsets[i]);
+ }
+ });
+ return offsets;
+}
+
+function createVariableWidthBytes(length: number, nullBitmap: Uint8Array, offsets: Int32Array, getBytes: (index: number) => Uint8Array) {
+ const bytes = new Uint8Array(offsets[length]);
+ iterateBitmap(length, nullBitmap, (i, valid) => {
+ valid && bytes.set(getBytes(i), offsets[i]);
+ });
+ return bytes;
+}
+
+function createDate32(length: number, nullBitmap: Uint8Array, values: (number | null)[] = []) {
+ const data = new Int32Array(length).fill(Date.now() / 86400000 | 0);
+ iterateBitmap(length, nullBitmap, (i, valid) => {
+ if (!valid) {
+ data[i] = 0;
+ values[i] = null;
+ } else {
+ data[i] = data[i] + (rand() * 10000 * (rand() > 0.5 ? -1 : 1)) | 0;
+ values[i] = data[i] * 86400000;
+ }
+ });
+ return data;
+}
+
+function createDate64(length: number, nullBitmap: Uint8Array, values: (number | null)[] = []) {
+ const data = new Int32Array(length * 2).fill(0);
+ const data32 = createDate32(length, nullBitmap, values);
+ iterateBitmap(length, nullBitmap, (i, valid) => {
+ if (valid) {
+ const value = data32[i] * 86400000;
+ const hi = (value / 4294967296) | 0;
+ const lo = (value - 4294967296 * hi) | 0;
+ values[i] = value;
+ data[i * 2 + 0] = lo;
+ data[i * 2 + 1] = hi;
+ }
+ });
+ return data;
+}
+
+function createTimestamp(length: number, nullBitmap: Uint8Array, multiple: number, values: (number | null)[] = []) {
+ const mult = 86400 * multiple;
+ const data = new Int32Array(length * 2).fill(0);
+ const data32 = createDate32(length, nullBitmap, values);
+ iterateBitmap(length, nullBitmap, (i, valid) => {
+ if (valid) {
+ const value = data32[i] * mult;
+ const hi = (value / 4294967296) | 0;
+ const lo = (value - 4294967296 * hi) | 0;
+ data[i * 2 + 0] = lo;
+ data[i * 2 + 1] = hi;
+ }
+ });
+ return data;
+}
+
+function createTime32(length: number, nullBitmap: Uint8Array, multiple: number, values: (number | null)[] = []) {
+ const data = new Int32Array(length).fill(0);
+ iterateBitmap(length, nullBitmap, (i, valid) => {
+ if (!valid) {
+ data[i] = 0;
+ values[i] = null;
+ } else {
+ values[i] = data[i] = ((1000 * rand()) | 0 * multiple) * (rand() > 0.5 ? -1 : 1);
+ }
+ });
+ return data;
+}
+
+function createTime64(length: number, nullBitmap: Uint8Array, multiple: number, values: (Int32Array | null)[] = []) {
+ const data = new Int32Array(length * 2).fill(0);
+ iterateBitmap(length, nullBitmap, (i, valid) => {
+ if (!valid) {
+ values[i] = null;
+ } else {
+ const value = (1000 * rand()) | 0 * multiple;
+ const hi = (value / 4294967296) | 0;
+ const lo = (value - 4294967296 * hi) | 0;
+ data[i * 2 + 0] = lo;
+ data[i * 2 + 1] = hi;
+ values[i] = data.subarray(i * 2, (i + 1) * 2);
+ }
+ });
+ return data;
+}
diff --git a/src/arrow/js/test/inference/column.ts b/src/arrow/js/test/inference/column.ts
new file mode 100644
index 000000000..440116b69
--- /dev/null
+++ b/src/arrow/js/test/inference/column.ts
@@ -0,0 +1,62 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+/* eslint-disable jest/no-standalone-expect */
+
+import { Data } from 'apache-arrow/data';
+import { Field } from 'apache-arrow/schema';
+import { Column } from 'apache-arrow/column';
+import { Vector } from 'apache-arrow/vector';
+import { Bool, Int8, Utf8, List, Dictionary, Struct } from 'apache-arrow/type';
+
+const boolType = new Bool();
+const boolVector = Vector.new(Data.Bool(boolType, 0, 10, 0, null, new Uint8Array(2)));
+
+const boolColumn = new Column(new Field('bool', boolType), [
+ Vector.new(Data.Bool(boolType, 0, 10, 0, null, new Uint8Array(2))),
+ Vector.new(Data.Bool(boolType, 0, 10, 0, null, new Uint8Array(2))),
+ Vector.new(Data.Bool(boolType, 0, 10, 0, null, new Uint8Array(2))),
+]);
+
+expect(typeof boolVector.get(0) === 'boolean').toBe(true);
+expect(typeof boolColumn.get(0) === 'boolean').toBe(true);
+
+type IndexSchema = {
+ 0: Int8;
+ 1: Utf8;
+ 2: Dictionary<List<Bool>>;
+};
+
+const structChildFields = [
+ { name: 0, type: new Int8() },
+ { name: 1, type: new Utf8() },
+ { name: 2, type: new Dictionary<List<Bool>>(null!, null!) }
+].map(({ name, type }) => new Field('' + name, type));
+
+const structType = new Struct<IndexSchema>(structChildFields);
+const structVector = Vector.new(Data.Struct(structType, 0, 0, 0, null, []));
+const structColumn = new Column(new Field('struct', structType), [
+ Vector.new(Data.Struct(structType, 0, 0, 0, null, [])),
+ Vector.new(Data.Struct(structType, 0, 0, 0, null, [])),
+ Vector.new(Data.Struct(structType, 0, 0, 0, null, [])),
+]);
+
+const [x1, y1, z1] = structVector.get(0)!;
+const [x2, y2, z2] = structColumn.get(0)!;
+
+console.log(x1, y1, z1);
+console.log(x2, y2, z2);
diff --git a/src/arrow/js/test/inference/nested.ts b/src/arrow/js/test/inference/nested.ts
new file mode 100644
index 000000000..0e3dc95e3
--- /dev/null
+++ b/src/arrow/js/test/inference/nested.ts
@@ -0,0 +1,62 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { Data } from 'apache-arrow/data';
+import { Field } from 'apache-arrow/schema';
+import { DataType } from 'apache-arrow/type';
+import { Vector, BoolVector } from 'apache-arrow/vector/index';
+import { Bool, Int8, Utf8, List, Dictionary, Struct } from 'apache-arrow/type';
+
+type NamedSchema = { a: Int8; b: Utf8; c: Dictionary<List<Bool>>; [idx: string]: DataType };
+type IndexSchema = { 0: Int8; 1: Utf8; 2: Dictionary<List<Bool>>; [idx: number]: DataType };
+
+checkIndexTypes({ 0: new Int8(), 1: new Utf8(), 2: new Dictionary<List<Bool>>(null!, null!) } as IndexSchema);
+checkNamedTypes({ a: new Int8(), b: new Utf8(), c: new Dictionary<List<Bool>>(null!, null!) } as NamedSchema);
+
+function checkIndexTypes(schema: IndexSchema) {
+
+ const data = Data.Struct(new Struct<IndexSchema>(
+ Object.keys(schema).map((x) => new Field(x, schema[(<any> x)]))
+ ), 0, 0, 0, null, []);
+
+ const row = Vector.new(data).bind(0);
+
+ const check_0 = (x = row[0]) => expect(typeof x === 'number').toBe(true);
+ const check_1 = (x = row[1]) => expect(typeof x === 'string').toBe(true);
+ const check_2 = (x = row[2]) => expect(x instanceof BoolVector).toBe(true);
+
+ check_0(); check_0(row[0]); check_0(row.get(0));
+ check_1(); check_1(row[1]); check_1(row.get(1));
+ check_2(); check_2(row[2]); check_2(row.get(2));
+}
+
+function checkNamedTypes(schema: NamedSchema) {
+
+ const data = Data.Struct(new Struct<NamedSchema>(
+ Object.keys(schema).map((x) => new Field(x, schema[x]))
+ ), 0, 0, 0, null, []);
+
+ const row = Vector.new(data).bind(0);
+
+ const check_a = (x = row.a) => expect(typeof x === 'number').toBe(true);
+ const check_b = (x = row.b) => expect(typeof x === 'string').toBe(true);
+ const check_c = (x = row.c) => expect(x instanceof BoolVector).toBe(true);
+
+ check_a(); check_a(row.a); check_a(row.get('a'));
+ check_b(); check_b(row.b); check_b(row.get('b'));
+ check_c(); check_c(row.c); check_c(row.get('c'));
+}
diff --git a/src/arrow/js/test/inference/visitor/get.ts b/src/arrow/js/test/inference/visitor/get.ts
new file mode 100644
index 000000000..a983d94d1
--- /dev/null
+++ b/src/arrow/js/test/inference/visitor/get.ts
@@ -0,0 +1,56 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import {
+ Data, Vector,
+ Bool, List, Dictionary
+} from '../../Arrow';
+
+import { instance as getVisitor } from 'apache-arrow/visitor/get';
+
+const data_Bool = new Data(new Bool(), 0, 0);
+const data_List_Bool = new Data(new List<Bool>(null as any), 0, 0);
+const data_Dictionary_Bool = new Data(new Dictionary<Bool>(null!, null!), 0, 0);
+const data_Dictionary_List_Bool = new Data(new Dictionary<List<Bool>>(null!, null!), 0, 0);
+
+const boolVec = Vector.new(data_Bool);
+const boolVec_getRaw = boolVec.get(0);
+const boolVec_getVisit = getVisitor.visit(boolVec, 0);
+const boolVec_getFactory = getVisitor.getVisitFn(boolVec)(boolVec, 0);
+const boolVec_getFactoryData = getVisitor.getVisitFn(boolVec.data)(boolVec, 0);
+const boolVec_getFactoryType = getVisitor.getVisitFn(boolVec.type)(boolVec, 0);
+
+const listVec = Vector.new(data_List_Bool);
+const listVec_getRaw = listVec.get(0);
+const listVec_getVisit = getVisitor.visit(listVec, 0);
+const listVec_getFactory = getVisitor.getVisitFn(listVec)(listVec, 0);
+const listVec_getFactoryData = getVisitor.getVisitFn(listVec.data)(listVec, 0);
+const listVec_getFactoryType = getVisitor.getVisitFn(listVec.type)(listVec, 0);
+
+const dictVec = Vector.new(data_Dictionary_Bool);
+const dictVec_getRaw = dictVec.get(0);
+const dictVec_getVisit = getVisitor.visit(dictVec, 0);
+const dictVec_getFactory = getVisitor.getVisitFn(dictVec)(dictVec, 0);
+const dictVec_getFactoryData = getVisitor.getVisitFn(dictVec.data)(dictVec, 0);
+const dictVec_getFactoryType = getVisitor.getVisitFn(dictVec.type)(dictVec, 0);
+
+const dictOfListVec = Vector.new(data_Dictionary_List_Bool);
+const dictOfListVec_getRaw = dictOfListVec.get(0);
+const dictOfListVec_getVisit = getVisitor.visit(dictOfListVec, 0);
+const dictOfListVec_getFactory = getVisitor.getVisitFn(dictOfListVec)(dictOfListVec, 0);
+const dictOfListVec_getFactoryData = getVisitor.getVisitFn(dictOfListVec.data)(dictOfListVec, 0);
+const dictOfListVec_getFactoryType = getVisitor.getVisitFn(dictOfListVec.type)(dictOfListVec, 0);
diff --git a/src/arrow/js/test/jest-extensions.ts b/src/arrow/js/test/jest-extensions.ts
new file mode 100644
index 000000000..6adde0b83
--- /dev/null
+++ b/src/arrow/js/test/jest-extensions.ts
@@ -0,0 +1,162 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { zip } from 'ix/iterable/zip';
+import { Table, Vector, RecordBatch, Column, util } from './Arrow';
+
+declare global {
+ namespace jest {
+ interface Matchers<R> {
+ toArrowCompare(expected: any): CustomMatcherResult;
+ toEqualTable(expected: Table): CustomMatcherResult;
+ toEqualRecordBatch(expected: RecordBatch): CustomMatcherResult;
+ toEqualVector(expected: Vector | [Vector | null, string?, string?]): CustomMatcherResult;
+ }
+ }
+}
+
+expect.extend({
+ toEqualTable,
+ toEqualVector,
+ toArrowCompare,
+ toEqualRecordBatch
+});
+
+function format(jest: jest.MatcherUtils, actual: any, expected: any, msg= ' ') {
+ return `${
+ jest.utils.printReceived(actual)
+ }${msg}${
+ jest.utils.printExpected(expected)
+ }`;
+}
+
+function toArrowCompare(this: jest.MatcherUtils, actual: any, expected: any) {
+ if (!util.createElementComparator(expected)(actual)) {
+ return { pass: false, message: () => format(this, actual, expected, ' should equal ') };
+ }
+ return { pass: true, message: () => '' };
+}
+
+function toEqualTable(this: jest.MatcherUtils, actual: Table, expected: Table) {
+ const failures = [] as string[];
+ try { expect(actual).toHaveLength(expected.length); } catch (e) { failures.push(`${e}`); }
+ try { expect(actual.numCols).toEqual(expected.numCols); } catch (e) { failures.push(`${e}`); }
+ try { expect(actual.schema.metadata).toEqual(expected.schema.metadata); } catch (e) { failures.push(`${e}`); }
+ (() => {
+ for (let i = -1, n = actual.numCols; ++i < n;) {
+ const v1 = actual.getColumnAt(i);
+ const v2 = expected.getColumnAt(i);
+ const name = actual.schema.fields[i].name;
+ try {
+ expect([v1, `actual`, name]).toEqualVector([v2, `expected`, name]);
+ } catch (e) { failures.push(`${e}`); }
+ }
+ })();
+ return {
+ pass: failures.length === 0,
+ message: () => failures.join('\n'),
+ };
+}
+
+function toEqualRecordBatch(this: jest.MatcherUtils, actual: RecordBatch, expected: RecordBatch) {
+ const failures = [] as string[];
+ try { expect(actual).toHaveLength(expected.length); } catch (e) { failures.push(`${e}`); }
+ try { expect(actual.numCols).toEqual(expected.numCols); } catch (e) { failures.push(`${e}`); }
+ (() => {
+ for (let i = -1, n = actual.numCols; ++i < n;) {
+ const v1 = actual.getChildAt(i);
+ const v2 = expected.getChildAt(i);
+ const name = actual.schema.fields[i].name;
+ try {
+ expect([v1, `actual`, name]).toEqualVector([v2, `expected`, name]);
+ } catch (e) { failures.push(`${e}`); }
+ }
+ })();
+ return {
+ pass: failures.length === 0,
+ message: () => failures.join('\n'),
+ };
+}
+
+function toEqualVector<
+ TActual extends Vector | [Vector | null, string?, string?],
+ TExpected extends Vector | [Vector | null, string?]
+>(this: jest.MatcherUtils, actual: TActual, expected: TExpected) {
+
+ let [v1, format1 = '', columnName = ''] = Array.isArray(actual) ? actual : [actual];
+ let [v2, format2 = ''] = Array.isArray(expected) ? expected : [expected];
+
+ if (v1 instanceof Column && columnName === '') { columnName = v1.name; }
+
+ if (v1 == null || v2 == null) {
+ return {
+ pass: false,
+ message: () => [
+ [columnName, `(${format(this, format1, format2, ' !== ')})`].filter(Boolean).join(':'),
+ `${v1 == null ? 'actual' : 'expected'} is null`
+ ].join('\n')
+ };
+ }
+
+ let getFailures = new Array<string>();
+ let propsFailures = new Array<string>();
+ let iteratorFailures = new Array<string>();
+ let allFailures = [
+ { title: 'get', failures: getFailures },
+ { title: 'props', failures: propsFailures },
+ { title: 'iterator', failures: iteratorFailures }
+ ];
+
+ let props: (keyof Vector)[] = ['type', 'length', 'nullCount'];
+
+ (() => {
+ for (let i = -1, n = props.length; ++i < n;) {
+ const prop = props[i];
+ if (`${v1[prop]}` !== `${v2[prop]}`) {
+ propsFailures.push(`${prop}: ${format(this, v1[prop], v2[prop], ' !== ')}`);
+ }
+ }
+ })();
+
+ (() => {
+ for (let i = -1, n = v1.length; ++i < n;) {
+ let x1 = v1.get(i), x2 = v2.get(i);
+ if (!util.createElementComparator(x2)(x1)) {
+ getFailures.push(`${i}: ${format(this, x1, x2, ' !== ')}`);
+ }
+ }
+ })();
+
+ (() => {
+ let i = -1;
+ for (let [x1, x2] of zip(v1, v2)) {
+ ++i;
+ if (!util.createElementComparator(x2)(x1)) {
+ iteratorFailures.push(`${i}: ${format(this, x1, x2, ' !== ')}`);
+ }
+ }
+ })();
+
+ return {
+ pass: allFailures.every(({ failures }) => failures.length === 0),
+ message: () => [
+ [columnName, `(${format(this, format1, format2, ' !== ')})`].filter(Boolean).join(':'),
+ ...allFailures.map(({ failures, title }) =>
+ !failures.length ? `` : [`${title}:`, ...failures].join(`\n`))
+ ].join('\n')
+ };
+}
diff --git a/src/arrow/js/test/tsconfig.json b/src/arrow/js/test/tsconfig.json
new file mode 100644
index 000000000..8cf2e7e7b
--- /dev/null
+++ b/src/arrow/js/test/tsconfig.json
@@ -0,0 +1,24 @@
+{
+ "extends": "../tsconfig.json",
+ "include": ["../src/**/*.ts", "../test/**/*.ts"],
+ "compilerOptions": {
+ "target": "esnext",
+ "module": "es2020",
+ "allowJs": true,
+ "declaration": false,
+ "declarationMap": false,
+ "importHelpers": false,
+ "noEmit": true,
+ "noEmitHelpers": false,
+ "noEmitOnError": false,
+ "sourceMap": true,
+ "inlineSources": false,
+ "inlineSourceMap": false,
+ "downlevelIteration": false,
+ "baseUrl": "../",
+ "paths": {
+ "apache-arrow": ["src/Arrow.node"],
+ "apache-arrow/*": ["src/*"]
+ }
+ }
+}
diff --git a/src/arrow/js/test/tsconfig/tsconfig.apache-arrow.json b/src/arrow/js/test/tsconfig/tsconfig.apache-arrow.json
new file mode 100644
index 000000000..161374e02
--- /dev/null
+++ b/src/arrow/js/test/tsconfig/tsconfig.apache-arrow.json
@@ -0,0 +1,8 @@
+// TypeScript configuration for the apache-arrow target's tests
+{
+ "extends": "./tsconfig.base.json",
+ "compilerOptions": {
+ "target": "esnext",
+ "module": "commonjs"
+ }
+}
diff --git a/src/arrow/js/test/tsconfig/tsconfig.base.json b/src/arrow/js/test/tsconfig/tsconfig.base.json
new file mode 100644
index 000000000..fcae71fb4
--- /dev/null
+++ b/src/arrow/js/test/tsconfig/tsconfig.base.json
@@ -0,0 +1,26 @@
+// Base TypeScript configuration for all targets' tests
+{
+ "extends": "../../tsconfig/tsconfig.base.json",
+ "compilerOptions": {
+ "target": "esnext",
+ "module": "commonjs",
+ "allowJs": true,
+ "declaration": false,
+ "importHelpers": false,
+ "noEmit": false,
+ "noEmitHelpers": false,
+ "noEmitOnError": false,
+ "sourceMap": true,
+ "inlineSources": false,
+ "inlineSourceMap": false,
+ "downlevelIteration": false,
+ "esModuleInterop": true,
+ "baseUrl": "../../",
+ "paths": {
+ "apache-arrow": ["src/Arrow.node"],
+ "apache-arrow/*": ["src/*"]
+ }
+ },
+ "exclude": ["../../node_modules"],
+ "include": ["../../src/**/*.ts"]
+}
diff --git a/src/arrow/js/test/tsconfig/tsconfig.coverage.json b/src/arrow/js/test/tsconfig/tsconfig.coverage.json
new file mode 100644
index 000000000..e903aa1e5
--- /dev/null
+++ b/src/arrow/js/test/tsconfig/tsconfig.coverage.json
@@ -0,0 +1,6 @@
+{
+ "extends": "./tsconfig.json",
+ "compilerOptions": {
+ "target": "esnext"
+ }
+}
diff --git a/src/arrow/js/test/tsconfig/tsconfig.es2015.cjs.json b/src/arrow/js/test/tsconfig/tsconfig.es2015.cjs.json
new file mode 100644
index 000000000..ed600bc24
--- /dev/null
+++ b/src/arrow/js/test/tsconfig/tsconfig.es2015.cjs.json
@@ -0,0 +1,8 @@
+// TypeScript configuration for the ES2015 CommonJS target's tests
+{
+ "extends": "./tsconfig.base.json",
+ "compilerOptions": {
+ "target": "esnext",
+ "module": "commonjs"
+ }
+}
diff --git a/src/arrow/js/test/tsconfig/tsconfig.es2015.esm.json b/src/arrow/js/test/tsconfig/tsconfig.es2015.esm.json
new file mode 100644
index 000000000..a030beba7
--- /dev/null
+++ b/src/arrow/js/test/tsconfig/tsconfig.es2015.esm.json
@@ -0,0 +1,8 @@
+// TypeScript configuration for the ES2015 ESModules target's tests
+{
+ "extends": "./tsconfig.base.json",
+ "compilerOptions": {
+ "target": "esnext",
+ "module": "es2020"
+ }
+}
diff --git a/src/arrow/js/test/tsconfig/tsconfig.es2015.umd.json b/src/arrow/js/test/tsconfig/tsconfig.es2015.umd.json
new file mode 100644
index 000000000..3e4de6f3c
--- /dev/null
+++ b/src/arrow/js/test/tsconfig/tsconfig.es2015.umd.json
@@ -0,0 +1,11 @@
+// TypeScript configuration for the ES2015 Closure Compiler target's tests
+{
+ "extends": "./tsconfig.base.json",
+ "compilerOptions": {
+ "target": "esnext",
+ "module": "umd",
+ "declaration": false,
+ "noEmitHelpers": true,
+ "importHelpers": true
+ }
+}
diff --git a/src/arrow/js/test/tsconfig/tsconfig.es5.cjs.json b/src/arrow/js/test/tsconfig/tsconfig.es5.cjs.json
new file mode 100644
index 000000000..edcd69773
--- /dev/null
+++ b/src/arrow/js/test/tsconfig/tsconfig.es5.cjs.json
@@ -0,0 +1,9 @@
+// TypeScript configuration for the ES5 CommonJS target's tests
+{
+ "extends": "./tsconfig.base.json",
+ "compilerOptions": {
+ "target": "esnext",
+ "module": "commonjs",
+ "downlevelIteration": true
+ }
+}
diff --git a/src/arrow/js/test/tsconfig/tsconfig.es5.esm.json b/src/arrow/js/test/tsconfig/tsconfig.es5.esm.json
new file mode 100644
index 000000000..01af8fabd
--- /dev/null
+++ b/src/arrow/js/test/tsconfig/tsconfig.es5.esm.json
@@ -0,0 +1,9 @@
+// TypeScript configuration for the ES5 ESModules target's tests
+{
+ "extends": "./tsconfig.base.json",
+ "compilerOptions": {
+ "target": "esnext",
+ "module": "es2020",
+ "downlevelIteration": true
+ }
+}
diff --git a/src/arrow/js/test/tsconfig/tsconfig.es5.umd.json b/src/arrow/js/test/tsconfig/tsconfig.es5.umd.json
new file mode 100644
index 000000000..445ec8809
--- /dev/null
+++ b/src/arrow/js/test/tsconfig/tsconfig.es5.umd.json
@@ -0,0 +1,12 @@
+// TypeScript configuration for the ES5 Closure Compiler target's tests
+{
+ "extends": "./tsconfig.base.json",
+ "compilerOptions": {
+ "target": "esnext",
+ "module": "umd",
+ "declaration": false,
+ "noEmitHelpers": true,
+ "importHelpers": true,
+ "downlevelIteration": true
+ }
+}
diff --git a/src/arrow/js/test/tsconfig/tsconfig.esnext.cjs.json b/src/arrow/js/test/tsconfig/tsconfig.esnext.cjs.json
new file mode 100644
index 000000000..6f21fd56c
--- /dev/null
+++ b/src/arrow/js/test/tsconfig/tsconfig.esnext.cjs.json
@@ -0,0 +1,8 @@
+// TypeScript configuration for the ESNext CommonJS target's tests
+{
+ "extends": "./tsconfig.base.json",
+ "compilerOptions": {
+ "target": "esnext",
+ "module": "commonjs"
+ }
+}
diff --git a/src/arrow/js/test/tsconfig/tsconfig.esnext.esm.json b/src/arrow/js/test/tsconfig/tsconfig.esnext.esm.json
new file mode 100644
index 000000000..3a9c27745
--- /dev/null
+++ b/src/arrow/js/test/tsconfig/tsconfig.esnext.esm.json
@@ -0,0 +1,8 @@
+// TypeScript configuration for the ESNext ESModules target's tests
+{
+ "extends": "./tsconfig.base.json",
+ "compilerOptions": {
+ "target": "esnext",
+ "module": "es2020"
+ }
+}
diff --git a/src/arrow/js/test/tsconfig/tsconfig.esnext.umd.json b/src/arrow/js/test/tsconfig/tsconfig.esnext.umd.json
new file mode 100644
index 000000000..baccc6994
--- /dev/null
+++ b/src/arrow/js/test/tsconfig/tsconfig.esnext.umd.json
@@ -0,0 +1,11 @@
+// TypeScript configuration for the ESNext Closure Compiler target's tests
+{
+ "extends": "./tsconfig.base.json",
+ "compilerOptions": {
+ "target": "esnext",
+ "module": "umd",
+ "declaration": false,
+ "noEmitHelpers": true,
+ "importHelpers": true
+ }
+}
diff --git a/src/arrow/js/test/tsconfig/tsconfig.src.json b/src/arrow/js/test/tsconfig/tsconfig.src.json
new file mode 100644
index 000000000..5413898f7
--- /dev/null
+++ b/src/arrow/js/test/tsconfig/tsconfig.src.json
@@ -0,0 +1,8 @@
+// TypeScript configuration for the source target's tests
+{
+ "extends": "./tsconfig.base.json",
+ "compilerOptions": {
+ "target": "esnext",
+ "module": "es2020"
+ }
+}
diff --git a/src/arrow/js/test/tsconfig/tsconfig.ts.json b/src/arrow/js/test/tsconfig/tsconfig.ts.json
new file mode 100644
index 000000000..1e053698e
--- /dev/null
+++ b/src/arrow/js/test/tsconfig/tsconfig.ts.json
@@ -0,0 +1,8 @@
+// TypeScript configuration for the TypeScript target's tests
+{
+ "extends": "./tsconfig.base.json",
+ "compilerOptions": {
+ "target": "esnext",
+ "module": "es2020"
+ }
+}
diff --git a/src/arrow/js/test/unit/bit-tests.ts b/src/arrow/js/test/unit/bit-tests.ts
new file mode 100644
index 000000000..cdfb37c16
--- /dev/null
+++ b/src/arrow/js/test/unit/bit-tests.ts
@@ -0,0 +1,41 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import * as Arrow from 'apache-arrow';
+const { BitIterator, getBool } = Arrow.util;
+
+describe('Bits', () => {
+ test('BitIterator produces correct bits for single byte', () => {
+ const byte = new Uint8Array([0b11110000]);
+ expect([...new BitIterator(byte, 0, 8, null, getBool)]).toEqual(
+ [false, false, false, false, true, true, true, true]);
+
+ expect([...new BitIterator(byte, 2, 5, null, getBool)]).toEqual(
+ [false, false, true, true, true]);
+ });
+
+ test('BitIterator produces correct bits for multiple bytes', () => {
+ const byte = new Uint8Array([0b11110000, 0b10101010]);
+ expect([...new BitIterator(byte, 0, 16, null, getBool)]).toEqual(
+ [false, false, false, false, true, true, true, true,
+ false, true, false, true, false, true, false, true]);
+
+ expect([...new BitIterator(byte, 2, 11, null, getBool)]).toEqual(
+ [false, false, true, true, true, true,
+ false, true, false, true, false]);
+ });
+});
diff --git a/src/arrow/js/test/unit/builders/builder-tests.ts b/src/arrow/js/test/unit/builders/builder-tests.ts
new file mode 100644
index 000000000..b6fa60271
--- /dev/null
+++ b/src/arrow/js/test/unit/builders/builder-tests.ts
@@ -0,0 +1,268 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import '../../jest-extensions';
+import { from, fromDOMStream, toArray } from 'ix/asynciterable';
+import { fromNodeStream } from 'ix/asynciterable/fromnodestream';
+import { validateVector } from './utils';
+import * as generate from '../../generate-test-data';
+import { Type, DataType, Chunked, util, Builder, UnionVector } from 'apache-arrow';
+
+const testDOMStreams = process.env.TEST_DOM_STREAMS === 'true';
+const testNodeStreams = process.env.TEST_NODE_STREAMS === 'true';
+
+describe('Generated Test Data', () => {
+ describe('NullBuilder', () => { validateBuilder(generate.null_); });
+ describe('BoolBuilder', () => { validateBuilder(generate.bool); });
+ describe('Int8Builder', () => { validateBuilder(generate.int8); });
+ describe('Int16Builder', () => { validateBuilder(generate.int16); });
+ describe('Int32Builder', () => { validateBuilder(generate.int32); });
+ describe('Int64Builder', () => { validateBuilder(generate.int64); });
+ describe('Uint8Builder', () => { validateBuilder(generate.uint8); });
+ describe('Uint16Builder', () => { validateBuilder(generate.uint16); });
+ describe('Uint32Builder', () => { validateBuilder(generate.uint32); });
+ describe('Uint64Builder', () => { validateBuilder(generate.uint64); });
+ describe('Float16Builder', () => { validateBuilder(generate.float16); });
+ describe('Float32Builder', () => { validateBuilder(generate.float32); });
+ describe('Float64Builder', () => { validateBuilder(generate.float64); });
+ describe('Utf8Builder', () => { validateBuilder(generate.utf8); });
+ describe('BinaryBuilder', () => { validateBuilder(generate.binary); });
+ describe('FixedSizeBinaryBuilder', () => { validateBuilder(generate.fixedSizeBinary); });
+ describe('DateDayBuilder', () => { validateBuilder(generate.dateDay); });
+ describe('DateMillisecondBuilder', () => { validateBuilder(generate.dateMillisecond); });
+ describe('TimestampSecondBuilder', () => { validateBuilder(generate.timestampSecond); });
+ describe('TimestampMillisecondBuilder', () => { validateBuilder(generate.timestampMillisecond); });
+ describe('TimestampMicrosecondBuilder', () => { validateBuilder(generate.timestampMicrosecond); });
+ describe('TimestampNanosecondBuilder', () => { validateBuilder(generate.timestampNanosecond); });
+ describe('TimeSecondBuilder', () => { validateBuilder(generate.timeSecond); });
+ describe('TimeMillisecondBuilder', () => { validateBuilder(generate.timeMillisecond); });
+ describe('TimeMicrosecondBuilder', () => { validateBuilder(generate.timeMicrosecond); });
+ describe('TimeNanosecondBuilder', () => { validateBuilder(generate.timeNanosecond); });
+ describe('DecimalBuilder', () => { validateBuilder(generate.decimal); });
+ describe('ListBuilder', () => { validateBuilder(generate.list); });
+ describe('StructBuilder', () => { validateBuilder(generate.struct); });
+ describe('DenseUnionBuilder', () => { validateBuilder(generate.denseUnion); });
+ describe('SparseUnionBuilder', () => { validateBuilder(generate.sparseUnion); });
+ describe('DictionaryBuilder', () => { validateBuilder(generate.dictionary); });
+ describe('IntervalDayTimeBuilder', () => { validateBuilder(generate.intervalDayTime); });
+ describe('IntervalYearMonthBuilder', () => { validateBuilder(generate.intervalYearMonth); });
+ describe('FixedSizeListBuilder', () => { validateBuilder(generate.fixedSizeList); });
+ describe('MapBuilder', () => { validateBuilder(generate.map); });
+});
+
+function validateBuilder(generate: (length?: number, nullCount?: number, ...args: any[]) => generate.GeneratedVector) {
+
+ const type = generate(0, 0).vector.type;
+
+ for (let i = -1; ++i < 1;) {
+ validateBuilderWithNullValues(`no nulls`, [], generate(100, 0));
+ validateBuilderWithNullValues(`with nulls`, [null], generate(100));
+ if (DataType.isUtf8(type)) {
+ validateBuilderWithNullValues(`with \\0`, ['\0'], generate(100));
+ validateBuilderWithNullValues(`with n/a`, ['n/a'], generate(100));
+ } else if (DataType.isFloat(type)) {
+ validateBuilderWithNullValues(`with NaNs`, [NaN], generate(100));
+ } else if (DataType.isInt(type)) {
+ validateBuilderWithNullValues(`with MAX_INT`, [
+ type.bitWidth < 64 ? 0x7fffffff :
+ new Uint32Array([0x7fffffff, 0x7fffffff])], generate(100));
+ }
+ }
+}
+
+const countQueueingStrategy = { highWaterMark: 10 };
+const byteLengthQueueingStrategy = { highWaterMark: 64 };
+
+const iterableBuilderOptions = <T extends DataType = any>({ vector }: generate.GeneratedVector, { type, ...opts }: BuilderOptions<T>) => ({
+ ...opts, type,
+ valueToChildTypeId: !DataType.isUnion(type) ? undefined : (() => {
+ let { typeIds } = vector as UnionVector;
+ let lastChunkLength = 0, chunksLength = 0;
+ return (builder: Builder<T>, _value: any, index: number) => {
+ if (index === 0) {
+ chunksLength += lastChunkLength;
+ }
+ lastChunkLength = builder.length + 1;
+ return typeIds[chunksLength + index];
+ };
+ })()
+});
+
+const domStreamBuilderOptions = <T extends DataType = any>({ vector }: generate.GeneratedVector, { type, queueingStrategy, ...opts }: Partial<BuilderTransformOptions<T>>) => ({
+ ...opts, type,
+ valueToChildTypeId: !DataType.isUnion(type) ? undefined : (() => {
+ let { typeIds } = vector as UnionVector;
+ let lastChunkLength = 0, chunksLength = 0;
+ return (builder: Builder<T>, _value: any, index: number) => {
+ if (index === 0) {
+ chunksLength += lastChunkLength;
+ }
+ lastChunkLength = builder.length + 1;
+ return typeIds[chunksLength + index];
+ };
+ })(),
+ queueingStrategy,
+ readableStrategy: queueingStrategy === 'bytes' ? byteLengthQueueingStrategy : countQueueingStrategy,
+ writableStrategy: queueingStrategy === 'bytes' ? byteLengthQueueingStrategy : countQueueingStrategy,
+});
+
+const nodeStreamBuilderOptions = <T extends DataType = any>({ vector }: generate.GeneratedVector, { type, queueingStrategy, ...opts }: Partial<BuilderDuplexOptions<T>>) => ({
+ ...opts, type,
+ valueToChildTypeId: !DataType.isUnion(type) ? undefined : (() => {
+ let { typeIds } = vector as UnionVector;
+ let lastChunkLength = 0, chunksLength = 0;
+ return (builder: Builder<T>, _value: any, index: number) => {
+ if (index === 0) {
+ chunksLength += lastChunkLength;
+ }
+ lastChunkLength = builder.length + 1;
+ return typeIds[chunksLength + index];
+ };
+ })(),
+ queueingStrategy,
+ highWaterMark: queueingStrategy === 'bytes' ? 64 : 10
+});
+
+function validateBuilderWithNullValues(suiteName: string, nullValues: any[], generated: generate.GeneratedVector) {
+
+ const type = generated.vector.type;
+ const referenceNullValues = nullValues.slice();
+ const originalValues = generated.values().slice();
+ const typeName = Type[type.typeId].toLowerCase();
+
+ let values: any[];
+ const opts: any = { type, nullValues };
+
+ if (DataType.isNull(type) || (nullValues.length === 1 && nullValues[0] === null)) {
+ values = originalValues.slice();
+ } else if (nullValues.length > 0) {
+ values = fillNA(originalValues, nullValues);
+ } else {
+ values = fillNADefault(originalValues, [originalValues.find((x) => x !== null)]);
+ }
+
+ if (DataType.isInt(type) && type.bitWidth === 64 && ArrayBuffer.isView(nullValues[0])) {
+ referenceNullValues[0] = util.BN.new<any>(nullValues[0])[Symbol.toPrimitive]('default');
+ }
+
+ describe(suiteName, () => {
+ it(`encodes ${typeName} single`, async () => {
+ const opts_ = iterableBuilderOptions(generated, { ...opts });
+ const vector = await encodeSingle(values.slice(), opts_);
+ validateVector(values, vector, referenceNullValues);
+ });
+ it(`encodes ${typeName} chunks by count`, async () => {
+ const highWaterMark = Math.max(5, (Math.random() * values.length - 5) | 0);
+ const opts_ = iterableBuilderOptions(generated, { ...opts, highWaterMark, queueingStrategy: 'count' });
+ const vector = await encodeChunks(values.slice(), opts_);
+ validateVector(values, vector, referenceNullValues);
+ });
+ it(`encodes ${typeName} chunks by bytes`, async () => {
+ const highWaterMark = 64;
+ const opts_ = iterableBuilderOptions(generated, { ...opts, highWaterMark, queueingStrategy: 'bytes' });
+ const vector = await encodeChunks(values.slice(), opts_);
+ validateVector(values, vector, referenceNullValues);
+ });
+ if (testDOMStreams) {
+ it(`encodes ${typeName} chunks from a DOM stream by count`, async () => {
+ const opts_ = domStreamBuilderOptions(generated, { ...opts, queueingStrategy: 'count' });
+ const vector = await encodeChunksDOM(values.slice(), opts_);
+ validateVector(values, vector, referenceNullValues);
+ });
+ it(`encodes ${typeName} chunks from a DOM stream by bytes`, async () => {
+ const opts_ = domStreamBuilderOptions(generated, { ...opts, queueingStrategy: 'bytes' });
+ const vector = await encodeChunksDOM(values.slice(), opts_);
+ validateVector(values, vector, referenceNullValues);
+ });
+ }
+ if (testNodeStreams) {
+ it(`encodes ${typeName} chunks from a Node stream by count`, async () => {
+ const opts_ = nodeStreamBuilderOptions(generated, { ...opts, queueingStrategy: 'count' });
+ const vector = await encodeChunksNode(values.slice(), opts_);
+ validateVector(values, vector, referenceNullValues);
+ });
+ it(`encodes ${typeName} chunks from a Node stream by bytes`, async () => {
+ const opts_ = nodeStreamBuilderOptions(generated, { ...opts, queueingStrategy: 'bytes' });
+ const vector = await encodeChunksNode(values.slice(), opts_);
+ validateVector(values, vector, referenceNullValues);
+ });
+ }
+ });
+}
+
+function fillNA(values: any[], nulls: any[]): any[] {
+ const n = nulls.length - 1;
+ return values.map((x) => {
+ if (x === null) {
+ return nulls[Math.round(n * Math.random())];
+ }
+ return x;
+ });
+}
+
+function fillNADefault(values: any[], nulls: any[]): any[] {
+ const n = nulls.length - 1;
+ return values.map((x) => {
+ if (x === null) {
+ return nulls[Math.round(n * Math.random())];
+ } else if (Array.isArray(x) && x.length > 0) {
+ let defaultValue = x.find((y) => y !== null);
+ if (defaultValue === undefined) { defaultValue = 0; }
+ return fillNADefault(x, [defaultValue]);
+ }
+ return x;
+ });
+}
+
+type BuilderOptions<T extends DataType = any, TNull = any> = import('apache-arrow/builder').BuilderOptions<T, TNull>;
+type BuilderDuplexOptions<T extends DataType = any, TNull = any> = import('apache-arrow/io/node/builder').BuilderDuplexOptions<T, TNull>;
+type BuilderTransformOptions<T extends DataType = any, TNull = any> = import('apache-arrow/io/whatwg/builder').BuilderTransformOptions<T, TNull>;
+
+async function encodeSingle<T extends DataType, TNull = any>(values: (T['TValue'] | TNull)[], options: BuilderOptions<T, TNull>) {
+ const builder = Builder.new(options);
+ values.forEach((x) => builder.append(x));
+ return builder.finish().toVector();
+}
+
+async function encodeChunks<T extends DataType, TNull = any>(values: (T['TValue'] | TNull)[], options: BuilderOptions<T, TNull>) {
+ return Chunked.concat(...Builder.throughIterable(options)(values));
+}
+
+async function encodeChunksDOM<T extends DataType, TNull = any>(values: (T['TValue'] | TNull)[], options: BuilderTransformOptions<T, TNull>) {
+
+ const stream = from(values).toDOMStream()
+ .pipeThrough(Builder.throughDOM(options));
+
+ const chunks = await fromDOMStream(stream).pipe(toArray);
+
+ return Chunked.concat(...chunks);
+}
+
+async function encodeChunksNode<T extends DataType, TNull = any>(values: (T['TValue'] | TNull)[], options: BuilderDuplexOptions<T, TNull>) {
+
+ if (options.nullValues) {
+ options.nullValues = [...options.nullValues, undefined] as TNull[];
+ }
+
+ const stream = from(fillNA(values, [undefined]))
+ .toNodeStream({ objectMode: true })
+ .pipe(Builder.throughNode(options));
+
+ const chunks: any[] = await fromNodeStream(stream, options.highWaterMark).pipe(toArray);
+
+ return Chunked.concat(...chunks);
+}
diff --git a/src/arrow/js/test/unit/builders/date-tests.ts b/src/arrow/js/test/unit/builders/date-tests.ts
new file mode 100644
index 000000000..5a9cc092b
--- /dev/null
+++ b/src/arrow/js/test/unit/builders/date-tests.ts
@@ -0,0 +1,106 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { validateVector } from './utils';
+import { Vector, DateDay, DateMillisecond } from 'apache-arrow';
+import {
+ encodeAll,
+ encodeEach,
+ encodeEachDOM,
+ encodeEachNode,
+ date32sNoNulls,
+ date64sNoNulls,
+ date32sWithNulls,
+ date64sWithNulls
+} from './utils';
+
+const testDOMStreams = process.env.TEST_DOM_STREAMS === 'true';
+const testNodeStreams = process.env.TEST_NODE_STREAMS === 'true';
+
+describe('DateDayBuilder', () => {
+ runTestsWithEncoder('encodeAll', encodeAll(() => new DateDay()));
+ runTestsWithEncoder('encodeEach: 5', encodeEach(() => new DateDay(), 5));
+ runTestsWithEncoder('encodeEach: 25', encodeEach(() => new DateDay(), 25));
+ runTestsWithEncoder('encodeEach: undefined', encodeEach(() => new DateDay()));
+ testDOMStreams && runTestsWithEncoder('encodeEachDOM: 25', encodeEachDOM(() => new DateDay(), 25));
+ testNodeStreams && runTestsWithEncoder('encodeEachNode: 25', encodeEachNode(() => new DateDay(), 25));
+
+ function runTestsWithEncoder(name: string, encode: (vals: (Date | null)[], nullVals?: any[]) => Promise<Vector<DateDay>>) {
+ describe(`${encode.name} ${name}`, () => {
+ it(`encodes dates no nulls`, async () => {
+ const vals = date32sNoNulls(20);
+ validateVector(vals, await encode(vals, []), []);
+ });
+ it(`encodes dates with nulls`, async () => {
+ const vals = date32sWithNulls(20);
+ validateVector(vals, await encode(vals, [null]), [null]);
+ });
+ });
+ }
+});
+
+describe('DateMillisecondBuilder', () => {
+ runTestsWithEncoder('encodeAll', encodeAll(() => new DateMillisecond()));
+ runTestsWithEncoder('encodeEach: 5', encodeEach(() => new DateMillisecond(), 5));
+ runTestsWithEncoder('encodeEach: 25', encodeEach(() => new DateMillisecond(), 25));
+ runTestsWithEncoder('encodeEach: undefined', encodeEach(() => new DateMillisecond()));
+ testDOMStreams && runTestsWithEncoder('encodeEachDOM: 25', encodeEachDOM(() => new DateMillisecond(), 25));
+ testNodeStreams && runTestsWithEncoder('encodeEachNode: 25', encodeEachNode(() => new DateMillisecond(), 25));
+
+ function runTestsWithEncoder(name: string, encode: (vals: (Date | null)[], nullVals?: any[]) => Promise<Vector<DateMillisecond>>) {
+ describe(`${encode.name} ${name}`, () => {
+ it(`encodes dates no nulls`, async () => {
+ const vals = date64sNoNulls(20);
+ validateVector(vals, await encode(vals, []), []);
+ });
+ it(`encodes dates with nulls`, async () => {
+ const vals = date64sWithNulls(20);
+ validateVector(vals, await encode(vals, [null]), [null]);
+ });
+ });
+ }
+});
+
+describe('DateMillisecondBuilder with nulls', () => {
+ const encode = encodeAll(() => new DateMillisecond());
+ const dates = [
+ null,
+ '2019-03-19T13:40:14.746Z',
+ '2019-03-06T21:12:50.912Z',
+ '2019-03-22T12:50:56.854Z',
+ '2019-02-25T03:34:30.916Z',
+ null,
+ null,
+ null,
+ null,
+ null,
+ null,
+ '2019-03-18T18:12:37.293Z',
+ '2019-03-26T21:58:35.307Z',
+ '2019-04-02T03:03:46.464Z',
+ '2019-03-24T18:45:25.763Z',
+ null,
+ '2019-03-19T01:10:59.189Z',
+ '2019-03-10T21:15:32.237Z',
+ '2019-03-21T07:25:34.864Z',
+ null
+ ].map((x) => x === null ? x : new Date(x));
+ it(`encodes dates with nulls`, async () => {
+ const vals = dates.slice();
+ validateVector(vals, await encode(vals, [null]), [null]);
+ });
+});
diff --git a/src/arrow/js/test/unit/builders/dictionary-tests.ts b/src/arrow/js/test/unit/builders/dictionary-tests.ts
new file mode 100644
index 000000000..19b3603bc
--- /dev/null
+++ b/src/arrow/js/test/unit/builders/dictionary-tests.ts
@@ -0,0 +1,65 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { validateVector } from './utils';
+import { Dictionary, Utf8, Int32, Vector } from 'apache-arrow';
+import {
+ encodeAll,
+ encodeEach,
+ encodeEachDOM,
+ encodeEachNode,
+ duplicateItems,
+ stringsNoNulls,
+ stringsWithNAs,
+ stringsWithNulls,
+ stringsWithEmpties
+} from './utils';
+
+const testDOMStreams = process.env.TEST_DOM_STREAMS === 'true';
+const testNodeStreams = process.env.TEST_NODE_STREAMS === 'true';
+
+describe('DictionaryBuilder', () => {
+ describe('<Utf8, Int32>', () => {
+ runTestsWithEncoder('encodeAll', encodeAll(() => new Dictionary(new Utf8(), new Int32())));
+ runTestsWithEncoder('encodeEach: 5', encodeEach(() => new Dictionary(new Utf8(), new Int32()), 5));
+ runTestsWithEncoder('encodeEach: 25', encodeEach(() => new Dictionary(new Utf8(), new Int32()), 25));
+ runTestsWithEncoder('encodeEach: undefined', encodeEach(() => new Dictionary(new Utf8(), new Int32()), void 0));
+ testDOMStreams && runTestsWithEncoder('encodeEachDOM: 25', encodeEachDOM(() => new Dictionary(new Utf8(), new Int32()), 25));
+ testNodeStreams && runTestsWithEncoder('encodeEachNode: 25', encodeEachNode(() => new Dictionary(new Utf8(), new Int32()), 25));
+ });
+});
+
+function runTestsWithEncoder(name: string, encode: (vals: (string | null)[], nullVals?: any[]) => Promise<Vector<Dictionary<Utf8, Int32>>>) {
+ describe(`${encode.name} ${name}`, () => {
+ it(`dictionary-encodes strings no nulls`, async () => {
+ const vals = duplicateItems(20, stringsNoNulls(10));
+ validateVector(vals, await encode(vals, []), []);
+ });
+ it(`dictionary-encodes strings with nulls`, async () => {
+ const vals = duplicateItems(20, stringsWithNulls(10));
+ validateVector(vals, await encode(vals, [null]), [null]);
+ });
+ it(`dictionary-encodes strings using n/a as the null value rep`, async () => {
+ const vals = duplicateItems(20, stringsWithNAs(10));
+ validateVector(vals, await encode(vals, ['n/a']), ['n/a']);
+ });
+ it(`dictionary-encodes strings using \\0 as the null value rep`, async () => {
+ const vals = duplicateItems(20, stringsWithEmpties(10));
+ validateVector(vals, await encode(vals, ['\0']), ['\0']);
+ });
+ });
+}
diff --git a/src/arrow/js/test/unit/builders/int64-tests.ts b/src/arrow/js/test/unit/builders/int64-tests.ts
new file mode 100644
index 000000000..876ce7030
--- /dev/null
+++ b/src/arrow/js/test/unit/builders/int64-tests.ts
@@ -0,0 +1,91 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { util, Vector, DataType, Int64 } from 'apache-arrow';
+import {
+ validateVector,
+ encodeAll, encodeEach, encodeEachDOM, encodeEachNode,
+ int64sNoNulls, int64sWithNulls, int64sWithMaxInts,
+} from './utils';
+
+const testDOMStreams = process.env.TEST_DOM_STREAMS === 'true';
+const testNodeStreams = process.env.TEST_NODE_STREAMS === 'true';
+
+const typeFactory = () => new Int64();
+const valueName = `Int64`.toLowerCase();
+const encode0 = encodeAll(typeFactory);
+const encode1 = encodeEach(typeFactory);
+const encode2 = encodeEach(typeFactory, 5);
+const encode3 = encodeEach(typeFactory, 25);
+const encode4 = encodeEachDOM(typeFactory, 25);
+const encode5 = encodeEachNode(typeFactory, 25);
+
+const nulls0: any[] = [0x7fffffff];
+const nulls1: any[] = [0x7fffffff];
+nulls0[0] = new Uint32Array([0x7fffffff, 0x7fffffff]);
+nulls1[0] = util.BN.new(nulls0[0])[Symbol.toPrimitive]();
+
+type EncodeValues<T extends DataType> = (values: (T['TValue'] | null)[], nullVals?: any[]) => Promise<Vector<T>>;
+
+function encodeAndValidate<T extends DataType>(encode: EncodeValues<T>, providedNulls: any[] = [], expectedNulls = providedNulls) {
+ return (values: any[]) => {
+ return async () => {
+ const vector = await encode(values, providedNulls);
+ const expected = values.map((x) => {
+ switch (typeof x) {
+ case 'number': return new Int32Array([x, 0]);
+ case 'bigint': return new Int32Array(new BigInt64Array([x]).buffer);
+ }
+ return x ? x.slice() : x;
+ });
+ return validateVector(expected, vector, expectedNulls);
+ };
+ };
+}
+
+describe(`Int64Builder`, () => {
+ describe(`encode single chunk`, () => {
+ it(`encodes ${valueName} no nulls`, encodeAndValidate(encode0, [], [])(int64sNoNulls(20)));
+ it(`encodes ${valueName} with nulls`, encodeAndValidate(encode0, [null], [null])(int64sWithNulls(20)));
+ it(`encodes ${valueName} with MAX_INT`, encodeAndValidate(encode0, nulls0, nulls1)(int64sWithMaxInts(20)));
+ });
+ describe(`encode chunks length default`, () => {
+ it(`encodes ${valueName} no nulls`, encodeAndValidate(encode1, [], [])(int64sNoNulls(20)));
+ it(`encodes ${valueName} with nulls`, encodeAndValidate(encode1, [null], [null])(int64sWithNulls(20)));
+ it(`encodes ${valueName} with MAX_INT`, encodeAndValidate(encode1, nulls0, nulls1)(int64sWithMaxInts(20)));
+ });
+ describe(`encode chunks length 5`, () => {
+ it(`encodes ${valueName} no nulls`, encodeAndValidate(encode2, [], [])(int64sNoNulls(20)));
+ it(`encodes ${valueName} with nulls`, encodeAndValidate(encode2, [null], [null])(int64sWithNulls(20)));
+ it(`encodes ${valueName} with MAX_INT`, encodeAndValidate(encode2, nulls0, nulls1)(int64sWithMaxInts(20)));
+ });
+ describe(`encode chunks length 25`, () => {
+ it(`encodes ${valueName} no nulls`, encodeAndValidate(encode3, [], [])(int64sNoNulls(20)));
+ it(`encodes ${valueName} with nulls`, encodeAndValidate(encode3, [null], [null])(int64sWithNulls(20)));
+ it(`encodes ${valueName} with MAX_INT`, encodeAndValidate(encode3, nulls0, nulls1)(int64sWithMaxInts(20)));
+ });
+ testDOMStreams && describe(`encode chunks length 25, WhatWG stream`, () => {
+ it(`encodes ${valueName} no nulls`, encodeAndValidate(encode4, [], [])(int64sNoNulls(20)));
+ it(`encodes ${valueName} with nulls`, encodeAndValidate(encode4, [null], [null])(int64sWithNulls(20)));
+ it(`encodes ${valueName} with MAX_INT`, encodeAndValidate(encode4, nulls0, nulls1)(int64sWithMaxInts(20)));
+ });
+ testNodeStreams && describe(`encode chunks length 25, NodeJS stream`, () => {
+ it(`encodes ${valueName} no nulls`, encodeAndValidate(encode5, [], [])(int64sNoNulls(20)));
+ it(`encodes ${valueName} with nulls`, encodeAndValidate(encode5, [null], [null])(int64sWithNulls(20)));
+ it(`encodes ${valueName} with MAX_INT`, encodeAndValidate(encode5, nulls0, nulls1)(int64sWithMaxInts(20)));
+ });
+});
diff --git a/src/arrow/js/test/unit/builders/primitive-tests.ts b/src/arrow/js/test/unit/builders/primitive-tests.ts
new file mode 100644
index 000000000..3fd515bf4
--- /dev/null
+++ b/src/arrow/js/test/unit/builders/primitive-tests.ts
@@ -0,0 +1,154 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import {
+ Vector, DataType,
+ Bool, Int8, Int16, Int32, Uint8, Uint16, Uint32, Float16, Float32, Float64
+} from 'apache-arrow';
+
+import {
+ validateVector,
+ encodeAll, encodeEach, encodeEachDOM, encodeEachNode,
+ boolsNoNulls, boolsWithNulls,
+ int8sNoNulls, int8sWithNulls, int8sWithMaxInts,
+ int16sNoNulls, int16sWithNulls, int16sWithMaxInts,
+ int32sNoNulls, int32sWithNulls, int32sWithMaxInts,
+ uint8sNoNulls, uint8sWithNulls, uint8sWithMaxInts,
+ uint16sNoNulls, uint16sWithNulls, uint16sWithMaxInts,
+ uint32sNoNulls, uint32sWithNulls, uint32sWithMaxInts,
+ float16sNoNulls, float16sWithNulls, float16sWithNaNs,
+ float32sNoNulls, float32sWithNulls, float64sWithNaNs,
+ float64sNoNulls, float64sWithNulls, float32sWithNaNs,
+} from './utils';
+
+const testDOMStreams = process.env.TEST_DOM_STREAMS === 'true';
+const testNodeStreams = process.env.TEST_NODE_STREAMS === 'true';
+
+describe('BoolBuilder', () => {
+
+ runTestsWithEncoder('encodeAll: 5', encodeAll(() => new Bool()));
+ runTestsWithEncoder('encodeEach: 5', encodeEach(() => new Bool(), 5));
+ runTestsWithEncoder('encodeEach: 25', encodeEach(() => new Bool(), 25));
+ runTestsWithEncoder('encodeEach: undefined', encodeEach(() => new Bool()));
+ testDOMStreams && runTestsWithEncoder('encodeEachDOM: 25', encodeEachDOM(() => new Bool(), 25));
+ testNodeStreams && runTestsWithEncoder('encodeEachNode: 25', encodeEachNode(() => new Bool(), 25));
+
+ function runTestsWithEncoder<T extends DataType>(name: string, encode: (vals: (T['TValue'] | null)[], nullVals?: any[]) => Promise<Vector<T>>) {
+ describe(`${encode.name} ${name}`, () => {
+ it(`encodes bools no nulls`, async () => {
+ const vals = boolsNoNulls(20);
+ validateVector(vals, await encode(vals, []), []);
+ });
+ it(`encodes bools with nulls`, async () => {
+ const vals = boolsWithNulls(20);
+ validateVector(vals, await encode(vals, [null]), [null]);
+ });
+ });
+ }
+});
+
+type PrimitiveTypeOpts<T extends DataType> = [
+ new (...args: any[]) => T,
+ (count: number) => (T['TValue'] | null)[],
+ (count: number) => (T['TValue'] | null)[],
+ (count: number) => (T['TValue'] | null)[]
+];
+
+[
+ [Int8, int8sNoNulls, int8sWithNulls, int8sWithMaxInts] as PrimitiveTypeOpts<Int8>,
+ [Int16, int16sNoNulls, int16sWithNulls, int16sWithMaxInts] as PrimitiveTypeOpts<Int16>,
+ [Int32, int32sNoNulls, int32sWithNulls, int32sWithMaxInts] as PrimitiveTypeOpts<Int32>,
+ [Uint8, uint8sNoNulls, uint8sWithNulls, uint8sWithMaxInts] as PrimitiveTypeOpts<Uint8>,
+ [Uint16, uint16sNoNulls, uint16sWithNulls, uint16sWithMaxInts] as PrimitiveTypeOpts<Uint16>,
+ [Uint32, uint32sNoNulls, uint32sWithNulls, uint32sWithMaxInts] as PrimitiveTypeOpts<Uint32>,
+].forEach(([TypeCtor, noNulls, withNulls, withNaNs]) => {
+
+ describe(`${TypeCtor.name}Builder`, () => {
+
+ const typeFactory = () => new TypeCtor();
+ const valueName = TypeCtor.name.toLowerCase();
+
+ runTestsWithEncoder('encodeAll', encodeAll(typeFactory));
+ runTestsWithEncoder('encodeEach: 5', encodeEach(typeFactory, 5));
+ runTestsWithEncoder('encodeEach: 25', encodeEach(typeFactory, 25));
+ runTestsWithEncoder('encodeEach: undefined', encodeEach(typeFactory));
+ testDOMStreams && runTestsWithEncoder('encodeEachDOM: 25', encodeEachDOM(typeFactory, 25));
+ testNodeStreams && runTestsWithEncoder('encodeEachNode: 25', encodeEachNode(typeFactory, 25));
+
+ function runTestsWithEncoder<T extends DataType>(name: string, encode: (vals: (T['TValue'] | null)[], nullVals?: any[]) => Promise<Vector<T>>) {
+ describe(`${name}`, () => {
+ it(`encodes ${valueName} no nulls`, async () => {
+ const vals = noNulls(20);
+ validateVector(vals, await encode(vals, []), []);
+ });
+ it(`encodes ${valueName} with nulls`, async () => {
+ const vals = withNulls(20);
+ validateVector(vals, await encode(vals, [null]), [null]);
+ });
+ it(`encodes ${valueName} with MAX_INT`, async () => {
+ const vals = withNaNs(20);
+ validateVector(vals, await encode(vals, [0x7fffffff]), [0x7fffffff]);
+ });
+ });
+ }
+ });
+});
+
+[
+ [Float16, float16sNoNulls, float16sWithNulls, float16sWithNaNs] as PrimitiveTypeOpts<Float16>,
+ [Float32, float32sNoNulls, float32sWithNulls, float32sWithNaNs] as PrimitiveTypeOpts<Float32>,
+ [Float64, float64sNoNulls, float64sWithNulls, float64sWithNaNs] as PrimitiveTypeOpts<Float64>,
+].forEach(([TypeCtor, noNulls, withNulls, withNaNs]) => {
+
+ describe(`${TypeCtor.name}Builder`, () => {
+
+ const typeFactory = () => new TypeCtor();
+ const valueName = TypeCtor.name.toLowerCase();
+
+ runTestsWithEncoder('encodeAll', encodeAll(typeFactory));
+ runTestsWithEncoder('encodeEach: 5', encodeEach(typeFactory, 5));
+ runTestsWithEncoder('encodeEach: 25', encodeEach(typeFactory, 25));
+ runTestsWithEncoder('encodeEach: undefined', encodeEach(typeFactory));
+ testDOMStreams && runTestsWithEncoder('encodeEachDOM: 25', encodeEachDOM(typeFactory, 25));
+ testNodeStreams && runTestsWithEncoder('encodeEachNode: 25', encodeEachNode(typeFactory, 25));
+
+ function runTestsWithEncoder<T extends DataType>(name: string, encode: (vals: (T['TValue'] | null)[], nullVals?: any[]) => Promise<Vector<T>>) {
+ describe(`${name}`, () => {
+ it(`encodes ${valueName} no nulls`, async () => {
+ const vals = noNulls(20);
+ validateVector(vals, await encode(vals, []), []);
+ });
+ it(`encodes ${valueName} with nulls`, async () => {
+ const vals = withNulls(20);
+ validateVector(vals, await encode(vals, [null]), [null]);
+ });
+ it(`encodes ${valueName} with NaNs`, async () => {
+ const vals = withNaNs(20);
+ validateVector(vals, await encode(vals, [NaN]), [NaN]);
+ });
+ });
+ }
+ });
+});
+
+describe('Float16Builder', () => {
+ const encode = encodeAll(() => new Float16());
+ it(`encodes the weird values`, async () => {
+ const vals = [0, 5.960464477539063e-8, NaN, 65504, 2, -0];
+ validateVector(vals, await encode(vals, []), []);
+ });
+});
diff --git a/src/arrow/js/test/unit/builders/uint64-tests.ts b/src/arrow/js/test/unit/builders/uint64-tests.ts
new file mode 100644
index 000000000..e08e25b5c
--- /dev/null
+++ b/src/arrow/js/test/unit/builders/uint64-tests.ts
@@ -0,0 +1,91 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { util, Vector, DataType, Uint64 } from 'apache-arrow';
+import {
+ validateVector,
+ encodeAll, encodeEach, encodeEachDOM, encodeEachNode,
+ uint64sNoNulls, uint64sWithNulls, uint64sWithMaxInts,
+} from './utils';
+
+const testDOMStreams = process.env.TEST_DOM_STREAMS === 'true';
+const testNodeStreams = process.env.TEST_NODE_STREAMS === 'true';
+
+const typeFactory = () => new Uint64();
+const valueName = `Uint64`.toLowerCase();
+const encode0 = encodeAll(typeFactory);
+const encode1 = encodeEach(typeFactory);
+const encode2 = encodeEach(typeFactory, 5);
+const encode3 = encodeEach(typeFactory, 25);
+const encode4 = encodeEachDOM(typeFactory, 25);
+const encode5 = encodeEachNode(typeFactory, 25);
+
+const nulls0: any[] = [0x7fffffff];
+const nulls1: any[] = [0x7fffffff];
+nulls0[0] = new Uint32Array([0x7fffffff, 0x7fffffff]);
+nulls1[0] = util.BN.new(nulls0[0])[Symbol.toPrimitive]();
+
+type ValuesToVector<T extends DataType> = (values: (T['TValue'] | null)[], nullVals?: any[]) => Promise<Vector<T>>;
+
+function encodeAndValidate<T extends DataType>(encode: ValuesToVector<T>, providedNulls: any[] = [], expectedNulls = providedNulls) {
+ return (values: any[]) => {
+ return async () => {
+ const vector = await encode(values, providedNulls);
+ const expected = values.map((x) => {
+ switch (typeof x) {
+ case 'number': return new Uint32Array([x, 0]);
+ case 'bigint': return new Uint32Array(new BigUint64Array([x]).buffer);
+ }
+ return x ? x.slice() : x;
+ });
+ return validateVector(expected, vector, expectedNulls);
+ };
+ };
+}
+
+describe(`Uint64Builder`, () => {
+ describe(`encode single chunk`, () => {
+ it(`encodes ${valueName} no nulls`, encodeAndValidate(encode0, [], [])(uint64sNoNulls(20)));
+ it(`encodes ${valueName} with nulls`, encodeAndValidate(encode0, [null], [null])(uint64sWithNulls(20)));
+ it(`encodes ${valueName} with MAX_INT`, encodeAndValidate(encode0, nulls0, nulls1)(uint64sWithMaxInts(20)));
+ });
+ describe(`encode chunks length default`, () => {
+ it(`encodes ${valueName} no nulls`, encodeAndValidate(encode1, [], [])(uint64sNoNulls(20)));
+ it(`encodes ${valueName} with nulls`, encodeAndValidate(encode1, [null], [null])(uint64sWithNulls(20)));
+ it(`encodes ${valueName} with MAX_INT`, encodeAndValidate(encode1, nulls0, nulls1)(uint64sWithMaxInts(20)));
+ });
+ describe(`encode chunks length 5`, () => {
+ it(`encodes ${valueName} no nulls`, encodeAndValidate(encode2, [], [])(uint64sNoNulls(20)));
+ it(`encodes ${valueName} with nulls`, encodeAndValidate(encode2, [null], [null])(uint64sWithNulls(20)));
+ it(`encodes ${valueName} with MAX_INT`, encodeAndValidate(encode2, nulls0, nulls1)(uint64sWithMaxInts(20)));
+ });
+ describe(`encode chunks length 25`, () => {
+ it(`encodes ${valueName} no nulls`, encodeAndValidate(encode3, [], [])(uint64sNoNulls(20)));
+ it(`encodes ${valueName} with nulls`, encodeAndValidate(encode3, [null], [null])(uint64sWithNulls(20)));
+ it(`encodes ${valueName} with MAX_INT`, encodeAndValidate(encode3, nulls0, nulls1)(uint64sWithMaxInts(20)));
+ });
+ testDOMStreams && describe(`encode chunks length 25, WhatWG stream`, () => {
+ it(`encodes ${valueName} no nulls`, encodeAndValidate(encode4, [], [])(uint64sNoNulls(20)));
+ it(`encodes ${valueName} with nulls`, encodeAndValidate(encode4, [null], [null])(uint64sWithNulls(20)));
+ it(`encodes ${valueName} with MAX_INT`, encodeAndValidate(encode4, nulls0, nulls1)(uint64sWithMaxInts(20)));
+ });
+ testNodeStreams && describe(`encode chunks length 25, NodeJS stream`, () => {
+ it(`encodes ${valueName} no nulls`, encodeAndValidate(encode5, [], [])(uint64sNoNulls(20)));
+ it(`encodes ${valueName} with nulls`, encodeAndValidate(encode5, [null], [null])(uint64sWithNulls(20)));
+ it(`encodes ${valueName} with MAX_INT`, encodeAndValidate(encode5, nulls0, nulls1)(uint64sWithMaxInts(20)));
+ });
+});
diff --git a/src/arrow/js/test/unit/builders/utf8-tests.ts b/src/arrow/js/test/unit/builders/utf8-tests.ts
new file mode 100644
index 000000000..212879ab4
--- /dev/null
+++ b/src/arrow/js/test/unit/builders/utf8-tests.ts
@@ -0,0 +1,62 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { validateVector } from './utils';
+import { Vector, Utf8 } from 'apache-arrow';
+import {
+ encodeAll,
+ encodeEach,
+ encodeEachDOM,
+ encodeEachNode,
+ stringsNoNulls,
+ stringsWithNAs,
+ stringsWithNulls,
+ stringsWithEmpties
+} from './utils';
+
+const testDOMStreams = process.env.TEST_DOM_STREAMS === 'true';
+const testNodeStreams = process.env.TEST_NODE_STREAMS === 'true';
+
+describe('Utf8Builder', () => {
+ runTestsWithEncoder('encodeAll', encodeAll(() => new Utf8()));
+ runTestsWithEncoder('encodeEach: 5', encodeEach(() => new Utf8(), 5));
+ runTestsWithEncoder('encodeEach: 25', encodeEach(() => new Utf8(), 25));
+ runTestsWithEncoder('encodeEach: undefined', encodeEach(() => new Utf8(), void 0));
+ testDOMStreams && runTestsWithEncoder('encodeEachDOM: 25', encodeEachDOM(() => new Utf8(), 25));
+ testNodeStreams && runTestsWithEncoder('encodeEachNode: 25', encodeEachNode(() => new Utf8(), 25));
+});
+
+function runTestsWithEncoder(name: string, encode: (vals: (string | null)[], nullVals?: any[]) => Promise<Vector<Utf8>>) {
+ describe(`${encode.name} ${name}`, () => {
+ it(`encodes strings no nulls`, async () => {
+ const vals = stringsNoNulls(20);
+ validateVector(vals, await encode(vals, []), []);
+ });
+ it(`encodes strings with nulls`, async () => {
+ const vals = stringsWithNulls(20);
+ validateVector(vals, await encode(vals, [null]), [null]);
+ });
+ it(`encodes strings using n/a as the null value rep`, async () => {
+ const vals = stringsWithNAs(20);
+ validateVector(vals, await encode(vals, ['n/a']), ['n/a']);
+ });
+ it(`encodes strings using \\0 as the null value rep`, async () => {
+ const vals = stringsWithEmpties(20);
+ validateVector(vals, await encode(vals, ['\0']), ['\0']);
+ });
+ });
+}
diff --git a/src/arrow/js/test/unit/builders/utils.ts b/src/arrow/js/test/unit/builders/utils.ts
new file mode 100644
index 000000000..9bd16fff3
--- /dev/null
+++ b/src/arrow/js/test/unit/builders/utils.ts
@@ -0,0 +1,221 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import '../../jest-extensions';
+import { from, fromDOMStream, toArray } from 'ix/asynciterable';
+import { fromNodeStream } from 'ix/asynciterable/fromnodestream';
+import 'ix/Ix.node';
+import { util } from 'apache-arrow';
+import { Builder } from 'apache-arrow';
+import { DataType, Vector, Chunked } from 'apache-arrow';
+import randstr from 'randomatic';
+
+const rand = Math.random.bind(Math);
+const randnulls = <T, TNull = null>(values: T[], n: TNull = <any> null) => values.map((x) => Math.random() > 0.25 ? x : n) as (T | TNull)[];
+
+export const randomBytes = (length: number) => fillRandom(Uint8Array, length);
+export const randomString = ((opts) => (length: number) =>
+ randstr('?', length, opts)
+)({ chars: `abcdefghijklmnopqrstuvwxyz0123456789_` });
+
+export const stringsNoNulls = (length = 20) => Array.from({ length }, (_) => randomString(1 + (Math.random() * 19 | 0)));
+export const timestamp32sNoNulls = (length = 20, now = Date.now() / 86400000 | 0) =>
+ Array.from({ length }, (_) => (now + (rand() * 10000 * (rand() > 0.5 ? -1 : 1)) | 0) * 86400000);
+
+export const timestamp64sNoNulls = (length = 20, now = Date.now()) => Array.from({ length }, (_) => {
+ const ms = now + (rand() * 31557600000 * (rand() > 0.5 ? -1 : 1) | 0);
+ return new Int32Array([(ms % 4294967296) | 0, (ms / 4294967296) | 0]);
+});
+
+export const timestamp32sWithNulls = (length = 20) => randnulls(timestamp32sNoNulls(length), null);
+export const timestamp64sWithNulls = (length = 20) => randnulls(timestamp64sNoNulls(length), null);
+export const timestamp32sWithMaxInts = (length = 20) => randnulls(timestamp32sNoNulls(length), 0x7fffffff);
+export const timestamp64sWithMaxInts = (length = 20) => randnulls(timestamp64sNoNulls(length), new Int32Array([0x7fffffff, 0x7fffffff]));
+
+export const boolsNoNulls = (length = 20) => Array.from({ length }, () => rand() > 0.5);
+export const date32sNoNulls = (length = 20) => timestamp32sNoNulls(length).map((x) => new Date(x));
+export const date64sNoNulls = (length = 20) => timestamp64sNoNulls(length).map((x) => new Date(4294967296 * x[1] + (x[0] >>> 0)));
+export const int8sNoNulls = (length = 20) => Array.from(new Int8Array(randomBytes(length * Int8Array.BYTES_PER_ELEMENT).buffer));
+export const int16sNoNulls = (length = 20) => Array.from(new Int16Array(randomBytes(length * Int16Array.BYTES_PER_ELEMENT).buffer));
+export const int32sNoNulls = (length = 20) => Array.from(new Int32Array(randomBytes(length * Int32Array.BYTES_PER_ELEMENT).buffer));
+export const int64sNoNulls = (length = 20) => Array.from({ length }, (_, i) => {
+ const bn = util.BN.new(new Int32Array(randomBytes(2 * 4).buffer));
+ // Evenly distribute the three types of arguments we support in the Int64
+ // builder
+ switch (i % 3) {
+ // Int32Array (util.BN is-a Int32Array)
+ case 0: return bn;
+ // BigInt
+ case 1: return bn[Symbol.toPrimitive]();
+ // number
+ case 2:
+ default: return bn[0];
+ }
+});
+
+export const uint8sNoNulls = (length = 20) => Array.from(new Uint8Array(randomBytes(length * Uint8Array.BYTES_PER_ELEMENT).buffer));
+export const uint16sNoNulls = (length = 20) => Array.from(new Uint16Array(randomBytes(length * Uint16Array.BYTES_PER_ELEMENT).buffer));
+export const uint32sNoNulls = (length = 20) => Array.from(new Uint32Array(randomBytes(length * Uint32Array.BYTES_PER_ELEMENT).buffer));
+export const uint64sNoNulls = (length = 20) => Array.from({ length }, (_, i) => {
+ const bn = util.BN.new(new Uint32Array(randomBytes(2 * 4).buffer));
+ // Evenly distribute the three types of arguments we support in the Uint64
+ // builder
+ switch (i % 3) {
+ // UInt32Array (util.BN is-a Uint32Array)
+ case 0: return bn;
+ // BigInt
+ case 1: return bn[Symbol.toPrimitive]();
+ // number
+ case 2:
+ default: return bn[0];
+ }
+});
+export const float16sNoNulls = (length = 20) => Array.from(new Uint16Array(randomBytes(length * Uint16Array.BYTES_PER_ELEMENT).buffer)).map(util.uint16ToFloat64);
+export const float32sNoNulls = (length = 20) => Array.from(new Float32Array(randomBytes(length * Float32Array.BYTES_PER_ELEMENT).buffer));
+export const float64sNoNulls = (length = 20) => Array.from(new Float64Array(randomBytes(length * Float64Array.BYTES_PER_ELEMENT).buffer));
+
+export const stringsWithNAs = (length = 20) => randnulls(stringsNoNulls(length), 'n/a');
+export const stringsWithNulls = (length = 20) => randnulls(stringsNoNulls(length), null);
+export const stringsWithEmpties = (length = 20) => randnulls(stringsNoNulls(length), '\0');
+
+export const boolsWithNulls = (length = 20) => randnulls(boolsNoNulls(length), null);
+export const date32sWithNulls = (length = 20) => randnulls(date32sNoNulls(length), null);
+export const date64sWithNulls = (length = 20) => randnulls(date64sNoNulls(length), null);
+export const int8sWithNulls = (length = 20) => randnulls(int8sNoNulls(length), null);
+export const int16sWithNulls = (length = 20) => randnulls(int16sNoNulls(length), null);
+export const int32sWithNulls = (length = 20) => randnulls(int32sNoNulls(length), null);
+export const int64sWithNulls = (length = 20) => randnulls(int64sNoNulls(length), null);
+export const uint8sWithNulls = (length = 20) => randnulls(uint8sNoNulls(length), null);
+export const uint16sWithNulls = (length = 20) => randnulls(uint16sNoNulls(length), null);
+export const uint32sWithNulls = (length = 20) => randnulls(uint32sNoNulls(length), null);
+export const uint64sWithNulls = (length = 20) => randnulls(uint64sNoNulls(length), null);
+export const float16sWithNulls = (length = 20) => randnulls(float16sNoNulls(length), null);
+export const float32sWithNulls = (length = 20) => randnulls(float32sNoNulls(length), null);
+export const float64sWithNulls = (length = 20) => randnulls(float64sNoNulls(length), null);
+
+export const int8sWithMaxInts = (length = 20) => randnulls(int8sNoNulls(length), 0x7fffffff);
+export const int16sWithMaxInts = (length = 20) => randnulls(int16sNoNulls(length), 0x7fffffff);
+export const int32sWithMaxInts = (length = 20) => randnulls(int32sNoNulls(length), 0x7fffffff);
+export const int64sWithMaxInts = (length = 20) => randnulls(int64sNoNulls(length), new Int32Array([0x7fffffff, 0x7fffffff]));
+export const uint8sWithMaxInts = (length = 20) => randnulls(uint8sNoNulls(length), 0x7fffffff);
+export const uint16sWithMaxInts = (length = 20) => randnulls(uint16sNoNulls(length), 0x7fffffff);
+export const uint32sWithMaxInts = (length = 20) => randnulls(uint32sNoNulls(length), 0x7fffffff);
+export const uint64sWithMaxInts = (length = 20) => randnulls(uint64sNoNulls(length), new Uint32Array([0x7fffffff, 0x7fffffff]));
+export const float16sWithNaNs = (length = 20) => randnulls(float16sNoNulls(length), NaN);
+export const float32sWithNaNs = (length = 20) => randnulls(float32sNoNulls(length), NaN);
+export const float64sWithNaNs = (length = 20) => randnulls(float64sNoNulls(length), NaN);
+
+export const duplicateItems = (n: number, xs: (any | null)[]) => {
+ const out = new Array<string | null>(n);
+ for (let i = -1, k = xs.length; ++i < n;) {
+ out[i] = xs[Math.random() * k | 0];
+ }
+ return out;
+};
+
+export function encodeAll<T extends DataType>(typeFactory: () => T) {
+ return async function encodeAll<TNull = any>(values: (T['TValue'] | TNull)[], nullValues?: TNull[]) {
+ const type = typeFactory();
+ const builder = Builder.new({ type, nullValues });
+ values.forEach(builder.append.bind(builder));
+ return builder.finish().toVector();
+ };
+}
+
+export function encodeEach<T extends DataType>(typeFactory: () => T, chunkLen?: number) {
+ return async function encodeEach<TNull = any>(vals: (T['TValue'] | TNull)[], nullValues?: TNull[]) {
+ const type = typeFactory();
+ const opts = { type, nullValues, highWaterMark: chunkLen };
+ const chunks = [...Builder.throughIterable(opts)(vals)];
+ return Chunked.concat(...chunks) as Chunked<T>;
+ };
+}
+
+export function encodeEachDOM<T extends DataType>(typeFactory: () => T, chunkLen?: number) {
+ return async function encodeEachDOM<TNull = any>(vals: (T['TValue'] | TNull)[], nullValues?: TNull[]) {
+ const type = typeFactory();
+ const strategy = { highWaterMark: chunkLen };
+ const source = from(vals).toDOMStream();
+ const builder = Builder.throughDOM({ type, nullValues, readableStrategy: strategy, writableStrategy: strategy });
+ const chunks = await fromDOMStream(source.pipeThrough(builder)).pipe(toArray);
+ return Chunked.concat(...chunks) as Chunked<T>;
+ };
+}
+
+export function encodeEachNode<T extends DataType>(typeFactory: () => T, chunkLen?: number) {
+ return async function encodeEachNode<TNull = any>(vals: (T['TValue'] | TNull)[], nullValues?: TNull[]) {
+ const type = typeFactory();
+ const vals_ = vals.map((x) => x === null ? undefined : x);
+ const source = from(vals_).toNodeStream({ objectMode: true });
+ const nulls_ = nullValues ? nullValues.map((x) => x === null ? undefined : x) : nullValues;
+ const builder = Builder.throughNode({ type, nullValues: nulls_, highWaterMark: chunkLen });
+ const chunks: any[] = await fromNodeStream(source.pipe(builder), chunkLen).pipe(toArray);
+ return Chunked.concat(...chunks) as Chunked<T>;
+ };
+}
+
+const isInt64Null = (nulls: Map<any, any>, x: any) => {
+ if (ArrayBuffer.isView(x)) {
+ const bn = util.BN.new<Int32Array>(x as Int32Array);
+ return nulls.has((<any> bn)[Symbol.toPrimitive]('default'));
+ }
+ return false;
+};
+
+export function validateVector<T extends DataType>(vals: (T['TValue'] | null)[], vec: Vector, nullVals: any[]) {
+ let i = 0, x: T['TValue'] | null, y: T['TValue'] | null;
+ const nulls = nullVals.reduce((m, x) => m.set(x, x), new Map());
+ try {
+ for (x of vec) {
+ if (nulls.has(y = vals[i])) {
+ expect(x).toBeNull();
+ } else if (isInt64Null(nulls, y)) {
+ expect(x).toBeNull();
+ } else {
+ expect(x).toArrowCompare(y);
+ }
+ i++;
+ }
+ } catch (e) {
+ // Uncomment these two lines to catch and debug the value retrieval that failed
+ // debugger;
+ // vec.get(i);
+ throw new Error([
+ `${(vec as any).VectorName}[${i}]: ${e?.stack || e}`,
+ `nulls: [${nullVals.join(', ')}]`,
+ `values: [${vals.join(', ')}]`,
+ ].join('\n'));
+ }
+}
+
+function fillRandom<T extends TypedArrayConstructor>(ArrayType: T, length: number) {
+ const BPE = ArrayType.BYTES_PER_ELEMENT;
+ const array = new ArrayType(length);
+ const max = (2 ** (8 * BPE)) - 1;
+ for (let i = -1; ++i < length; array[i] = rand() * max * (rand() > 0.5 ? -1 : 1)) { }
+ return array as InstanceType<T>;
+}
+
+type TypedArrayConstructor =
+ (typeof Int8Array) |
+ (typeof Int16Array) |
+ (typeof Int32Array) |
+ (typeof Uint8Array) |
+ (typeof Uint16Array) |
+ (typeof Uint32Array) |
+ (typeof Float32Array) |
+ (typeof Float64Array);
diff --git a/src/arrow/js/test/unit/dataframe-tests.ts b/src/arrow/js/test/unit/dataframe-tests.ts
new file mode 100644
index 000000000..9e87e372d
--- /dev/null
+++ b/src/arrow/js/test/unit/dataframe-tests.ts
@@ -0,0 +1,282 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import '../jest-extensions';
+import {
+ predicate, DataFrame, RecordBatch
+} from 'apache-arrow';
+import { test_data } from './table-tests';
+import { jest } from '@jest/globals';
+
+const { col, lit, custom, and, or, And, Or } = predicate;
+
+const F32 = 0, I32 = 1, DICT = 2;
+
+describe(`DataFrame`, () => {
+
+ for (let datum of test_data) {
+ describe(datum.name, () => {
+
+ describe(`scan()`, () => {
+ test(`yields all values`, () => {
+ const df = new DataFrame(datum.table());
+ let expected_idx = 0;
+ df.scan((idx, batch) => {
+ const columns = batch.schema.fields.map((_, i) => batch.getChildAt(i)!);
+ expect(columns.map((c) => c.get(idx))).toEqual(values[expected_idx++]);
+ });
+ });
+ test(`calls bind function with every batch`, () => {
+ const df = new DataFrame(datum.table());
+ let bind = jest.fn();
+ df.scan(() => { }, bind);
+ for (let batch of df.chunks) {
+ expect(bind).toHaveBeenCalledWith(batch);
+ }
+ });
+ });
+ describe(`scanReverse()`, () => {
+ test(`yields all values`, () => {
+ const df = new DataFrame(datum.table());
+ let expected_idx = values.length;
+ df.scanReverse((idx, batch) => {
+ const columns = batch.schema.fields.map((_, i) => batch.getChildAt(i)!);
+ expect(columns.map((c) => c.get(idx))).toEqual(values[--expected_idx]);
+ });
+ });
+ test(`calls bind function with every batch`, () => {
+ const df = new DataFrame(datum.table());
+ let bind = jest.fn();
+ df.scanReverse(() => { }, bind);
+ for (let batch of df.chunks) {
+ expect(bind).toHaveBeenCalledWith(batch);
+ }
+ });
+ });
+ test(`count() returns the correct length`, () => {
+ const df = new DataFrame(datum.table());
+ const values = datum.values();
+ expect(df.count()).toEqual(values.length);
+ });
+ test(`getColumnIndex`, () => {
+ const df = new DataFrame(datum.table());
+ expect(df.getColumnIndex('i32')).toEqual(I32);
+ expect(df.getColumnIndex('f32')).toEqual(F32);
+ expect(df.getColumnIndex('dictionary')).toEqual(DICT);
+ });
+ const df = new DataFrame(datum.table());
+ const values = datum.values();
+ let get_i32: (idx: number) => number, get_f32: (idx: number) => number;
+ const filter_tests = [
+ {
+ name: `filter on f32 >= 0`,
+ filtered: df.filter(col('f32').ge(0)),
+ expected: values.filter((row) => row[F32] >= 0)
+ }, {
+ name: `filter on 0 <= f32`,
+ filtered: df.filter(lit(0).le(col('f32'))),
+ expected: values.filter((row) => 0 <= row[F32])
+ }, {
+ name: `filter on i32 <= 0`,
+ filtered: df.filter(col('i32').le(0)),
+ expected: values.filter((row) => row[I32] <= 0)
+ }, {
+ name: `filter on 0 >= i32`,
+ filtered: df.filter(lit(0).ge(col('i32'))),
+ expected: values.filter((row) => 0 >= row[I32])
+ }, {
+ name: `filter on f32 < 0`,
+ filtered: df.filter(col('f32').lt(0)),
+ expected: values.filter((row) => row[F32] < 0)
+ }, {
+ name: `filter on i32 > 1 (empty)`,
+ filtered: df.filter(col('i32').gt(0)),
+ expected: values.filter((row) => row[I32] > 0)
+ }, {
+ name: `filter on f32 <= -.25 || f3 >= .25`,
+ filtered: df.filter(col('f32').le(-.25).or(col('f32').ge(.25))),
+ expected: values.filter((row) => row[F32] <= -.25 || row[F32] >= .25)
+ }, {
+ name: `filter on !(f32 <= -.25 || f3 >= .25) (not)`,
+ filtered: df.filter(col('f32').le(-.25).or(col('f32').ge(.25)).not()),
+ expected: values.filter((row) => !(row[F32] <= -.25 || row[F32] >= .25))
+ }, {
+ name: `filter method combines predicates (f32 >= 0 && i32 <= 0)`,
+ filtered: df.filter(col('i32').le(0)).filter(col('f32').ge(0)),
+ expected: values.filter((row) => row[I32] <= 0 && row[F32] >= 0)
+ }, {
+ name: `filter on dictionary == 'a'`,
+ filtered: df.filter(col('dictionary').eq('a')),
+ expected: values.filter((row) => row[DICT] === 'a')
+ }, {
+ name: `filter on 'a' == dictionary (commutativity)`,
+ filtered: df.filter(lit('a').eq(col('dictionary'))),
+ expected: values.filter((row) => row[DICT] === 'a')
+ }, {
+ name: `filter on dictionary != 'b'`,
+ filtered: df.filter(col('dictionary').ne('b')),
+ expected: values.filter((row) => row[DICT] !== 'b')
+ }, {
+ name: `filter on f32 >= i32`,
+ filtered: df.filter(col('f32').ge(col('i32'))),
+ expected: values.filter((row) => row[F32] >= row[I32])
+ }, {
+ name: `filter on f32 <= i32`,
+ filtered: df.filter(col('f32').le(col('i32'))),
+ expected: values.filter((row) => row[F32] <= row[I32])
+ }, {
+ name: `filter on f32*i32 > 0 (custom predicate)`,
+ filtered: df.filter(custom(
+ (idx: number) => (get_f32(idx) * get_i32(idx) > 0),
+ (batch: RecordBatch) => {
+ get_f32 = col('f32').bind(batch);
+ get_i32 = col('i32').bind(batch);
+ })),
+ expected: values.filter((row) => (row[F32] as number) * (row[I32] as number) > 0)
+ }, {
+ name: `filter out all records`,
+ filtered: df.filter(lit(1).eq(0)),
+ expected: []
+ }
+ ];
+ for (let this_test of filter_tests) {
+ const { name, filtered, expected } = this_test;
+ describe(name, () => {
+ test(`count() returns the correct length`, () => {
+ expect(filtered.count()).toEqual(expected.length);
+ });
+ describe(`scan()`, () => {
+ test(`iterates over expected values`, () => {
+ let expected_idx = 0;
+ filtered.scan((idx, batch) => {
+ const columns = batch.schema.fields.map((_, i) => batch.getChildAt(i)!);
+ expect(columns.map((c) => c.get(idx))).toEqual(expected[expected_idx++]);
+ });
+ });
+ test(`calls bind function lazily`, () => {
+ let bind = jest.fn();
+ filtered.scan(() => { }, bind);
+ if (expected.length) {
+ expect(bind).toHaveBeenCalled();
+ } else {
+ expect(bind).not.toHaveBeenCalled();
+ }
+ });
+ });
+ describe(`scanReverse()`, () => {
+ test(`iterates over expected values in reverse`, () => {
+ let expected_idx = expected.length;
+ filtered.scanReverse((idx, batch) => {
+ const columns = batch.schema.fields.map((_, i) => batch.getChildAt(i)!);
+ expect(columns.map((c) => c.get(idx))).toEqual(expected[--expected_idx]);
+ });
+ });
+ test(`calls bind function lazily`, () => {
+ let bind = jest.fn();
+ filtered.scanReverse(() => { }, bind);
+ if (expected.length) {
+ expect(bind).toHaveBeenCalled();
+ } else {
+ expect(bind).not.toHaveBeenCalled();
+ }
+ });
+ });
+ });
+ }
+ test(`countBy on dictionary returns the correct counts`, () => {
+ // Make sure countBy works both with and without the Col wrapper
+ // class
+ let expected: { [key: string]: number } = { 'a': 0, 'b': 0, 'c': 0 };
+ for (let row of values) {
+ expected[row[DICT]] += 1;
+ }
+
+ expect(df.countBy(col('dictionary')).toJSON()).toEqual(expected);
+ expect(df.countBy('dictionary').toJSON()).toEqual(expected);
+ });
+ test(`countBy on dictionary with filter returns the correct counts`, () => {
+ let expected: { [key: string]: number } = { 'a': 0, 'b': 0, 'c': 0 };
+ for (let row of values) {
+ if (row[I32] === 1) { expected[row[DICT]] += 1; }
+ }
+
+ expect(df.filter(col('i32').eq(1)).countBy('dictionary').toJSON()).toEqual(expected);
+ });
+ test(`countBy on non dictionary column throws error`, () => {
+ expect(() => { df.countBy('i32'); }).toThrow();
+ expect(() => { df.filter(col('dict').eq('a')).countBy('i32'); }).toThrow();
+ });
+ test(`countBy on non-existent column throws error`, () => {
+ expect(() => { df.countBy('FAKE' as any); }).toThrow();
+ });
+ test(`table.select() basic tests`, () => {
+ let selected = df.select('f32', 'dictionary');
+ expect(selected.schema.fields).toHaveLength(2);
+ expect(selected.schema.fields[0]).toEqual(df.schema.fields[0]);
+ expect(selected.schema.fields[1]).toEqual(df.schema.fields[2]);
+
+ expect(selected).toHaveLength(values.length);
+ let idx = 0, expected_row;
+ for (let row of selected) {
+ expected_row = values[idx++];
+ expect(row.f32).toEqual(expected_row[F32]);
+ expect(row.dictionary).toEqual(expected_row[DICT]);
+ }
+ });
+ test(`table.filter(..).count() on always false predicates returns 0`, () => {
+ expect(df.filter(col('i32').ge(100)).count()).toEqual(0);
+ expect(df.filter(col('dictionary').eq('z')).count()).toEqual(0);
+ });
+ describe(`lit-lit comparison`, () => {
+ test(`always-false count() returns 0`, () => {
+ expect(df.filter(lit('abc').eq('def')).count()).toEqual(0);
+ expect(df.filter(lit(0).ge(1)).count()).toEqual(0);
+ });
+ test(`always-true count() returns length`, () => {
+ expect(df.filter(lit('abc').eq('abc')).count()).toEqual(df.length);
+ expect(df.filter(lit(-100).le(0)).count()).toEqual(df.length);
+ });
+ });
+ describe(`col-col comparison`, () => {
+ test(`always-false count() returns 0`, () => {
+ expect(df.filter(col('dictionary').eq(col('i32'))).count()).toEqual(0);
+ });
+ test(`always-true count() returns length`, () => {
+ expect(df.filter(col('dictionary').eq(col('dictionary'))).count()).toEqual(df.length);
+ });
+ });
+ });
+ }
+});
+
+describe(`Predicate`, () => {
+ const p1 = col('a').gt(100);
+ const p2 = col('a').lt(1000);
+ const p3 = col('b').eq('foo');
+ const p4 = col('c').eq('bar');
+ const expected = [p1, p2, p3, p4];
+ test(`and flattens children`, () => {
+ expect(and(p1, p2, p3, p4).children).toEqual(expected);
+ expect(and(p1.and(p2), new And(p3, p4)).children).toEqual(expected);
+ expect(and(p1.and(p2, p3, p4)).children).toEqual(expected);
+ });
+ test(`or flattens children`, () => {
+ expect(or(p1, p2, p3, p4).children).toEqual(expected);
+ expect(or(p1.or(p2), new Or(p3, p4)).children).toEqual(expected);
+ expect(or(p1.or(p2, p3, p4)).children).toEqual(expected);
+ });
+});
diff --git a/src/arrow/js/test/unit/generated-data-tests.ts b/src/arrow/js/test/unit/generated-data-tests.ts
new file mode 100644
index 000000000..ab1276f76
--- /dev/null
+++ b/src/arrow/js/test/unit/generated-data-tests.ts
@@ -0,0 +1,61 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import '../jest-extensions';
+import * as generate from '../generate-test-data';
+import { validateTable, validateRecordBatch, validateVector } from './generated-data-validators';
+
+describe('Generated Test Data', () => {
+ describe('Table', () => { validateTable(generate.table([100, 150, 75])).run(); });
+ describe('RecordBatch', () => { validateRecordBatch(generate.recordBatch()).run(); });
+ describe('NullVector', () => { validateVector(generate.null_()).run(); });
+ describe('BoolVector', () => { validateVector(generate.bool()).run(); });
+ describe('Int8Vector', () => { validateVector(generate.int8()).run(); });
+ describe('Int16Vector', () => { validateVector(generate.int16()).run(); });
+ describe('Int32Vector', () => { validateVector(generate.int32()).run(); });
+ describe('Int64Vector', () => { validateVector(generate.int64()).run(); });
+ describe('Uint8Vector', () => { validateVector(generate.uint8()).run(); });
+ describe('Uint16Vector', () => { validateVector(generate.uint16()).run(); });
+ describe('Uint32Vector', () => { validateVector(generate.uint32()).run(); });
+ describe('Uint64Vector', () => { validateVector(generate.uint64()).run(); });
+ describe('Float16Vector', () => { validateVector(generate.float16()).run(); });
+ describe('Float32Vector', () => { validateVector(generate.float32()).run(); });
+ describe('Float64Vector', () => { validateVector(generate.float64()).run(); });
+ describe('Utf8Vector', () => { validateVector(generate.utf8()).run(); });
+ describe('BinaryVector', () => { validateVector(generate.binary()).run(); });
+ describe('FixedSizeBinaryVector', () => { validateVector(generate.fixedSizeBinary()).run(); });
+ describe('DateDayVector', () => { validateVector(generate.dateDay()).run(); });
+ describe('DateMillisecondVector', () => { validateVector(generate.dateMillisecond()).run(); });
+ describe('TimestampSecondVector', () => { validateVector(generate.timestampSecond()).run(); });
+ describe('TimestampMillisecondVector', () => { validateVector(generate.timestampMillisecond()).run(); });
+ describe('TimestampMicrosecondVector', () => { validateVector(generate.timestampMicrosecond()).run(); });
+ describe('TimestampNanosecondVector', () => { validateVector(generate.timestampNanosecond()).run(); });
+ describe('TimeSecondVector', () => { validateVector(generate.timeSecond()).run(); });
+ describe('TimeMillisecondVector', () => { validateVector(generate.timeMillisecond()).run(); });
+ describe('TimeMicrosecondVector', () => { validateVector(generate.timeMicrosecond()).run(); });
+ describe('TimeNanosecondVector', () => { validateVector(generate.timeNanosecond()).run(); });
+ describe('DecimalVector', () => { validateVector(generate.decimal()).run(); });
+ describe('ListVector', () => { validateVector(generate.list()).run(); });
+ describe('StructVector', () => { validateVector(generate.struct()).run(); });
+ describe('DenseUnionVector', () => { validateVector(generate.denseUnion()).run(); });
+ describe('SparseUnionVector', () => { validateVector(generate.sparseUnion()).run(); });
+ describe('DictionaryVector', () => { validateVector(generate.dictionary()).run(); });
+ describe('IntervalDayTimeVector', () => { validateVector(generate.intervalDayTime()).run(); });
+ describe('IntervalYearMonthVector', () => { validateVector(generate.intervalYearMonth()).run(); });
+ describe('FixedSizeListVector', () => { validateVector(generate.fixedSizeList()).run(); });
+ describe('MapVector', () => { validateVector(generate.map()).run(); });
+});
diff --git a/src/arrow/js/test/unit/generated-data-validators.ts b/src/arrow/js/test/unit/generated-data-validators.ts
new file mode 100644
index 000000000..910386d4a
--- /dev/null
+++ b/src/arrow/js/test/unit/generated-data-validators.ts
@@ -0,0 +1,184 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import '../jest-extensions';
+import {
+ GeneratedTable,
+ GeneratedRecordBatch,
+ GeneratedVector
+} from '../generate-test-data';
+
+import { util } from 'apache-arrow';
+const { createElementComparator: compare } = util;
+
+type DeferredTest = { description: string; tests?: DeferredTest[]; run: (...args: any[]) => any };
+
+function deferTest(description: string, run: (...args: any[]) => any) {
+ return { description, run: () => test(description, run) } as DeferredTest;
+}
+
+function deferDescribe(description: string, tests: DeferredTest | DeferredTest[]) {
+ const t = (Array.isArray(tests) ? tests : [tests]).filter(Boolean);
+ return { description, tests: t, run: () => describe(description, () => { t.forEach((x) => x.run()); } ) };
+}
+
+export function validateTable({ keys, rows, cols, rowBatches, colBatches, keyBatches, table }: GeneratedTable) {
+ return deferDescribe(`Table: ${table.schema}`, ([] as DeferredTest[]).concat(
+ validateVector({ values: rows, vector: table }),
+ table.chunks.map((recordBatch, i) =>
+ deferDescribe(`recordBatch ${i}`, validateRecordBatch({
+ keys: keyBatches[i], rows: rowBatches[i], cols: colBatches[i], recordBatch
+ }))
+ ),
+ table.schema.fields.map((field, i) =>
+ deferDescribe(`column ${i}: ${field}`, validateVector({
+ keys: keys()[i],
+ values: () => cols()[i],
+ vector: table.getColumnAt(i)!
+ }))
+ )
+ ));
+}
+
+export function validateRecordBatch({ rows, cols, keys, recordBatch }: GeneratedRecordBatch) {
+ return deferDescribe(`RecordBatch: ${recordBatch.schema}`, ([] as DeferredTest[]).concat(
+ validateVector({ values: rows, vector: recordBatch }),
+ recordBatch.schema.fields.map((field, i) =>
+ deferDescribe(`Field: ${field}`, validateVector({
+ keys: keys()[i],
+ values: () => cols()[i],
+ vector: recordBatch.getChildAt(i)!
+ }))
+ )
+ ));
+}
+
+export function validateVector({ values: createTestValues, vector, keys }: GeneratedVector, sliced = false) {
+
+ const values = createTestValues();
+ const suites = [
+ deferDescribe(`Validate ${vector.type} (sliced=${sliced})`, [
+ deferTest(`length is correct`, () => {
+ expect(vector).toHaveLength(values.length);
+ }),
+ deferTest(`gets expected values`, () => {
+ expect.hasAssertions();
+ let i = -1, n = vector.length, actual, expected;
+ try {
+ while (++i < n) {
+ actual = vector.get(i);
+ expected = values[i];
+ expect(actual).toArrowCompare(expected);
+ }
+ } catch (e) { throw new Error(`${vector}[${i}]: ${e}`); }
+ }),
+ (keys && keys.length > 0) && deferTest(`dictionary indices should match`, () => {
+ expect.hasAssertions();
+ let indices = (vector as any).indices;
+ let i = -1, n = indices.length;
+ try {
+ while (++i < n) {
+ indices.isValid(i)
+ ? expect(indices.get(i)).toBe(keys[i])
+ : expect(indices.get(i)).toBeNull();
+ }
+ } catch (e) { throw new Error(`${indices}[${i}]: ${e}`); }
+ }) || null as any as DeferredTest,
+ deferTest(`sets expected values`, () => {
+ expect.hasAssertions();
+ let i = -1, n = vector.length, actual, expected;
+ try {
+ while (++i < n) {
+ expected = vector.get(i);
+ vector.set(i, expected);
+ actual = vector.get(i);
+ expect(actual).toArrowCompare(expected);
+ }
+ } catch (e) { throw new Error(`${vector}[${i}]: ${e}`); }
+ }),
+ deferTest(`iterates expected values`, () => {
+ expect.hasAssertions();
+ let i = -1, actual, expected;
+ try {
+ for (actual of vector) {
+ expected = values[++i];
+ expect(actual).toArrowCompare(expected);
+ }
+ } catch (e) { throw new Error(`${vector}[${i}]: ${e}`); }
+ }),
+ deferTest(`indexOf returns expected values`, () => {
+ expect.hasAssertions();
+ let i = -1, n = vector.length;
+ const shuffled = shuffle(values);
+ let value: any, actual, expected;
+ try {
+ while (++i < n) {
+ value = shuffled[i];
+ actual = vector.indexOf(value);
+ expected = values.findIndex(compare(value));
+ expect(actual).toBe(expected);
+ }
+ // I would be pretty surprised if randomatic ever generates these values
+ expect(vector.indexOf('purple elephants')).toBe(-1);
+ expect(vector.indexOf('whistling wombats')).toBe(-1);
+ expect(vector.indexOf('carnivorous novices')).toBe(-1);
+ } catch (e) { throw new Error(`${vector}[${i}]: ${e}`); }
+ })
+ ])
+ ] as DeferredTest[];
+
+ if (!sliced) {
+ const begin = (values.length * .25) | 0;
+ const end = (values.length * .75) | 0;
+ suites.push(
+ // test slice with no args
+ validateVector({
+ vector: vector.slice(),
+ values: () => values.slice(),
+ keys: keys ? keys.slice() : undefined
+ }, true),
+ // test slicing half the array
+ validateVector({
+ vector: vector.slice(begin, end),
+ values: () => values.slice(begin, end),
+ keys: keys ? keys.slice(begin, end) : undefined
+ }, true),
+ // test concat each end together
+ validateVector({
+ vector: vector.slice(0, begin).concat(vector.slice(end)),
+ values: () => values.slice(0, begin).concat(values.slice(end)),
+ keys: keys ? [...keys.slice(0, begin), ...keys.slice(end)] : undefined
+ }, true)
+ );
+
+ return deferDescribe(`Vector`, suites);
+ }
+
+ return suites[0];
+}
+
+function shuffle(input: any[]) {
+ const result = input.slice();
+ let j, tmp, i = result.length;
+ while (--i > 0) {
+ j = (Math.random() * (i + 1)) | 0;
+ tmp = result[i];
+ result[i] = result[j];
+ result[j] = tmp;
+ }
+ return result;
+}
diff --git a/src/arrow/js/test/unit/int-tests.ts b/src/arrow/js/test/unit/int-tests.ts
new file mode 100644
index 000000000..15c75e1a1
--- /dev/null
+++ b/src/arrow/js/test/unit/int-tests.ts
@@ -0,0 +1,241 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import * as Arrow from 'apache-arrow';
+const { Int64, Uint64, Int128 } = Arrow.util;
+
+describe(`Uint64`, () => {
+ test(`gets expected high/low bytes`, () => {
+ let i = new Uint64(new Uint32Array([5, 0]));
+ expect(i.high()).toEqual(0);
+ expect(i.low()).toEqual(5);
+ });
+ test(`adds 32-bit numbers`, () => {
+ let a = new Uint64(new Uint32Array([5, 0]));
+ let b = new Uint64(new Uint32Array([9, 0]));
+ let expected = new Uint64(new Uint32Array([14, 0]));
+ expect(a.plus(b)).toEqual(expected);
+ });
+ test(`addition overflows 32-bit numbers`, () => {
+ let a = new Uint64(new Uint32Array([0xffffffff, 0]));
+ let b = new Uint64(new Uint32Array([9, 0]));
+ let expected = new Uint64(new Uint32Array([8, 1]));
+ expect(a.plus(b)).toEqual(expected);
+ });
+ test(`multiplies 32-bit numbers`, () => {
+ let a = new Uint64(new Uint32Array([5, 0]));
+ let b = new Uint64(new Uint32Array([9, 0]));
+ let expected = new Uint64(new Uint32Array([45, 0]));
+ expect(a.times(b)).toEqual(expected);
+ });
+ test(`multiplication overflows 32-bit numbers`, () => {
+ let a = new Uint64(new Uint32Array([0x80000000, 0]));
+ let b = new Uint64(new Uint32Array([3, 0]));
+ let expected = new Uint64(new Uint32Array([0x80000000, 1]));
+ expect(a.times(b)).toEqual(expected);
+ });
+ test(`multiplication is associative`, () => {
+ let a = new Uint64(new Uint32Array([0x80000000, 0]));
+ let b = new Uint64(new Uint32Array([3, 0]));
+ expect(Uint64.multiply(a, b)).toEqual(Uint64.multiply(b,a));
+ });
+ test(`lessThan works on 32-bit numbers`, () => {
+ let a = new Uint64(new Uint32Array([0x0000abcd, 0]));
+ let b = new Uint64(new Uint32Array([0x0000abcf, 0]));
+ expect(a.lessThan(b)).toBeTruthy();
+ });
+ test(`lessThan works on 64-bit numbers`, () => {
+ let a = new Uint64(new Uint32Array([123, 32]));
+ let b = new Uint64(new Uint32Array([568, 32]));
+ expect(a.lessThan(b)).toBeTruthy();
+ });
+ test(`fromString parses string`, () => {
+ expect(Uint64.fromString('6789123456789')).toEqual(new Int64(new Uint32Array([0xb74abf15, 0x62c])));
+ });
+ test(`fromString parses big (full unsigned 64-bit) string`, () => {
+ expect(Uint64.fromString('18364758544493064720')).toEqual(new Uint64(new Uint32Array([0x76543210, 0xfedcba98])));
+ });
+ test(`fromNumber converts 53-ish bit number`, () => {
+ expect(Uint64.fromNumber(8086463330923024)).toEqual(new Uint64(new Uint32Array([0x76543210, 0x001cba98])));
+ });
+});
+
+describe(`Int64`, () => {
+ test(`gets expected high/low bytes`, () => {
+ let i = new Int64(new Uint32Array([5, 0]));
+ expect(i.high()).toEqual(0);
+ expect(i.low()).toEqual(5);
+ });
+ test(`adds 32-bit numbers`, () => {
+ let a = new Int64(new Uint32Array([5, 0]));
+ let b = new Int64(new Uint32Array([9, 0]));
+ let expected = new Int64(new Uint32Array([14, 0]));
+ expect(a.plus(b)).toEqual(expected);
+ });
+ test(`adds negative 32-bit numbers`, () => {
+ let a = new Int64(new Uint32Array([56789 , 0]));
+ let b = new Int64(new Uint32Array([-66789, -1]));
+ let expected = new Int64(new Uint32Array([-10000, -1]));
+ expect(a.plus(b)).toEqual(expected);
+ });
+ test(`addition overflows 32-bit numbers`, () => {
+ let a = new Int64(new Uint32Array([0xffffffff, 0]));
+ let b = new Int64(new Uint32Array([9, 0]));
+ let expected = new Int64(new Uint32Array([8, 1]));
+ expect(a.plus(b)).toEqual(expected);
+ });
+ test(`multiplies 32-bit numbers`, () => {
+ let a = new Int64(new Uint32Array([5, 0]));
+ let b = new Int64(new Uint32Array([9, 0]));
+ let expected = new Int64(new Uint32Array([45, 0]));
+ expect(a.times(b)).toEqual(expected);
+ });
+ test(`multiplication overflows 32-bit numbers`, () => {
+ let a = new Int64(new Uint32Array([0x80000000, 0]));
+ let b = new Int64(new Uint32Array([3, 0]));
+ let expected = new Int64(new Uint32Array([0x80000000, 1]));
+ expect(a.times(b)).toEqual(expected);
+ });
+ test(`multiplication works on negative numbers`, () => {
+ let a = new Int64(new Uint32Array([-5, -1]));
+ let b = new Int64(new Uint32Array([-100, -1]));
+ expect(a.times(b)).toEqual(new Int64(new Uint32Array([ 500, 0])));
+ expect(a.times(b)).toEqual(new Int64(new Uint32Array([ -50000, -1])));
+ expect(a.times(b)).toEqual(new Int64(new Uint32Array([5000000, 0])));
+ });
+ test(`multiplication is associative`, () => {
+ let a = new Int64(new Uint32Array([0x80000000, 0]));
+ let b = new Int64(new Uint32Array([3, 0]));
+ expect(Int64.multiply(a, b)).toEqual(Int64.multiply(b,a));
+ });
+ test(`lessThan works on 32-bit numbers`, () => {
+ let a = new Int64(new Uint32Array([0x0000abcd, 0]));
+ let b = new Int64(new Uint32Array([0x0000abcf, 0]));
+ expect(a.lessThan(b)).toBeTruthy();
+ });
+ test(`lessThan works on 64-bit numbers`, () => {
+ let a = new Int64(new Uint32Array([123, 32]));
+ let b = new Int64(new Uint32Array([568, 32]));
+ expect(a.lessThan(b)).toBeTruthy();
+ });
+ test(`lessThan works on negative numbers`, () => {
+ let a = new Int64(new Uint32Array([0, -158]));
+ let b = new Int64(new Uint32Array([-3, -1]));
+ expect(a.lessThan(b)).toBeTruthy();
+ });
+ test(`lessThan works on mixed numbers`, () => {
+ let a = new Int64(new Uint32Array([-3, -1]));
+ let b = new Int64(new Uint32Array([ 0, 3]));
+ expect(a.lessThan(b)).toBeTruthy();
+ });
+ test(`negate works on 32-bit number`, () => {
+ expect (new Int64(new Uint32Array([123456, 0])).negate()).toEqual(new Int64(new Uint32Array([-123456, -1])));
+ });
+ test(`double negation is noop`, () => {
+ let test = new Int64(new Uint32Array([6789, 12345]));
+ let expected = new Int64(new Uint32Array([6789, 12345]));
+ expect(test.negate().negate()).toEqual(expected);
+ });
+ test(`negate works on 64-bit number`, () => {
+ expect (new Int64(new Uint32Array([0xb74abf15, 0x62c])).negate()).toEqual(new Int64(new Uint32Array([0x48b540eb, 0xfffff9d3])));
+ });
+ test(`fromString parses string`, () => {
+ expect(Int64.fromString('6789123456789')).toEqual(new Int64(new Uint32Array([0xb74abf15, 0x62c])));
+ });
+ test(`fromString parses negative string`, () => {
+ expect(Int64.fromString('-6789123456789')).toEqual(new Int64(new Uint32Array([0x48b540eb, 0xfffff9d3])));
+ });
+ test(`fromNumber converts 53-ish bit number`, () => {
+ expect(Int64.fromNumber(8086463330923024)).toEqual(new Int64(new Uint32Array([0x76543210, 0x001cba98])));
+ expect(Int64.fromNumber(-8086463330923024)).toEqual(new Int64(new Uint32Array([0x89abcdf0, 0xffe34567])));
+ });
+});
+
+describe(`Int128`, () => {
+ test(`gets expected bytes`, () => {
+ let i = new Int128(new Uint32Array([4, 3, 2, 1]));
+ expect(i.high().high()).toEqual(1);
+ expect(i.high().low() ).toEqual(2);
+ expect(i.low().high() ).toEqual(3);
+ expect(i.low().low() ).toEqual(4);
+ });
+ test(`adds 32-bit numbers`, () => {
+ let a = new Int128(new Uint32Array([5, 0, 0, 0]));
+ let b = new Int128(new Uint32Array([9, 0, 0, 0]));
+ let expected = new Int128(new Uint32Array([14, 0, 0, 0]));
+ expect(a.plus(b)).toEqual(expected);
+ });
+ test(`adds negative 32-bit numbers`, () => {
+ let a = new Int128(new Uint32Array([56789 , 0, 0, 0]));
+ let b = new Int128(new Uint32Array([-66789, -1, -1, -1]));
+ let expected = new Int128(new Uint32Array([-10000, -1, -1, -1]));
+ expect(a.plus(b)).toEqual(expected);
+ });
+ test(`addition overflows 32-bit numbers`, () => {
+ let a = new Int128(new Uint32Array([0xffffffff, 0, 0, 0]));
+ let b = new Int128(new Uint32Array([9, 0, 0, 0]));
+ let expected = new Int128(new Uint32Array([8, 1, 0, 0]));
+ expect(a.plus(b)).toEqual(expected);
+ });
+ test(`multiplies 32-bit numbers`, () => {
+ let a = new Int128(new Uint32Array([5, 0, 0, 0]));
+ let b = new Int128(new Uint32Array([9, 0, 0, 0]));
+ let expected = new Int128(new Uint32Array([45, 0, 0, 0]));
+ expect(a.times(b)).toEqual(expected);
+ });
+ test(`multiplication overflows 32-bit numbers`, () => {
+ let a = new Int128(new Uint32Array([0x80000000, 0, 0, 0]));
+ let b = new Int128(new Uint32Array([3, 0, 0, 0]));
+ let expected = new Int128(new Uint32Array([0x80000000, 1, 0, 0]));
+ expect(a.times(b)).toEqual(expected);
+ });
+ test(`multiplication works on negative numbers`, () => {
+ let a = new Int128(new Uint32Array([-5, -1, -1, -1]));
+ let b = new Int128(new Uint32Array([-100, -1, -1, -1]));
+ expect(a.times(b)).toEqual(new Int128(new Uint32Array([ 500, 0, 0, 0])));
+ expect(a.times(b)).toEqual(new Int128(new Uint32Array([ -50000, -1, -1, -1])));
+ expect(a.times(b)).toEqual(new Int128(new Uint32Array([5000000, 0, 0, 0])));
+ });
+ test(`multiplication is associative`, () => {
+ let a = new Int128(new Uint32Array([4, 3, 2, 1]));
+ let b = new Int128(new Uint32Array([3, 0, 0, 0]));
+ expect(Int128.multiply(a, b)).toEqual(Int128.multiply(b,a));
+ });
+ test(`multiplication can produce 128-bit number`, () => {
+ let a = new Int128(new Uint32Array([0, 0xf0000000, 0, 0]));
+ let b = new Int128(new Uint32Array([0, 0x10000000, 0, 0]));
+ expect(a.times(b)).toEqual(new Int128(new Uint32Array([0x00000000, 0x00000000, 0x00000000, 0xf000000])));
+ });
+ test(`fromString parses string`, () => {
+ expect(Int128.fromString('1002111867823618826746863804903129070'))
+ .toEqual(new Int64(new Uint32Array([0x00c0ffee,
+ 0x00c0ffee,
+ 0x00c0ffee,
+ 0x00c0ffee])));
+ });
+ test(`fromString parses negative string`, () => {
+ expect(Int128.fromString('-12345678901234567890123456789012345678'))
+ .toEqual(new Int64(new Uint32Array([0x21c70cb2,
+ 0x3bb66faf,
+ 0x0ffdccec,
+ 0xf6b64f09])));
+ });
+ test(`fromNumber converts 53-ish bit number`, () => {
+ expect(Int128.fromNumber(8086463330923024)).toEqual(new Int128(new Uint32Array([0x76543210, 0x001cba98, 0, 0])));
+ expect(Int128.fromNumber(-8086463330923024)).toEqual(new Int128(new Uint32Array([0x89abcdf0, 0xffe34567, 0xffffffff, 0xffffffff])));
+ });
+});
diff --git a/src/arrow/js/test/unit/ipc/helpers.ts b/src/arrow/js/test/unit/ipc/helpers.ts
new file mode 100644
index 000000000..9fccefec9
--- /dev/null
+++ b/src/arrow/js/test/unit/ipc/helpers.ts
@@ -0,0 +1,202 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import '../../jest-extensions';
+
+import {
+ Table,
+ RecordBatchWriter,
+ RecordBatchFileWriter,
+ RecordBatchJSONWriter,
+ RecordBatchStreamWriter,
+} from 'apache-arrow';
+
+import * as fs from 'fs';
+import { fs as memfs } from 'memfs';
+import { Readable, PassThrough } from 'stream';
+import randomatic from 'randomatic';
+
+export abstract class ArrowIOTestHelper {
+
+ constructor(public table: Table) {}
+
+ public static file(table: Table) { return new ArrowFileIOTestHelper(table); }
+ public static json(table: Table) { return new ArrowJsonIOTestHelper(table); }
+ public static stream(table: Table) { return new ArrowStreamIOTestHelper(table); }
+
+ protected abstract writer(table: Table): RecordBatchWriter;
+ protected async filepath(table: Table): Promise<fs.PathLike> {
+ const path = `/${randomatic('a0', 20)}.arrow`;
+ const data = await this.writer(table).toUint8Array();
+ await memfs.promises.writeFile(path, data);
+ return path;
+ }
+
+ buffer(testFn: (buffer: Uint8Array) => void | Promise<void>) {
+ return async () => {
+ expect.hasAssertions();
+ await testFn(await this.writer(this.table).toUint8Array());
+ };
+ }
+ iterable(testFn: (iterable: Generator<Uint8Array>) => void | Promise<void>) {
+ return async () => {
+ expect.hasAssertions();
+ await testFn(chunkedIterable(await this.writer(this.table).toUint8Array()));
+ };
+ }
+ asyncIterable(testFn: (asyncIterable: AsyncGenerator<Uint8Array>) => void | Promise<void>) {
+ return async () => {
+ expect.hasAssertions();
+ await testFn(asyncChunkedIterable(await this.writer(this.table).toUint8Array()));
+ };
+ }
+ fsFileHandle(testFn: (handle: fs.promises.FileHandle) => void | Promise<void>) {
+ return async () => {
+ expect.hasAssertions();
+ const path = await this.filepath(this.table);
+ await testFn(<any> await memfs.promises.open(path, 'r'));
+ await memfs.promises.unlink(path);
+ };
+ }
+ fsReadableStream(testFn: (stream: fs.ReadStream) => void | Promise<void>) {
+ return async () => {
+ expect.hasAssertions();
+ const path = await this.filepath(this.table);
+ await testFn(<any> memfs.createReadStream(path));
+ await memfs.promises.unlink(path);
+ };
+ }
+ nodeReadableStream(testFn: (stream: NodeJS.ReadableStream) => void | Promise<void>) {
+ return async () => {
+ expect.hasAssertions();
+ const sink = new PassThrough();
+ sink.end(await this.writer(this.table).toUint8Array());
+ await testFn(sink);
+ };
+ }
+ whatwgReadableStream(testFn: (stream: ReadableStream) => void | Promise<void>) {
+ return async () => {
+ expect.hasAssertions();
+ const path = await this.filepath(this.table);
+ await testFn(nodeToDOMStream(memfs.createReadStream(path)));
+ await memfs.promises.unlink(path);
+ };
+ }
+ whatwgReadableByteStream(testFn: (stream: ReadableStream) => void | Promise<void>) {
+ return async () => {
+ expect.hasAssertions();
+ const path = await this.filepath(this.table);
+ await testFn(nodeToDOMStream(memfs.createReadStream(path), { type: 'bytes' }));
+ await memfs.promises.unlink(path);
+ };
+ }
+}
+
+class ArrowFileIOTestHelper extends ArrowIOTestHelper {
+ constructor(table: Table) { super(table); }
+ protected writer(table: Table) {
+ return RecordBatchFileWriter.writeAll(table);
+ }
+}
+
+class ArrowJsonIOTestHelper extends ArrowIOTestHelper {
+ constructor(table: Table) { super(table); }
+ protected writer(table: Table) {
+ return RecordBatchJSONWriter.writeAll(table);
+ }
+}
+
+class ArrowStreamIOTestHelper extends ArrowIOTestHelper {
+ constructor(table: Table) { super(table); }
+ protected writer(table: Table) {
+ return RecordBatchStreamWriter.writeAll(table);
+ }
+}
+
+export function* chunkedIterable(buffer: Uint8Array) {
+ let offset = 0, size = 0;
+ while (offset < buffer.byteLength) {
+ size = yield buffer.subarray(offset, offset +=
+ (isNaN(+size) ? buffer.byteLength - offset : size));
+ }
+}
+
+export async function* asyncChunkedIterable(buffer: Uint8Array) {
+ let offset = 0, size = 0;
+ while (offset < buffer.byteLength) {
+ size = yield buffer.subarray(offset, offset +=
+ (isNaN(+size) ? buffer.byteLength - offset : size));
+ }
+}
+
+export async function concatBuffersAsync(iterator: AsyncIterable<Uint8Array> | ReadableStream) {
+ if (iterator instanceof ReadableStream) {
+ iterator = readableDOMStreamToAsyncIterator(iterator);
+ }
+ let chunks = [], total = 0;
+ for await (const chunk of iterator) {
+ chunks.push(chunk);
+ total += chunk.byteLength;
+ }
+ return chunks.reduce((x, buffer) => {
+ x.buffer.set(buffer, x.offset);
+ x.offset += buffer.byteLength;
+ return x;
+ }, { offset: 0, buffer: new Uint8Array(total) }).buffer;
+}
+
+export async function* readableDOMStreamToAsyncIterator<T>(stream: ReadableStream<T>) {
+ // Get a lock on the stream
+ const reader = stream.getReader();
+ try {
+ while (true) {
+ // Read from the stream
+ const { done, value } = await reader.read();
+ // Exit if we're done
+ if (done) { break; }
+ // Else yield the chunk
+ yield value as T;
+ }
+ } finally {
+ try { stream.locked && reader.releaseLock(); } catch (e) {}
+ }
+}
+
+export function nodeToDOMStream<T = any>(stream: NodeJS.ReadableStream, opts: any = {}) {
+ stream = new Readable((stream as any)._readableState).wrap(stream);
+ return new ReadableStream<T>({
+ ...opts,
+ start(controller) {
+ stream.pause();
+ stream.on('data', (chunk) => {
+ controller.enqueue(chunk);
+ stream.pause();
+ });
+ stream.on('end', () => controller.close());
+ stream.on('error', e => controller.error(e));
+ },
+ pull() { stream.resume(); },
+ cancel(reason) {
+ stream.pause();
+ if (typeof (stream as any).cancel === 'function') {
+ return (stream as any).cancel(reason);
+ } else if (typeof (stream as any).destroy === 'function') {
+ return (stream as any).destroy(reason);
+ }
+ }
+ });
+}
diff --git a/src/arrow/js/test/unit/ipc/message-reader-tests.ts b/src/arrow/js/test/unit/ipc/message-reader-tests.ts
new file mode 100644
index 000000000..c48aa2ce1
--- /dev/null
+++ b/src/arrow/js/test/unit/ipc/message-reader-tests.ts
@@ -0,0 +1,109 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// import * as fs from 'fs';
+import {
+ generateRandomTables,
+ // generateDictionaryTables
+} from '../../data/tables';
+
+import { ArrowIOTestHelper } from './helpers';
+import { MessageReader, AsyncMessageReader } from 'apache-arrow';
+
+for (const table of generateRandomTables([10, 20, 30])) {
+
+ const io = ArrowIOTestHelper.stream(table);
+ const name = `[\n ${table.schema.fields.join(',\n ')}\n]`;
+ const numMessages = table.chunks.reduce((numMessages, batch) => {
+ return numMessages +
+ /* recordBatch message */ 1 +
+ /* dictionary messages */ batch.dictionaries.size;
+ }, /* schema message */ 1);
+
+ const validate = validateMessageReader.bind(0, numMessages);
+ const validateAsync = validateAsyncMessageReader.bind(0, numMessages);
+
+ describe(`MessageReader (${name})`, () => {
+ describe(`should read all Messages`, () => {
+ test(`Uint8Array`, io.buffer(validate));
+ test(`Iterable`, io.iterable(validate));
+ });
+ });
+
+ describe(`AsyncMessageReader (${name})`, () => {
+ describe(`should read all Messages`, () => {
+ test('AsyncIterable', io.asyncIterable(validateAsync));
+ test('fs.FileHandle', io.fsFileHandle(validateAsync));
+ test('fs.ReadStream', io.fsReadableStream(validateAsync));
+ test('stream.Readable', io.nodeReadableStream(validateAsync));
+ test('whatwg.ReadableStream', io.whatwgReadableStream(validateAsync));
+ test('whatwg.ReadableByteStream', io.whatwgReadableByteStream(validateAsync));
+ });
+ });
+}
+
+export function validateMessageReader(numMessages: number, source: any) {
+ const reader = new MessageReader(source);
+ let index = 0;
+ for (let message of reader) {
+
+ if (index === 0) {
+ expect(message.isSchema()).toBe(true);
+ expect(message.bodyLength).toBe(0);
+ } else {
+ expect(message.isSchema()).toBe(false);
+ expect(message.isRecordBatch() || message.isDictionaryBatch()).toBe(true);
+ }
+
+ try {
+ expect(message.bodyLength % 8).toBe(0);
+ } catch (e) { throw new Error(`bodyLength: ${e}`); }
+
+ const body = reader.readMessageBody(message.bodyLength);
+ expect(body).toBeInstanceOf(Uint8Array);
+ expect(body.byteLength).toBe(message.bodyLength);
+ expect(index++).toBeLessThan(numMessages);
+ }
+ expect(index).toBe(numMessages);
+ reader.return();
+}
+
+export async function validateAsyncMessageReader(numMessages: number, source: any) {
+ const reader = new AsyncMessageReader(source);
+ let index = 0;
+ for await (let message of reader) {
+
+ if (index === 0) {
+ expect(message.isSchema()).toBe(true);
+ expect(message.bodyLength).toBe(0);
+ } else {
+ expect(message.isSchema()).toBe(false);
+ expect(message.isRecordBatch() || message.isDictionaryBatch()).toBe(true);
+ }
+
+ try {
+ expect(message.bodyLength % 8).toBe(0);
+ } catch (e) { throw new Error(`bodyLength: ${e}`); }
+
+ const body = await reader.readMessageBody(message.bodyLength);
+ expect(body).toBeInstanceOf(Uint8Array);
+ expect(body.byteLength).toBe(message.bodyLength);
+ expect(index++).toBeLessThan(numMessages);
+ }
+ expect(index).toBe(numMessages);
+ await reader.return();
+}
diff --git a/src/arrow/js/test/unit/ipc/reader/file-reader-tests.ts b/src/arrow/js/test/unit/ipc/reader/file-reader-tests.ts
new file mode 100644
index 000000000..a7ddfc940
--- /dev/null
+++ b/src/arrow/js/test/unit/ipc/reader/file-reader-tests.ts
@@ -0,0 +1,123 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import {
+ generateRandomTables,
+ // generateDictionaryTables
+} from '../../../data/tables';
+import { ArrowIOTestHelper } from '../helpers';
+import { toArray } from 'ix/asynciterable/toarray';
+
+import {
+ validateRecordBatchReader,
+ validateAsyncRecordBatchReader
+} from '../validate';
+
+import {
+ RecordBatchReader,
+ RecordBatchFileReader,
+ AsyncRecordBatchFileReader
+} from 'apache-arrow';
+
+for (const table of generateRandomTables([10, 20, 30])) {
+
+ const io = ArrowIOTestHelper.file(table);
+ const name = `[\n ${table.schema.fields.join(',\n ')}\n]`;
+
+ const validate = (source: any) => { validateRecordBatchReader('file', 3, RecordBatchReader.from(source)); };
+ const validateAsync = async (source: any) => { await validateAsyncRecordBatchReader('file', 3, await RecordBatchReader.from(source)); };
+ const validateAsyncWrapped = async (source: any) => { await validateAsyncRecordBatchReader('file', 3, await RecordBatchReader.from(Promise.resolve(source))); };
+
+ describe(`RecordBatchFileReader (${name})`, () => {
+ describe(`should read all RecordBatches`, () => {
+ test(`Uint8Array`, io.buffer(validate));
+ test(`Iterable`, io.iterable(validate));
+ });
+ describe(`should allow random access to record batches after iterating when autoDestroy=false`, () => {
+ test(`Uint8Array`, io.buffer(validateRandomAccess));
+ test(`Iterable`, io.iterable(validateRandomAccess));
+ });
+ });
+
+ describe(`AsyncRecordBatchFileReader (${name})`, () => {
+ describe(`should read all RecordBatches`, () => {
+
+ test('AsyncIterable', io.asyncIterable(validateAsync));
+ test('fs.FileHandle', io.fsFileHandle(validateAsync));
+ test('fs.ReadStream', io.fsReadableStream(validateAsync));
+ test('stream.Readable', io.nodeReadableStream(validateAsync));
+ test('whatwg.ReadableStream', io.whatwgReadableStream(validateAsync));
+ test('whatwg.ReadableByteStream', io.whatwgReadableByteStream(validateAsync));
+
+ test('Promise<AsyncIterable>', io.asyncIterable(validateAsyncWrapped));
+ test('Promise<fs.FileHandle>', io.fsFileHandle(validateAsyncWrapped));
+ test('Promise<fs.ReadStream>', io.fsReadableStream(validateAsyncWrapped));
+ test('Promise<stream.Readable>', io.nodeReadableStream(validateAsyncWrapped));
+ test('Promise<ReadableStream>', io.whatwgReadableStream(validateAsyncWrapped));
+ test('Promise<ReadableByteStream>', io.whatwgReadableByteStream(validateAsyncWrapped));
+ });
+
+ describe(`should allow random access to record batches after iterating when autoDestroy=false`, () => {
+
+ test('AsyncIterable', io.asyncIterable(validateRandomAccessAsync));
+ test('fs.FileHandle', io.fsFileHandle(validateRandomAccessAsync));
+ test('fs.ReadStream', io.fsReadableStream(validateRandomAccessAsync));
+ test('stream.Readable', io.nodeReadableStream(validateRandomAccessAsync));
+ test('whatwg.ReadableStream', io.whatwgReadableStream(validateRandomAccessAsync));
+ test('whatwg.ReadableByteStream', io.whatwgReadableByteStream(validateRandomAccessAsync));
+
+ test('Promise<AsyncIterable>', io.asyncIterable(validateRandomAccessAsync));
+ test('Promise<fs.FileHandle>', io.fsFileHandle(validateRandomAccessAsync));
+ test('Promise<fs.ReadStream>', io.fsReadableStream(validateRandomAccessAsync));
+ test('Promise<stream.Readable>', io.nodeReadableStream(validateRandomAccessAsync));
+ test('Promise<ReadableStream>', io.whatwgReadableStream(validateRandomAccessAsync));
+ test('Promise<ReadableByteStream>', io.whatwgReadableByteStream(validateRandomAccessAsync));
+ });
+ });
+}
+
+function validateRandomAccess(source: any) {
+ const reader = RecordBatchReader.from(source) as RecordBatchFileReader;
+ const schema = reader.open({ autoDestroy: false }).schema;
+ const batches = [...reader];
+ expect(reader.closed).toBe(false);
+ expect(reader.schema).toBe(schema);
+ while (batches.length > 0) {
+ const expected = batches.pop()!;
+ const actual = reader.readRecordBatch(batches.length);
+ expect(actual).toEqualRecordBatch(expected);
+ }
+ reader.cancel();
+ expect(reader.closed).toBe(true);
+ expect(reader.schema).toBeUndefined();
+}
+
+async function validateRandomAccessAsync(source: any) {
+ const reader = (await RecordBatchReader.from(source)) as AsyncRecordBatchFileReader;
+ const schema = (await reader.open({ autoDestroy: false })).schema;
+ const batches = await toArray(reader);
+ expect(reader.closed).toBe(false);
+ expect(reader.schema).toBe(schema);
+ while (batches.length > 0) {
+ const expected = batches.pop()!;
+ const actual = await reader.readRecordBatch(batches.length);
+ expect(actual).toEqualRecordBatch(expected);
+ }
+ await reader.cancel();
+ expect(reader.closed).toBe(true);
+ expect(reader.schema).toBeUndefined();
+}
diff --git a/src/arrow/js/test/unit/ipc/reader/from-inference-tests.ts b/src/arrow/js/test/unit/ipc/reader/from-inference-tests.ts
new file mode 100644
index 000000000..c444b78fc
--- /dev/null
+++ b/src/arrow/js/test/unit/ipc/reader/from-inference-tests.ts
@@ -0,0 +1,150 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import {
+ generateRandomTables,
+ // generateDictionaryTables
+} from '../../../data/tables';
+
+import { ArrowIOTestHelper } from '../helpers';
+import {
+ RecordBatchReader,
+ RecordBatchFileReader,
+ RecordBatchStreamReader,
+ AsyncRecordBatchFileReader,
+ AsyncRecordBatchStreamReader
+} from 'apache-arrow';
+
+for (const table of generateRandomTables([10, 20, 30])) {
+ const name = `[\n ${table.schema.fields.join(',\n ')}\n]`;
+ // eslint-disable-next-line jest/valid-describe
+ describe('RecordBatchReader.from', ((table, name) => () => {
+ testFromFile(ArrowIOTestHelper.file(table), name);
+ testFromJSON(ArrowIOTestHelper.json(table), name);
+ testFromStream(ArrowIOTestHelper.stream(table), name);
+ })(table, name));
+}
+
+function testFromJSON(io: ArrowIOTestHelper, name: string) {
+ describe(`should return a RecordBatchJSONReader (${name})`, () => {
+ test(`Uint8Array`, io.buffer((buffer) => {
+ const json = JSON.parse(`${Buffer.from(buffer)}`);
+ const reader = RecordBatchReader.from(json);
+ expect(reader.isSync()).toEqual(true);
+ expect(reader.isAsync()).toEqual(false);
+ expect(reader).toBeInstanceOf(RecordBatchStreamReader);
+ }));
+ });
+}
+
+function testFromFile(io: ArrowIOTestHelper, name: string) {
+
+ describe(`should return a RecordBatchFileReader (${name})`, () => {
+
+ test(`Uint8Array`, io.buffer(syncSync));
+ test(`Iterable`, io.iterable(syncSync));
+ test('AsyncIterable', io.asyncIterable(asyncSync));
+ test('fs.FileHandle', io.fsFileHandle(asyncAsync));
+ test('fs.ReadStream', io.fsReadableStream(asyncSync));
+ test('stream.Readable', io.nodeReadableStream(asyncSync));
+ test('whatwg.ReadableStream', io.whatwgReadableStream(asyncSync));
+ test('whatwg.ReadableByteStream', io.whatwgReadableByteStream(asyncSync));
+
+ test(`Promise<Uint8Array>`, io.buffer((source) => asyncSync(Promise.resolve(source))));
+ test(`Promise<Iterable>`, io.iterable((source) => asyncSync(Promise.resolve(source))));
+ test('Promise<AsyncIterable>', io.asyncIterable((source) => asyncSync(Promise.resolve(source))));
+ test('Promise<fs.FileHandle>', io.fsFileHandle((source) => asyncAsync(Promise.resolve(source))));
+ test('Promise<fs.ReadStream>', io.fsReadableStream((source) => asyncSync(Promise.resolve(source))));
+ test('Promise<stream.Readable>', io.nodeReadableStream((source) => asyncSync(Promise.resolve(source))));
+ test('Promise<whatwg.ReadableStream>', io.whatwgReadableStream((source) => asyncSync(Promise.resolve(source))));
+ test('Promise<whatwg.ReadableByteStream>', io.whatwgReadableByteStream((source) => asyncSync(Promise.resolve(source))));
+ });
+
+ function syncSync(source: any) {
+ const reader = RecordBatchReader.from(source);
+ expect(reader.isSync()).toEqual(true);
+ expect(reader.isAsync()).toEqual(false);
+ expect(reader).toBeInstanceOf(RecordBatchFileReader);
+ }
+
+ async function asyncSync(source: any) {
+ const pending = RecordBatchReader.from(source);
+ expect(pending).toBeInstanceOf(Promise);
+ const reader = await pending;
+ expect(reader.isSync()).toEqual(true);
+ expect(reader.isAsync()).toEqual(false);
+ expect(reader).toBeInstanceOf(RecordBatchFileReader);
+ }
+
+ async function asyncAsync(source: any) {
+ const pending = RecordBatchReader.from(source);
+ expect(pending).toBeInstanceOf(Promise);
+ const reader = await pending;
+ expect(reader.isSync()).toEqual(false);
+ expect(reader.isAsync()).toEqual(true);
+ expect(reader).toBeInstanceOf(AsyncRecordBatchFileReader);
+ }
+}
+
+function testFromStream(io: ArrowIOTestHelper, name: string) {
+
+ describe(`should return a RecordBatchStreamReader (${name})`, () => {
+
+ test(`Uint8Array`, io.buffer(syncSync));
+ test(`Iterable`, io.iterable(syncSync));
+ test('AsyncIterable', io.asyncIterable(asyncAsync));
+ test('fs.FileHandle', io.fsFileHandle(asyncAsync));
+ test('fs.ReadStream', io.fsReadableStream(asyncAsync));
+ test('stream.Readable', io.nodeReadableStream(asyncAsync));
+ test('whatwg.ReadableStream', io.whatwgReadableStream(asyncAsync));
+ test('whatwg.ReadableByteStream', io.whatwgReadableByteStream(asyncAsync));
+
+ test(`Promise<Uint8Array>`, io.buffer((source) => asyncSync(Promise.resolve(source))));
+ test(`Promise<Iterable>`, io.iterable((source) => asyncSync(Promise.resolve(source))));
+ test('Promise<AsyncIterable>', io.asyncIterable((source) => asyncAsync(Promise.resolve(source))));
+ test('Promise<fs.FileHandle>', io.fsFileHandle((source) => asyncAsync(Promise.resolve(source))));
+ test('Promise<fs.ReadStream>', io.fsReadableStream((source) => asyncAsync(Promise.resolve(source))));
+ test('Promise<stream.Readable>', io.nodeReadableStream((source) => asyncAsync(Promise.resolve(source))));
+ test('Promise<whatwg.ReadableStream>', io.whatwgReadableStream((source) => asyncAsync(Promise.resolve(source))));
+ test('Promise<whatwg.ReadableByteStream>', io.whatwgReadableByteStream((source) => asyncAsync(Promise.resolve(source))));
+ });
+
+ function syncSync(source: any) {
+ const reader = RecordBatchReader.from(source);
+ expect(reader.isSync()).toEqual(true);
+ expect(reader.isAsync()).toEqual(false);
+ expect(reader).toBeInstanceOf(RecordBatchStreamReader);
+ }
+
+ async function asyncSync(source: any) {
+ const pending = RecordBatchReader.from(source);
+ expect(pending).toBeInstanceOf(Promise);
+ const reader = await pending;
+ expect(reader.isSync()).toEqual(true);
+ expect(reader.isAsync()).toEqual(false);
+ expect(reader).toBeInstanceOf(RecordBatchStreamReader);
+ }
+
+ async function asyncAsync(source: any) {
+ const pending = RecordBatchReader.from(source);
+ expect(pending).toBeInstanceOf(Promise);
+ const reader = await pending;
+ expect(reader.isSync()).toEqual(false);
+ expect(reader.isAsync()).toEqual(true);
+ expect(reader).toBeInstanceOf(AsyncRecordBatchStreamReader);
+ }
+}
diff --git a/src/arrow/js/test/unit/ipc/reader/json-reader-tests.ts b/src/arrow/js/test/unit/ipc/reader/json-reader-tests.ts
new file mode 100644
index 000000000..9bd1e3466
--- /dev/null
+++ b/src/arrow/js/test/unit/ipc/reader/json-reader-tests.ts
@@ -0,0 +1,40 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import {
+ generateRandomTables,
+ // generateDictionaryTables
+} from '../../../data/tables';
+
+import { ArrowIOTestHelper } from '../helpers';
+import { RecordBatchReader } from 'apache-arrow';
+import { validateRecordBatchReader } from '../validate';
+
+for (const table of generateRandomTables([10, 20, 30])) {
+
+ const io = ArrowIOTestHelper.json(table);
+ const name = `[\n ${table.schema.fields.join(',\n ')}\n]`;
+
+ describe(`RecordBatchJSONReader (${name})`, () => {
+ describe(`should read all RecordBatches`, () => {
+ test(`Uint8Array`, io.buffer((buffer) => {
+ const json = JSON.parse(Buffer.from(buffer).toString());
+ validateRecordBatchReader('json', 3, RecordBatchReader.from(json));
+ }));
+ });
+ });
+}
diff --git a/src/arrow/js/test/unit/ipc/reader/stream-reader-tests.ts b/src/arrow/js/test/unit/ipc/reader/stream-reader-tests.ts
new file mode 100644
index 000000000..23879cf79
--- /dev/null
+++ b/src/arrow/js/test/unit/ipc/reader/stream-reader-tests.ts
@@ -0,0 +1,65 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import {
+ generateRandomTables,
+ // generateDictionaryTables
+} from '../../../data/tables';
+
+import {
+ validateRecordBatchReader,
+ validateAsyncRecordBatchReader
+} from '../validate';
+
+import { ArrowIOTestHelper } from '../helpers';
+import { RecordBatchReader } from 'apache-arrow';
+
+for (const table of generateRandomTables([10, 20, 30])) {
+
+ const io = ArrowIOTestHelper.stream(table);
+ const name = `[\n ${table.schema.fields.join(',\n ')}\n]`;
+
+ const validate = (source: any) => { validateRecordBatchReader('stream', 3, RecordBatchReader.from(source)); };
+ const validateAsync = async (source: any) => { await validateAsyncRecordBatchReader('stream', 3, await RecordBatchReader.from(source)); };
+ const validateAsyncWrapped = async (source: any) => { await validateAsyncRecordBatchReader('stream', 3, await RecordBatchReader.from(Promise.resolve(source))); };
+
+ describe(`RecordBatchStreamReader (${name})`, () => {
+ describe(`should read all RecordBatches`, () => {
+ test(`Uint8Array`, io.buffer(validate));
+ test(`Iterable`, io.iterable(validate));
+ });
+ });
+
+ describe(`AsyncRecordBatchStreamReader (${name})`, () => {
+ describe(`should read all RecordBatches`, () => {
+
+ test('AsyncIterable', io.asyncIterable(validateAsync));
+ test('fs.FileHandle', io.fsFileHandle(validateAsync));
+ test('fs.ReadStream', io.fsReadableStream(validateAsync));
+ test('stream.Readable', io.nodeReadableStream(validateAsync));
+ test('whatwg.ReadableStream', io.whatwgReadableStream(validateAsync));
+ test('whatwg.ReadableByteStream', io.whatwgReadableByteStream(validateAsync));
+
+ test('Promise<AsyncIterable>', io.asyncIterable(validateAsyncWrapped));
+ test('Promise<fs.FileHandle>', io.fsFileHandle(validateAsyncWrapped));
+ test('Promise<fs.ReadStream>', io.fsReadableStream(validateAsyncWrapped));
+ test('Promise<stream.Readable>', io.nodeReadableStream(validateAsyncWrapped));
+ test('Promise<ReadableStream>', io.whatwgReadableStream(validateAsyncWrapped));
+ test('Promise<ReadableByteStream>', io.whatwgReadableByteStream(validateAsyncWrapped));
+ });
+ });
+}
diff --git a/src/arrow/js/test/unit/ipc/reader/streams-dom-tests.ts b/src/arrow/js/test/unit/ipc/reader/streams-dom-tests.ts
new file mode 100644
index 000000000..a380e1619
--- /dev/null
+++ b/src/arrow/js/test/unit/ipc/reader/streams-dom-tests.ts
@@ -0,0 +1,227 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import {
+ generateRandomTables,
+ // generateDictionaryTables
+} from '../../../data/tables';
+
+import {
+ Table,
+ RecordBatchReader,
+ RecordBatchStreamWriter
+} from 'apache-arrow';
+
+import { validateRecordBatchAsyncIterator } from '../validate';
+import { ArrowIOTestHelper, readableDOMStreamToAsyncIterator } from '../helpers';
+
+(() => {
+
+ if (process.env.TEST_DOM_STREAMS !== 'true') {
+ return test('not testing DOM streams because process.env.TEST_DOM_STREAMS !== "true"', () => {});
+ }
+
+ for (const table of generateRandomTables([10, 20, 30])) {
+
+ const file = ArrowIOTestHelper.file(table);
+ const json = ArrowIOTestHelper.json(table);
+ const stream = ArrowIOTestHelper.stream(table);
+ const name = `[\n ${table.schema.fields.join(',\n ')}\n]`;
+
+ describe(`RecordBatchReader.throughDOM (${name})`, () => {
+ describe('file', () => {
+ test('ReadableStream', file.whatwgReadableStream(validate));
+ test('ReadableByteStream', file.whatwgReadableByteStream(validate));
+ });
+ describe('stream', () => {
+ test('ReadableStream', stream.whatwgReadableStream(validate));
+ test('ReadableByteStream', stream.whatwgReadableByteStream(validate));
+ });
+ async function validate(source: ReadableStream) {
+ const stream = source.pipeThrough(RecordBatchReader.throughDOM());
+ await validateRecordBatchAsyncIterator(3, readableDOMStreamToAsyncIterator(stream));
+ }
+ });
+
+ describe(`toDOMStream (${name})`, () => {
+
+ describe(`RecordBatchJSONReader`, () => {
+ test('Uint8Array', json.buffer((source) => validate(JSON.parse(`${Buffer.from(source)}`))));
+ });
+
+ describe(`RecordBatchFileReader`, () => {
+ test(`Uint8Array`, file.buffer(validate));
+ test(`Iterable`, file.iterable(validate));
+ test('AsyncIterable', file.asyncIterable(validate));
+ test('fs.FileHandle', file.fsFileHandle(validate));
+ test('fs.ReadStream', file.fsReadableStream(validate));
+ test('stream.Readable', file.nodeReadableStream(validate));
+ test('whatwg.ReadableStream', file.whatwgReadableStream(validate));
+ test('whatwg.ReadableByteStream', file.whatwgReadableByteStream(validate));
+ test('Promise<AsyncIterable>', file.asyncIterable((source) => validate(Promise.resolve(source))));
+ test('Promise<fs.FileHandle>', file.fsFileHandle((source) => validate(Promise.resolve(source))));
+ test('Promise<fs.ReadStream>', file.fsReadableStream((source) => validate(Promise.resolve(source))));
+ test('Promise<stream.Readable>', file.nodeReadableStream((source) => validate(Promise.resolve(source))));
+ test('Promise<ReadableStream>', file.whatwgReadableStream((source) => validate(Promise.resolve(source))));
+ test('Promise<ReadableByteStream>', file.whatwgReadableByteStream((source) => validate(Promise.resolve(source))));
+ });
+
+ describe(`RecordBatchStreamReader`, () => {
+ test(`Uint8Array`, stream.buffer(validate));
+ test(`Iterable`, stream.iterable(validate));
+ test('AsyncIterable', stream.asyncIterable(validate));
+ test('fs.FileHandle', stream.fsFileHandle(validate));
+ test('fs.ReadStream', stream.fsReadableStream(validate));
+ test('stream.Readable', stream.nodeReadableStream(validate));
+ test('whatwg.ReadableStream', stream.whatwgReadableStream(validate));
+ test('whatwg.ReadableByteStream', stream.whatwgReadableByteStream(validate));
+ test('Promise<AsyncIterable>', stream.asyncIterable((source) => validate(Promise.resolve(source))));
+ test('Promise<fs.FileHandle>', stream.fsFileHandle((source) => validate(Promise.resolve(source))));
+ test('Promise<fs.ReadStream>', stream.fsReadableStream((source) => validate(Promise.resolve(source))));
+ test('Promise<stream.Readable>', stream.nodeReadableStream((source) => validate(Promise.resolve(source))));
+ test('Promise<ReadableStream>', stream.whatwgReadableStream((source) => validate(Promise.resolve(source))));
+ test('Promise<ReadableByteStream>', stream.whatwgReadableByteStream((source) => validate(Promise.resolve(source))));
+ });
+
+ async function validate(source: any) {
+ const reader: RecordBatchReader = await RecordBatchReader.from(source);
+ const iterator = readableDOMStreamToAsyncIterator(reader.toDOMStream());
+ await validateRecordBatchAsyncIterator(3, iterator);
+ }
+ });
+ }
+
+ it('readAll() should pipe to separate WhatWG WritableStreams', async () => {
+ // @ts-ignore
+ const { concatStream } = await import('@openpgp/web-stream-tools');
+
+ expect.hasAssertions();
+
+ const tables = [...generateRandomTables([10, 20, 30])];
+
+ const stream = concatStream(tables.map((table, i) =>
+ RecordBatchStreamWriter.writeAll(table).toDOMStream({
+ // Alternate between bytes mode and regular mode because code coverage
+ type: i % 2 === 0 ? 'bytes' : undefined
+ })
+ )) as ReadableStream<Uint8Array>;
+
+ let tableIndex = -1;
+ let reader: RecordBatchReader | undefined;
+
+ for await (reader of RecordBatchReader.readAll(stream)) {
+
+ validateStreamState(reader, stream, false);
+
+ const output = reader
+ .pipeThrough(RecordBatchStreamWriter.throughDOM())
+ .pipeThrough(new TransformStream());
+
+ validateStreamState(reader, output, false, false);
+
+ const sourceTable = tables[++tableIndex];
+ const streamTable = await Table.from(output);
+ expect(streamTable).toEqualTable(sourceTable);
+ expect(output.locked).toBe(false);
+ }
+
+ expect(reader).toBeDefined();
+ validateStreamState(reader!, stream, true);
+ expect(tableIndex).toBe(tables.length - 1);
+ });
+
+ it('should not close the underlying WhatWG ReadableStream when reading multiple tables to completion', async () => {
+ // @ts-ignore
+ const { concatStream } = await import('@openpgp/web-stream-tools');
+
+ expect.hasAssertions();
+
+ const tables = [...generateRandomTables([10, 20, 30])];
+
+ const stream = concatStream(tables.map((table, i) =>
+ RecordBatchStreamWriter.writeAll(table).toDOMStream({
+ // Alternate between bytes mode and regular mode because code coverage
+ type: i % 2 === 0 ? 'bytes' : undefined
+ })
+ )) as ReadableStream<Uint8Array>;
+
+ let tableIndex = -1;
+ let reader = await RecordBatchReader.from(stream);
+
+ validateStreamState(reader, stream, false);
+
+ for await (reader of RecordBatchReader.readAll(reader)) {
+
+ validateStreamState(reader, stream, false);
+
+ const sourceTable = tables[++tableIndex];
+ const streamTable = await Table.from(reader);
+ expect(streamTable).toEqualTable(sourceTable);
+ }
+
+ validateStreamState(reader, stream, true);
+ expect(tableIndex).toBe(tables.length - 1);
+ });
+
+ it('should close the underlying WhatWG ReadableStream when reading multiple tables and we break early', async () => {
+ // @ts-ignore
+ const { concatStream } = await import('@openpgp/web-stream-tools');
+
+ expect.hasAssertions();
+
+ const tables = [...generateRandomTables([10, 20, 30])];
+
+ const stream = concatStream(tables.map((table, i) =>
+ RecordBatchStreamWriter.writeAll(table).toDOMStream({
+ // Alternate between bytes mode and regular mode because code coverage
+ type: i % 2 === 0 ? 'bytes' : undefined
+ })
+ )) as ReadableStream<Uint8Array>;
+
+ let tableIndex = -1;
+ let reader = await RecordBatchReader.from(stream);
+
+ validateStreamState(reader, stream, false);
+
+ for await (reader of RecordBatchReader.readAll(reader)) {
+
+ validateStreamState(reader, stream, false);
+
+ let batchIndex = -1;
+ const sourceTable = tables[++tableIndex];
+ const breakEarly = tableIndex === (tables.length / 2 | 0);
+
+ for await (const streamBatch of reader) {
+ expect(streamBatch).toEqualRecordBatch(sourceTable.chunks[++batchIndex]);
+ if (breakEarly && batchIndex === 1) { break; }
+ }
+ if (breakEarly) {
+ // the reader should stay open until we break from the outermost loop
+ validateStreamState(reader, stream, false);
+ break;
+ }
+ }
+
+ validateStreamState(reader, stream, true);
+ expect(tableIndex).toBe(tables.length / 2 | 0);
+ });
+})();
+
+function validateStreamState(reader: RecordBatchReader, stream: ReadableStream, closed: boolean, locked = !closed) {
+ expect(reader.closed).toBe(closed);
+ expect(stream.locked).toBe(locked);
+}
diff --git a/src/arrow/js/test/unit/ipc/reader/streams-node-tests.ts b/src/arrow/js/test/unit/ipc/reader/streams-node-tests.ts
new file mode 100644
index 000000000..822f99350
--- /dev/null
+++ b/src/arrow/js/test/unit/ipc/reader/streams-node-tests.ts
@@ -0,0 +1,219 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import {
+ generateRandomTables
+} from '../../../data/tables';
+
+import {
+ Table,
+ RecordBatchReader,
+ RecordBatchStreamWriter
+} from 'apache-arrow';
+
+import { ArrowIOTestHelper } from '../helpers';
+import { validateRecordBatchAsyncIterator } from '../validate';
+
+(() => {
+
+ if (process.env.TEST_NODE_STREAMS !== 'true') {
+ return test('not testing node streams because process.env.TEST_NODE_STREAMS !== "true"', () => {});
+ }
+
+ for (const table of generateRandomTables([10, 20, 30])) {
+
+ const file = ArrowIOTestHelper.file(table);
+ const json = ArrowIOTestHelper.json(table);
+ const stream = ArrowIOTestHelper.stream(table);
+ const name = `[\n ${table.schema.fields.join(',\n ')}\n]`;
+
+ describe(`RecordBatchReader.throughNode (${name})`, () => {
+ describe('file', () => {
+ test('fs.ReadStream', file.fsReadableStream(validate));
+ test('stream.Readable', file.nodeReadableStream(validate));
+ });
+ describe('stream', () => {
+ test('fs.ReadStream', file.fsReadableStream(validate));
+ test('stream.Readable', file.nodeReadableStream(validate));
+ });
+ async function validate(source: NodeJS.ReadableStream) {
+ const stream = source.pipe(RecordBatchReader.throughNode());
+ await validateRecordBatchAsyncIterator(3, stream[Symbol.asyncIterator]());
+ }
+ });
+
+ describe(`toNodeStream (${name})`, () => {
+
+ describe(`RecordBatchJSONReader`, () => {
+ test('Uint8Array', json.buffer((source) => validate(JSON.parse(`${Buffer.from(source)}`))));
+ });
+
+ describe(`RecordBatchFileReader`, () => {
+ test(`Uint8Array`, file.buffer(validate));
+ test(`Iterable`, file.iterable(validate));
+ test('AsyncIterable', file.asyncIterable(validate));
+ test('fs.FileHandle', file.fsFileHandle(validate));
+ test('fs.ReadStream', file.fsReadableStream(validate));
+ test('stream.Readable', file.nodeReadableStream(validate));
+ test('whatwg.ReadableStream', file.whatwgReadableStream(validate));
+ test('whatwg.ReadableByteStream', file.whatwgReadableByteStream(validate));
+ test('Promise<AsyncIterable>', file.asyncIterable((source) => validate(Promise.resolve(source))));
+ test('Promise<fs.FileHandle>', file.fsFileHandle((source) => validate(Promise.resolve(source))));
+ test('Promise<fs.ReadStream>', file.fsReadableStream((source) => validate(Promise.resolve(source))));
+ test('Promise<stream.Readable>', file.nodeReadableStream((source) => validate(Promise.resolve(source))));
+ test('Promise<ReadableStream>', file.whatwgReadableStream((source) => validate(Promise.resolve(source))));
+ test('Promise<ReadableByteStream>', file.whatwgReadableByteStream((source) => validate(Promise.resolve(source))));
+ });
+
+ describe(`RecordBatchStreamReader`, () => {
+ test(`Uint8Array`, stream.buffer(validate));
+ test(`Iterable`, stream.iterable(validate));
+ test('AsyncIterable', stream.asyncIterable(validate));
+ test('fs.FileHandle', stream.fsFileHandle(validate));
+ test('fs.ReadStream', stream.fsReadableStream(validate));
+ test('stream.Readable', stream.nodeReadableStream(validate));
+ test('whatwg.ReadableStream', stream.whatwgReadableStream(validate));
+ test('whatwg.ReadableByteStream', stream.whatwgReadableByteStream(validate));
+ test('Promise<AsyncIterable>', stream.asyncIterable((source) => validate(Promise.resolve(source))));
+ test('Promise<fs.FileHandle>', stream.fsFileHandle((source) => validate(Promise.resolve(source))));
+ test('Promise<fs.ReadStream>', stream.fsReadableStream((source) => validate(Promise.resolve(source))));
+ test('Promise<stream.Readable>', stream.nodeReadableStream((source) => validate(Promise.resolve(source))));
+ test('Promise<ReadableStream>', stream.whatwgReadableStream((source) => validate(Promise.resolve(source))));
+ test('Promise<ReadableByteStream>', stream.whatwgReadableByteStream((source) => validate(Promise.resolve(source))));
+ });
+
+ async function validate(source: any) {
+ const reader: RecordBatchReader = await RecordBatchReader.from(source);
+ await validateRecordBatchAsyncIterator(3, reader.toNodeStream()[Symbol.asyncIterator]());
+ }
+ });
+ }
+
+ it('readAll() should pipe to separate NodeJS WritableStreams', async () => {
+ // @ts-ignore
+ const { default: MultiStream } = await import('multistream');
+ const { PassThrough } = await import('stream');
+
+ expect.hasAssertions();
+
+ const tables = [...generateRandomTables([10, 20, 30])];
+
+ const stream = new MultiStream(tables.map((table) =>
+ () => RecordBatchStreamWriter.writeAll(table).toNodeStream()
+ )) as NodeJS.ReadableStream;
+
+ let tableIndex = -1;
+ let reader: RecordBatchReader | undefined;
+
+ for await (reader of RecordBatchReader.readAll(stream)) {
+
+ validateStreamState(reader, stream, false);
+
+ const output = reader
+ .pipe(RecordBatchStreamWriter.throughNode())
+ .pipe(new PassThrough());
+
+ validateStreamState(reader, output, false);
+
+ const sourceTable = tables[++tableIndex];
+ const streamTable = await Table.from(output);
+ expect(streamTable).toEqualTable(sourceTable);
+ expect(Boolean(output.readableFlowing)).toBe(false);
+ }
+
+ expect(reader).toBeDefined();
+ validateStreamState(reader!, stream, true);
+ expect(tableIndex).toBe(tables.length - 1);
+ });
+
+ it('should not close the underlying NodeJS ReadableStream when reading multiple tables to completion', async () => {
+ // @ts-ignore
+ const { default: MultiStream } = await import('multistream');
+
+ expect.hasAssertions();
+
+ const tables = [...generateRandomTables([10, 20, 30])];
+
+ const stream = new MultiStream(tables.map((table) =>
+ () => RecordBatchStreamWriter.writeAll(table).toNodeStream()
+ )) as NodeJS.ReadableStream;
+
+ let tableIndex = -1;
+ let reader = await RecordBatchReader.from(stream);
+
+ validateStreamState(reader, stream, false);
+
+ for await (reader of RecordBatchReader.readAll(reader)) {
+
+ validateStreamState(reader, stream, false);
+
+ const sourceTable = tables[++tableIndex];
+ const streamTable = await Table.from(reader);
+ expect(streamTable).toEqualTable(sourceTable);
+ }
+
+ validateStreamState(reader, stream, true);
+ expect(tableIndex).toBe(tables.length - 1);
+ });
+
+ it('should close the underlying NodeJS ReadableStream when reading multiple tables and we break early', async () => {
+ // @ts-ignore
+ const { default: MultiStream } = await import('multistream');
+
+ expect.hasAssertions();
+
+ const tables = [...generateRandomTables([10, 20, 30])];
+
+ const stream = new MultiStream(tables.map((table) =>
+ () => RecordBatchStreamWriter.writeAll(table).toNodeStream()
+ )) as NodeJS.ReadableStream;
+
+ let tableIndex = -1;
+ let reader = await RecordBatchReader.from(stream);
+
+ validateStreamState(reader, stream, false);
+
+ for await (reader of RecordBatchReader.readAll(reader)) {
+
+ validateStreamState(reader, stream, false);
+
+ let batchIndex = -1;
+ const sourceTable = tables[++tableIndex];
+ const breakEarly = tableIndex === (tables.length / 2 | 0);
+
+ for await (const streamBatch of reader) {
+ expect(streamBatch).toEqualRecordBatch(sourceTable.chunks[++batchIndex]);
+ if (breakEarly && batchIndex === 1) { break; }
+ }
+ if (breakEarly) {
+ // the reader should stay open until we break from the outermost loop
+ validateStreamState(reader, stream, false);
+ break;
+ }
+ }
+
+ validateStreamState(reader, stream, true, true);
+ expect(tableIndex).toBe(tables.length / 2 | 0);
+ });
+})();
+
+function validateStreamState(reader: RecordBatchReader, stream: NodeJS.ReadableStream, closed: boolean, readable = !closed) {
+ expect(reader.closed).toBe(closed);
+ expect(Boolean(stream.readable)).toBe(readable);
+ expect(Boolean((stream as any).destroyed)).toBe(closed);
+ expect(Boolean((stream as any).readableFlowing)).toBe(false);
+}
diff --git a/src/arrow/js/test/unit/ipc/validate.ts b/src/arrow/js/test/unit/ipc/validate.ts
new file mode 100644
index 000000000..aedf87a2d
--- /dev/null
+++ b/src/arrow/js/test/unit/ipc/validate.ts
@@ -0,0 +1,74 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import '../../jest-extensions';
+
+import {
+ Schema,
+ RecordBatch,
+ RecordBatchReader,
+ RecordBatchFileReader,
+ RecordBatchStreamReader,
+} from 'apache-arrow';
+
+export function validateRecordBatchReader<T extends RecordBatchFileReader | RecordBatchStreamReader>(type: 'json' | 'file' | 'stream', numBatches: number, r: T) {
+ const reader = r.open();
+ expect(reader).toBeInstanceOf(RecordBatchReader);
+ expect(type === 'file' ? reader.isFile() : reader.isStream()).toBe(true);
+ expect(reader.schema).toBeInstanceOf(Schema);
+ validateRecordBatchIterator(numBatches, reader[Symbol.iterator]());
+ expect(reader.closed).toBe(reader.autoDestroy);
+ return reader;
+}
+
+export async function validateAsyncRecordBatchReader<T extends RecordBatchReader>(type: 'json' | 'file' | 'stream', numBatches: number, r: T) {
+ const reader = await r.open();
+ expect(reader).toBeInstanceOf(RecordBatchReader);
+ expect(reader.schema).toBeInstanceOf(Schema);
+ expect(type === 'file' ? reader.isFile() : reader.isStream()).toBe(true);
+ await validateRecordBatchAsyncIterator(numBatches, reader[Symbol.asyncIterator]());
+ expect(reader.closed).toBe(reader.autoDestroy);
+ return reader;
+}
+
+export function validateRecordBatchIterator(numBatches: number, iterator: Iterable<RecordBatch> | IterableIterator<RecordBatch>) {
+ let i = 0;
+ try {
+ for (const recordBatch of iterator) {
+ expect(recordBatch).toBeInstanceOf(RecordBatch);
+ expect(i++).toBeLessThan(numBatches);
+ }
+ } catch (e) { throw new Error(`${i}: ${e}`); }
+ expect(i).toBe(numBatches);
+ if (typeof (iterator as any).return === 'function') {
+ (iterator as any).return();
+ }
+}
+
+export async function validateRecordBatchAsyncIterator(numBatches: number, iterator: AsyncIterable<RecordBatch> | AsyncIterableIterator<RecordBatch>) {
+ let i = 0;
+ try {
+ for await (const recordBatch of iterator) {
+ expect(recordBatch).toBeInstanceOf(RecordBatch);
+ expect(i++).toBeLessThan(numBatches);
+ }
+ } catch (e) { throw new Error(`${i}: ${e}`); }
+ expect(i).toBe(numBatches);
+ if (typeof (iterator as any).return === 'function') {
+ await (iterator as any).return();
+ }
+}
diff --git a/src/arrow/js/test/unit/ipc/writer/file-writer-tests.ts b/src/arrow/js/test/unit/ipc/writer/file-writer-tests.ts
new file mode 100644
index 000000000..fa639e5f6
--- /dev/null
+++ b/src/arrow/js/test/unit/ipc/writer/file-writer-tests.ts
@@ -0,0 +1,46 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import {
+ generateRandomTables,
+ generateDictionaryTables
+} from '../../../data/tables';
+
+import { validateRecordBatchIterator } from '../validate';
+import { Table, RecordBatchFileWriter } from 'apache-arrow';
+
+describe('RecordBatchFileWriter', () => {
+ for (const table of generateRandomTables([10, 20, 30])) {
+ testFileWriter(table, `[${table.schema.fields.join(', ')}]`);
+ }
+ for (const table of generateDictionaryTables([10, 20, 30])) {
+ testFileWriter(table, `${table.schema.fields[0]}`);
+ }
+});
+
+function testFileWriter(table: Table, name: string) {
+ describe(`should write the Arrow IPC file format (${name})`, () => {
+ test(`Table`, validateTable.bind(0, table));
+ });
+}
+
+async function validateTable(source: Table) {
+ const writer = RecordBatchFileWriter.writeAll(source);
+ const result = await Table.from(writer.toUint8Array());
+ validateRecordBatchIterator(3, source.chunks);
+ expect(result).toEqualTable(source);
+}
diff --git a/src/arrow/js/test/unit/ipc/writer/json-writer-tests.ts b/src/arrow/js/test/unit/ipc/writer/json-writer-tests.ts
new file mode 100644
index 000000000..05be0e272
--- /dev/null
+++ b/src/arrow/js/test/unit/ipc/writer/json-writer-tests.ts
@@ -0,0 +1,46 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import {
+ generateRandomTables,
+ generateDictionaryTables
+} from '../../../data/tables';
+
+import { validateRecordBatchIterator } from '../validate';
+import { Table, RecordBatchJSONWriter } from 'apache-arrow';
+
+describe('RecordBatchJSONWriter', () => {
+ for (const table of generateRandomTables([10, 20, 30])) {
+ testJSONWriter(table, `[${table.schema.fields.join(', ')}]`);
+ }
+ for (const table of generateDictionaryTables([10, 20, 30])) {
+ testJSONWriter(table, `${table.schema.fields[0]}`);
+ }
+});
+
+function testJSONWriter(table: Table, name: string) {
+ describe(`should write the Arrow IPC JSON format (${name})`, () => {
+ test(`Table`, validateTable.bind(0, table));
+ });
+}
+
+async function validateTable(source: Table) {
+ const writer = RecordBatchJSONWriter.writeAll(source);
+ const result = Table.from(JSON.parse(await writer.toString()));
+ validateRecordBatchIterator(3, source.chunks);
+ expect(result).toEqualTable(source);
+}
diff --git a/src/arrow/js/test/unit/ipc/writer/stream-writer-tests.ts b/src/arrow/js/test/unit/ipc/writer/stream-writer-tests.ts
new file mode 100644
index 000000000..a83aa39da
--- /dev/null
+++ b/src/arrow/js/test/unit/ipc/writer/stream-writer-tests.ts
@@ -0,0 +1,119 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import {
+ generateRandomTables,
+ generateDictionaryTables
+} from '../../../data/tables';
+
+import * as generate from '../../../generate-test-data';
+import { validateRecordBatchIterator } from '../validate';
+import { RecordBatchStreamWriterOptions } from 'apache-arrow/ipc/writer';
+import { DictionaryVector, Dictionary, Uint32, Int32 } from 'apache-arrow';
+import { Table, Schema, Field, Chunked, Builder, RecordBatch, RecordBatchReader, RecordBatchStreamWriter } from 'apache-arrow';
+
+describe('RecordBatchStreamWriter', () => {
+
+ (() => {
+ const type = generate.sparseUnion(0, 0).vector.type;
+ const schema = new Schema([new Field('dictSparseUnion', type)]);
+ const table = generate.table([10, 20, 30], schema).table;
+ const testName = `[${table.schema.fields.join(', ')}]`;
+ testStreamWriter(table, testName, { writeLegacyIpcFormat: true });
+ testStreamWriter(table, testName, { writeLegacyIpcFormat: false });
+ })();
+
+ for (const table of generateRandomTables([10, 20, 30])) {
+ const testName = `[${table.schema.fields.join(', ')}]`;
+ testStreamWriter(table, testName, { writeLegacyIpcFormat: true });
+ testStreamWriter(table, testName, { writeLegacyIpcFormat: false });
+ }
+
+ for (const table of generateDictionaryTables([10, 20, 30])) {
+ const testName = `${table.schema.fields[0]}`;
+ testStreamWriter(table, testName, { writeLegacyIpcFormat: true });
+ testStreamWriter(table, testName, { writeLegacyIpcFormat: false });
+ }
+
+ it(`should write multiple tables to the same output stream`, async () => {
+ const tables = [] as Table[];
+ const writer = new RecordBatchStreamWriter({ autoDestroy: false });
+ const validate = (async () => {
+ for await (const reader of RecordBatchReader.readAll(writer)) {
+ const sourceTable = tables.shift()!;
+ const streamTable = await Table.from(reader);
+ expect(streamTable).toEqualTable(sourceTable);
+ }
+ })();
+ for (const table of generateRandomTables([10, 20, 30])) {
+ tables.push(table);
+ await writer.writeAll((async function*() {
+ for (const chunk of table.chunks) {
+ yield chunk; // insert some asynchrony
+ await new Promise((r) => setTimeout(r, 1));
+ }
+ }()));
+ }
+ writer.close();
+ await validate;
+ });
+
+ it('should write delta dictionary batches', async () => {
+
+ const name = 'dictionary_encoded_uint32';
+ const chunks: DictionaryVector<Uint32, Int32>[] = [];
+ const {
+ vector: sourceVector, values: sourceValues,
+ } = generate.dictionary(1000, 20, new Uint32(), new Int32());
+
+ const writer = RecordBatchStreamWriter.writeAll((function* () {
+ const transform = Builder.throughIterable({
+ type: sourceVector.type, nullValues: [null],
+ queueingStrategy: 'count', highWaterMark: 50,
+ });
+ for (const chunk of transform(sourceValues())) {
+ chunks.push(chunk);
+ yield RecordBatch.new({ [name]: chunk });
+ }
+ })());
+
+ expect(Chunked.concat(chunks)).toEqualVector(sourceVector);
+
+ type T = { [name]: Dictionary<Uint32, Int32> };
+ const sourceTable = Table.new({ [name]: sourceVector });
+ const resultTable = await Table.from<T>(writer.toUint8Array());
+
+ const { dictionary } = resultTable.getColumn(name);
+
+ expect(resultTable).toEqualTable(sourceTable);
+ expect((dictionary as Chunked)).toBeInstanceOf(Chunked);
+ expect((dictionary as Chunked).chunks).toHaveLength(20);
+ });
+});
+
+function testStreamWriter(table: Table, name: string, options: RecordBatchStreamWriterOptions) {
+ describe(`should write the Arrow IPC stream format (${name})`, () => {
+ test(`Table`, validateTable.bind(0, table, options));
+ });
+}
+
+async function validateTable(source: Table, options: RecordBatchStreamWriterOptions) {
+ const writer = RecordBatchStreamWriter.writeAll(source, options);
+ const result = await Table.from(writer.toUint8Array());
+ validateRecordBatchIterator(3, source.chunks);
+ expect(result).toEqualTable(source);
+}
diff --git a/src/arrow/js/test/unit/ipc/writer/streams-dom-tests.ts b/src/arrow/js/test/unit/ipc/writer/streams-dom-tests.ts
new file mode 100644
index 000000000..a19ddcdd7
--- /dev/null
+++ b/src/arrow/js/test/unit/ipc/writer/streams-dom-tests.ts
@@ -0,0 +1,272 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import {
+ generateRandomTables,
+ // generateDictionaryTables
+} from '../../../data/tables';
+
+import { from, as } from 'ix/asynciterable';
+import { tap, flatMap } from 'ix/asynciterable/operators';
+
+import {
+ Table,
+ RecordBatchReader,
+ RecordBatchWriter,
+ RecordBatchFileWriter,
+ RecordBatchJSONWriter,
+ RecordBatchStreamWriter,
+} from 'apache-arrow';
+
+import {
+ ArrowIOTestHelper,
+ concatBuffersAsync,
+ readableDOMStreamToAsyncIterator
+} from '../helpers';
+
+import {
+ validateRecordBatchReader,
+ validateAsyncRecordBatchReader,
+ validateRecordBatchAsyncIterator
+} from '../validate';
+
+(() => {
+
+ if (process.env.TEST_DOM_STREAMS !== 'true') {
+ return test('not testing DOM streams because process.env.TEST_DOM_STREAMS !== "true"', () => {});
+ }
+
+ for (const table of generateRandomTables([10, 20, 30])) {
+
+ const file = ArrowIOTestHelper.file(table);
+ const json = ArrowIOTestHelper.json(table);
+ const stream = ArrowIOTestHelper.stream(table);
+ const name = `[\n ${table.schema.fields.join(',\n ')}\n]`;
+
+ describe(`RecordBatchWriter.throughDOM (${name})`, () => {
+
+ describe('file', () => {
+ describe(`convert`, () => {
+ test('ReadableStream', file.whatwgReadableStream(validateConvert.bind(0, RecordBatchStreamWriter)));
+ test('ReadableByteStream', file.whatwgReadableByteStream(validateConvert.bind(0, RecordBatchStreamWriter)));
+ });
+ describe(`through`, () => {
+ test('ReadableStream', file.whatwgReadableStream(validateThrough.bind(0, RecordBatchFileWriter)));
+ test('ReadableByteStream', file.whatwgReadableByteStream(validateThrough.bind(0, RecordBatchFileWriter)));
+ });
+ });
+
+ describe('stream', () => {
+ describe(`convert`, () => {
+ test('ReadableStream', stream.whatwgReadableStream(validateConvert.bind(0, RecordBatchFileWriter)));
+ test('ReadableByteStream', stream.whatwgReadableByteStream(validateConvert.bind(0, RecordBatchFileWriter)));
+ });
+ describe(`through`, () => {
+ test('ReadableStream', stream.whatwgReadableStream(validateThrough.bind(0, RecordBatchStreamWriter)));
+ test('ReadableByteStream', stream.whatwgReadableByteStream(validateThrough.bind(0, RecordBatchStreamWriter)));
+ });
+ });
+
+ async function validateConvert(RBWImplementation: typeof RecordBatchWriter, source: ReadableStream) {
+ const stream = source
+ .pipeThrough(RecordBatchReader.throughDOM())
+ .pipeThrough(RBWImplementation.throughDOM());
+ const type = RBWImplementation === RecordBatchFileWriter ? 'file' : 'stream';
+ await validateAsyncRecordBatchReader(type, 3, await RecordBatchReader.from(stream));
+ }
+
+ async function validateThrough(RBWImplementation: typeof RecordBatchWriter, source: ReadableStream) {
+ const stream = source
+ .pipeThrough(RecordBatchReader.throughDOM())
+ .pipeThrough(RBWImplementation.throughDOM())
+ .pipeThrough(RecordBatchReader.throughDOM());
+ await validateRecordBatchAsyncIterator(3, readableDOMStreamToAsyncIterator(stream));
+ }
+ });
+
+ describe(`toDOMStream (${name})`, () => {
+
+ const wrapArgInPromise = (fn: (p: Promise<any>) => any) => (x: any) => fn(Promise.resolve(x));
+
+ describe(`RecordBatchJSONWriter`, () => {
+
+ const toJSON = (x: any): { schema: any } => JSON.parse(`${Buffer.from(x)}`);
+
+ test('Uint8Array', json.buffer((source) => validate(toJSON(source))));
+ test('Promise<Uint8Array>', json.buffer((source) => validate(Promise.resolve(toJSON(source)))));
+
+ async function validate(source: { schema: any } | Promise<{ schema: any }>) {
+ const reader = await RecordBatchReader.from(<any> source);
+ const writer = await RecordBatchJSONWriter.writeAll(reader);
+ const buffer = await concatBuffersAsync(writer.toDOMStream());
+ validateRecordBatchReader('json', 3, RecordBatchReader.from(toJSON(buffer)));
+ }
+ });
+
+ describe(`RecordBatchFileWriter`, () => {
+
+ describe(`sync write/read`, () => {
+
+ test(`Uint8Array`, file.buffer(validate));
+ test(`Iterable`, file.iterable(validate));
+ test('AsyncIterable', file.asyncIterable(validate));
+ test('fs.FileHandle', file.fsFileHandle(validate));
+ test('fs.ReadStream', file.fsReadableStream(validate));
+ test('stream.Readable', file.nodeReadableStream(validate));
+ test('whatwg.ReadableStream', file.whatwgReadableStream(validate));
+ test('whatwg.ReadableByteStream', file.whatwgReadableByteStream(validate));
+ test('Promise<AsyncIterable>', file.asyncIterable(wrapArgInPromise(validate)));
+ test('Promise<fs.FileHandle>', file.fsFileHandle(wrapArgInPromise(validate)));
+ test('Promise<fs.ReadStream>', file.fsReadableStream(wrapArgInPromise(validate)));
+ test('Promise<stream.Readable>', file.nodeReadableStream(wrapArgInPromise(validate)));
+ test('Promise<ReadableStream>', file.whatwgReadableStream(wrapArgInPromise(validate)));
+ test('Promise<ReadableByteStream>', file.whatwgReadableByteStream(wrapArgInPromise(validate)));
+
+ async function validate(source: any) {
+ const reader = await RecordBatchReader.from(source);
+ const writer = await RecordBatchFileWriter.writeAll(reader);
+ const stream = await RecordBatchReader.from(writer.toDOMStream());
+ await validateAsyncRecordBatchReader('file', 3, stream);
+ }
+ });
+
+ describe(`async write/read`, () => {
+
+ test(`Uint8Array`, file.buffer(validate));
+ test(`Iterable`, file.iterable(validate));
+ test('AsyncIterable', file.asyncIterable(validate));
+ test('fs.FileHandle', file.fsFileHandle(validate));
+ test('fs.ReadStream', file.fsReadableStream(validate));
+ test('stream.Readable', file.nodeReadableStream(validate));
+ test('whatwg.ReadableStream', file.whatwgReadableStream(validate));
+ test('whatwg.ReadableByteStream', file.whatwgReadableByteStream(validate));
+ test('Promise<AsyncIterable>', file.asyncIterable(wrapArgInPromise(validate)));
+ test('Promise<fs.FileHandle>', file.fsFileHandle(wrapArgInPromise(validate)));
+ test('Promise<fs.ReadStream>', file.fsReadableStream(wrapArgInPromise(validate)));
+ test('Promise<stream.Readable>', file.nodeReadableStream(wrapArgInPromise(validate)));
+ test('Promise<ReadableStream>', file.whatwgReadableStream(wrapArgInPromise(validate)));
+ test('Promise<ReadableByteStream>', file.whatwgReadableByteStream(wrapArgInPromise(validate)));
+
+ async function validate(source: any) {
+ const writer = new RecordBatchFileWriter();
+ /* no await */ writer.writeAll(await RecordBatchReader.from(source));
+ const reader = await RecordBatchReader.from(writer.toDOMStream());
+ await validateAsyncRecordBatchReader('file', 3, reader);
+ }
+ });
+ });
+
+ describe(`RecordBatchStreamWriter`, () => {
+
+ describe(`sync write/read`, () => {
+
+ test(`Uint8Array`, stream.buffer(validate));
+ test(`Iterable`, stream.iterable(validate));
+ test('AsyncIterable', stream.asyncIterable(validate));
+ test('fs.FileHandle', stream.fsFileHandle(validate));
+ test('fs.ReadStream', stream.fsReadableStream(validate));
+ test('stream.Readable', stream.nodeReadableStream(validate));
+ test('whatwg.ReadableStream', stream.whatwgReadableStream(validate));
+ test('whatwg.ReadableByteStream', stream.whatwgReadableByteStream(validate));
+ test('Promise<AsyncIterable>', stream.asyncIterable(wrapArgInPromise(validate)));
+ test('Promise<fs.FileHandle>', stream.fsFileHandle(wrapArgInPromise(validate)));
+ test('Promise<fs.ReadStream>', stream.fsReadableStream(wrapArgInPromise(validate)));
+ test('Promise<stream.Readable>', stream.nodeReadableStream(wrapArgInPromise(validate)));
+ test('Promise<ReadableStream>', stream.whatwgReadableStream(wrapArgInPromise(validate)));
+ test('Promise<ReadableByteStream>', stream.whatwgReadableByteStream(wrapArgInPromise(validate)));
+
+ async function validate(source: any) {
+ const reader = await RecordBatchReader.from(source);
+ const writer = await RecordBatchStreamWriter.writeAll(reader);
+ const stream = await RecordBatchReader.from(writer.toDOMStream());
+ await validateAsyncRecordBatchReader('stream', 3, stream);
+ }
+ });
+
+ describe(`async write/read`, () => {
+
+ test(`Uint8Array`, stream.buffer(validate));
+ test(`Iterable`, stream.iterable(validate));
+ test('AsyncIterable', stream.asyncIterable(validate));
+ test('fs.FileHandle', stream.fsFileHandle(validate));
+ test('fs.ReadStream', stream.fsReadableStream(validate));
+ test('stream.Readable', stream.nodeReadableStream(validate));
+ test('whatwg.ReadableStream', stream.whatwgReadableStream(validate));
+ test('whatwg.ReadableByteStream', stream.whatwgReadableByteStream(validate));
+ test('Promise<AsyncIterable>', stream.asyncIterable(wrapArgInPromise(validate)));
+ test('Promise<fs.FileHandle>', stream.fsFileHandle(wrapArgInPromise(validate)));
+ test('Promise<fs.ReadStream>', stream.fsReadableStream(wrapArgInPromise(validate)));
+ test('Promise<stream.Readable>', stream.nodeReadableStream(wrapArgInPromise(validate)));
+ test('Promise<ReadableStream>', stream.whatwgReadableStream(wrapArgInPromise(validate)));
+ test('Promise<ReadableByteStream>', stream.whatwgReadableByteStream(wrapArgInPromise(validate)));
+
+ async function validate(source: any) {
+ const writer = new RecordBatchStreamWriter();
+ /* no await */ writer.writeAll(await RecordBatchReader.from(source));
+ const reader = await RecordBatchReader.from(writer.toDOMStream());
+ await validateAsyncRecordBatchReader('stream', 3, reader);
+ }
+ });
+ });
+ });
+ }
+
+ describe(`RecordBatchStreamWriter.throughDOM`, () => {
+
+ const opts = { autoDestroy: false };
+ const sleep = (n: number) => new Promise((r) => setTimeout(r, n));
+
+ it(`should write a stream of tables to the same output stream`, async () => {
+
+ const tables = [] as Table[];
+ const stream: ReadableStream<any> = from(generateRandomTables([10, 20, 30]))
+ // insert some asynchrony
+ .pipe(tap({ async next(table: Table) { tables.push(table); await sleep(1); } }))
+ .pipeThrough(RecordBatchStreamWriter.throughDOM(opts));
+
+ for await (const reader of RecordBatchReader.readAll(stream)) {
+ const sourceTable = tables.shift()!;
+ const streamTable = await Table.from(reader);
+ expect(streamTable).toEqualTable(sourceTable);
+ }
+
+ expect(tables).toHaveLength(0);
+ expect(stream.locked).toBe(false);
+ });
+
+ it(`should write a stream of record batches to the same output stream`, async () => {
+
+ const tables = [] as Table[];
+ const stream = from(generateRandomTables([10, 20, 30]))
+ // insert some asynchrony
+ .pipe(tap({ async next(table: Table) { tables.push(table); await sleep(1); } }))
+ // flatMap from Table -> RecordBatches[]
+ .pipe(flatMap((table) => as(table.chunks)))
+ .pipeThrough(RecordBatchStreamWriter.throughDOM(opts));
+
+ for await (const reader of RecordBatchReader.readAll(stream)) {
+ const sourceTable = tables.shift()!;
+ const streamTable = await Table.from(reader);
+ expect(streamTable).toEqualTable(sourceTable);
+ }
+
+ expect(tables).toHaveLength(0);
+ expect(stream.locked).toBe(false);
+ });
+ });
+
+})();
diff --git a/src/arrow/js/test/unit/ipc/writer/streams-node-tests.ts b/src/arrow/js/test/unit/ipc/writer/streams-node-tests.ts
new file mode 100644
index 000000000..662129b1b
--- /dev/null
+++ b/src/arrow/js/test/unit/ipc/writer/streams-node-tests.ts
@@ -0,0 +1,274 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import {
+ generateRandomTables,
+ // generateDictionaryTables
+} from '../../../data/tables';
+
+import { from, as } from 'ix/asynciterable';
+import { tap, flatMap } from 'ix/asynciterable/operators';
+import 'ix/Ix.node';
+
+import {
+ Table,
+ RecordBatchReader,
+ RecordBatchWriter,
+ RecordBatchFileWriter,
+ RecordBatchJSONWriter,
+ RecordBatchStreamWriter,
+} from 'apache-arrow';
+
+import {
+ ArrowIOTestHelper,
+ concatBuffersAsync
+} from '../helpers';
+
+import {
+ validateRecordBatchReader,
+ validateAsyncRecordBatchReader,
+ validateRecordBatchAsyncIterator
+} from '../validate';
+
+(() => {
+
+ if (process.env.TEST_NODE_STREAMS !== 'true') {
+ return test('not testing node streams because process.env.TEST_NODE_STREAMS !== "true"', () => {});
+ }
+
+ for (const table of generateRandomTables([10, 20, 30])) {
+
+ const file = ArrowIOTestHelper.file(table);
+ const json = ArrowIOTestHelper.json(table);
+ const stream = ArrowIOTestHelper.stream(table);
+ const name = `[\n ${table.schema.fields.join(',\n ')}\n]`;
+
+ describe(`RecordBatchWriter.throughNode (${name})`, () => {
+
+ describe('file', () => {
+ describe(`convert`, () => {
+ test('fs.ReadStream', file.fsReadableStream(validateConvert.bind(0, RecordBatchStreamWriter)));
+ test('stream.Readable', file.nodeReadableStream(validateConvert.bind(0, RecordBatchStreamWriter)));
+ });
+ describe(`through`, () => {
+ test('fs.ReadStream', file.fsReadableStream(validateThrough.bind(0, RecordBatchFileWriter)));
+ test('stream.Readable', file.nodeReadableStream(validateThrough.bind(0, RecordBatchFileWriter)));
+ });
+ });
+
+ describe('stream', () => {
+ describe(`convert`, () => {
+ test('fs.ReadStream', stream.fsReadableStream(validateConvert.bind(0, RecordBatchFileWriter)));
+ test('stream.Readable', stream.nodeReadableStream(validateConvert.bind(0, RecordBatchFileWriter)));
+ });
+ describe(`through`, () => {
+ test('fs.ReadStream', stream.fsReadableStream(validateThrough.bind(0, RecordBatchStreamWriter)));
+ test('stream.Readable', stream.nodeReadableStream(validateThrough.bind(0, RecordBatchStreamWriter)));
+ });
+ });
+
+ async function validateConvert(RBWImplementation: typeof RecordBatchWriter, source: NodeJS.ReadableStream) {
+ const stream = source
+ .pipe(RecordBatchReader.throughNode())
+ .pipe(RBWImplementation.throughNode());
+ const type = RBWImplementation === RecordBatchFileWriter ? 'file' : 'stream';
+ await validateAsyncRecordBatchReader(type, 3, await RecordBatchReader.from(stream));
+ }
+
+ async function validateThrough(RBWImplementation: typeof RecordBatchWriter, source: NodeJS.ReadableStream) {
+ const stream = source
+ .pipe(RecordBatchReader.throughNode())
+ .pipe(RBWImplementation.throughNode())
+ .pipe(RecordBatchReader.throughNode());
+ await validateRecordBatchAsyncIterator(3, stream[Symbol.asyncIterator]());
+ }
+ });
+
+ describe(`toNodeStream (${name})`, () => {
+
+ const wrapArgInPromise = (fn: (p: Promise<any>) => any) => (x: any) => fn(Promise.resolve(x));
+
+ describe(`RecordBatchJSONWriter`, () => {
+
+ const toJSON = (x: any): { schema: any } => JSON.parse(`${Buffer.from(x)}`);
+
+ test('Uint8Array', json.buffer((source) => validate(toJSON(source))));
+ test('Promise<Uint8Array>', json.buffer((source) => validate(Promise.resolve(toJSON(source)))));
+
+ async function validate(source: { schema: any } | Promise<{ schema: any }>) {
+ const reader = await RecordBatchReader.from(<any> source);
+ const writer = await RecordBatchJSONWriter.writeAll(reader);
+ const buffer = await concatBuffersAsync(writer.toNodeStream());
+ validateRecordBatchReader('json', 3, RecordBatchReader.from(toJSON(buffer)));
+ }
+ });
+
+ describe(`RecordBatchFileWriter`, () => {
+
+ describe(`sync write/read`, () => {
+
+ test(`Uint8Array`, file.buffer(validate));
+ test(`Iterable`, file.iterable(validate));
+ test('AsyncIterable', file.asyncIterable(validate));
+ test('fs.FileHandle', file.fsFileHandle(validate));
+ test('fs.ReadStream', file.fsReadableStream(validate));
+ test('stream.Readable', file.nodeReadableStream(validate));
+ test('whatwg.ReadableStream', file.whatwgReadableStream(validate));
+ test('whatwg.ReadableByteStream', file.whatwgReadableByteStream(validate));
+ test('Promise<AsyncIterable>', file.asyncIterable(wrapArgInPromise(validate)));
+ test('Promise<fs.FileHandle>', file.fsFileHandle(wrapArgInPromise(validate)));
+ test('Promise<fs.ReadStream>', file.fsReadableStream(wrapArgInPromise(validate)));
+ test('Promise<stream.Readable>', file.nodeReadableStream(wrapArgInPromise(validate)));
+ test('Promise<ReadableStream>', file.whatwgReadableStream(wrapArgInPromise(validate)));
+ test('Promise<ReadableByteStream>', file.whatwgReadableByteStream(wrapArgInPromise(validate)));
+
+ async function validate(source: any) {
+ const reader = await RecordBatchReader.from(source);
+ const writer = await RecordBatchFileWriter.writeAll(reader);
+ const stream = await RecordBatchReader.from(writer.toNodeStream());
+ await validateAsyncRecordBatchReader('file', 3, stream);
+ }
+ });
+
+ describe(`async write/read`, () => {
+
+ test(`Uint8Array`, file.buffer(validate));
+ test(`Iterable`, file.iterable(validate));
+ test('AsyncIterable', file.asyncIterable(validate));
+ test('fs.FileHandle', file.fsFileHandle(validate));
+ test('fs.ReadStream', file.fsReadableStream(validate));
+ test('stream.Readable', file.nodeReadableStream(validate));
+ test('whatwg.ReadableStream', file.whatwgReadableStream(validate));
+ test('whatwg.ReadableByteStream', file.whatwgReadableByteStream(validate));
+ test('Promise<AsyncIterable>', file.asyncIterable(wrapArgInPromise(validate)));
+ test('Promise<fs.FileHandle>', file.fsFileHandle(wrapArgInPromise(validate)));
+ test('Promise<fs.ReadStream>', file.fsReadableStream(wrapArgInPromise(validate)));
+ test('Promise<stream.Readable>', file.nodeReadableStream(wrapArgInPromise(validate)));
+ test('Promise<ReadableStream>', file.whatwgReadableStream(wrapArgInPromise(validate)));
+ test('Promise<ReadableByteStream>', file.whatwgReadableByteStream(wrapArgInPromise(validate)));
+
+ async function validate(source: any) {
+ const writer = new RecordBatchFileWriter();
+ /* no await */ writer.writeAll(await RecordBatchReader.from(source));
+ const reader = await RecordBatchReader.from(writer.toNodeStream());
+ await validateAsyncRecordBatchReader('file', 3, reader);
+ }
+ });
+ });
+
+ describe(`RecordBatchStreamWriter`, () => {
+
+ describe(`sync write/read`, () => {
+
+ test(`Uint8Array`, stream.buffer(validate));
+ test(`Iterable`, stream.iterable(validate));
+ test('AsyncIterable', stream.asyncIterable(validate));
+ test('fs.FileHandle', stream.fsFileHandle(validate));
+ test('fs.ReadStream', stream.fsReadableStream(validate));
+ test('stream.Readable', stream.nodeReadableStream(validate));
+ test('whatwg.ReadableStream', stream.whatwgReadableStream(validate));
+ test('whatwg.ReadableByteStream', stream.whatwgReadableByteStream(validate));
+ test('Promise<AsyncIterable>', stream.asyncIterable(wrapArgInPromise(validate)));
+ test('Promise<fs.FileHandle>', stream.fsFileHandle(wrapArgInPromise(validate)));
+ test('Promise<fs.ReadStream>', stream.fsReadableStream(wrapArgInPromise(validate)));
+ test('Promise<stream.Readable>', stream.nodeReadableStream(wrapArgInPromise(validate)));
+ test('Promise<ReadableStream>', stream.whatwgReadableStream(wrapArgInPromise(validate)));
+ test('Promise<ReadableByteStream>', stream.whatwgReadableByteStream(wrapArgInPromise(validate)));
+
+ async function validate(source: any) {
+ const reader = await RecordBatchReader.from(source);
+ const writer = await RecordBatchStreamWriter.writeAll(reader);
+ const stream = await RecordBatchReader.from(writer.toNodeStream());
+ await validateAsyncRecordBatchReader('stream', 3, stream);
+ }
+ });
+
+ describe(`async write/read`, () => {
+
+ test(`Uint8Array`, stream.buffer(validate));
+ test(`Iterable`, stream.iterable(validate));
+ test('AsyncIterable', stream.asyncIterable(validate));
+ test('fs.FileHandle', stream.fsFileHandle(validate));
+ test('fs.ReadStream', stream.fsReadableStream(validate));
+ test('stream.Readable', stream.nodeReadableStream(validate));
+ test('whatwg.ReadableStream', stream.whatwgReadableStream(validate));
+ test('whatwg.ReadableByteStream', stream.whatwgReadableByteStream(validate));
+ test('Promise<AsyncIterable>', stream.asyncIterable(wrapArgInPromise(validate)));
+ test('Promise<fs.FileHandle>', stream.fsFileHandle(wrapArgInPromise(validate)));
+ test('Promise<fs.ReadStream>', stream.fsReadableStream(wrapArgInPromise(validate)));
+ test('Promise<stream.Readable>', stream.nodeReadableStream(wrapArgInPromise(validate)));
+ test('Promise<ReadableStream>', stream.whatwgReadableStream(wrapArgInPromise(validate)));
+ test('Promise<ReadableByteStream>', stream.whatwgReadableByteStream(wrapArgInPromise(validate)));
+
+ async function validate(source: any) {
+ const writer = new RecordBatchStreamWriter();
+ /* no await */ writer.writeAll(await RecordBatchReader.from(source));
+ const reader = await RecordBatchReader.from(writer.toNodeStream());
+ await validateAsyncRecordBatchReader('stream', 3, reader);
+ }
+ });
+ });
+ });
+ }
+
+ describe(`RecordBatchStreamWriter.throughNode`, () => {
+
+ const sleep = (n: number) => new Promise((r) => setTimeout(r, n));
+
+ it(`should write a stream of tables to the same output stream`, async () => {
+
+ const tables = [] as Table[];
+ const writer = RecordBatchStreamWriter.throughNode({ autoDestroy: false });
+ const stream = from(generateRandomTables([10, 20, 30]))
+ // insert some asynchrony
+ .pipe(tap({ async next(table: Table) { tables.push(table); await sleep(1); } }))
+ .pipe(writer);
+
+ for await (const reader of RecordBatchReader.readAll(stream)) {
+ const sourceTable = tables.shift()!;
+ const streamTable = await Table.from(reader);
+ expect(streamTable).toEqualTable(sourceTable);
+ }
+
+ expect(tables).toHaveLength(0);
+ expect(writer.readable).toBe(false);
+ expect((writer as any).destroyed).toBe(true);
+ });
+
+ it(`should write a stream of record batches to the same output stream`, async () => {
+
+ const tables = [] as Table[];
+ const writer = RecordBatchStreamWriter.throughNode({ autoDestroy: false });
+ const stream = from(generateRandomTables([10, 20, 30]))
+ // insert some asynchrony
+ .pipe(tap({ async next(table: Table) { tables.push(table); await sleep(1); } }))
+ .pipe(flatMap((table) => as(table.chunks)))
+ .pipe(writer);
+
+ for await (const reader of RecordBatchReader.readAll(stream)) {
+ const sourceTable = tables.shift()!;
+ const streamTable = await Table.from(reader);
+ expect(streamTable).toEqualTable(sourceTable);
+ }
+
+ expect(tables).toHaveLength(0);
+ expect(writer.readable).toBe(false);
+ expect((writer as any).destroyed).toBe(true);
+ });
+
+ });
+})();
diff --git a/src/arrow/js/test/unit/math-tests.ts b/src/arrow/js/test/unit/math-tests.ts
new file mode 100644
index 000000000..7e3ffcd8f
--- /dev/null
+++ b/src/arrow/js/test/unit/math-tests.ts
@@ -0,0 +1,47 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import * as Arrow from 'apache-arrow';
+const { float64ToUint16, uint16ToFloat64 } = Arrow.util;
+
+describe('Float16', () => {
+ test('Uint16 to Float64 works', () => {
+
+ const uNaN = 0x7E00 /* NaN */;
+ const pInf = 0x7C00 /* 1/0 */;
+ const nInf = 0xFC00 /*-1/0 */;
+ let value = 0, expected = value;
+
+ do {
+
+ expected = value;
+
+ // if exponent is all 1s, either Infinity or NaN
+ if ((value & 0x7C00) === 0x7C00) {
+ // if significand, must be NaN
+ if (((value << 6) & 0xFFFF) !== 0) {
+ expected = uNaN;
+ } else {
+ // otherwise +/- Infinity
+ expected = (value >>> 15) !== 0 ? nInf : pInf;
+ }
+ }
+
+ expect(float64ToUint16(uint16ToFloat64(value))).toEqual(expected);
+ } while (++value < 65536);
+ });
+});
diff --git a/src/arrow/js/test/unit/recordbatch/record-batch-tests.ts b/src/arrow/js/test/unit/recordbatch/record-batch-tests.ts
new file mode 100644
index 000000000..520c04f84
--- /dev/null
+++ b/src/arrow/js/test/unit/recordbatch/record-batch-tests.ts
@@ -0,0 +1,130 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import '../../jest-extensions';
+import {
+ Data, RecordBatch,
+ Vector, Int32Vector, Float32Vector, Float32, Int32,
+} from 'apache-arrow';
+import { arange } from '../utils';
+
+function numsRecordBatch(i32Len: number, f32Len: number) {
+ return RecordBatch.new({
+ i32: Int32Vector.from(new Int32Array(arange(new Array(i32Len)))) as Int32Vector,
+ f32: Float32Vector.from(new Float32Array(arange(new Array(f32Len)))) as Float32Vector
+ });
+}
+
+describe(`RecordBatch`, () => {
+ describe(`new()`, () => {
+
+ test(`creates a new RecordBatch from a Vector`, () => {
+
+ const i32s = new Int32Array(arange(new Array<number>(10)));
+
+ let i32 = Vector.new(Data.Int(new Int32(), 0, i32s.length, 0, null, i32s));
+ expect(i32).toHaveLength(i32s.length);
+ expect(i32.nullCount).toBe(0);
+
+ const batch = RecordBatch.new([i32], ['i32']);
+ i32 = batch.getChildAt(0) as Int32Vector;
+
+ expect(batch.schema.fields[0].name).toBe('i32');
+ expect(i32).toHaveLength(i32s.length);
+ expect(i32.nullCount).toBe(0);
+
+ expect(i32).toEqualVector(Int32Vector.from(i32s));
+ });
+
+ test(`creates a new RecordBatch from Vectors`, () => {
+
+ const i32s = new Int32Array(arange(new Array<number>(10)));
+ const f32s = new Float32Array(arange(new Array<number>(10)));
+
+ let i32 = Vector.new(Data.Int(new Int32(), 0, i32s.length, 0, null, i32s));
+ let f32 = Vector.new(Data.Float(new Float32(), 0, f32s.length, 0, null, f32s));
+ expect(i32).toHaveLength(i32s.length);
+ expect(f32).toHaveLength(f32s.length);
+ expect(i32.nullCount).toBe(0);
+ expect(f32.nullCount).toBe(0);
+
+ const batch = RecordBatch.new([i32, f32], ['i32', 'f32']);
+ i32 = batch.getChildAt(0) as Int32Vector;
+ f32 = batch.getChildAt(1) as Float32Vector;
+
+ expect(batch.schema.fields[0].name).toBe('i32');
+ expect(batch.schema.fields[1].name).toBe('f32');
+ expect(i32).toHaveLength(i32s.length);
+ expect(f32).toHaveLength(f32s.length);
+ expect(i32.nullCount).toBe(0);
+ expect(f32.nullCount).toBe(0);
+
+ expect(i32).toEqualVector(Int32Vector.from(i32s));
+ expect(f32).toEqualVector(Float32Vector.from(f32s));
+ });
+
+ test(`creates a new RecordBatch from Vectors with different lengths`, () => {
+
+ const i32s = new Int32Array(arange(new Array<number>(20)));
+ const f32s = new Float32Array(arange(new Array<number>(8)));
+
+ let i32 = Int32Vector.from(i32s);
+ let f32 = Float32Vector.from(f32s);
+
+ expect(i32).toHaveLength(i32s.length);
+ expect(f32).toHaveLength(f32s.length);
+ expect(i32.nullCount).toBe(0);
+ expect(f32.nullCount).toBe(0);
+
+ const batch = RecordBatch.new([i32, f32]);
+ i32 = batch.getChildAt(0) as Int32Vector;
+ f32 = batch.getChildAt(1) as Float32Vector;
+
+ expect(batch.schema.fields[0].name).toBe('0');
+ expect(batch.schema.fields[1].name).toBe('1');
+ expect(i32).toHaveLength(i32s.length);
+ expect(f32).toHaveLength(i32s.length); // new length should be the same as the longest sibling
+ expect(i32.nullCount).toBe(0);
+ expect(f32.nullCount).toBe(i32s.length - f32s.length);
+
+ const f32Expected = Data.Float(
+ f32.type, 0, i32s.length,
+ i32s.length - f32s.length,
+ new Uint8Array(8).fill(255, 0, 1), f32s);
+
+ expect(i32).toEqualVector(Int32Vector.from(i32s));
+ expect(f32).toEqualVector(new Float32Vector(f32Expected));
+ });
+ });
+
+ describe(`select()`, () => {
+ test(`can select recordbatch children by name`, () => {
+ const batch = numsRecordBatch(32, 27);
+ const i32sBatch = batch.select('i32');
+ expect(i32sBatch.numCols).toBe(1);
+ expect(i32sBatch).toHaveLength(32);
+ });
+ });
+ describe(`selectAt()`, () => {
+ test(`can select recordbatch children by index`, () => {
+ const batch = numsRecordBatch(32, 45);
+ const f32sBatch = batch.selectAt(1);
+ expect(f32sBatch.numCols).toBe(1);
+ expect(f32sBatch).toHaveLength(45);
+ });
+ });
+});
diff --git a/src/arrow/js/test/unit/table-tests.ts b/src/arrow/js/test/unit/table-tests.ts
new file mode 100644
index 000000000..2f138182b
--- /dev/null
+++ b/src/arrow/js/test/unit/table-tests.ts
@@ -0,0 +1,406 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import '../jest-extensions';
+import {
+ Data, Schema, Field, Table, RecordBatch, Column,
+ Vector, Int32Vector, Float32Vector, Utf8Vector, DictionaryVector,
+ Struct, Float32, Int32, Dictionary, Utf8, Int8
+} from 'apache-arrow';
+import { arange } from './utils';
+
+const NAMES = ['f32', 'i32', 'dictionary'] as (keyof TestDataSchema)[];
+const F32 = 0, I32 = 1, DICT = 2;
+export const test_data = [
+ {
+ name: `single record batch`,
+ table: getSingleRecordBatchTable,
+ // Use Math.fround to coerce to float32
+ values: () => [
+ [Math.fround(-0.3), -1, 'a'],
+ [Math.fround(-0.2), 1, 'b'],
+ [Math.fround(-0.1), -1, 'c'],
+ [Math.fround(0), 1, 'a'],
+ [Math.fround(0.1), -1, 'b'],
+ [Math.fround(0.2), 1, 'c'],
+ [Math.fround(0.3), -1, 'a']
+ ]
+ }, {
+ name: `multiple record batches`,
+ table: getMultipleRecordBatchesTable,
+ values: () => [
+ [Math.fround(-0.3), -1, 'a'],
+ [Math.fround(-0.2), 1, 'b'],
+ [Math.fround(-0.1), -1, 'c'],
+ [Math.fround(0), 1, 'a'],
+ [Math.fround(0.1), -1, 'b'],
+ [Math.fround(0.2), 1, 'c'],
+ [Math.fround(0.3), -1, 'a'],
+ [Math.fround(0.2), 1, 'b'],
+ [Math.fround(0.1), -1, 'c'],
+ ]
+ }, {
+ name: `struct`,
+ table: () => Table.fromStruct(getStructTable().getColumn('struct')!),
+ // Use Math.fround to coerce to float32
+ values: () => [
+ [Math.fround(-0.3), -1, 'a'],
+ [Math.fround(-0.2), 1, 'b'],
+ [Math.fround(-0.1), -1, 'c'],
+ [Math.fround(0), 1, 'a'],
+ [Math.fround(0.1), -1, 'b'],
+ [Math.fround(0.2), 1, 'c'],
+ [Math.fround(0.3), -1, 'a']
+ ]
+ },
+];
+
+function compareBatchAndTable(source: Table, offset: number, batch: RecordBatch, table: Table) {
+ expect(batch).toHaveLength(table.length);
+ expect(table.numCols).toEqual(source.numCols);
+ expect(batch.numCols).toEqual(source.numCols);
+ for (let i = -1, n = source.numCols; ++i < n;) {
+ const v0 = source.getColumnAt(i)!.slice(offset, offset + batch.length);
+ const v1 = batch.getChildAt(i);
+ const v2 = table.getColumnAt(i);
+ const name = source.schema.fields[i].name;
+ expect([v1, `batch`, name]).toEqualVector([v0, `source`]);
+ expect([v2, `table`, name]).toEqualVector([v0, `source`]);
+ }
+}
+
+describe(`Table`, () => {
+ test(`can create an empty table`, () => {
+ expect(Table.empty()).toHaveLength(0);
+ });
+ test(`Table.from([]) creates an empty table`, () => {
+ expect(Table.from([])).toHaveLength(0);
+ });
+ test(`Table.from() creates an empty table`, () => {
+ expect(Table.from()).toHaveLength(0);
+ });
+
+ describe(`new()`, () => {
+ test(`creates an empty Table with Columns`, () => {
+ let i32 = Column.new('i32', Data.new(new Int32(), 0, 0));
+ let f32 = Column.new('f32', Data.new(new Float32(), 0, 0));
+ const table = Table.new(i32, f32);
+ i32 = table.getColumn('i32')!;
+ f32 = table.getColumn('f32')!;
+ expect(table).toHaveLength(0);
+ expect(i32).toHaveLength(0);
+ expect(f32).toHaveLength(0);
+ expect(i32.toArray()).toBeInstanceOf(Int32Array);
+ expect(f32.toArray()).toBeInstanceOf(Float32Array);
+ });
+
+ test(`creates a new Table from a Column`, () => {
+
+ const i32s = new Int32Array(arange(new Array<number>(10)));
+
+ let i32 = Column.new('i32', Data.Int(new Int32(), 0, i32s.length, 0, null, i32s));
+ expect(i32.name).toBe('i32');
+ expect(i32).toHaveLength(i32s.length);
+ expect(i32.nullable).toBe(true);
+ expect(i32.nullCount).toBe(0);
+
+ const table = Table.new(i32);
+ i32 = table.getColumnAt(0)!;
+
+ expect(i32.name).toBe('i32');
+ expect(i32).toHaveLength(i32s.length);
+ expect(i32.nullable).toBe(true);
+ expect(i32.nullCount).toBe(0);
+
+ expect(i32).toEqualVector(Int32Vector.from(i32s));
+ });
+
+ test(`creates a new Table from Columns`, () => {
+
+ const i32s = new Int32Array(arange(new Array<number>(10)));
+ const f32s = new Float32Array(arange(new Array<number>(10)));
+
+ let i32 = Column.new('i32', Data.Int(new Int32(), 0, i32s.length, 0, null, i32s));
+ let f32 = Column.new('f32', Data.Float(new Float32(), 0, f32s.length, 0, null, f32s));
+ expect(i32.name).toBe('i32');
+ expect(f32.name).toBe('f32');
+ expect(i32).toHaveLength(i32s.length);
+ expect(f32).toHaveLength(f32s.length);
+ expect(i32.nullable).toBe(true);
+ expect(f32.nullable).toBe(true);
+ expect(i32.nullCount).toBe(0);
+ expect(f32.nullCount).toBe(0);
+
+ const table = Table.new(i32, f32);
+ i32 = table.getColumnAt(0)!;
+ f32 = table.getColumnAt(1)!;
+
+ expect(i32.name).toBe('i32');
+ expect(f32.name).toBe('f32');
+ expect(i32).toHaveLength(i32s.length);
+ expect(f32).toHaveLength(f32s.length);
+ expect(i32.nullable).toBe(true);
+ expect(f32.nullable).toBe(true);
+ expect(i32.nullCount).toBe(0);
+ expect(f32.nullCount).toBe(0);
+
+ expect(i32).toEqualVector(Int32Vector.from(i32s));
+ expect(f32).toEqualVector(Float32Vector.from(f32s));
+ });
+
+ test(`creates a new Table from Columns with different lengths`, () => {
+
+ const i32s = new Int32Array(arange(new Array<number>(20)));
+ const f32s = new Float32Array(arange(new Array<number>(8)));
+
+ let i32 = Column.new('i32', Int32Vector.from(i32s));
+ let f32 = Column.new('f32', Float32Vector.from(f32s));
+
+ expect(i32.name).toBe('i32');
+ expect(f32.name).toBe('f32');
+ expect(i32).toHaveLength(i32s.length);
+ expect(f32).toHaveLength(f32s.length);
+ expect(i32.nullable).toBe(true);
+ expect(f32.nullable).toBe(true);
+ expect(i32.nullCount).toBe(0);
+ expect(f32.nullCount).toBe(0);
+
+ const table = Table.new([i32, f32]);
+ i32 = table.getColumnAt(0)!;
+ f32 = table.getColumnAt(1)!;
+
+ expect(i32.name).toBe('i32');
+ expect(f32.name).toBe('f32');
+ expect(i32).toHaveLength(i32s.length);
+ expect(f32).toHaveLength(i32s.length); // new length should be the same as the longest sibling
+ expect(i32.nullable).toBe(true);
+ expect(f32.nullable).toBe(true); // true, with 12 additional nulls
+ expect(i32.nullCount).toBe(0);
+ expect(f32.nullCount).toBe(i32s.length - f32s.length);
+
+ const f32Expected = Data.Float(
+ f32.type, 0, i32s.length,
+ i32s.length - f32s.length,
+ new Uint8Array(8).fill(255, 0, 1), f32s);
+
+ expect(i32).toEqualVector(Int32Vector.from(i32s));
+ expect(f32).toEqualVector(new Float32Vector(f32Expected));
+ });
+
+ test(`creates a new Table from Columns with different lengths and number of inner chunks`, () => {
+
+ const i32s = new Int32Array(arange(new Array<number>(20)));
+ const f32s = new Float32Array(arange(new Array<number>(16)));
+
+ let i32 = Column.new('i32', Int32Vector.from(i32s));
+ let f32 = Column.new('f32', Float32Vector.from(f32s.slice(0, 8)), Float32Vector.from(f32s.slice(8, 16)));
+
+ expect(i32.name).toBe('i32');
+ expect(f32.name).toBe('f32');
+ expect(i32).toHaveLength(i32s.length);
+ expect(f32).toHaveLength(f32s.length);
+ expect(i32.nullable).toBe(true);
+ expect(f32.nullable).toBe(true);
+ expect(i32.nullCount).toBe(0);
+ expect(f32.nullCount).toBe(0);
+
+ const table = Table.new({ i32Renamed: i32, f32Renamed: f32 });
+ i32 = table.getColumn('i32Renamed');
+ f32 = table.getColumn('f32Renamed');
+
+ expect(i32.name).toBe('i32Renamed');
+ expect(f32.name).toBe('f32Renamed');
+ expect(i32).toHaveLength(i32s.length);
+ expect(f32).toHaveLength(i32s.length); // new length should be the same as the longest sibling
+ expect(i32.nullable).toBe(true);
+ expect(f32.nullable).toBe(true); // true, with 4 additional nulls
+ expect(i32.nullCount).toBe(0);
+ expect(f32.nullCount).toBe(i32s.length - f32s.length);
+
+ const f32Expected = Data.Float(
+ f32.type, 0, i32s.length,
+ i32s.length - f32s.length,
+ new Uint8Array(8).fill(255, 0, 2), f32s);
+
+ expect(i32).toEqualVector(Int32Vector.from(i32s));
+ expect(f32).toEqualVector(new Float32Vector(f32Expected));
+ });
+
+ test(`creates a new Table from Typed Arrays`, () => {
+ let i32s = Int32Array.from({length: 10}, (_, i) => i);
+ let f32s = Float32Array.from({length: 10}, (_, i) => i);
+ const table = Table.new({ i32s, f32s });
+ const i32 = table.getColumn('i32s')!;
+ const f32 = table.getColumn('f32s')!;
+
+ expect(table).toHaveLength(10);
+ expect(i32).toHaveLength(10);
+ expect(f32).toHaveLength(10);
+ expect(i32.toArray()).toBeInstanceOf(Int32Array);
+ expect(f32.toArray()).toBeInstanceOf(Float32Array);
+ expect(i32.toArray()).toEqual(i32s);
+ expect(f32.toArray()).toEqual(f32s);
+ });
+ });
+
+ test(`Table.serialize() serializes sliced RecordBatches`, () => {
+
+ const table = getSingleRecordBatchTable();
+ const batch = table.chunks[0], half = batch.length / 2 | 0;
+
+ // First compare what happens when slicing from the batch level
+ let [batch1, batch2] = [batch.slice(0, half), batch.slice(half)];
+
+ compareBatchAndTable(table, 0, batch1, Table.from(new Table(batch1).serialize()));
+ compareBatchAndTable(table, half, batch2, Table.from(new Table(batch2).serialize()));
+
+ // Then compare what happens when creating a RecordBatch by slicing each child individually
+ batch1 = new RecordBatch(batch1.schema, batch1.length, batch1.schema.fields.map((_, i) => {
+ return batch.getChildAt(i)!.slice(0, half);
+ }));
+
+ batch2 = new RecordBatch(batch2.schema, batch2.length, batch2.schema.fields.map((_, i) => {
+ return batch.getChildAt(i)!.slice(half);
+ }));
+
+ compareBatchAndTable(table, 0, batch1, Table.from(new Table(batch1).serialize()));
+ compareBatchAndTable(table, half, batch2, Table.from(new Table(batch2).serialize()));
+ });
+
+ for (let datum of test_data) {
+ describe(datum.name, () => {
+ test(`has the correct length`, () => {
+ const table = datum.table();
+ const values = datum.values();
+ expect(table).toHaveLength(values.length);
+ });
+ test(`gets expected values`, () => {
+ const table = datum.table();
+ const values = datum.values();
+ for (let i = -1; ++i < values.length;) {
+ const row = table.get(i);
+ const expected = values[i];
+ expect(row.f32).toEqual(expected[F32]);
+ expect(row.i32).toEqual(expected[I32]);
+ expect(row.dictionary).toEqual(expected[DICT]);
+ }
+ });
+ test(`iterates expected values`, () => {
+ let i = 0;
+ const table = datum.table();
+ const values = datum.values();
+ for (let row of table) {
+ const expected = values[i++];
+ expect(row.f32).toEqual(expected[F32]);
+ expect(row.i32).toEqual(expected[I32]);
+ expect(row.dictionary).toEqual(expected[DICT]);
+ }
+ });
+ test(`serialize and de-serialize is a no-op`, () => {
+ const table = datum.table();
+ const clone = Table.from(table.serialize());
+ expect(clone).toEqualTable(table);
+ });
+
+ test(`count() returns the correct length`, () => {
+ const table = datum.table();
+ const values = datum.values();
+ expect(table.count()).toEqual(values.length);
+ });
+ test(`getColumnIndex`, () => {
+ const table = datum.table();
+ expect(table.getColumnIndex('i32')).toEqual(I32);
+ expect(table.getColumnIndex('f32')).toEqual(F32);
+ expect(table.getColumnIndex('dictionary')).toEqual(DICT);
+ });
+
+ const table = datum.table();
+ const values = datum.values();
+
+ test(`table.select() basic tests`, () => {
+ let selected = table.select('f32', 'dictionary');
+ expect(selected.schema.fields).toHaveLength(2);
+ expect(selected.schema.fields[0]).toEqual(table.schema.fields[0]);
+ expect(selected.schema.fields[1]).toEqual(table.schema.fields[2]);
+
+ expect(selected).toHaveLength(values.length);
+ let idx = 0, expected_row;
+ for (let row of selected) {
+ expected_row = values[idx++];
+ expect(row.f32).toEqual(expected_row[F32]);
+ expect(row.dictionary).toEqual(expected_row[DICT]);
+ }
+ });
+ });
+ }
+});
+
+type TestDataSchema = { f32: Float32; i32: Int32; dictionary: Dictionary<Utf8, Int8> };
+
+function getTestVectors(f32Values: number[], i32Values: number[], dictIndices: number[]) {
+
+ const values = Utf8Vector.from(['a', 'b', 'c']);
+ const i32Data = Data.Int(new Int32(), 0, i32Values.length, 0, null, i32Values);
+ const f32Data = Data.Float(new Float32(), 0, f32Values.length, 0, null, f32Values);
+
+ return [Vector.new(f32Data), Vector.new(i32Data), DictionaryVector.from(values, new Int8(), dictIndices)];
+}
+
+function getSingleRecordBatchTable() {
+ const vectors = getTestVectors(
+ [-0.3, -0.2, -0.1, 0, 0.1, 0.2, 0.3],
+ [-1, 1, -1, 1, -1, 1, -1],
+ [0, 1, 2, 0, 1, 2, 0]
+ );
+
+ return Table.new<TestDataSchema>(vectors, NAMES);
+}
+
+function getMultipleRecordBatchesTable() {
+
+ const types = getTestVectors([], [], []).map((vec) => vec.type);
+ const fields = NAMES.map((name, i) => Field.new(name, types[i]));
+ const schema = new Schema<TestDataSchema>(fields);
+
+ const b1 = new RecordBatch(schema, 3, getTestVectors(
+ [-0.3, -0.2, -0.1],
+ [-1, 1, -1],
+ [0, 1, 2]
+ ));
+
+ const b2 = new RecordBatch(schema, 3, getTestVectors(
+ [0, 0.1, 0.2],
+ [1, -1, 1],
+ [0, 1, 2]
+ ));
+
+ const b3 = new RecordBatch(schema, 3, getTestVectors(
+ [0.3, 0.2, 0.1],
+ [-1, 1, -1],
+ [0, 1, 2]
+ ));
+
+ return new Table<TestDataSchema>([b1, b2, b3]);
+}
+
+function getStructTable() {
+ const table = getSingleRecordBatchTable();
+ const struct = new Struct<TestDataSchema>(table.schema.fields);
+ const children = table.schema.fields.map((_, i) => table.getColumnAt(i)!);
+ const structVec = Vector.new(Data.Struct(struct, 0, table.length, 0, null, children));
+ return Table.new<{ struct: Struct<TestDataSchema> }>([structVec], ['struct']);
+}
diff --git a/src/arrow/js/test/unit/table/assign-tests.ts b/src/arrow/js/test/unit/table/assign-tests.ts
new file mode 100644
index 000000000..fa1dacbc6
--- /dev/null
+++ b/src/arrow/js/test/unit/table/assign-tests.ts
@@ -0,0 +1,80 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+/* eslint-disable jest/no-standalone-expect */
+
+import '../../jest-extensions';
+import { zip } from 'ix/iterable';
+import * as generate from '../../generate-test-data';
+import { validateTable } from '../generated-data-validators';
+import {
+ Schema, Field, DataType, Int32, Float32, Utf8
+} from 'apache-arrow';
+
+const toSchema = (...xs: [string, DataType][]) => new Schema(xs.map((x) => new Field(...x)));
+const schema1 = toSchema(['a', new Int32()], ['b', new Float32()], ['c', new Utf8()]);
+const partialOverlapWith1 = toSchema(['a', new Int32()], ['b', new Float32()], ['f', new Utf8()]);
+const schema2 = toSchema(['d', new Int32()], ['e', new Float32()], ['f', new Utf8()]);
+
+describe('Table.assign()', () => {
+ describe(`should assign non-overlapping fields`, () => {
+ const lhs = generate.table([20], schema1);
+ const rhs = generate.table([20], schema2);
+ const table = lhs.table.assign(rhs.table);
+ const f = assignGeneratedTables(lhs, rhs);
+ expect(table.schema.fields.map((f) => f.name)).toEqual(['a', 'b', 'c', 'd', 'e', 'f']);
+ validateTable({ ...f([0,1,2], [3,4,5]), table }).run();
+ });
+ describe(`should assign partially-overlapping fields`, () => {
+ const lhs = generate.table([20], schema1);
+ const rhs = generate.table([20], partialOverlapWith1);
+ const table = lhs.table.assign(rhs.table);
+ const f = assignGeneratedTables(lhs, rhs);
+ expect(table.schema.fields.map((f) => f.name)).toEqual(['a', 'b', 'c', 'f']);
+ // eslint-disable-next-line no-sparse-arrays
+ validateTable({ ...f([ , , 2], [0,1,3]), table }).run();
+ });
+ describe(`should assign completely-overlapping fields`, () => {
+ const lhs = generate.table([20], schema2);
+ const rhs = generate.table([20], schema2);
+ const table = lhs.table.assign(rhs.table);
+ const f = assignGeneratedTables(lhs, rhs);
+ expect(table.schema.fields.map((f) => f.name)).toEqual(['d', 'e', 'f']);
+ // eslint-disable-next-line no-sparse-arrays
+ validateTable({ ...f([ , , ], [0,1,2]), table }).run();
+ });
+});
+
+function assignGeneratedTables(lhs: generate.GeneratedTable, rhs: generate.GeneratedTable) {
+ return function createAssignedTestData(lhsIndices: any[], rhsIndices: any[]) {
+ const pluckLhs = (xs: any[], ys: any[] = []) => lhsIndices.reduce((ys, i, j) => {
+ if (i !== undefined) { ys[i] = xs ? xs[j] : null; }
+ return ys;
+ }, ys);
+ const pluckRhs = (xs: any[], ys: any[] = []) => rhsIndices.reduce((ys, i, j) => {
+ if (i !== undefined) { ys[i] = xs ? xs[j] : null; }
+ return ys;
+ }, ys);
+ const cols = () => [...pluckLhs(lhs.cols(), pluckRhs(rhs.cols()))];
+ const keys = () => [...pluckLhs(lhs.keys(), pluckRhs(rhs.keys()))];
+ const rows = () => [...zip(lhs.rows(), rhs.rows())].map(([x, y]) => [...pluckLhs(x, pluckRhs(y))]);
+ const colBatches = [...zip(lhs.colBatches, rhs.colBatches)].map(([x, y]) => () => [...pluckLhs(x(), pluckRhs(y()))]);
+ const keyBatches = [...zip(lhs.keyBatches, rhs.keyBatches)].map(([x, y]) => () => [...pluckLhs(x(), pluckRhs(y()))]);
+ const rowBatches = [...zip(lhs.rowBatches, rhs.rowBatches)].map(([x, y]) => () => [...zip(x(), y())].map(([x, y]) => [...pluckLhs(x, pluckRhs(y))]));
+ return { cols, keys, rows, colBatches, keyBatches, rowBatches };
+ };
+}
diff --git a/src/arrow/js/test/unit/table/serialize-tests.ts b/src/arrow/js/test/unit/table/serialize-tests.ts
new file mode 100644
index 000000000..5eb211763
--- /dev/null
+++ b/src/arrow/js/test/unit/table/serialize-tests.ts
@@ -0,0 +1,167 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import '../../jest-extensions';
+import * as generate from '../../generate-test-data';
+import {
+ Table, Schema, Field, DataType, Dictionary, Int32, Float32, Utf8, Null, Int32Vector
+} from 'apache-arrow';
+
+const toSchema = (...xs: [string, DataType][]) => new Schema(xs.map((x) => new Field(...x)));
+const schema1 = toSchema(['a', new Int32()], ['b', new Float32()], ['c', new Dictionary(new Utf8(), new Int32())]);
+const schema2 = toSchema(['d', new Int32()], ['e', new Float32()], ['f', new Utf8()]);
+const nullSchema = new Schema([new Field('null', new Null())]);
+
+schema1.metadata.set('foo', 'bar');
+
+function createTable<T extends { [key: string]: DataType } = any>(schema: Schema<T>, chunkLengths: number[]) {
+ return generate.table(chunkLengths, schema).table;
+}
+
+describe('Table#serialize()', () => {
+
+ test(`doesn't swap the order of buffers that share the same underlying ArrayBuffer but are in a different order`, () => {
+ const values = new Int32Array([0, 1, 2, 3, 4, 5, 6, 7]);
+ const expected = values.slice();
+ const x = Int32Vector.from(values.subarray(4, 8)); // back
+ const y = Int32Vector.from(values.subarray(0, 4)); // front
+ const source = Table.new([x, y], ['x', 'y']);
+ const table = Table.from(source.serialize());
+ expect(table.getColumn('x').toArray()).toEqual(expected.subarray(4, 8));
+ expect(table.getColumn('y').toArray()).toEqual(expected.subarray(0, 4));
+ });
+
+ test(`Table#empty round-trips through serialization`, () => {
+ const source = Table.empty();
+ source.schema.metadata.set('foo', 'bar');
+ expect(source).toHaveLength(0);
+ expect(source.numCols).toBe(0);
+ const result = Table.from(source.serialize());
+ expect(result).toEqualTable(source);
+ expect(result.schema.metadata.get('foo')).toEqual('bar');
+ });
+
+ test(`Schema metadata round-trips through serialization`, () => {
+ const source = createTable(schema1, [20]);
+ expect(source).toHaveLength(20);
+ expect(source.numCols).toBe(3);
+ const result = Table.from(source.serialize());
+ expect(result).toEqualTable(source);
+ expect(result.schema.metadata.get('foo')).toEqual('bar');
+ });
+
+ test(`Table#assign an empty Table to a Table with a zero-length Null column round-trips through serialization`, () => {
+ const table1 = new Table(nullSchema);
+ const table2 = Table.empty();
+ const source = table1.assign(table2);
+ expect(source).toHaveLength(0);
+ expect(source.numCols).toBe(1);
+ const result = Table.from(source.serialize());
+ expect(result).toEqualTable(source);
+ });
+
+ const chunkLengths = [] as number[];
+ for (let i = -1; ++i < 3;) {
+ chunkLengths[i * 2] = (Math.random() * 100) | 0;
+ chunkLengths[i * 2 + 1] = 0;
+ const table = <T extends { [key: string]: DataType } = any>(schema: Schema<T>) => createTable(schema, chunkLengths);
+ test(`Table#select round-trips through serialization`, () => {
+ const source = table(schema1).select('a', 'c');
+ expect(source.numCols).toBe(2);
+ const result = Table.from(source.serialize());
+ expect(result).toEqualTable(source);
+ });
+ test(`Table#selectAt round-trips through serialization`, () => {
+ const source = table(schema1).selectAt(0, 2);
+ expect(source.numCols).toBe(2);
+ const result = Table.from(source.serialize());
+ expect(result).toEqualTable(source);
+ });
+ test(`Table#assign round-trips through serialization`, () => {
+ const source = table(schema1).assign(table(schema2));
+ expect(source.numCols).toBe(6);
+ const result = Table.from(source.serialize());
+ expect(result).toEqualTable(source);
+ expect(result.schema.metadata.get('foo')).toEqual('bar');
+ });
+ test(`Table#assign with an empty table round-trips through serialization`, () => {
+ const table1 = table(schema1);
+ const source = table1.assign(Table.empty());
+ expect(source.numCols).toBe(table1.numCols);
+ expect(source).toHaveLength(table1.length);
+ const result = Table.from(source.serialize());
+ expect(result).toEqualTable(source);
+ expect(result.schema.metadata.get('foo')).toEqual('bar');
+ });
+ test(`Table#assign with a zero-length Null column round-trips through serialization`, () => {
+ const table1 = new Table(nullSchema);
+ const table2 = table(schema1);
+ const source = table1.assign(table2);
+ expect(source).toHaveLength(table2.length);
+ expect(source.numCols).toBe(4);
+ const result = Table.from(source.serialize());
+ expect(result).toEqualTable(source);
+ expect(result.schema.metadata.get('foo')).toEqual('bar');
+ });
+ test(`Table#assign with different lengths and number of chunks round-trips through serialization`, () => {
+ const table1 = table(schema1);
+ const table2 = createTable(schema2, [102, 4, 10, 97, 10, 2, 4]);
+ const source = table1.assign(table2);
+ expect(source.numCols).toBe(6);
+ expect(source).toHaveLength(Math.max(table1.length, table2.length));
+ const result = Table.from(source.serialize());
+ expect(result).toEqualTable(source);
+ expect(result.schema.metadata.get('foo')).toEqual('bar');
+ });
+ test(`Table#select with Table#assign the result of Table#selectAt round-trips through serialization`, () => {
+ const table1 = table(schema1);
+ const table2 = table(schema2);
+ const source = table1.select('a', 'c').assign(table2.selectAt(2));
+ expect(source.numCols).toBe(3);
+ const result = Table.from(source.serialize());
+ expect(result).toEqualTable(source);
+ expect(result.schema.metadata.get('foo')).toEqual('bar');
+ });
+ test(`Table#slice round-trips through serialization`, () => {
+ const table1 = table(schema1);
+ const length = table1.length;
+ const [begin, end] = [length * .25, length * .75].map((x) => x | 0);
+ const source = table1.slice(begin, end);
+ expect(source.numCols).toBe(3);
+ expect(source).toHaveLength(end - begin);
+ const result = Table.from(source.serialize());
+ expect(result).toEqualTable(source);
+ expect(result.schema.metadata.get('foo')).toEqual('bar');
+ });
+ test(`Table#concat of two slices round-trips through serialization`, () => {
+ const table1 = table(schema1);
+ const length = table1.length;
+ const [begin1, end1] = [length * .10, length * .20].map((x) => x | 0);
+ const [begin2, end2] = [length * .80, length * .90].map((x) => x | 0);
+ const slice1 = table1.slice(begin1, end1);
+ const slice2 = table1.slice(begin2, end2);
+ const source = slice1.concat(slice2);
+ expect(slice1).toHaveLength(end1 - begin1);
+ expect(slice2).toHaveLength(end2 - begin2);
+ expect(source).toHaveLength((end1 - begin1) + (end2 - begin2));
+ [slice1, slice2, source].forEach((x) => expect(x.numCols).toBe(3));
+ const result = Table.from(source.serialize());
+ expect(result).toEqualTable(source);
+ expect(result.schema.metadata.get('foo')).toEqual('bar');
+ });
+ }
+});
diff --git a/src/arrow/js/test/unit/utils-tests.ts b/src/arrow/js/test/unit/utils-tests.ts
new file mode 100644
index 000000000..985bec7aa
--- /dev/null
+++ b/src/arrow/js/test/unit/utils-tests.ts
@@ -0,0 +1,32 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { isTypedArray } from 'apache-arrow';
+
+describe('isTypedArray', () => {
+ test('works for typed arrays', () => {
+ expect(isTypedArray(new Int8Array())).toBeTruthy();
+ expect(isTypedArray(new Int32Array())).toBeTruthy();
+ expect(isTypedArray(new BigInt64Array())).toBeTruthy();
+ });
+
+ test('does not recognize arrays, buffers, or data views', () => {
+ expect(isTypedArray(new Array([1, 2, 3]))).toBeFalsy();
+ expect(isTypedArray(new ArrayBuffer(10))).toBeFalsy();
+ expect(isTypedArray(new DataView(new ArrayBuffer(10)))).toBeFalsy();
+ });
+});
diff --git a/src/arrow/js/test/unit/utils.ts b/src/arrow/js/test/unit/utils.ts
new file mode 100644
index 000000000..c57de487f
--- /dev/null
+++ b/src/arrow/js/test/unit/utils.ts
@@ -0,0 +1,21 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+export function arange<T extends { length: number; [n: number]: number }>(arr: T, n = arr.length) {
+ for (let i = -1; ++i < n; arr[i] = i) { }
+ return arr;
+}
diff --git a/src/arrow/js/test/unit/vector/bool-vector-tests.ts b/src/arrow/js/test/unit/vector/bool-vector-tests.ts
new file mode 100644
index 000000000..41c53da60
--- /dev/null
+++ b/src/arrow/js/test/unit/vector/bool-vector-tests.ts
@@ -0,0 +1,111 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { Data, Bool, Vector, BoolVector } from 'apache-arrow';
+
+const newBoolVector = (length: number, data: Uint8Array) => Vector.new(Data.Bool(new Bool(), 0, length, 0, null, data));
+
+describe(`BoolVector`, () => {
+ const values = [true, true, false, true, true, false, false, false];
+ const n = values.length;
+ const vector = newBoolVector(n, new Uint8Array([27, 0, 0, 0, 0, 0, 0, 0]));
+ test(`gets expected values`, () => {
+ let i = -1;
+ while (++i < n) {
+ expect(vector.get(i)).toEqual(values[i]);
+ }
+ });
+ test(`iterates expected values`, () => {
+ let i = -1;
+ for (let v of vector) {
+ expect(++i).toBeLessThan(n);
+ expect(v).toEqual(values[i]);
+ }
+ });
+ test(`indexOf returns expected values`, () => {
+ for (let test_value of [true, false]) {
+ const expected = values.indexOf(test_value);
+ expect(vector.indexOf(test_value)).toEqual(expected);
+ }
+ });
+ test(`indexOf returns -1 when value not found`, () => {
+ const v = newBoolVector(3, new Uint8Array([0xFF]));
+ expect(v.indexOf(false)).toEqual(-1);
+ });
+ test(`can set values to true and false`, () => {
+ const v = newBoolVector(n, new Uint8Array([27, 0, 0, 0, 0, 0, 0, 0]));
+ const expected1 = [true, true, false, true, true, false, false, false];
+ const expected2 = [true, true, true, true, true, false, false, false];
+ const expected3 = [true, true, false, false, false, false, true, true];
+ function validate(expected: boolean[]) {
+ for (let i = -1; ++i < n;) {
+ expect(v.get(i)).toEqual(expected[i]);
+ }
+ }
+ validate(expected1);
+ v.set(2, true);
+ validate(expected2);
+ v.set(2, false);
+ validate(expected1);
+ v.set(3, false);
+ v.set(4, false);
+ v.set(6, true);
+ v.set(7, true);
+ validate(expected3);
+ v.set(3, true);
+ v.set(4, true);
+ v.set(6, false);
+ v.set(7, false);
+ validate(expected1);
+ });
+ test(`packs 0 values`, () => {
+ const expected = new Uint8Array(64);
+ expect(BoolVector.from([]).values).toEqual(expected);
+ });
+ test(`packs 3 values`, () => {
+ const expected = new Uint8Array(64);
+ expected[0] = 5;
+ expect(BoolVector.from([
+ true, false, true
+ ]).values).toEqual(expected);
+ });
+ test(`packs 8 values`, () => {
+ const expected = new Uint8Array(64);
+ expected[0] = 27;
+ expect(BoolVector.from([
+ true, true, false, true, true, false, false, false
+ ]).values).toEqual(expected);
+ });
+ test(`packs 25 values`, () => {
+ const expected = new Uint8Array(64);
+ expected[0] = 27;
+ expected[1] = 216;
+ expect(BoolVector.from([
+ true, true, false, true, true, false, false, false,
+ false, false, false, true, true, false, true, true,
+ false
+ ]).values).toEqual(expected);
+ });
+ test(`from with boolean Array packs values`, () => {
+ const expected = new Uint8Array(64);
+ expected[0] = 5;
+ expect(BoolVector
+ .from([true, false, true])
+ .slice().values
+ ).toEqual(expected);
+ });
+});
diff --git a/src/arrow/js/test/unit/vector/date-vector-tests.ts b/src/arrow/js/test/unit/vector/date-vector-tests.ts
new file mode 100644
index 000000000..4658633ba
--- /dev/null
+++ b/src/arrow/js/test/unit/vector/date-vector-tests.ts
@@ -0,0 +1,102 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { Table, DateDay, DateMillisecond } from 'apache-arrow';
+
+describe(`DateVector`, () => {
+ it('returns days since the epoch as correct JS Dates', () => {
+ const table = Table.from(test_data);
+ const expectedMillis = expectedMillis32();
+ const date32 = table.getColumnAt<DateDay>(0)!;
+ for (const date of date32) {
+ const millis = expectedMillis.shift();
+ expect(date).toEqual(millis === null ? null : new Date(millis!));
+ }
+ });
+ it('returns millisecond longs since the epoch as correct JS Dates', () => {
+ const table = Table.from(test_data);
+ const expectedMillis = expectedMillis64();
+ const date64 = table.getColumnAt<DateMillisecond>(1)!;
+ for (const date of date64) {
+ const millis = expectedMillis.shift();
+ expect(date).toEqual(millis === null ? null : new Date(millis!));
+ }
+ });
+});
+
+const expectedMillis32 = () => [
+ 165247430400000, 34582809600000, 232604524800000, null,
+ 199808812800000, 165646771200000, 209557238400000, null
+];
+
+const expectedMillis64 = () => [
+ 27990830234011, -41278585914325, 12694624797111,
+ null, null, 10761360520213, null, 1394015437000
+];
+
+const test_data = {
+ 'schema': {
+ 'fields': [
+ {
+ 'name': 'f0',
+ 'type': {
+ 'name': 'date',
+ 'unit': 'DAY'
+ },
+ 'nullable': true,
+ 'children': []
+ },
+ {
+ 'name': 'f1',
+ 'type': {
+ 'name': 'date',
+ 'unit': 'MILLISECOND'
+ },
+ 'nullable': true,
+ 'children': []
+ }
+ ]
+ },
+ 'batches': [
+ {
+ 'count': 8,
+ 'columns': [
+ {
+ 'name': 'f0',
+ 'count': 8,
+ 'VALIDITY': [1, 1, 1, 0, 1, 1, 1, 0],
+ 'DATA': [1912586, 400264, 2692182, 2163746, 2312602, 1917208, 2425431]
+ },
+ {
+ 'name': 'f1',
+ 'count': 8,
+ 'VALIDITY': [1, 1, 1, 0, 0, 1, 0, 1],
+ 'DATA': [
+ 27990830234011,
+ -41278585914325,
+ 12694624797111,
+ -38604948562547,
+ -37802308043516,
+ 10761360520213,
+ -25129181633384,
+ 1394015437000 // <-- the tricky one
+ ]
+ }
+ ]
+ }
+ ]
+};
diff --git a/src/arrow/js/test/unit/vector/numeric-vector-tests.ts b/src/arrow/js/test/unit/vector/numeric-vector-tests.ts
new file mode 100644
index 000000000..61418c431
--- /dev/null
+++ b/src/arrow/js/test/unit/vector/numeric-vector-tests.ts
@@ -0,0 +1,616 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+/* eslint-disable jest/no-identical-title */
+
+import {
+ util,
+ Data, Vector,
+ Float, Float16, Float32, Float64,
+ Int, Int8, Int16, Int32, Int64, Uint8, Uint16, Uint32, Uint64,
+ FloatVector, Float16Vector, Float32Vector, Float64Vector,
+ IntVector, Int8Vector, Int16Vector, Int32Vector, Int64Vector,
+ Uint8Vector, Uint16Vector, Uint32Vector, Uint64Vector,
+} from 'apache-arrow';
+
+const { float64ToUint16, uint16ToFloat64 } = util;
+import { VectorType as V } from 'apache-arrow/interfaces';
+import { TypedArray, TypedArrayConstructor } from 'apache-arrow/interfaces';
+import { BigIntArray, BigIntArrayConstructor } from 'apache-arrow/interfaces';
+
+const { joinUint8Arrays, BN } = util;
+const uint16ToFloat64Array = (b: ArrayBuffer) => new Float64Array([...new Uint16Array(b)].map(uint16ToFloat64));
+const randomBytes = (n: number) => new Uint16Array([
+ ...Uint16Array.from([0, 65535]),
+ ...Uint16Array.from({ length: (n / 2) - 2 }, () => (Math.random() * 65536) | 0),
+]).buffer;
+const toBigNumsArray = (values: Int32Array | Uint32Array) => {
+ const array = new Array(values.length * 0.5);
+ for (let i = -1, n = values.length * 0.5; ++i < n;) {
+ array[i] = BN.new(values.subarray(i * 2, i * 2 + 2))[Symbol.toPrimitive]();
+ }
+ return array;
+};
+
+const testValueBuffers = Array.from({ length: 5 }, () => randomBytes(64));
+const testValuesBuffer = joinUint8Arrays(testValueBuffers.map((b) => new Uint8Array(b)))[0].buffer;
+
+const checkType = <T, R extends T>(Ctor: new (...args: any) => T, inst: R) => expect(inst).toBeInstanceOf(Ctor);
+const valuesArray = <T extends TypedArray>(ArrayType: TypedArrayConstructor<T>) => [...valuesTyped<T>(ArrayType)];
+const valuesArray64 = <T extends TypedArray>(ArrayType: TypedArrayConstructor<T>) => {
+ const typed = valuesTyped<T>(ArrayType);
+ const array = new Array(typed.length * 0.5);
+ for (let i = -1, n = array.length; ++i < n;) {
+ // Interleave regular Arrays and TypedArrays to cover more surface area
+ array[i] = i % 2 === 0
+ ? [...typed.subarray(i * 2, (i + 1) * 2)]
+ : typed.subarray(i * 2, (i + 1) * 2);
+ }
+ return array;
+};
+const valuesTyped = <T extends TypedArray>(ArrayType: TypedArrayConstructor<T>) => new ArrayType(testValuesBuffer);
+const bigIntValuesTyped = <T extends BigIntArray>(ArrayType: BigIntArrayConstructor<T>) => new ArrayType(testValuesBuffer);
+const bigIntValuesArray = <T extends BigIntArray>(ArrayType: BigIntArrayConstructor<T>) => [...bigIntValuesTyped<T>(ArrayType)];
+
+describe(`FloatVector`, () => {
+
+ describe(`FloatVector.from infers the type from the input TypedArray`, () => {
+
+ const u16s = valuesTyped(Uint16Array).map((x) => float64ToUint16(uint16ToFloat64(x)));
+ const f16s = valuesArray(Uint16Array).map(uint16ToFloat64);
+ const f32s = valuesTyped(Float32Array);
+ const f64s = valuesTyped(Float64Array);
+ const f16Vec = FloatVector.from(u16s);
+ const f32Vec = FloatVector.from(valuesTyped(Float32Array));
+ const f64Vec = FloatVector.from(valuesTyped(Float64Array));
+
+ // test strong typing at compile-time
+ test(`return type is correct`, () => checkType(Float16Vector, f16Vec));
+ test(`return type is correct`, () => checkType(Float32Vector, f32Vec));
+ test(`return type is correct`, () => checkType(Float64Vector, f64Vec));
+ test(`throws on bad input`, () => {
+ expect(() => FloatVector.from(<any> {})).toThrow('Unrecognized FloatVector input');
+ });
+
+ testAndValidateVector(f16Vec, u16s, f16s);
+ testAndValidateVector(f32Vec, f32s);
+ testAndValidateVector(f64Vec, f64s);
+ });
+
+ describe(`FloatVector.from casts the input values to the correct float type`, () => {
+
+ const u16s = valuesTyped(Uint16Array).map((x) => float64ToUint16(uint16ToFloat64(x)));
+ const f16s = valuesArray(Uint16Array).map(uint16ToFloat64);
+ const f16Vec_ = FloatVector.from(u16s);
+
+ const f16Vec = Float16Vector.from(f16Vec_);
+ const f32Vec = Float32Vector.from(f16Vec_);
+ const f64Vec = Float64Vector.from(f16Vec_);
+
+ // test strong typing at compile-time
+ test(`return type is correct`, () => checkType(Float16Vector, f16Vec));
+ test(`return type is correct`, () => checkType(Float32Vector, f32Vec));
+ test(`return type is correct`, () => checkType(Float64Vector, f64Vec));
+
+ testAndValidateVector(f16Vec, u16s, f16s);
+ testAndValidateVector(f32Vec, Float32Array.from(f16s));
+ testAndValidateVector(f64Vec, Float64Array.from(f16s));
+ });
+
+ describe(`Float16Vector`, () => {
+ testFloatVector(Float16, valuesArray(Uint16Array).map(uint16ToFloat64));
+ describe(`Float16Vector.from accepts regular Arrays`, () => {
+ const u16s = valuesTyped(Uint16Array).map((x) => float64ToUint16(uint16ToFloat64(x)));
+ const f16s = valuesArray(Uint16Array).map(uint16ToFloat64);
+ const vector = Float16Vector.from(f16s);
+ test(`return type is correct`, () => checkType(Float16Vector, vector));
+ testAndValidateVector(vector, u16s, f16s);
+ });
+ describe(`Float16Vector.from accepts Uint16Arrays`, () => {
+ const u16s = valuesTyped(Uint16Array).map((x) => float64ToUint16(uint16ToFloat64(x)));
+ const f16s = valuesArray(Uint16Array).map(uint16ToFloat64);
+ const vector = Float16Vector.from(u16s);
+ test(`return type is correct`, () => checkType(Float16Vector, vector));
+ testAndValidateVector(vector, u16s, f16s);
+ });
+ });
+ describe(`Float32Vector`, () => {
+ testFloatVector(Float32);
+ describe(`Float32Vector.from accepts regular Arrays`, () => {
+ const values = valuesArray(Float32Array);
+ const vector = Float32Vector.from(values);
+ testAndValidateVector(vector, valuesTyped(Float32Array), values);
+ test(`return type is correct`, () => checkType(Float32Vector, vector));
+ });
+ });
+ describe(`Float64Vector`, () => {
+ testFloatVector(Float64);
+ describe(`Float64Vector.from accepts regular Arrays`, () => {
+ const values = valuesArray(Float64Array);
+ const vector = Float64Vector.from(values);
+ testAndValidateVector(vector, valuesTyped(Float64Array), values);
+ test(`return type is correct`, () => checkType(Float64Vector, vector));
+ });
+ });
+});
+
+describe(`IntVector`, () => {
+
+ describe(`IntVector.from infers the type from the input TypedArray`, () => {
+
+ const i8s = valuesTyped(Int8Array);
+ const i16s = valuesTyped(Int16Array);
+ const i32s = valuesTyped(Int32Array);
+ const i64s = valuesTyped(Int32Array);
+ const u8s = valuesTyped(Uint8Array);
+ const u16s = valuesTyped(Uint16Array);
+ const u32s = valuesTyped(Uint32Array);
+ const u64s = valuesTyped(Uint32Array);
+ const i8Vec = IntVector.from(i8s);
+ const i16Vec = IntVector.from(i16s);
+ const i32Vec = IntVector.from(i32s);
+ const i64Vec = IntVector.from(i64s, true);
+ const u8Vec = IntVector.from(u8s);
+ const u16Vec = IntVector.from(u16s);
+ const u32Vec = IntVector.from(u32s);
+ const u64Vec = IntVector.from(u64s, true);
+
+ // test strong typing at compile-time
+ test(`return type is correct`, () => checkType(Int8Vector, i8Vec));
+ test(`return type is correct`, () => checkType(Int16Vector, i16Vec));
+ test(`return type is correct`, () => checkType(Int32Vector, i32Vec));
+ test(`return type is correct`, () => checkType(Int64Vector, i64Vec));
+ test(`return type is correct`, () => checkType(Uint8Vector, u8Vec));
+ test(`return type is correct`, () => checkType(Uint16Vector, u16Vec));
+ test(`return type is correct`, () => checkType(Uint32Vector, u32Vec));
+ test(`return type is correct`, () => checkType(Uint64Vector, u64Vec));
+ test(`throws on bad input`, () => {
+ expect(() => IntVector.from(<any> {})).toThrow('Unrecognized IntVector input');
+ });
+
+ const bigI64s = BigInt64Array.from(toBigNumsArray(i64s));
+ const bigU64s = BigUint64Array.from(toBigNumsArray(u64s));
+
+ testAndValidateVector(i8Vec, i8s);
+ testAndValidateVector(i16Vec, i16s);
+ testAndValidateVector(i32Vec, i32s);
+ // This tests when values are represented as pairs of lo, hi
+ testAndValidateVector(i64Vec, i64s);
+ // This tests when values are represented as native JS bigints
+ testAndValidateVector(i64Vec, i64s, [...bigI64s]);
+ testAndValidateVector(u8Vec, u8s);
+ testAndValidateVector(u16Vec, u16s);
+ testAndValidateVector(u32Vec, u32s);
+ // This tests when values are represented as pairs of lo, hi
+ testAndValidateVector(u64Vec, u64s);
+ // This tests when values are represented as native JS bigints
+ testAndValidateVector(u64Vec, u64s, [...bigU64s]);
+ });
+
+ describe('IntVector.from casts the input values to the correct integer type', () => {
+
+ const i8s = valuesTyped(Int8Array);
+ const i16s = valuesTyped(Int16Array);
+ const i32s = valuesTyped(Int32Array);
+ const i64s = valuesTyped(Int32Array);
+ const u8s = valuesTyped(Uint8Array);
+ const u16s = valuesTyped(Uint16Array);
+ const u32s = valuesTyped(Uint32Array);
+ const u64s = valuesTyped(Uint32Array);
+ const i8Vec_ = IntVector.from(i8s);
+ const i16Vec_ = IntVector.from(i16s);
+ const i32Vec_ = IntVector.from(i32s);
+ const i64Vec_ = IntVector.from(i64s, true);
+ const u8Vec_ = IntVector.from(u8s);
+ const u16Vec_ = IntVector.from(u16s);
+ const u32Vec_ = IntVector.from(u32s);
+ const u64Vec_ = IntVector.from(u64s, true);
+
+ // Convert from a Vector of the opposite sign
+ const i8Vec = Int8Vector.from(u8Vec_);
+ const i16Vec = Int16Vector.from(u16Vec_);
+ const i32Vec = Int32Vector.from(u32Vec_);
+ const i64Vec = Int64Vector.from(u64Vec_);
+ const u8Vec = Uint8Vector.from(i8Vec_);
+ const u16Vec = Uint16Vector.from(i16Vec_);
+ const u32Vec = Uint32Vector.from(i32Vec_);
+ const u64Vec = Uint64Vector.from(i64Vec_);
+
+ // test strong typing at compile-time
+ test(`return type is correct`, () => checkType(Int8Vector, i8Vec));
+ test(`return type is correct`, () => checkType(Int16Vector, i16Vec));
+ test(`return type is correct`, () => checkType(Int32Vector, i32Vec));
+ test(`return type is correct`, () => checkType(Int64Vector, i64Vec));
+ test(`return type is correct`, () => checkType(Uint8Vector, u8Vec));
+ test(`return type is correct`, () => checkType(Uint16Vector, u16Vec));
+ test(`return type is correct`, () => checkType(Uint32Vector, u32Vec));
+ test(`return type is correct`, () => checkType(Uint64Vector, u64Vec));
+
+ const bigI64s = BigInt64Array.from(toBigNumsArray(u64s));
+ const bigU64s = BigUint64Array.from(toBigNumsArray(i64s));
+
+ testAndValidateVector(i8Vec, Int8Array.from(u8s));
+ testAndValidateVector(i16Vec, Int16Array.from(u16s));
+ testAndValidateVector(i32Vec, Int32Array.from(u32s));
+ // This tests when values are represented as pairs of lo, hi
+ testAndValidateVector(i64Vec, new Int32Array(bigI64s.buffer));
+ // This tests when values are represented as native JS bigints
+ testAndValidateVector(i64Vec, new Int32Array(bigI64s.buffer), [...bigI64s]);
+ testAndValidateVector(u8Vec, Uint8Array.from(i8s));
+ testAndValidateVector(u16Vec, Uint16Array.from(i16s));
+ testAndValidateVector(u32Vec, Uint32Array.from(i32s));
+ // This tests when values are represented as pairs of lo, hi
+ testAndValidateVector(u64Vec, new Uint32Array(bigU64s.buffer));
+ // This tests when values are represented as native JS bigints
+ testAndValidateVector(u64Vec, new Uint32Array(bigU64s.buffer), [...bigU64s]);
+ });
+
+ describe(`Int8Vector`, () => {
+ testIntVector(Int8);
+ describe(`Int8Vector.from accepts regular Arrays`, () => {
+ const values = valuesArray(Int8Array);
+ const vector = Int8Vector.from(values);
+ testAndValidateVector(vector, valuesTyped(Int8Array), values);
+ test(`return type is correct`, () => checkType(Int8Vector, vector));
+ });
+ });
+ describe(`Int16Vector`, () => {
+ testIntVector(Int16);
+ describe(`Int16Vector.from accepts regular Arrays`, () => {
+ const values = valuesArray(Int16Array);
+ const vector = Int16Vector.from(values);
+ testAndValidateVector(vector, valuesTyped(Int16Array), values);
+ test(`return type is correct`, () => checkType(Int16Vector, vector));
+ });
+ });
+ describe(`Int32Vector`, () => {
+ testIntVector(Int32);
+ describe(`Int32Vector.from accepts regular Arrays`, () => {
+ const values = valuesArray(Int32Array);
+ const vector = Int32Vector.from(values);
+ testAndValidateVector(vector, valuesTyped(Int32Array), values);
+ test(`return type is correct`, () => checkType(Int32Vector, vector));
+ });
+ });
+ describe(`Int64Vector`, () => {
+ testIntVector(Int64);
+ testIntVector(Int64, bigIntValuesArray(BigInt64Array));
+ describe(`Int64Vector.from accepts regular Arrays`, () => {
+ const values = valuesArray64(Int32Array);
+ const vector = Int64Vector.from(values);
+ testAndValidateVector(vector, valuesTyped(Int32Array), values);
+ testAndValidateVector(vector, valuesTyped(Int32Array), bigIntValuesArray(BigInt64Array));
+ test(`return type is correct`, () => checkType(Int64Vector, vector));
+ });
+ });
+ describe(`Uint8Vector`, () => {
+ testIntVector(Uint8);
+ describe(`Uint8Vector.from accepts regular Arrays`, () => {
+ const values = valuesArray(Uint8Array);
+ const vector = Uint8Vector.from(values);
+ testAndValidateVector(vector, valuesTyped(Uint8Array), values);
+ test(`return type is correct`, () => checkType(Uint8Vector, vector));
+ });
+ });
+ describe(`Uint16Vector`, () => {
+ testIntVector(Uint16);
+ describe(`Uint16Vector.from accepts regular Arrays`, () => {
+ const values = valuesArray(Uint16Array);
+ const vector = Uint16Vector.from(values);
+ testAndValidateVector(vector, valuesTyped(Uint16Array), values);
+ test(`return type is correct`, () => checkType(Uint16Vector, vector));
+ });
+ });
+ describe(`Uint32Vector`, () => {
+ testIntVector(Uint32);
+ describe(`Uint32Vector.from accepts regular Arrays`, () => {
+ const values = valuesArray(Uint32Array);
+ const vector = Uint32Vector.from(values);
+ testAndValidateVector(vector, valuesTyped(Uint32Array), values);
+ test(`return type is correct`, () => checkType(Uint32Vector, vector));
+ });
+ });
+ describe(`Uint64Vector`, () => {
+ testIntVector(Uint64);
+ testIntVector(Uint64, bigIntValuesArray(BigUint64Array));
+ describe(`Uint64Vector.from accepts regular Arrays`, () => {
+ const values = valuesArray64(Uint32Array);
+ const vector = Uint64Vector.from(values);
+ testAndValidateVector(vector, valuesTyped(Uint32Array), values);
+ testAndValidateVector(vector, valuesTyped(Uint32Array), bigIntValuesArray(BigUint64Array));
+ test(`return type is correct`, () => checkType(Uint64Vector, vector));
+ });
+ });
+});
+
+function testIntVector<T extends Int>(DataType: new () => T, values?: Array<any>) {
+
+ const type = new DataType();
+ const ArrayType = type.ArrayType;
+ const stride = type.bitWidth < 64 ? 1 : 2;
+
+ const typed = valuesTyped(ArrayType);
+ const jsArray = values || [...typed];
+ const vector = Vector.new(Data.Int(type, 0, typed.length / stride, 0, null, typed));
+ const chunked = testValueBuffers.map((b) => new ArrayType(b))
+ .map((b) => Vector.new(Data.Int(type, 0, b.length / stride, 0, null, b)))
+ .reduce((v: any, v2) => v.concat(v2));
+
+ const vectorBegin = (vector.length * .25) | 0;
+ const vectorEnd = (vector.length * .75) | 0;
+ const typedBegin = vectorBegin * (typed.length / vector.length);
+ const typedEnd = vectorEnd * (typed.length / vector.length);
+ const jsArrayBegin = vectorBegin * (jsArray.length / vector.length);
+ const jsArrayEnd = vectorEnd * (jsArray.length / vector.length);
+
+ const combos = [[`vector`, vector], [`chunked`, chunked]] as [string, V<T>][];
+ combos.forEach(([chunksType, vector]) => {
+ describe(chunksType, () => {
+ // test base case no slicing
+ describe(`base case no slicing`, () => { testAndValidateVector(vector, typed, jsArray); });
+ // test slicing without args
+ describe(`slicing without args`, () => { testAndValidateVector(vector.slice(), typed.slice(), jsArray.slice()); });
+ // test slicing the middle half
+ describe(`slice the middle half`, () => {
+ testAndValidateVector(
+ vector.slice(vectorBegin, vectorEnd),
+ typed.slice(typedBegin, typedEnd),
+ jsArray.slice(jsArrayBegin, jsArrayEnd)
+ );
+ });
+ // test splicing out the middle half
+ describe(`splicing out the middle half`, () => {
+ testAndValidateVector(
+ vector.slice(0, vectorBegin).concat(vector.slice(vectorEnd)),
+ new ArrayType([...typed.slice(0, typedBegin), ...typed.slice(typedEnd)]),
+ [...jsArray.slice(0, jsArrayBegin), ...jsArray.slice(jsArrayEnd)]
+ );
+ });
+ });
+ });
+}
+
+function testFloatVector<T extends Float>(DataType: new () => T, values?: Array<any>) {
+
+ const type = new DataType();
+ const ArrayType = type.ArrayType;
+
+ const typed = valuesTyped(ArrayType);
+ const jsArray = values || [...typed];
+ const vector = Vector.new(Data.Float(type, 0, typed.length, 0, null, typed));
+ const chunked = testValueBuffers.map((b) => new ArrayType(b))
+ .map((b) => Vector.new(Data.Float(type, 0, b.length, 0, null, b)))
+ .reduce((v: any, v2) => v.concat(v2));
+
+ const begin = (vector.length * .25) | 0;
+ const end = (vector.length * .75) | 0;
+ const combos = [[`vector`, vector], [`chunked`, chunked]] as [string, V<T>][];
+
+ combos.forEach(([chunksType, vector]) => {
+ describe(chunksType, () => {
+ // test base case no slicing
+ describe(`base case no slicing`, () => { testAndValidateVector(vector, typed, jsArray); });
+ // test slicing without args
+ describe(`slicing without args`, () => { testAndValidateVector(vector.slice(), typed.slice(), jsArray.slice()); });
+ // test slicing the middle half
+ describe(`slice the middle half`, () => {
+ testAndValidateVector(
+ vector.slice(begin, end),
+ typed.slice(begin, end),
+ jsArray.slice(begin, end)
+ );
+ });
+ // test splicing out the middle half
+ describe(`splicing out the middle half`, () => {
+ testAndValidateVector(
+ vector.slice(0, begin).concat(vector.slice(end)),
+ new ArrayType([...typed.slice(0, begin), ...typed.slice(end)]),
+ [...jsArray.slice(0, begin), ...jsArray.slice(end)]
+ );
+ });
+ });
+ });
+}
+
+function testAndValidateVector<T extends Int | Float>(vector: Vector<T>, typed: T['TArray'], values: any[] = [...typed]) {
+ gets_expected_values(vector, typed, values);
+ iterates_expected_values(vector, typed, values);
+ indexof_returns_expected_values(vector, typed, values);
+ slice_returns_a_typedarray(vector);
+ slices_the_entire_array(vector, typed);
+ slices_from_minus_20_to_length(vector, typed);
+ slices_from_0_to_minus_20(vector, typed);
+ slices_the_array_from_0_to_length_minus_20(vector, typed);
+ slices_the_array_from_0_to_length_plus_20(vector, typed);
+}
+
+function gets_expected_values<T extends Int | Float>(vector: Vector<T>, typed: T['TArray'], values: any[] = [...typed]) {
+ test(`gets expected values`, () => {
+ expect.hasAssertions();
+ let i = -1, n = vector.length;
+ let stride = vector.stride;
+ try {
+ if (stride === 1) {
+ while (++i < n) {
+ expect(vector.get(i)).toEqual(values[i]);
+ }
+ } else if (typeof values[0] === 'bigint') {
+ while (++i < n) {
+ const x: any = vector.get(i)!;
+ expect(0n + x).toEqual(values[i]);
+ }
+ } else {
+ const vector64 = vector as Vector<Int64 | Uint64>;
+ const i64 = (() => typed.subarray(stride * i, stride * (i + 1)));
+ while (++i < n) {
+ expect((vector64.get(i) as any).subarray(0, stride)).toEqual(i64());
+ }
+ }
+ } catch (e) { throw new Error(`${i}: ${e}`); }
+ });
+}
+
+function iterates_expected_values<T extends Int | Float>(vector: Vector<T>, typed: T['TArray'], values: any[] = [...typed]) {
+ test(`iterates expected values`, () => {
+ expect.hasAssertions();
+ let i = -1, n = vector.length;
+ let stride = vector.stride;
+ try {
+ if (stride === 1) {
+ for (let v of vector) {
+ expect(++i).toBeLessThan(n);
+ expect(v).toEqual(values[i]);
+ }
+ } else if (typeof values[0] === 'bigint') {
+ let x: any;
+ for (let v of vector) {
+ x = v;
+ expect(++i).toBeLessThan(n);
+ expect(0n + x).toEqual(values[i]);
+ }
+ } else {
+ const vector64 = vector as Vector<Int64 | Uint64>;
+ const i64 = (() => typed.subarray(stride * i, stride * (i + 1)));
+ for (let v of vector64) {
+ expect(++i).toBeLessThan(n);
+ expect((v as any).subarray(0, stride)).toEqual(i64());
+ }
+ }
+ } catch (e) { throw new Error(`${i}: ${e}`); }
+ });
+}
+
+function indexof_returns_expected_values<T extends Int | Float>(vector: Vector<T>, typed: T['TArray'], values: any = [...typed]) {
+ test(`indexOf returns expected values`, () => {
+
+ expect.hasAssertions();
+
+ const stride = vector.stride;
+ const BPE = vector.ArrayType.BYTES_PER_ELEMENT;
+ const isBigInt = typeof values[0] === 'bigint';
+ const isInt64 = util.compareTypes(vector.type, new Int64());
+ const isFloat16 = util.compareTypes(vector.type, new Float16());
+
+ // Create a few random values
+ let missing: any = new vector.ArrayType(randomBytes(8 * 2 * BPE));
+
+ // Special cases convert the values and/or missing to the
+ // representations that indexOf() expects to receive
+
+ if (isFloat16) {
+ missing = uint16ToFloat64Array(missing);
+ } else if (isBigInt) {
+ const BigIntArray = isInt64 ? BigInt64Array : BigUint64Array;
+ missing = Array.from({ length: missing.length / stride },
+ (_, i) => new BigIntArray(missing.buffer, BPE * stride * i, 1)[0]);
+ } else if (stride !== 1) {
+ values = Array.from({ length: typed.length / stride },
+ (_, i) => typed.slice(stride * i, stride * (i + 1)));
+ missing = Array.from({ length: missing.length / stride },
+ (_, i) => missing.slice(stride * i, stride * (i + 1)));
+ }
+
+ const original = values.slice();
+ // Combine with the expected values and shuffle the order
+ const shuffled = shuffle(values.concat([...missing]));
+ let i = -1, j: number, k: number, n = shuffled.length;
+
+ try {
+ if (!isBigInt) {
+ while (++i < n) {
+ const search = shuffled[i];
+ if (typeof search !== 'number' || !isNaN(search)) {
+ expect(vector.indexOf(search)).toEqual(original.indexOf(search));
+ } else {
+ for (j = -1, k = original.length; ++j < k;) {
+ if (isNaN(original[j])) { break; }
+ }
+ expect(vector.indexOf(search)).toEqual(j < k ? j : -1);
+ }
+ }
+ } else {
+ // Distinguish the bigint comparisons to ensure the indexOf type signature accepts bigints
+ let shuffled64 = shuffled as bigint[];
+ if (isInt64) {
+ let vector64 = (<unknown> vector) as Int64Vector;
+ while (++i < n) {
+ expect(vector64.indexOf(shuffled64[i])).toEqual(original.indexOf(shuffled64[i]));
+ }
+ } else {
+ let vector64 = (<unknown> vector) as Uint64Vector;
+ while (++i < n) {
+ expect(vector64.indexOf(shuffled64[i])).toEqual(original.indexOf(shuffled64[i]));
+ }
+ }
+ }
+ } catch (e) { throw new Error(`${i} (${shuffled[i]}): ${e}`); }
+ });
+}
+
+function slice_returns_a_typedarray<T extends Int | Float>(vector: Vector<T>) {
+ test(`slice returns a TypedArray`, () => {
+ expect.hasAssertions();
+ expect(vector.slice().toArray()).toBeInstanceOf(vector.ArrayType);
+ });
+}
+
+function slices_the_entire_array<T extends Int | Float>(vector: Vector<T>, values: T['TArray']) {
+ test(`slices the entire array`, () => {
+ expect.hasAssertions();
+ expect(vector.slice().toArray()).toEqual(values);
+ });
+}
+
+function slices_from_minus_20_to_length<T extends Int | Float>(vector: Vector<T>, values: T['TArray']) {
+ test(`slices from -20 to length`, () => {
+ expect.hasAssertions();
+ expect(vector.slice(-20).toArray()).toEqual(values.slice(-(20 * vector.stride)));
+ });
+}
+
+function slices_from_0_to_minus_20<T extends Int | Float>(vector: Vector<T>, values: T['TArray']) {
+ test(`slices from 0 to -20`, () => {
+ expect.hasAssertions();
+ expect(vector.slice(0, -20).toArray()).toEqual(values.slice(0, -(20 * vector.stride)));
+ });
+}
+
+function slices_the_array_from_0_to_length_minus_20 <T extends Int | Float>(vector: Vector<T>, values: T['TArray']) {
+ test(`slices the array from 0 to length - 20`, () => {
+ expect.hasAssertions();
+ expect(vector.slice(0, vector.length - 20).toArray()).toEqual(values.slice(0, values.length - (20 * vector.stride)));
+ });
+}
+
+function slices_the_array_from_0_to_length_plus_20<T extends Int | Float>(vector: Vector<T>, values: T['TArray']) {
+ test(`slices the array from 0 to length + 20`, () => {
+ expect.hasAssertions();
+ expect(vector.slice(0, vector.length + 20).toArray()).toEqual(values.slice(0, values.length + (20 * vector.stride)));
+ });
+}
+
+function shuffle(input: any[]) {
+ const result = input.slice();
+ let j, tmp, i = result.length;
+ while (--i > 0) {
+ j = (Math.random() * (i + 1)) | 0;
+ tmp = result[i];
+ result[i] = result[j];
+ result[j] = tmp;
+ }
+ return result;
+}
diff --git a/src/arrow/js/test/unit/vector/vector-tests.ts b/src/arrow/js/test/unit/vector/vector-tests.ts
new file mode 100644
index 000000000..60bff94f8
--- /dev/null
+++ b/src/arrow/js/test/unit/vector/vector-tests.ts
@@ -0,0 +1,127 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import {
+ Int32, Dictionary, DateUnit, util,
+ Data, Vector, Utf8Vector, DateVector, DictionaryVector,
+} from 'apache-arrow';
+
+describe(`DateVector`, () => {
+ const extras = [
+ new Date(2000, 0, 1),
+ new Date(1991, 5, 28, 12, 11, 10)
+ ];
+ describe(`unit = MILLISECOND`, () => {
+ const values = [
+ new Date(1989, 5, 22, 1, 2, 3),
+ new Date(1988, 3, 25, 4, 5, 6),
+ new Date(1987, 2, 24, 7, 8, 9),
+ new Date(2018, 4, 12, 17, 30, 0)
+ ];
+ const vector = DateVector.from(values);
+ basicVectorTests(vector, values, extras);
+ });
+ describe(`unit = DAY`, () => {
+ // Use UTC to ensure that dates are always at midnight
+ const values = [
+ new Date(Date.UTC(1989, 5, 22)),
+ new Date(Date.UTC(1988, 3, 25)),
+ new Date(Date.UTC(1987, 2, 24)),
+ new Date(Date.UTC(2018, 4, 12))
+ ];
+ const vector = DateVector.from(values, DateUnit.DAY);
+ basicVectorTests(vector, values, extras);
+ });
+});
+
+describe(`DictionaryVector`, () => {
+
+ const dictionary = ['foo', 'bar', 'baz'];
+ const extras = ['abc', '123']; // values to search for that should NOT be found
+ const dictionary_vec = Utf8Vector.from(dictionary);
+
+ const indices = Array.from({length: 50}, () => Math.random() * 3 | 0);
+ const validity = Array.from({ length: indices.length }, () => Math.random() > 0.2 ? true : false);
+
+ describe(`index with nullCount == 0`, () => {
+
+ const values = Array.from(indices).map((d) => dictionary[d]);
+ const vector = DictionaryVector.from(dictionary_vec, new Int32(), indices);
+
+ basicVectorTests(vector, values, extras);
+
+ describe(`sliced`, () => {
+ basicVectorTests(vector.slice(10, 20), values.slice(10,20), extras);
+ });
+ });
+
+ describe(`index with nullCount > 0`, () => {
+
+ const nullBitmap = util.packBools(validity);
+ const nullCount = validity.reduce((acc, d) => acc + (d ? 0 : 1), 0);
+ const values = Array.from(indices).map((d, i) => validity[i] ? dictionary[d] : null);
+ const type = new Dictionary(dictionary_vec.type, new Int32(), null, null);
+ const vector = Vector.new(Data.Dictionary(type, 0, indices.length, nullCount, nullBitmap, indices, dictionary_vec));
+
+ basicVectorTests(vector, values, ['abc', '123']);
+ describe(`sliced`, () => {
+ basicVectorTests(vector.slice(10, 20), values.slice(10,20), extras);
+ });
+ });
+});
+
+describe(`Utf8Vector`, () => {
+ const values = ['foo', 'bar', 'baz', 'foo bar', 'bar'];
+ const vector = Utf8Vector.from(values);
+ basicVectorTests(vector, values, ['abc', '123']);
+ describe(`sliced`, () => {
+ basicVectorTests(vector.slice(1,3), values.slice(1,3), ['foo', 'abc']);
+ });
+});
+
+// Creates some basic tests for the given vector.
+// Verifies that:
+// - `get` and the native iterator return the same data as `values`
+// - `indexOf` returns the same indices as `values`
+function basicVectorTests(vector: Vector, values: any[], extras: any[]) {
+
+ const n = values.length;
+
+ test(`gets expected values`, () => {
+ let i = -1;
+ while (++i < n) {
+ expect(vector.get(i)).toEqual(values[i]);
+ }
+ });
+ test(`iterates expected values`, () => {
+ expect.hasAssertions();
+ let i = -1;
+ for (let v of vector) {
+ expect(++i).toBeLessThan(n);
+ expect(v).toEqual(values[i]);
+ }
+ });
+ test(`indexOf returns expected values`, () => {
+ let testValues = values.concat(extras);
+
+ for (const value of testValues) {
+ const actual = vector.indexOf(value);
+ const expected = values.indexOf(value);
+ expect(actual).toEqual(expected);
+ }
+ });
+}
diff --git a/src/arrow/js/test/unit/visitor-tests.ts b/src/arrow/js/test/unit/visitor-tests.ts
new file mode 100644
index 000000000..22b3e5ced
--- /dev/null
+++ b/src/arrow/js/test/unit/visitor-tests.ts
@@ -0,0 +1,169 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import { Field } from 'apache-arrow';
+import { Visitor } from 'apache-arrow';
+import {
+ DataType, Dictionary,
+ Bool, Null, Utf8, Binary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct,
+ Float, Float16, Float32, Float64,
+ Int, Uint8, Uint16, Uint32, Uint64, Int8, Int16, Int32, Int64,
+ Date_, DateDay, DateMillisecond,
+ Interval, IntervalDayTime, IntervalYearMonth,
+ Time, TimeSecond, TimeMillisecond, TimeMicrosecond, TimeNanosecond,
+ Timestamp, TimestampSecond, TimestampMillisecond, TimestampMicrosecond, TimestampNanosecond,
+ Union, DenseUnion, SparseUnion,
+} from 'apache-arrow';
+
+class BasicVisitor extends Visitor {
+ public type: DataType | undefined;
+ public visitNull <T extends Null> (type: T) { return (this.type = type); }
+ public visitBool <T extends Bool> (type: T) { return (this.type = type); }
+ public visitInt <T extends Int> (type: T) { return (this.type = type); }
+ public visitFloat <T extends Float> (type: T) { return (this.type = type); }
+ public visitUtf8 <T extends Utf8> (type: T) { return (this.type = type); }
+ public visitBinary <T extends Binary> (type: T) { return (this.type = type); }
+ public visitFixedSizeBinary <T extends FixedSizeBinary> (type: T) { return (this.type = type); }
+ public visitDate <T extends Date_> (type: T) { return (this.type = type); }
+ public visitTimestamp <T extends Timestamp> (type: T) { return (this.type = type); }
+ public visitTime <T extends Time> (type: T) { return (this.type = type); }
+ public visitDecimal <T extends Decimal> (type: T) { return (this.type = type); }
+ public visitList <T extends List> (type: T) { return (this.type = type); }
+ public visitStruct <T extends Struct> (type: T) { return (this.type = type); }
+ public visitUnion <T extends Union> (type: T) { return (this.type = type); }
+ public visitDictionary <T extends Dictionary> (type: T) { return (this.type = type); }
+ public visitInterval <T extends Interval> (type: T) { return (this.type = type); }
+ public visitFixedSizeList <T extends FixedSizeList> (type: T) { return (this.type = type); }
+ public visitMap <T extends Map_> (type: T) { return (this.type = type); }
+}
+
+class FeatureVisitor extends Visitor {
+ public type: DataType | undefined;
+ public visitNull <T extends Null> (type: T) { return (this.type = type); }
+ public visitBool <T extends Bool> (type: T) { return (this.type = type); }
+ public visitInt8 <T extends Int8> (type: T) { return (this.type = type); }
+ public visitInt16 <T extends Int16> (type: T) { return (this.type = type); }
+ public visitInt32 <T extends Int32> (type: T) { return (this.type = type); }
+ public visitInt64 <T extends Int64> (type: T) { return (this.type = type); }
+ public visitUint8 <T extends Uint8> (type: T) { return (this.type = type); }
+ public visitUint16 <T extends Uint16> (type: T) { return (this.type = type); }
+ public visitUint32 <T extends Uint32> (type: T) { return (this.type = type); }
+ public visitUint64 <T extends Uint64> (type: T) { return (this.type = type); }
+ public visitFloat16 <T extends Float16> (type: T) { return (this.type = type); }
+ public visitFloat32 <T extends Float32> (type: T) { return (this.type = type); }
+ public visitFloat64 <T extends Float64> (type: T) { return (this.type = type); }
+ public visitUtf8 <T extends Utf8> (type: T) { return (this.type = type); }
+ public visitBinary <T extends Binary> (type: T) { return (this.type = type); }
+ public visitFixedSizeBinary <T extends FixedSizeBinary> (type: T) { return (this.type = type); }
+ public visitDateDay <T extends DateDay> (type: T) { return (this.type = type); }
+ public visitDateMillisecond <T extends DateMillisecond> (type: T) { return (this.type = type); }
+ public visitTimestampSecond <T extends TimestampSecond> (type: T) { return (this.type = type); }
+ public visitTimestampMillisecond <T extends TimestampMillisecond> (type: T) { return (this.type = type); }
+ public visitTimestampMicrosecond <T extends TimestampMicrosecond> (type: T) { return (this.type = type); }
+ public visitTimestampNanosecond <T extends TimestampNanosecond> (type: T) { return (this.type = type); }
+ public visitTimeSecond <T extends TimeSecond> (type: T) { return (this.type = type); }
+ public visitTimeMillisecond <T extends TimeMillisecond> (type: T) { return (this.type = type); }
+ public visitTimeMicrosecond <T extends TimeMicrosecond> (type: T) { return (this.type = type); }
+ public visitTimeNanosecond <T extends TimeNanosecond> (type: T) { return (this.type = type); }
+ public visitDecimal <T extends Decimal> (type: T) { return (this.type = type); }
+ public visitList <T extends List> (type: T) { return (this.type = type); }
+ public visitStruct <T extends Struct> (type: T) { return (this.type = type); }
+ public visitDenseUnion <T extends DenseUnion> (type: T) { return (this.type = type); }
+ public visitSparseUnion <T extends SparseUnion> (type: T) { return (this.type = type); }
+ public visitDictionary <T extends Dictionary> (type: T) { return (this.type = type); }
+ public visitIntervalDayTime <T extends IntervalDayTime> (type: T) { return (this.type = type); }
+ public visitIntervalYearMonth <T extends IntervalYearMonth> (type: T) { return (this.type = type); }
+ public visitFixedSizeList <T extends FixedSizeList> (type: T) { return (this.type = type); }
+ public visitMap <T extends Map_> (type: T) { return (this.type = type); }
+}
+
+describe('Visitor', () => {
+
+ describe('uses the base methods when no feature methods are implemented', () => {
+ test(`visits Null types`, () => validateBasicVisitor(new Null()));
+ test(`visits Bool types`, () => validateBasicVisitor(new Bool()));
+ test(`visits Int types`, () => validateBasicVisitor(new Int(true, 32)));
+ test(`visits Float types`, () => validateBasicVisitor(new Float(0)));
+ test(`visits Utf8 types`, () => validateBasicVisitor(new Utf8()));
+ test(`visits Binary types`, () => validateBasicVisitor(new Binary()));
+ test(`visits FixedSizeBinary types`, () => validateBasicVisitor(new FixedSizeBinary(128)));
+ test(`visits Date types`, () => validateBasicVisitor(new Date_(0)));
+ test(`visits Timestamp types`, () => validateBasicVisitor(new Timestamp(0, 'UTC')));
+ test(`visits Time types`, () => validateBasicVisitor(new Time(0, 64)));
+ test(`visits Decimal types`, () => validateBasicVisitor(new Decimal(2, 9)));
+ test(`visits List types`, () => validateBasicVisitor(new List(null as any)));
+ test(`visits Struct types`, () => validateBasicVisitor(new Struct([] as any[])));
+ test(`visits Union types`, () => validateBasicVisitor(new Union(0, [] as any[], [] as any[])));
+ test(`visits Dictionary types`, () => validateBasicVisitor(new Dictionary(null as any, null as any)));
+ test(`visits Interval types`, () => validateBasicVisitor(new Interval(0)));
+ test(`visits FixedSizeList types`, () => validateBasicVisitor(new FixedSizeList(2, null as any)));
+ test(`visits Map types`, () => validateBasicVisitor(new Map_(new Field('', new Struct<{ key: Int; value: Int }>([] as any[])))));
+ function validateBasicVisitor<T extends DataType>(type: T) {
+ const visitor = new BasicVisitor();
+ const result = visitor.visit(type);
+ expect(result).toBe(type);
+ expect(visitor.type).toBe(type);
+ }
+ });
+
+ describe(`uses the feature methods instead of the base methods when they're implemented`, () => {
+
+ test(`visits Null types`, () => validateFeatureVisitor(new Null()));
+ test(`visits Bool types`, () => validateFeatureVisitor(new Bool()));
+ test(`visits Int8 types`, () => validateFeatureVisitor(new Int8()));
+ test(`visits Int16 types`, () => validateFeatureVisitor(new Int16()));
+ test(`visits Int32 types`, () => validateFeatureVisitor(new Int32()));
+ test(`visits Int64 types`, () => validateFeatureVisitor(new Int64()));
+ test(`visits Uint8 types`, () => validateFeatureVisitor(new Uint8()));
+ test(`visits Uint16 types`, () => validateFeatureVisitor(new Uint16()));
+ test(`visits Uint32 types`, () => validateFeatureVisitor(new Uint32()));
+ test(`visits Uint64 types`, () => validateFeatureVisitor(new Uint64()));
+ test(`visits Float16 types`, () => validateFeatureVisitor(new Float16()));
+ test(`visits Float32 types`, () => validateFeatureVisitor(new Float32()));
+ test(`visits Float64 types`, () => validateFeatureVisitor(new Float64()));
+ test(`visits Utf8 types`, () => validateFeatureVisitor(new Utf8()));
+ test(`visits Binary types`, () => validateFeatureVisitor(new Binary()));
+ test(`visits FixedSizeBinary types`, () => validateFeatureVisitor(new FixedSizeBinary(128)));
+ test(`visits DateDay types`, () => validateFeatureVisitor(new DateDay()));
+ test(`visits DateMillisecond types`, () => validateFeatureVisitor(new DateMillisecond()));
+ test(`visits TimestampSecond types`, () => validateFeatureVisitor(new TimestampSecond()));
+ test(`visits TimestampMillisecond types`, () => validateFeatureVisitor(new TimestampMillisecond()));
+ test(`visits TimestampMicrosecond types`, () => validateFeatureVisitor(new TimestampMicrosecond()));
+ test(`visits TimestampNanosecond types`, () => validateFeatureVisitor(new TimestampNanosecond()));
+ test(`visits TimeSecond types`, () => validateFeatureVisitor(new TimeSecond()));
+ test(`visits TimeMillisecond types`, () => validateFeatureVisitor(new TimeMillisecond()));
+ test(`visits TimeMicrosecond types`, () => validateFeatureVisitor(new TimeMicrosecond()));
+ test(`visits TimeNanosecond types`, () => validateFeatureVisitor(new TimeNanosecond()));
+ test(`visits Decimal types`, () => validateFeatureVisitor(new Decimal(2, 9)));
+ test(`visits List types`, () => validateFeatureVisitor(new List(null as any)));
+ test(`visits Struct types`, () => validateFeatureVisitor(new Struct([] as any[])));
+ test(`visits DenseUnion types`, () => validateFeatureVisitor(new DenseUnion([] as any[], [] as any[])));
+ test(`visits SparseUnion types`, () => validateFeatureVisitor(new SparseUnion([] as any[], [] as any[])));
+ test(`visits Dictionary types`, () => validateFeatureVisitor(new Dictionary(null as any, null as any)));
+ test(`visits IntervalDayTime types`, () => validateFeatureVisitor(new IntervalDayTime()));
+ test(`visits IntervalYearMonth types`, () => validateFeatureVisitor(new IntervalYearMonth()));
+ test(`visits FixedSizeList types`, () => validateFeatureVisitor(new FixedSizeList(2, null as any)));
+ test(`visits Map types`, () => validateFeatureVisitor(new Map_(new Field('', new Struct<{ key: Int; value: Int }>([] as any[])))));
+
+ function validateFeatureVisitor<T extends DataType>(type: T) {
+ const visitor = new FeatureVisitor();
+ const result = visitor.visit(type);
+ expect(result).toBe(type);
+ expect(visitor.type).toBe(type);
+ }
+ });
+});