path: root/src/arrow/js/src
diff options
Diffstat (limited to 'src/arrow/js/src')
110 files changed, 19516 insertions, 0 deletions
diff --git a/src/arrow/js/src/Arrow.dom.ts b/src/arrow/js/src/Arrow.dom.ts
new file mode 100644
index 000000000..07f0c8b8e
--- /dev/null
+++ b/src/arrow/js/src/Arrow.dom.ts
@@ -0,0 +1,113 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import streamAdapters from './io/adapters';
+import { Builder } from './builder/index';
+import { RecordBatchReader, RecordBatchFileReader, RecordBatchStreamReader, } from './ipc/reader';
+import { RecordBatchWriter, RecordBatchFileWriter, RecordBatchStreamWriter, } from './ipc/writer';
+import { toDOMStream } from './io/whatwg/iterable';
+import { builderThroughDOMStream } from './io/whatwg/builder';
+import { recordBatchReaderThroughDOMStream } from './io/whatwg/reader';
+import { recordBatchWriterThroughDOMStream } from './io/whatwg/writer';
+streamAdapters.toDOMStream = toDOMStream;
+Builder['throughDOM'] = builderThroughDOMStream;
+RecordBatchReader['throughDOM'] = recordBatchReaderThroughDOMStream;
+RecordBatchFileReader['throughDOM'] = recordBatchReaderThroughDOMStream;
+RecordBatchStreamReader['throughDOM'] = recordBatchReaderThroughDOMStream;
+RecordBatchWriter['throughDOM'] = recordBatchWriterThroughDOMStream;
+RecordBatchFileWriter['throughDOM'] = recordBatchWriterThroughDOMStream;
+RecordBatchStreamWriter['throughDOM'] = recordBatchWriterThroughDOMStream;
+export {
+ DateUnit, IntervalUnit, MessageHeader, MetadataVersion, Precision, TimeUnit, Type, UnionMode, BufferType,
+ Data,
+ DataType,
+ Null,
+ Bool,
+ Int, Int8, Int16, Int32, Int64, Uint8, Uint16, Uint32, Uint64,
+ Float, Float16, Float32, Float64,
+ Utf8,
+ Binary,
+ FixedSizeBinary,
+ Date_, DateDay, DateMillisecond,
+ Timestamp, TimestampSecond, TimestampMillisecond, TimestampMicrosecond, TimestampNanosecond,
+ Time, TimeSecond, TimeMillisecond, TimeMicrosecond, TimeNanosecond,
+ Decimal,
+ List,
+ Struct,
+ Union, DenseUnion, SparseUnion,
+ Dictionary,
+ Interval, IntervalDayTime, IntervalYearMonth,
+ FixedSizeList,
+ Map_,
+ Table,
+ Column,
+ Schema, Field,
+ Visitor,
+ Vector,
+ BaseVector,
+ BinaryVector,
+ BoolVector,
+ Chunked,
+ DateVector, DateDayVector, DateMillisecondVector,
+ DecimalVector,
+ DictionaryVector,
+ FixedSizeBinaryVector,
+ FixedSizeListVector,
+ FloatVector, Float16Vector, Float32Vector, Float64Vector,
+ IntervalVector, IntervalDayTimeVector, IntervalYearMonthVector,
+ IntVector, Int8Vector, Int16Vector, Int32Vector, Int64Vector, Uint8Vector, Uint16Vector, Uint32Vector, Uint64Vector,
+ ListVector,
+ MapVector,
+ NullVector,
+ StructVector,
+ TimestampVector, TimestampSecondVector, TimestampMillisecondVector, TimestampMicrosecondVector, TimestampNanosecondVector,
+ TimeVector, TimeSecondVector, TimeMillisecondVector, TimeMicrosecondVector, TimeNanosecondVector,
+ UnionVector, DenseUnionVector, SparseUnionVector,
+ Utf8Vector,
+ ByteStream, AsyncByteStream, AsyncByteQueue, ReadableSource, WritableSink,
+ RecordBatchReader, RecordBatchFileReader, RecordBatchStreamReader, AsyncRecordBatchFileReader, AsyncRecordBatchStreamReader,
+ RecordBatchWriter, RecordBatchFileWriter, RecordBatchStreamWriter, RecordBatchJSONWriter,
+ MessageReader, AsyncMessageReader, JSONMessageReader,
+ Message,
+ RecordBatch,
+ ArrowJSONLike, FileHandle, Readable, Writable, ReadableWritable, ReadableDOMStreamOptions,
+ DataFrame, FilteredDataFrame, CountByResult, BindFunc, NextFunc,
+ predicate,
+ util,
+ Builder,
+ BinaryBuilder,
+ BoolBuilder,
+ DateBuilder, DateDayBuilder, DateMillisecondBuilder,
+ DecimalBuilder,
+ DictionaryBuilder,
+ FixedSizeBinaryBuilder,
+ FixedSizeListBuilder,
+ FloatBuilder, Float16Builder, Float32Builder, Float64Builder,
+ IntervalBuilder, IntervalDayTimeBuilder, IntervalYearMonthBuilder,
+ IntBuilder, Int8Builder, Int16Builder, Int32Builder, Int64Builder, Uint8Builder, Uint16Builder, Uint32Builder, Uint64Builder,
+ ListBuilder,
+ MapBuilder,
+ NullBuilder,
+ StructBuilder,
+ TimestampBuilder, TimestampSecondBuilder, TimestampMillisecondBuilder, TimestampMicrosecondBuilder, TimestampNanosecondBuilder,
+ TimeBuilder, TimeSecondBuilder, TimeMillisecondBuilder, TimeMicrosecondBuilder, TimeNanosecondBuilder,
+ UnionBuilder, DenseUnionBuilder, SparseUnionBuilder,
+ Utf8Builder,
+ isTypedArray,
+} from './Arrow';
diff --git a/src/arrow/js/src/Arrow.node.ts b/src/arrow/js/src/Arrow.node.ts
new file mode 100644
index 000000000..44221f613
--- /dev/null
+++ b/src/arrow/js/src/Arrow.node.ts
@@ -0,0 +1,32 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import streamAdapters from './io/adapters';
+import { Builder } from './builder/index';
+import { RecordBatchReader } from './ipc/reader';
+import { RecordBatchWriter } from './ipc/writer';
+import { toNodeStream } from './io/node/iterable';
+import { builderThroughNodeStream } from './io/node/builder';
+import { recordBatchReaderThroughNodeStream } from './io/node/reader';
+import { recordBatchWriterThroughNodeStream } from './io/node/writer';
+streamAdapters.toNodeStream = toNodeStream;
+Builder['throughNode'] = builderThroughNodeStream;
+RecordBatchReader['throughNode'] = recordBatchReaderThroughNodeStream;
+RecordBatchWriter['throughNode'] = recordBatchWriterThroughNodeStream;
+export * from './Arrow.dom';
diff --git a/src/arrow/js/src/Arrow.ts b/src/arrow/js/src/Arrow.ts
new file mode 100644
index 000000000..8bf296310
--- /dev/null
+++ b/src/arrow/js/src/Arrow.ts
@@ -0,0 +1,136 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+export {
+ DateUnit,
+ TimeUnit,
+ Precision,
+ UnionMode,
+ IntervalUnit,
+ MetadataVersion,
+} from './fb/Schema';
+export { MessageHeader } from './fb/Message';
+export { Type, BufferType } from './enum';
+export { Data } from './data';
+export {
+ DataType,
+ Null,
+ Bool,
+ Int, Int8, Int16, Int32, Int64, Uint8, Uint16, Uint32, Uint64,
+ Float, Float16, Float32, Float64,
+ Utf8,
+ Binary,
+ FixedSizeBinary,
+ Date_, DateDay, DateMillisecond,
+ Timestamp, TimestampSecond, TimestampMillisecond, TimestampMicrosecond, TimestampNanosecond,
+ Time, TimeSecond, TimeMillisecond, TimeMicrosecond, TimeNanosecond,
+ Decimal,
+ List,
+ Struct,
+ Union, DenseUnion, SparseUnion,
+ Dictionary,
+ Interval, IntervalDayTime, IntervalYearMonth,
+ FixedSizeList,
+ Map_,
+} from './type';
+export { Table } from './table';
+export { Column } from './column';
+export { Visitor } from './visitor';
+export { Schema, Field } from './schema';
+export {
+ Vector,
+ BaseVector,
+ BinaryVector,
+ BoolVector,
+ Chunked,
+ DateVector, DateDayVector, DateMillisecondVector,
+ DecimalVector,
+ DictionaryVector,
+ FixedSizeBinaryVector,
+ FixedSizeListVector,
+ FloatVector, Float16Vector, Float32Vector, Float64Vector,
+ IntervalVector, IntervalDayTimeVector, IntervalYearMonthVector,
+ IntVector, Int8Vector, Int16Vector, Int32Vector, Int64Vector, Uint8Vector, Uint16Vector, Uint32Vector, Uint64Vector,
+ ListVector,
+ MapVector,
+ NullVector,
+ StructVector,
+ TimestampVector, TimestampSecondVector, TimestampMillisecondVector, TimestampMicrosecondVector, TimestampNanosecondVector,
+ TimeVector, TimeSecondVector, TimeMillisecondVector, TimeMicrosecondVector, TimeNanosecondVector,
+ UnionVector, DenseUnionVector, SparseUnionVector,
+ Utf8Vector,
+} from './vector/index';
+export {
+ Builder,
+ BinaryBuilder,
+ BoolBuilder,
+ DateBuilder, DateDayBuilder, DateMillisecondBuilder,
+ DecimalBuilder,
+ DictionaryBuilder,
+ FixedSizeBinaryBuilder,
+ FixedSizeListBuilder,
+ FloatBuilder, Float16Builder, Float32Builder, Float64Builder,
+ IntervalBuilder, IntervalDayTimeBuilder, IntervalYearMonthBuilder,
+ IntBuilder, Int8Builder, Int16Builder, Int32Builder, Int64Builder, Uint8Builder, Uint16Builder, Uint32Builder, Uint64Builder,
+ ListBuilder,
+ MapBuilder,
+ NullBuilder,
+ StructBuilder,
+ TimestampBuilder, TimestampSecondBuilder, TimestampMillisecondBuilder, TimestampMicrosecondBuilder, TimestampNanosecondBuilder,
+ TimeBuilder, TimeSecondBuilder, TimeMillisecondBuilder, TimeMicrosecondBuilder, TimeNanosecondBuilder,
+ UnionBuilder, DenseUnionBuilder, SparseUnionBuilder,
+ Utf8Builder,
+} from './builder/index';
+export { ByteStream, AsyncByteStream, AsyncByteQueue, ReadableSource, WritableSink } from './io/stream';
+export { RecordBatchReader, RecordBatchFileReader, RecordBatchStreamReader, AsyncRecordBatchFileReader, AsyncRecordBatchStreamReader } from './ipc/reader';
+export { RecordBatchWriter, RecordBatchFileWriter, RecordBatchStreamWriter, RecordBatchJSONWriter } from './ipc/writer';
+export { MessageReader, AsyncMessageReader, JSONMessageReader } from './ipc/message';
+export { Message } from './ipc/metadata/message';
+export { RecordBatch } from './recordbatch';
+export { ArrowJSONLike, FileHandle, Readable, Writable, ReadableWritable, ReadableDOMStreamOptions } from './io/interfaces';
+export { DataFrame, FilteredDataFrame, CountByResult, BindFunc, NextFunc } from './compute/dataframe';
+import * as util_bn_ from './util/bn';
+import * as util_int_ from './util/int';
+import * as util_bit_ from './util/bit';
+import * as util_math_ from './util/math';
+import * as util_buffer_ from './util/buffer';
+import * as util_vector_ from './util/vector';
+import * as predicate from './compute/predicate';
+import { compareSchemas, compareFields, compareTypes } from './visitor/typecomparator';
+export { predicate };
+/** @ignore */
+export const util = {
+ ...util_bn_,
+ ...util_int_,
+ ...util_bit_,
+ ...util_math_,
+ ...util_buffer_,
+ ...util_vector_,
+ compareSchemas,
+ compareFields,
+ compareTypes,
+export { isTypedArray } from './util/args';
diff --git a/src/arrow/js/src/bin/arrow2csv.ts b/src/arrow/js/src/bin/arrow2csv.ts
new file mode 100644
index 000000000..d5803cce0
--- /dev/null
+++ b/src/arrow/js/src/bin/arrow2csv.ts
@@ -0,0 +1,334 @@
+#! /usr/bin/env node
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import * as fs from 'fs';
+import * as stream from 'stream';
+import { valueToString } from '../util/pretty';
+import { Schema, RecordBatch, RecordBatchReader, AsyncByteQueue } from '../Arrow.node';
+/* eslint-disable @typescript-eslint/no-require-imports */
+const padLeft = require('pad-left');
+const bignumJSONParse = require('json-bignum').parse;
+const argv = require(`command-line-args`)(cliOpts(), { partial: true });
+const files = ? [] : [...(argv.file || []), ...(argv._unknown || [])].filter(Boolean);
+const state = { ...argv, closed: false, maxColWidths: [10] };
+type ToStringState = {
+ hr: string;
+ sep: string;
+ schema: any;
+ closed: boolean;
+ metadata: boolean;
+ maxColWidths: number[];
+(async () => {
+ const sources = ? [] : [
+ => () => fs.createReadStream(file)),
+ ...(process.stdin.isTTY ? [] : [() => process.stdin])
+ ].filter(Boolean) as (() => NodeJS.ReadableStream)[];
+ let reader: RecordBatchReader | null;
+ let hasReaders = false;
+ for (const source of sources) {
+ if (state.closed) { break; }
+ for await (reader of recordBatchReaders(source)) {
+ hasReaders = true;
+ const transformToString = batchesToString(state, reader.schema);
+ await pipeTo(
+ reader.pipe(transformToString),
+ process.stdout, { end: false }
+ ).catch(() => state.closed = true); // Handle EPIPE errors
+ }
+ if (state.closed) { break; }
+ }
+ return hasReaders ? 0 : print_usage();
+.then((x) => +x || 0, (err) => {
+ if (err) {
+ console.error(`${err?.stack || err}`);
+ }
+ return process.exitCode || 1;
+}).then((code) => process.exit(code));
+function pipeTo(source: NodeJS.ReadableStream, sink: NodeJS.WritableStream, opts?: { end: boolean }) {
+ return new Promise((resolve, reject) => {
+ source.on('end', onEnd).pipe(sink, opts).on('error', onErr);
+ function onEnd() { done(undefined, resolve); }
+ function onErr(err: any) { done(err, reject); }
+ function done(e: any, cb: (e?: any) => void) {
+ source.removeListener('end', onEnd);
+ sink.removeListener('error', onErr);
+ cb(e);
+ }
+ });
+async function *recordBatchReaders(createSourceStream: () => NodeJS.ReadableStream) {
+ const json = new AsyncByteQueue();
+ const stream = new AsyncByteQueue();
+ const source = createSourceStream();
+ let reader: RecordBatchReader | null = null;
+ let readers: AsyncIterable<RecordBatchReader> | null = null;
+ // tee the input source, just in case it's JSON
+ source.on('end', () => [stream, json].forEach((y) => y.close()))
+ .on('data', (x) => [stream, json].forEach((y) => y.write(x)))
+ .on('error', (e) => [stream, json].forEach((y) => y.abort(e)));
+ try {
+ for await (reader of RecordBatchReader.readAll(stream)) {
+ reader && (yield reader);
+ }
+ if (reader) return;
+ } catch (e) { readers = null; }
+ if (!readers) {
+ await json.closed;
+ if (source instanceof fs.ReadStream) { source.close(); }
+ // If the data in the `json` ByteQueue parses to JSON, then assume it's Arrow JSON from a file or stdin
+ try {
+ for await (reader of RecordBatchReader.readAll(bignumJSONParse(await json.toString()))) {
+ reader && (yield reader);
+ }
+ } catch (e) { readers = null; }
+ }
+function batchesToString(state: ToStringState, schema: Schema) {
+ let rowId = 0;
+ let batchId = -1;
+ let maxColWidths = [10];
+ const { hr, sep } = state;
+ const header = ['row_id', => `${f}`)].map(valueToString);
+ state.maxColWidths =, i) => Math.max(maxColWidths[i] || 0, x.length));
+ return new stream.Transform({
+ encoding: 'utf8',
+ writableObjectMode: true,
+ readableObjectMode: false,
+ final(cb: (error?: Error | null) => void) {
+ // if there were no batches, then print the Schema, and metadata
+ if (batchId === -1) {
+ hr && this.push(`${horizontalRule(state.maxColWidths, hr, sep)}\n\n`);
+ this.push(`${formatRow(header, maxColWidths, sep)}\n`);
+ if (state.metadata && schema.metadata.size > 0) {
+ this.push(`metadata:\n${formatMetadata(schema.metadata)}\n`);
+ }
+ }
+ hr && this.push(`${horizontalRule(state.maxColWidths, hr, sep)}\n\n`);
+ cb();
+ },
+ transform(batch: RecordBatch, _enc: string, cb: (error?: Error, data?: any) => void) {
+ batch = !state.schema?.length ? batch :;
+ if (state.closed) { return cb(undefined, null); }
+ // Pass one to convert to strings and count max column widths
+ state.maxColWidths = measureColumnWidths(rowId, batch,, i) => Math.max(maxColWidths[i] || 0, x.length)));
+ // If this is the first batch in a stream, print a top horizontal rule, schema metadata, and
+ if (++batchId === 0) {
+ hr && this.push(`${horizontalRule(state.maxColWidths, hr, sep)}\n`);
+ if (state.metadata && batch.schema.metadata.size > 0) {
+ this.push(`metadata:\n${formatMetadata(batch.schema.metadata)}\n`);
+ hr && this.push(`${horizontalRule(state.maxColWidths, hr, sep)}\n`);
+ }
+ if (batch.length <= 0 || batch.numCols <= 0) {
+ this.push(`${formatRow(header, maxColWidths = state.maxColWidths, sep)}\n`);
+ }
+ }
+ if (batch.length > 0 && batch.numCols > 0) {
+ // If any of the column widths changed, print the header again
+ if (rowId % 350 !== 0 && JSON.stringify(state.maxColWidths) !== JSON.stringify(maxColWidths)) {
+ this.push(`${formatRow(header, state.maxColWidths, sep)}\n`);
+ }
+ maxColWidths = state.maxColWidths;
+ for (const row of batch) {
+ if (state.closed) { break; } else if (!row) { continue; }
+ if (rowId++ % 350 === 0) {
+ this.push(`${formatRow(header, maxColWidths, sep)}\n`);
+ }
+ this.push(`${formatRow([rowId, ...row.toArray()].map(valueToString), maxColWidths, sep)}\n`);
+ }
+ }
+ cb();
+ }
+ });
+function horizontalRule(maxColWidths: number[], hr = '', sep = ' | ') {
+ return ` ${padLeft('', maxColWidths.reduce((x, y) => x + y, -2 + maxColWidths.length * sep.length), hr)}`;
+function formatRow(row: string[] = [], maxColWidths: number[] = [], sep = ' | ') {
+ return `${, j) => padLeft(x, maxColWidths[j])).join(sep)}`;
+function formatMetadata(metadata: Map<string, string>) {
+ return [...metadata].map(([key, val]) =>
+ ` ${key}: ${formatMetadataValue(val)}`
+ ).join(', \n');
+ function formatMetadataValue(value = '') {
+ let parsed = value;
+ try {
+ parsed = JSON.stringify(JSON.parse(value), null, 2);
+ } catch (e) { parsed = value; }
+ return valueToString(parsed).split('\n').join('\n ');
+ }
+function measureColumnWidths(rowId: number, batch: RecordBatch, maxColWidths: number[] = []) {
+ let val: any, j = 0;
+ for (const row of batch) {
+ if (!row) { continue; }
+ maxColWidths[j = 0] = Math.max(maxColWidths[0] || 0, (`${rowId++}`).length);
+ for (val of row) {
+ if (val && typedArrayElementWidths.has(val.constructor) && (typeof val[Symbol.toPrimitive] !== 'function')) {
+ // If we're printing a column of TypedArrays, ensure the column is wide enough to accommodate
+ // the widest possible element for a given byte size, since JS omits leading zeroes. For example:
+ // 1 | [1137743649,2170567488,244696391,2122556476]
+ // 2 | null
+ // 3 | [637174007,2142281880,961736230,2912449282]
+ // 4 | [1035112265,21832886,412842672,2207710517]
+ // 5 | null
+ // 6 | null
+ // 7 | [2755142991,4192423256,2994359,467878370]
+ const elementWidth = typedArrayElementWidths.get(val.constructor)!;
+ maxColWidths[j + 1] = Math.max(maxColWidths[j + 1] || 0,
+ 2 + // brackets on each end
+ (val.length - 1) + // commas between elements
+ (val.length * elementWidth) // width of stringified 2^N-1
+ );
+ } else {
+ maxColWidths[j + 1] = Math.max(maxColWidths[j + 1] || 0, valueToString(val).length);
+ }
+ ++j;
+ }
+ }
+ return maxColWidths;
+// Measure the stringified representation of 2^N-1 for each TypedArray variant
+const typedArrayElementWidths = (() => {
+ const maxElementWidth = (ArrayType: any) => {
+ const octets = Array.from({ length: ArrayType.BYTES_PER_ELEMENT - 1 }, _ => 255);
+ return `${new ArrayType(new Uint8Array([...octets, 254]).buffer)[0]}`.length;
+ };
+ return new Map<any, number>([
+ [Int8Array, maxElementWidth(Int8Array)],
+ [Int16Array, maxElementWidth(Int16Array)],
+ [Int32Array, maxElementWidth(Int32Array)],
+ [Uint8Array, maxElementWidth(Uint8Array)],
+ [Uint16Array, maxElementWidth(Uint16Array)],
+ [Uint32Array, maxElementWidth(Uint32Array)],
+ [Float32Array, maxElementWidth(Float32Array)],
+ [Float64Array, maxElementWidth(Float64Array)],
+ [Uint8ClampedArray, maxElementWidth(Uint8ClampedArray)]
+ ]);
+function cliOpts() {
+ return [
+ {
+ type: String,
+ name: 'schema', alias: 's',
+ optional: true, multiple: true,
+ typeLabel: '{underline columns}',
+ description: 'A space-delimited list of column names'
+ },
+ {
+ type: String,
+ name: 'file', alias: 'f',
+ optional: true, multiple: true,
+ description: 'The Arrow file to read'
+ },
+ {
+ type: String,
+ name: 'sep', optional: true, default: ' | ',
+ description: 'The column separator character (default: " | ")'
+ },
+ {
+ type: String,
+ name: 'hr', optional: true, default: '',
+ description: 'The horizontal border character (default: "")'
+ },
+ {
+ type: Boolean,
+ name: 'metadata', alias: 'm',
+ optional: true, default: false,
+ description: 'Flag to print Schema metadata (default: false)'
+ },
+ {
+ type: Boolean,
+ name: 'help', optional: true, default: false,
+ description: 'Print this usage guide.'
+ }
+ ];
+function print_usage() {
+ console.log(require('command-line-usage')([
+ {
+ header: 'arrow2csv',
+ content: 'Print a CSV from an Arrow file'
+ },
+ {
+ header: 'Synopsis',
+ content: [
+ '$ arrow2csv {underline file.arrow} [{bold --schema} column_name ...]',
+ '$ arrow2csv [{bold --schema} column_name ...] [{bold --file} {underline file.arrow}]',
+ '$ arrow2csv {bold -s} column_1 {bold -s} column_2 [{bold -f} {underline file.arrow}]',
+ '$ arrow2csv [{bold --help}]'
+ ]
+ },
+ {
+ header: 'Options',
+ optionList: cliOpts()
+ },
+ {
+ header: 'Example',
+ content: [
+ '$ arrow2csv --schema foo baz --sep " , " -f simple.arrow',
+ '> "row_id", "foo: Int32", "baz: Utf8"',
+ '> 0, 1, "aa"',
+ '> 1, null, null',
+ '> 2, 3, null',
+ '> 3, 4, "bbb"',
+ '> 4, 5, "cccc"',
+ ]
+ }
+ ]));
+ return 1;
diff --git a/src/arrow/js/src/builder.ts b/src/arrow/js/src/builder.ts
new file mode 100644
index 000000000..86db95306
--- /dev/null
+++ b/src/arrow/js/src/builder.ts
@@ -0,0 +1,527 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { Vector } from './vector';
+import { BufferType } from './enum';
+import { Data, Buffers } from './data';
+import { createIsValidFunction } from './builder/valid';
+import { BuilderType as B, VectorType as V} from './interfaces';
+import { BufferBuilder, BitmapBufferBuilder, DataBufferBuilder, OffsetsBufferBuilder } from './builder/buffer';
+import {
+ DataType, strideForType,
+ Float, Int, Decimal, FixedSizeBinary,
+ Date_, Time, Timestamp, Interval,
+ Utf8, Binary, List, Map_
+} from './type';
+ * A set of options required to create a `Builder` instance for a given `DataType`.
+ * @see {@link Builder}
+ */
+export interface BuilderOptions<T extends DataType = any, TNull = any> {
+ type: T;
+ nullValues?: TNull[] | ReadonlyArray<TNull> | null;
+ children?: { [key: string]: BuilderOptions } | BuilderOptions[];
+ * A set of options to create an Iterable or AsyncIterable `Builder` transform function.
+ * @see {@link Builder.throughIterable}
+ * @see {@link Builder.throughAsyncIterable}
+ */
+export interface IterableBuilderOptions<T extends DataType = any, TNull = any> extends BuilderOptions<T, TNull> {
+ highWaterMark?: number;
+ queueingStrategy?: 'bytes' | 'count';
+ dictionaryHashFunction?: (value: any) => string | number;
+ valueToChildTypeId?: (builder: Builder<T, TNull>, value: any, offset: number) => number;
+ * An abstract base class for types that construct Arrow Vectors from arbitrary JavaScript values.
+ *
+ * A `Builder` is responsible for writing arbitrary JavaScript values
+ * to ArrayBuffers and/or child Builders according to the Arrow specification
+ * for each DataType, creating or resizing the underlying ArrayBuffers as necessary.
+ *
+ * The `Builder` for each Arrow `DataType` handles converting and appending
+ * values for a given `DataType`. The high-level {@link ``} convenience
+ * method creates the specific `Builder` subclass for the supplied `DataType`.
+ *
+ * Once created, `Builder` instances support both appending values to the end
+ * of the `Builder`, and random-access writes to specific indices
+ * (`Builder.prototype.append(value)` is a convenience method for
+ * `builder.set(builder.length, value)`). Appending or setting values beyond the
+ * Builder's current length may cause the builder to grow its underlying buffers
+ * or child Builders (if applicable) to accommodate the new values.
+ *
+ * After enough values have been written to a `Builder`, `Builder.prototype.flush()`
+ * will commit the values to the underlying ArrayBuffers (or child Builders). The
+ * internal Builder state will be reset, and an instance of `Data<T>` is returned.
+ * Alternatively, `Builder.prototype.toVector()` will flush the `Builder` and return
+ * an instance of `Vector<T>` instead.
+ *
+ * When there are no more values to write, use `Builder.prototype.finish()` to
+ * finalize the `Builder`. This does not reset the internal state, so it is
+ * necessary to call `Builder.prototype.flush()` or `toVector()` one last time
+ * if there are still values queued to be flushed.
+ *
+ * Note: calling `Builder.prototype.finish()` is required when using a `DictionaryBuilder`,
+ * because this is when it flushes the values that have been enqueued in its internal
+ * dictionary's `Builder`, and creates the `dictionaryVector` for the `Dictionary` `DataType`.
+ *
+ * ```ts
+ * import { Builder, Utf8 } from 'apache-arrow';
+ *
+ * const utf8Builder ={
+ * type: new Utf8(),
+ * nullValues: [null, 'n/a']
+ * });
+ *
+ * utf8Builder
+ * .append('hello')
+ * .append('n/a')
+ * .append('world')
+ * .append(null);
+ *
+ * const utf8Vector = utf8Builder.finish().toVector();
+ *
+ * console.log(utf8Vector.toJSON());
+ * // > ["hello", null, "world", null]
+ * ```
+ *
+ * @typeparam T The `DataType` of this `Builder`.
+ * @typeparam TNull The type(s) of values which will be considered null-value sentinels.
+ */
+export abstract class Builder<T extends DataType = any, TNull = any> {
+ /**
+ * Create a `Builder` instance based on the `type` property of the supplied `options` object.
+ * @param {BuilderOptions<T, TNull>} options An object with a required `DataType` instance
+ * and other optional parameters to be passed to the `Builder` subclass for the given `type`.
+ *
+ * @typeparam T The `DataType` of the `Builder` to create.
+ * @typeparam TNull The type(s) of values which will be considered null-value sentinels.
+ * @nocollapse
+ */
+ // @ts-ignore
+ public static new<T extends DataType = any, TNull = any>(options: BuilderOptions<T, TNull>): B<T, TNull> {}
+ /** @nocollapse */
+ // @ts-ignore
+ public static throughNode<T extends DataType = any, TNull = any>(options: import('./io/node/builder').BuilderDuplexOptions<T, TNull>): import('stream').Duplex {
+ throw new Error(`"throughNode" not available in this environment`);
+ }
+ /** @nocollapse */
+ // @ts-ignore
+ public static throughDOM<T extends DataType = any, TNull = any>(options: import('./io/whatwg/builder').BuilderTransformOptions<T, TNull>): import('./io/whatwg/builder').BuilderTransform<T, TNull> {
+ throw new Error(`"throughDOM" not available in this environment`);
+ }
+ /**
+ * Transform a synchronous `Iterable` of arbitrary JavaScript values into a
+ * sequence of Arrow Vector<T> following the chunking semantics defined in
+ * the supplied `options` argument.
+ *
+ * This function returns a function that accepts an `Iterable` of values to
+ * transform. When called, this function returns an Iterator of `Vector<T>`.
+ *
+ * The resulting `Iterator<Vector<T>>` yields Vectors based on the
+ * `queueingStrategy` and `highWaterMark` specified in the `options` argument.
+ *
+ * * If `queueingStrategy` is `"count"` (or omitted), The `Iterator<Vector<T>>`
+ * will flush the underlying `Builder` (and yield a new `Vector<T>`) once the
+ * Builder's `length` reaches or exceeds the supplied `highWaterMark`.
+ * * If `queueingStrategy` is `"bytes"`, the `Iterator<Vector<T>>` will flush
+ * the underlying `Builder` (and yield a new `Vector<T>`) once its `byteLength`
+ * reaches or exceeds the supplied `highWaterMark`.
+ *
+ * @param {IterableBuilderOptions<T, TNull>} options An object of properties which determine the `Builder` to create and the chunking semantics to use.
+ * @returns A function which accepts a JavaScript `Iterable` of values to
+ * write, and returns an `Iterator` that yields Vectors according
+ * to the chunking semantics defined in the `options` argument.
+ * @nocollapse
+ */
+ public static throughIterable<T extends DataType = any, TNull = any>(options: IterableBuilderOptions<T, TNull>) {
+ return throughIterable(options);
+ }
+ /**
+ * Transform an `AsyncIterable` of arbitrary JavaScript values into a
+ * sequence of Arrow Vector<T> following the chunking semantics defined in
+ * the supplied `options` argument.
+ *
+ * This function returns a function that accepts an `AsyncIterable` of values to
+ * transform. When called, this function returns an AsyncIterator of `Vector<T>`.
+ *
+ * The resulting `AsyncIterator<Vector<T>>` yields Vectors based on the
+ * `queueingStrategy` and `highWaterMark` specified in the `options` argument.
+ *
+ * * If `queueingStrategy` is `"count"` (or omitted), The `AsyncIterator<Vector<T>>`
+ * will flush the underlying `Builder` (and yield a new `Vector<T>`) once the
+ * Builder's `length` reaches or exceeds the supplied `highWaterMark`.
+ * * If `queueingStrategy` is `"bytes"`, the `AsyncIterator<Vector<T>>` will flush
+ * the underlying `Builder` (and yield a new `Vector<T>`) once its `byteLength`
+ * reaches or exceeds the supplied `highWaterMark`.
+ *
+ * @param {IterableBuilderOptions<T, TNull>} options An object of properties which determine the `Builder` to create and the chunking semantics to use.
+ * @returns A function which accepts a JavaScript `AsyncIterable` of values
+ * to write, and returns an `AsyncIterator` that yields Vectors
+ * according to the chunking semantics defined in the `options`
+ * argument.
+ * @nocollapse
+ */
+ public static throughAsyncIterable<T extends DataType = any, TNull = any>(options: IterableBuilderOptions<T, TNull>) {
+ return throughAsyncIterable(options);
+ }
+ /**
+ * Construct a builder with the given Arrow DataType with optional null values,
+ * which will be interpreted as "null" when set or appended to the `Builder`.
+ * @param {{ type: T, nullValues?: any[] }} options A `BuilderOptions` object used to create this `Builder`.
+ */
+ constructor({ 'type': type, 'nullValues': nulls }: BuilderOptions<T, TNull>) {
+ this.type = type;
+ this.children = [];
+ this.nullValues = nulls;
+ this.stride = strideForType(type);
+ this._nulls = new BitmapBufferBuilder();
+ if (nulls && nulls.length > 0) {
+ this._isValid = createIsValidFunction(nulls);
+ }
+ }
+ /**
+ * The Builder's `DataType` instance.
+ * @readonly
+ */
+ public type: T;
+ /**
+ * The number of values written to the `Builder` that haven't been flushed yet.
+ * @readonly
+ */
+ public length = 0;
+ /**
+ * A boolean indicating whether `Builder.prototype.finish()` has been called on this `Builder`.
+ * @readonly
+ */
+ public finished = false;
+ /**
+ * The number of elements in the underlying values TypedArray that
+ * represent a single logical element, determined by this Builder's
+ * `DataType`. This is 1 for most types, but is larger when the `DataType`
+ * is `Int64`, `Uint64`, `Decimal`, `DateMillisecond`, certain variants of
+ * `Interval`, `Time`, or `Timestamp`, `FixedSizeBinary`, and `FixedSizeList`.
+ * @readonly
+ */
+ public readonly stride: number;
+ public readonly children: Builder[];
+ /**
+ * The list of null-value sentinels for this `Builder`. When one of these values
+ * is written to the `Builder` (either via `Builder.prototype.set()` or `Builder.prototype.append()`),
+ * a 1-bit is written to this Builder's underlying null BitmapBufferBuilder.
+ * @readonly
+ */
+ public readonly nullValues?: TNull[] | ReadonlyArray<TNull> | null;
+ /**
+ * Flush the `Builder` and return a `Vector<T>`.
+ * @returns {Vector<T>} A `Vector<T>` of the flushed values.
+ */
+ public toVector() { return; }
+ public get ArrayType() { return this.type.ArrayType; }
+ public get nullCount() { return this._nulls.numInvalid; }
+ public get numChildren() { return this.children.length; }
+ /**
+ * @returns The aggregate length (in bytes) of the values that have been written.
+ */
+ public get byteLength(): number {
+ let size = 0;
+ this._offsets && (size += this._offsets.byteLength);
+ this._values && (size += this._values.byteLength);
+ this._nulls && (size += this._nulls.byteLength);
+ this._typeIds && (size += this._typeIds.byteLength);
+ return this.children.reduce((size, child) => size + child.byteLength, size);
+ }
+ /**
+ * @returns The aggregate number of rows that have been reserved to write new values.
+ */
+ public get reservedLength(): number {
+ return this._nulls.reservedLength;
+ }
+ /**
+ * @returns The aggregate length (in bytes) that has been reserved to write new values.
+ */
+ public get reservedByteLength(): number {
+ let size = 0;
+ this._offsets && (size += this._offsets.reservedByteLength);
+ this._values && (size += this._values.reservedByteLength);
+ this._nulls && (size += this._nulls.reservedByteLength);
+ this._typeIds && (size += this._typeIds.reservedByteLength);
+ return this.children.reduce((size, child) => size + child.reservedByteLength, size);
+ }
+ protected _offsets!: DataBufferBuilder<Int32Array>;
+ public get valueOffsets() { return this._offsets ? this._offsets.buffer : null; }
+ protected _values!: BufferBuilder<T['TArray'], any>;
+ public get values() { return this._values ? this._values.buffer : null; }
+ protected _nulls: BitmapBufferBuilder;
+ public get nullBitmap() { return this._nulls ? this._nulls.buffer : null; }
+ protected _typeIds!: DataBufferBuilder<Int8Array>;
+ public get typeIds() { return this._typeIds ? this._typeIds.buffer : null; }
+ protected _isValid!: (value: T['TValue'] | TNull) => boolean;
+ protected _setValue!: (inst: Builder<T>, index: number, value: T['TValue']) => void;
+ /**
+ * Appends a value (or null) to this `Builder`.
+ * This is equivalent to `builder.set(builder.length, value)`.
+ * @param {T['TValue'] | TNull } value The value to append.
+ */
+ public append(value: T['TValue'] | TNull) { return this.set(this.length, value); }
+ /**
+ * Validates whether a value is valid (true), or null (false)
+ * @param {T['TValue'] | TNull } value The value to compare against null the value representations
+ */
+ public isValid(value: T['TValue'] | TNull): boolean { return this._isValid(value); }
+ /**
+ * Write a value (or null-value sentinel) at the supplied index.
+ * If the value matches one of the null-value representations, a 1-bit is
+ * written to the null `BitmapBufferBuilder`. Otherwise, a 0 is written to
+ * the null `BitmapBufferBuilder`, and the value is passed to
+ * `Builder.prototype.setValue()`.
+ * @param {number} index The index of the value to write.
+ * @param {T['TValue'] | TNull } value The value to write at the supplied index.
+ * @returns {this} The updated `Builder` instance.
+ */
+ public set(index: number, value: T['TValue'] | TNull) {
+ if (this.setValid(index, this.isValid(value))) {
+ this.setValue(index, value);
+ }
+ return this;
+ }
+ /**
+ * Write a value to the underlying buffers at the supplied index, bypassing
+ * the null-value check. This is a low-level method that
+ * @param {number} index
+ * @param {T['TValue'] | TNull } value
+ */
+ public setValue(index: number, value: T['TValue']) { this._setValue(this, index, value); }
+ public setValid(index: number, valid: boolean) {
+ this.length = this._nulls.set(index, +valid).length;
+ return valid;
+ }
+ // @ts-ignore
+ public addChild(child: Builder, name = `${this.numChildren}`) {
+ throw new Error(`Cannot append children to non-nested type "${this.type}"`);
+ }
+ /**
+ * Retrieve the child `Builder` at the supplied `index`, or null if no child
+ * exists at that index.
+ * @param {number} index The index of the child `Builder` to retrieve.
+ * @returns {Builder | null} The child Builder at the supplied index or null.
+ */
+ public getChildAt<R extends DataType = any>(index: number): Builder<R> | null {
+ return this.children[index] || null;
+ }
+ /**
+ * Commit all the values that have been written to their underlying
+ * ArrayBuffers, including any child Builders if applicable, and reset
+ * the internal `Builder` state.
+ * @returns A `Data<T>` of the buffers and childData representing the values written.
+ */
+ public flush() {
+ const buffers: any = [];
+ const values = this._values;
+ const offsets = this._offsets;
+ const typeIds = this._typeIds;
+ const { length, nullCount } = this;
+ if (typeIds) { /* Unions */
+ buffers[BufferType.TYPE] = typeIds.flush(length);
+ // DenseUnions
+ offsets && (buffers[BufferType.OFFSET] = offsets.flush(length));
+ } else if (offsets) { /* Variable-width primitives (Binary, Utf8) and Lists */
+ // Binary, Utf8
+ values && (buffers[BufferType.DATA] = values.flush(offsets.last()));
+ buffers[BufferType.OFFSET] = offsets.flush(length);
+ } else if (values) { /* Fixed-width primitives (Int, Float, Decimal, Time, Timestamp, and Interval) */
+ buffers[BufferType.DATA] = values.flush(length);
+ }
+ nullCount > 0 && (buffers[BufferType.VALIDITY] = this._nulls.flush(length));
+ const data =<T>(
+ this.type, 0, length, nullCount, buffers as Buffers<T>,
+ => child.flush())) as Data<T>;
+ this.clear();
+ return data;
+ }
+ /**
+ * Finalize this `Builder`, and child builders if applicable.
+ * @returns {this} The finalized `Builder` instance.
+ */
+ public finish() {
+ this.finished = true;
+ this.children.forEach((child) => child.finish());
+ return this;
+ }
+ /**
+ * Clear this Builder's internal state, including child Builders if applicable, and reset the length to 0.
+ * @returns {this} The cleared `Builder` instance.
+ */
+ public clear() {
+ this.length = 0;
+ this._offsets && (this._offsets.clear());
+ this._values && (this._values.clear());
+ this._nulls && (this._nulls.clear());
+ this._typeIds && (this._typeIds.clear());
+ this.children.forEach((child) => child.clear());
+ return this;
+ }
+(Builder.prototype as any).length = 1;
+(Builder.prototype as any).stride = 1;
+(Builder.prototype as any).children = null;
+(Builder.prototype as any).finished = false;
+(Builder.prototype as any).nullValues = null;
+(Builder.prototype as any)._isValid = () => true;
+/** @ignore */
+export abstract class FixedWidthBuilder<T extends Int | Float | FixedSizeBinary | Date_ | Timestamp | Time | Decimal | Interval = any, TNull = any> extends Builder<T, TNull> {
+ constructor(opts: BuilderOptions<T, TNull>) {
+ super(opts);
+ this._values = new DataBufferBuilder(new this.ArrayType(0), this.stride);
+ }
+ public setValue(index: number, value: T['TValue']) {
+ const values = this._values;
+ values.reserve(index - values.length + 1);
+ return super.setValue(index, value);
+ }
+/** @ignore */
+export abstract class VariableWidthBuilder<T extends Binary | Utf8 | List | Map_, TNull = any> extends Builder<T, TNull> {
+ protected _pendingLength = 0;
+ protected _offsets: OffsetsBufferBuilder;
+ protected _pending: Map<number, any> | undefined;
+ constructor(opts: BuilderOptions<T, TNull>) {
+ super(opts);
+ this._offsets = new OffsetsBufferBuilder();
+ }
+ public setValue(index: number, value: T['TValue']) {
+ const pending = this._pending || (this._pending = new Map());
+ const current = pending.get(index);
+ current && (this._pendingLength -= current.length);
+ this._pendingLength += value.length;
+ pending.set(index, value);
+ }
+ public setValid(index: number, isValid: boolean) {
+ if (!super.setValid(index, isValid)) {
+ (this._pending || (this._pending = new Map())).set(index, undefined);
+ return false;
+ }
+ return true;
+ }
+ public clear() {
+ this._pendingLength = 0;
+ this._pending = undefined;
+ return super.clear();
+ }
+ public flush() {
+ this._flush();
+ return super.flush();
+ }
+ public finish() {
+ this._flush();
+ return super.finish();
+ }
+ protected _flush() {
+ const pending = this._pending;
+ const pendingLength = this._pendingLength;
+ this._pendingLength = 0;
+ this._pending = undefined;
+ if (pending && pending.size > 0) {
+ this._flushPending(pending, pendingLength);
+ }
+ return this;
+ }
+ protected abstract _flushPending(pending: Map<number, any>, pendingLength: number): void;
+/** @ignore */
+type ThroughIterable<T extends DataType = any, TNull = any> = (source: Iterable<T['TValue'] | TNull>) => IterableIterator<V<T>>;
+/** @ignore */
+function throughIterable<T extends DataType = any, TNull = any>(options: IterableBuilderOptions<T, TNull>) {
+ const { ['queueingStrategy']: queueingStrategy = 'count' } = options;
+ const { ['highWaterMark']: highWaterMark = queueingStrategy !== 'bytes' ? 1000 : 2 ** 14 } = options;
+ const sizeProperty: 'length' | 'byteLength' = queueingStrategy !== 'bytes' ? 'length' : 'byteLength';
+ return function*(source: Iterable<T['TValue'] | TNull>) {
+ let numChunks = 0;
+ const builder =;
+ for (const value of source) {
+ if (builder.append(value)[sizeProperty] >= highWaterMark) {
+ ++numChunks && (yield builder.toVector());
+ }
+ }
+ if (builder.finish().length > 0 || numChunks === 0) {
+ yield builder.toVector();
+ }
+ } as ThroughIterable<T, TNull>;
+/** @ignore */
+type ThroughAsyncIterable<T extends DataType = any, TNull = any> = (source: Iterable<T['TValue'] | TNull> | AsyncIterable<T['TValue'] | TNull>) => AsyncIterableIterator<V<T>>;
+/** @ignore */
+function throughAsyncIterable<T extends DataType = any, TNull = any>(options: IterableBuilderOptions<T, TNull>) {
+ const { ['queueingStrategy']: queueingStrategy = 'count' } = options;
+ const { ['highWaterMark']: highWaterMark = queueingStrategy !== 'bytes' ? 1000 : 2 ** 14 } = options;
+ const sizeProperty: 'length' | 'byteLength' = queueingStrategy !== 'bytes' ? 'length' : 'byteLength';
+ return async function* (source: Iterable<T['TValue'] | TNull> | AsyncIterable<T['TValue'] | TNull>) {
+ let numChunks = 0;
+ const builder =;
+ for await (const value of source) {
+ if (builder.append(value)[sizeProperty] >= highWaterMark) {
+ ++numChunks && (yield builder.toVector());
+ }
+ }
+ if (builder.finish().length > 0 || numChunks === 0) {
+ yield builder.toVector();
+ }
+ } as ThroughAsyncIterable<T, TNull>;
diff --git a/src/arrow/js/src/builder/binary.ts b/src/arrow/js/src/builder/binary.ts
new file mode 100644
index 000000000..829da5c97
--- /dev/null
+++ b/src/arrow/js/src/builder/binary.ts
@@ -0,0 +1,54 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { Binary } from '../type';
+import { toUint8Array } from '../util/buffer';
+import { BufferBuilder } from './buffer';
+import { VariableWidthBuilder, BuilderOptions } from '../builder';
+/** @ignore */
+export class BinaryBuilder<TNull = any> extends VariableWidthBuilder<Binary, TNull> {
+ constructor(opts: BuilderOptions<Binary, TNull>) {
+ super(opts);
+ this._values = new BufferBuilder(new Uint8Array(0));
+ }
+ public get byteLength(): number {
+ let size = this._pendingLength + (this.length * 4);
+ this._offsets && (size += this._offsets.byteLength);
+ this._values && (size += this._values.byteLength);
+ this._nulls && (size += this._nulls.byteLength);
+ return size;
+ }
+ public setValue(index: number, value: Uint8Array) {
+ return super.setValue(index, toUint8Array(value));
+ }
+ protected _flushPending(pending: Map<number, Uint8Array | undefined>, pendingLength: number) {
+ const offsets = this._offsets;
+ const data = this._values.reserve(pendingLength).buffer;
+ let index = 0, length = 0, offset = 0, value: Uint8Array | undefined;
+ for ([index, value] of pending) {
+ if (value === undefined) {
+ offsets.set(index, 0);
+ } else {
+ length = value.length;
+ data.set(value, offset);
+ offsets.set(index, length);
+ offset += length;
+ }
+ }
+ }
diff --git a/src/arrow/js/src/builder/bool.ts b/src/arrow/js/src/builder/bool.ts
new file mode 100644
index 000000000..5c0e0950e
--- /dev/null
+++ b/src/arrow/js/src/builder/bool.ts
@@ -0,0 +1,31 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { Bool } from '../type';
+import { BitmapBufferBuilder } from './buffer';
+import { Builder, BuilderOptions } from '../builder';
+/** @ignore */
+export class BoolBuilder<TNull = any> extends Builder<Bool, TNull> {
+ constructor(options: BuilderOptions<Bool, TNull>) {
+ super(options);
+ this._values = new BitmapBufferBuilder();
+ }
+ public setValue(index: number, value: boolean) {
+ this._values.set(index, +value);
+ }
diff --git a/src/arrow/js/src/builder/buffer.ts b/src/arrow/js/src/builder/buffer.ts
new file mode 100644
index 000000000..3c20cc001
--- /dev/null
+++ b/src/arrow/js/src/builder/buffer.ts
@@ -0,0 +1,182 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { memcpy } from '../util/buffer';
+import { BigIntAvailable, BigInt64Array, BigUint64Array } from '../util/compat';
+import {
+ TypedArray, TypedArrayConstructor,
+ BigIntArray, BigIntArrayConstructor
+} from '../interfaces';
+/** @ignore */ type DataValue<T> = T extends TypedArray ? number : T extends BigIntArray ? WideValue<T> : T;
+/** @ignore */ type WideValue<T extends BigIntArray> = T extends BigIntArray ? bigint | Int32Array | Uint32Array : never;
+/** @ignore */ type ArrayCtor<T extends TypedArray | BigIntArray> =
+ T extends TypedArray ? TypedArrayConstructor<T> :
+ T extends BigIntArray ? BigIntArrayConstructor<T> :
+ any;
+/** @ignore */
+const roundLengthUpToNearest64Bytes = (len: number, BPE: number) => ((((len * BPE) + 63) & ~63) || 64) / BPE;
+/** @ignore */
+const sliceOrExtendArray = <T extends TypedArray | BigIntArray>(arr: T, len = 0) => (
+ arr.length >= len ? arr.subarray(0, len) : memcpy(new (arr.constructor as any)(len), arr, 0)
+) as T;
+/** @ignore */
+export interface BufferBuilder<T extends TypedArray | BigIntArray = any, TValue = DataValue<T>> {
+ readonly offset: number;
+/** @ignore */
+export class BufferBuilder<T extends TypedArray | BigIntArray = any, TValue = DataValue<T>> {
+ constructor(buffer: T, stride = 1) {
+ this.buffer = buffer;
+ this.stride = stride;
+ this.ArrayType = buffer.constructor as ArrayCtor<T>;
+ this._resize(this.length = buffer.length / stride | 0);
+ }
+ public buffer: T;
+ public length: number;
+ public readonly stride: number;
+ public readonly ArrayType: ArrayCtor<T>;
+ public readonly BYTES_PER_ELEMENT: number;
+ public get byteLength() { return this.length * this.stride * this.BYTES_PER_ELEMENT | 0; }
+ public get reservedLength() { return this.buffer.length / this.stride; }
+ public get reservedByteLength() { return this.buffer.byteLength; }
+ // @ts-ignore
+ public set(index: number, value: TValue) { return this; }
+ public append(value: TValue) { return this.set(this.length, value); }
+ public reserve(extra: number) {
+ if (extra > 0) {
+ this.length += extra;
+ const stride = this.stride;
+ const length = this.length * stride;
+ const reserved = this.buffer.length;
+ if (length >= reserved) {
+ this._resize(reserved === 0
+ ? roundLengthUpToNearest64Bytes(length * 1, this.BYTES_PER_ELEMENT)
+ : roundLengthUpToNearest64Bytes(length * 2, this.BYTES_PER_ELEMENT)
+ );
+ }
+ }
+ return this;
+ }
+ public flush(length = this.length) {
+ length = roundLengthUpToNearest64Bytes(length * this.stride, this.BYTES_PER_ELEMENT);
+ const array = sliceOrExtendArray<T>(this.buffer, length);
+ this.clear();
+ return array;
+ }
+ public clear() {
+ this.length = 0;
+ this._resize(0);
+ return this;
+ }
+ protected _resize(newLength: number) {
+ return this.buffer = <T> memcpy(new this.ArrayType(newLength), this.buffer);
+ }
+(BufferBuilder.prototype as any).offset = 0;
+/** @ignore */
+export class DataBufferBuilder<T extends TypedArray> extends BufferBuilder<T, number> {
+ public last() { return this.get(this.length - 1); }
+ public get(index: number) { return this.buffer[index]; }
+ public set(index: number, value: number) {
+ this.reserve(index - this.length + 1);
+ this.buffer[index * this.stride] = value;
+ return this;
+ }
+/** @ignore */
+export class BitmapBufferBuilder extends DataBufferBuilder<Uint8Array> {
+ constructor(data = new Uint8Array(0)) { super(data, 1 / 8); }
+ public numValid = 0;
+ public get numInvalid() { return this.length - this.numValid; }
+ public get(idx: number) { return this.buffer[idx >> 3] >> idx % 8 & 1; }
+ public set(idx: number, val: number) {
+ const { buffer } = this.reserve(idx - this.length + 1);
+ const byte = idx >> 3, bit = idx % 8, cur = buffer[byte] >> bit & 1;
+ // If `val` is truthy and the current bit is 0, flip it to 1 and increment `numValid`.
+ // If `val` is falsey and the current bit is 1, flip it to 0 and decrement `numValid`.
+ val ? cur === 0 && ((buffer[byte] |= (1 << bit)), ++this.numValid)
+ : cur === 1 && ((buffer[byte] &= ~(1 << bit)), --this.numValid);
+ return this;
+ }
+ public clear() {
+ this.numValid = 0;
+ return super.clear();
+ }
+/** @ignore */
+export class OffsetsBufferBuilder extends DataBufferBuilder<Int32Array> {
+ constructor(data = new Int32Array(1)) { super(data, 1); }
+ public append(value: number) {
+ return this.set(this.length - 1, value);
+ }
+ public set(index: number, value: number) {
+ const offset = this.length - 1;
+ const buffer = this.reserve(index - offset + 1).buffer;
+ if (offset < index++) {
+ buffer.fill(buffer[offset], offset, index);
+ }
+ buffer[index] = buffer[index - 1] + value;
+ return this;
+ }
+ public flush(length = this.length - 1) {
+ if (length > this.length) {
+ this.set(length - 1, 0);
+ }
+ return super.flush(length + 1);
+ }
+/** @ignore */
+export class WideBufferBuilder<T extends TypedArray, R extends BigIntArray> extends BufferBuilder<T, DataValue<T>> {
+ public buffer64!: R;
+ protected _ArrayType64!: BigIntArrayConstructor<R>;
+ public get ArrayType64() {
+ return this._ArrayType64 || (this._ArrayType64 = <BigIntArrayConstructor<R>> (this.buffer instanceof Int32Array ? BigInt64Array : BigUint64Array));
+ }
+ public set(index: number, value: DataValue<T>) {
+ this.reserve(index - this.length + 1);
+ switch (typeof value) {
+ case 'bigint': this.buffer64[index] = value; break;
+ case 'number': this.buffer[index * this.stride] = value; break;
+ default: this.buffer.set(value as TypedArray, index * this.stride);
+ }
+ return this;
+ }
+ protected _resize(newLength: number) {
+ const data = super._resize(newLength);
+ const length = data.byteLength / (this.BYTES_PER_ELEMENT * this.stride);
+ if (BigIntAvailable) {
+ this.buffer64 = new this.ArrayType64(data.buffer, data.byteOffset, length);
+ }
+ return data;
+ }
diff --git a/src/arrow/js/src/builder/date.ts b/src/arrow/js/src/builder/date.ts
new file mode 100644
index 000000000..e9748b58c
--- /dev/null
+++ b/src/arrow/js/src/builder/date.ts
@@ -0,0 +1,26 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { FixedWidthBuilder } from '../builder';
+import { Date_, DateDay, DateMillisecond } from '../type';
+/** @ignore */
+export class DateBuilder<T extends Date_ = Date_, TNull = any> extends FixedWidthBuilder<T, TNull> {}
+/** @ignore */
+export class DateDayBuilder<TNull = any> extends DateBuilder<DateDay, TNull> {}
+/** @ignore */
+export class DateMillisecondBuilder<TNull = any> extends DateBuilder<DateMillisecond, TNull> {}
diff --git a/src/arrow/js/src/builder/decimal.ts b/src/arrow/js/src/builder/decimal.ts
new file mode 100644
index 000000000..5814abd5b
--- /dev/null
+++ b/src/arrow/js/src/builder/decimal.ts
@@ -0,0 +1,22 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { Decimal } from '../type';
+import { FixedWidthBuilder } from '../builder';
+/** @ignore */
+export class DecimalBuilder<TNull = any> extends FixedWidthBuilder<Decimal, TNull> {}
diff --git a/src/arrow/js/src/builder/dictionary.ts b/src/arrow/js/src/builder/dictionary.ts
new file mode 100644
index 000000000..6602825dd
--- /dev/null
+++ b/src/arrow/js/src/builder/dictionary.ts
@@ -0,0 +1,98 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { Vector } from '../vector';
+import { IntBuilder } from './int';
+import { Dictionary, DataType } from '../type';
+import { Builder, BuilderOptions } from '../builder';
+type DictionaryHashFunction = (x: any) => string | number;
+export interface DictionaryBuilderOptions<T extends DataType = any, TNull = any> extends BuilderOptions<T, TNull> {
+ dictionaryHashFunction?: DictionaryHashFunction;
+/** @ignore */
+export class DictionaryBuilder<T extends Dictionary, TNull = any> extends Builder<T, TNull> {
+ protected _dictionaryOffset: number;
+ protected _dictionary?: Vector<T['dictionary']>;
+ protected _keysToIndices: { [key: string]: number };
+ public readonly indices: IntBuilder<T['indices']>;
+ public readonly dictionary: Builder<T['dictionary']>;
+ constructor({ 'type': type, 'nullValues': nulls, 'dictionaryHashFunction': hashFn }: DictionaryBuilderOptions<T, TNull>) {
+ super({ type: new Dictionary(type.dictionary, type.indices,, type.isOrdered) as T });
+ this._nulls = <any> null;
+ this._dictionaryOffset = 0;
+ this._keysToIndices = Object.create(null);
+ this.indices ={ 'type': this.type.indices, 'nullValues': nulls }) as IntBuilder<T['indices']>;
+ this.dictionary ={ 'type': this.type.dictionary, 'nullValues': null }) as Builder<T['dictionary']>;
+ if (typeof hashFn === 'function') {
+ this.valueToKey = hashFn;
+ }
+ }
+ public get values() { return this.indices.values; }
+ public get nullCount() { return this.indices.nullCount; }
+ public get nullBitmap() { return this.indices.nullBitmap; }
+ public get byteLength() { return this.indices.byteLength + this.dictionary.byteLength; }
+ public get reservedLength() { return this.indices.reservedLength + this.dictionary.reservedLength; }
+ public get reservedByteLength() { return this.indices.reservedByteLength + this.dictionary.reservedByteLength; }
+ public isValid(value: T['TValue'] | TNull) { return this.indices.isValid(value); }
+ public setValid(index: number, valid: boolean) {
+ const indices = this.indices;
+ valid = indices.setValid(index, valid);
+ this.length = indices.length;
+ return valid;
+ }
+ public setValue(index: number, value: T['TValue']) {
+ const keysToIndices = this._keysToIndices;
+ const key = this.valueToKey(value);
+ let idx = keysToIndices[key];
+ if (idx === undefined) {
+ keysToIndices[key] = idx = this._dictionaryOffset + this.dictionary.append(value).length - 1;
+ }
+ return this.indices.setValue(index, idx);
+ }
+ public flush() {
+ const type = this.type;
+ const prev = this._dictionary;
+ const curr = this.dictionary.toVector();
+ const data = this.indices.flush().clone(type);
+ data.dictionary = prev ? prev.concat(curr) : curr;
+ this.finished || (this._dictionaryOffset += curr.length);
+ this._dictionary = data.dictionary as Vector<T['dictionary']>;
+ this.clear();
+ return data;
+ }
+ public finish() {
+ this.indices.finish();
+ this.dictionary.finish();
+ this._dictionaryOffset = 0;
+ this._keysToIndices = Object.create(null);
+ return super.finish();
+ }
+ public clear() {
+ this.indices.clear();
+ this.dictionary.clear();
+ return super.clear();
+ }
+ public valueToKey(val: any): string | number {
+ return typeof val === 'string' ? val : `${val}`;
+ }
diff --git a/src/arrow/js/src/builder/fixedsizebinary.ts b/src/arrow/js/src/builder/fixedsizebinary.ts
new file mode 100644
index 000000000..99aaf46a3
--- /dev/null
+++ b/src/arrow/js/src/builder/fixedsizebinary.ts
@@ -0,0 +1,22 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { FixedSizeBinary } from '../type';
+import { FixedWidthBuilder } from '../builder';
+/** @ignore */
+export class FixedSizeBinaryBuilder<TNull = any> extends FixedWidthBuilder<FixedSizeBinary, TNull> {}
diff --git a/src/arrow/js/src/builder/fixedsizelist.ts b/src/arrow/js/src/builder/fixedsizelist.ts
new file mode 100644
index 000000000..cc20f5ba2
--- /dev/null
+++ b/src/arrow/js/src/builder/fixedsizelist.ts
@@ -0,0 +1,41 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { Run } from './run';
+import { Field } from '../schema';
+import { Builder } from '../builder';
+import { DataType, FixedSizeList } from '../type';
+/** @ignore */
+export class FixedSizeListBuilder<T extends DataType = any, TNull = any> extends Builder<FixedSizeList<T>, TNull> {
+ protected _run = new Run<T, TNull>();
+ public setValue(index: number, value: T['TValue']) {
+ super.setValue(index, this._run.bind(value));
+ }
+ public addChild(child: Builder<T>, name = '0') {
+ if (this.numChildren > 0) {
+ throw new Error('FixedSizeListBuilder can only have one child.');
+ }
+ const childIndex = this.children.push(child);
+ this.type = new FixedSizeList(this.type.listSize, new Field(name, child.type, true));
+ return childIndex;
+ }
+ public clear() {
+ this._run.clear();
+ return super.clear();
+ }
diff --git a/src/arrow/js/src/builder/float.ts b/src/arrow/js/src/builder/float.ts
new file mode 100644
index 000000000..dbf4c0d06
--- /dev/null
+++ b/src/arrow/js/src/builder/float.ts
@@ -0,0 +1,45 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { float64ToUint16 } from '../util/math';
+import { FixedWidthBuilder } from '../builder';
+import { Float, Float16, Float32, Float64 } from '../type';
+/** @ignore */
+export class FloatBuilder<T extends Float = Float, TNull = any> extends FixedWidthBuilder<T, TNull> {}
+/** @ignore */
+export class Float16Builder<TNull = any> extends FloatBuilder<Float16, TNull> {
+ public setValue(index: number, value: number) {
+ // convert JS float64 to a uint16
+ this._values.set(index, float64ToUint16(value));
+ }
+/** @ignore */
+export class Float32Builder<TNull = any> extends FloatBuilder<Float32, TNull> {
+ public setValue(index: number, value: number) {
+ this._values.set(index, value);
+ }
+/** @ignore */
+export class Float64Builder<TNull = any> extends FloatBuilder<Float64, TNull> {
+ public setValue(index: number, value: number) {
+ this._values.set(index, value);
+ }
diff --git a/src/arrow/js/src/builder/index.ts b/src/arrow/js/src/builder/index.ts
new file mode 100644
index 000000000..dfd9d54f1
--- /dev/null
+++ b/src/arrow/js/src/builder/index.ts
@@ -0,0 +1,82 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+/** @ignore */
+export { Builder, BuilderOptions } from '../builder';
+export { BoolBuilder } from './bool';
+export { NullBuilder } from './null';
+export { DateBuilder, DateDayBuilder, DateMillisecondBuilder } from './date';
+export { DecimalBuilder } from './decimal';
+export { DictionaryBuilder } from './dictionary';
+export { FixedSizeBinaryBuilder } from './fixedsizebinary';
+export { FloatBuilder, Float16Builder, Float32Builder, Float64Builder } from './float';
+export { IntBuilder, Int8Builder, Int16Builder, Int32Builder, Int64Builder, Uint8Builder, Uint16Builder, Uint32Builder, Uint64Builder } from './int';
+export { TimeBuilder, TimeSecondBuilder, TimeMillisecondBuilder, TimeMicrosecondBuilder, TimeNanosecondBuilder } from './time';
+export { TimestampBuilder, TimestampSecondBuilder, TimestampMillisecondBuilder, TimestampMicrosecondBuilder, TimestampNanosecondBuilder } from './timestamp';
+export { IntervalBuilder, IntervalDayTimeBuilder, IntervalYearMonthBuilder } from './interval';
+export { Utf8Builder } from './utf8';
+export { BinaryBuilder } from './binary';
+export { ListBuilder } from './list';
+export { FixedSizeListBuilder } from './fixedsizelist';
+export { MapBuilder } from './map';
+export { StructBuilder } from './struct';
+export { UnionBuilder, SparseUnionBuilder, DenseUnionBuilder } from './union';
+import { Type } from '../enum';
+import { Field } from '../schema';
+import { DataType } from '../type';
+import { Utf8Builder } from './utf8';
+import { BuilderType as B } from '../interfaces';
+import { Builder, BuilderOptions } from '../builder';
+import { instance as setVisitor } from '../visitor/set';
+import { instance as getBuilderConstructor } from '../visitor/builderctor';
+/** @nocollapse */ = newBuilder;
+function newBuilder<T extends DataType = any, TNull = any>(options: BuilderOptions<T, TNull>): B<T, TNull> {
+ const type = options.type;
+ const builder = new (getBuilderConstructor.getVisitFn<T>(type)())(options) as Builder<T, TNull>;
+ if (type.children && type.children.length > 0) {
+ const children = options['children'] || [] as BuilderOptions[];
+ const defaultOptions = { 'nullValues': options['nullValues'] };
+ const getChildOptions = Array.isArray(children)
+ ? ((_: Field, i: number) => children[i] || defaultOptions)
+ : (({ name }: Field) => children[name] || defaultOptions);
+ type.children.forEach((field, index) => {
+ const { type } = field;
+ const opts = getChildOptions(field, index);
+ builder.children.push(newBuilder({ ...opts, type }));
+ });
+ }
+ return builder as B<T, TNull>;
+(Object.keys(Type) as any[])
+ .map((T: any) => Type[T] as any)
+ .filter((T: any): T is Type => typeof T === 'number' && T !== Type.NONE)
+ .forEach((typeId) => {
+ const BuilderCtor = getBuilderConstructor.visit(typeId);
+ BuilderCtor.prototype._setValue = setVisitor.getVisitFn(typeId);
+ });
+(Utf8Builder.prototype as any)._setValue = setVisitor.visitBinary;
diff --git a/src/arrow/js/src/builder/int.ts b/src/arrow/js/src/builder/int.ts
new file mode 100644
index 000000000..5777bd125
--- /dev/null
+++ b/src/arrow/js/src/builder/int.ts
@@ -0,0 +1,80 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { bignumToBigInt } from '../util/bn';
+import { WideBufferBuilder } from './buffer';
+import { BigInt64Array } from '../util/compat';
+import { FixedWidthBuilder, BuilderOptions } from '../builder';
+import { Int, Int8, Int16, Int32, Int64, Uint8, Uint16, Uint32, Uint64 } from '../type';
+/** @ignore */
+export class IntBuilder<T extends Int = Int, TNull = any> extends FixedWidthBuilder<T, TNull> {
+ public setValue(index: number, value: T['TValue']) {
+ this._values.set(index, value);
+ }
+/** @ignore */
+export class Int8Builder<TNull = any> extends IntBuilder<Int8, TNull> {}
+/** @ignore */
+export class Int16Builder<TNull = any> extends IntBuilder<Int16, TNull> {}
+/** @ignore */
+export class Int32Builder<TNull = any> extends IntBuilder<Int32, TNull> {}
+/** @ignore */
+export class Int64Builder<TNull = any> extends IntBuilder<Int64, TNull> {
+ protected _values: WideBufferBuilder<Int32Array, BigInt64Array>;
+ constructor(options: BuilderOptions<Int64, TNull>) {
+ if (options['nullValues']) {
+ options['nullValues'] = (options['nullValues'] as TNull[]).map(toBigInt);
+ }
+ super(options);
+ this._values = new WideBufferBuilder(new Int32Array(0), 2);
+ }
+ public get values64() { return this._values.buffer64; }
+ public isValid(value: Int32Array | bigint | TNull) { return super.isValid(toBigInt(value)); }
+/** @ignore */
+export class Uint8Builder<TNull = any> extends IntBuilder<Uint8, TNull> {}
+/** @ignore */
+export class Uint16Builder<TNull = any> extends IntBuilder<Uint16, TNull> {}
+/** @ignore */
+export class Uint32Builder<TNull = any> extends IntBuilder<Uint32, TNull> {}
+/** @ignore */
+export class Uint64Builder<TNull = any> extends IntBuilder<Uint64, TNull> {
+ protected _values: WideBufferBuilder<Uint32Array, BigUint64Array>;
+ constructor(options: BuilderOptions<Uint64, TNull>) {
+ if (options['nullValues']) {
+ options['nullValues'] = (options['nullValues'] as TNull[]).map(toBigInt);
+ }
+ super(options);
+ this._values = new WideBufferBuilder(new Uint32Array(0), 2);
+ }
+ public get values64() { return this._values.buffer64; }
+ public isValid(value: Uint32Array | bigint | TNull) { return super.isValid(toBigInt(value)); }
+const toBigInt = ((memo: any) => (value: any) => {
+ if (ArrayBuffer.isView(value)) {
+ memo.buffer = value.buffer;
+ memo.byteOffset = value.byteOffset;
+ memo.byteLength = value.byteLength;
+ value = bignumToBigInt(memo);
+ memo.buffer = null;
+ }
+ return value;
+})({ 'BigIntArray': BigInt64Array });
diff --git a/src/arrow/js/src/builder/interval.ts b/src/arrow/js/src/builder/interval.ts
new file mode 100644
index 000000000..374228215
--- /dev/null
+++ b/src/arrow/js/src/builder/interval.ts
@@ -0,0 +1,26 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { FixedWidthBuilder } from '../builder';
+import { Interval, IntervalDayTime, IntervalYearMonth } from '../type';
+/** @ignore */
+export class IntervalBuilder<T extends Interval = Interval, TNull = any> extends FixedWidthBuilder<T, TNull> {}
+/** @ignore */
+export class IntervalDayTimeBuilder<TNull = any> extends IntervalBuilder<IntervalDayTime, TNull> {}
+/** @ignore */
+export class IntervalYearMonthBuilder<TNull = any> extends IntervalBuilder<IntervalYearMonth, TNull> {}
diff --git a/src/arrow/js/src/builder/list.ts b/src/arrow/js/src/builder/list.ts
new file mode 100644
index 000000000..844681eae
--- /dev/null
+++ b/src/arrow/js/src/builder/list.ts
@@ -0,0 +1,58 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { Run } from './run';
+import { Field } from '../schema';
+import { DataType, List } from '../type';
+import { OffsetsBufferBuilder } from './buffer';
+import { Builder, BuilderOptions, VariableWidthBuilder } from '../builder';
+/** @ignore */
+export class ListBuilder<T extends DataType = any, TNull = any> extends VariableWidthBuilder<List<T>, TNull> {
+ protected _run = new Run<T, TNull>();
+ protected _offsets: OffsetsBufferBuilder;
+ constructor(opts: BuilderOptions<List<T>, TNull>) {
+ super(opts);
+ this._offsets = new OffsetsBufferBuilder();
+ }
+ public addChild(child: Builder<T>, name = '0') {
+ if (this.numChildren > 0) {
+ throw new Error('ListBuilder can only have one child.');
+ }
+ this.children[this.numChildren] = child;
+ this.type = new List(new Field(name, child.type, true));
+ return this.numChildren - 1;
+ }
+ public clear() {
+ this._run.clear();
+ return super.clear();
+ }
+ protected _flushPending(pending: Map<number, T['TValue'] | undefined>) {
+ const run = this._run;
+ const offsets = this._offsets;
+ const setValue = this._setValue;
+ let index = 0, value: Uint8Array | undefined;
+ for ([index, value] of pending) {
+ if (value === undefined) {
+ offsets.set(index, 0);
+ } else {
+ offsets.set(index, value.length);
+ setValue(this, index, run.bind(value));
+ }
+ }
+ }
diff --git a/src/arrow/js/src/builder/map.ts b/src/arrow/js/src/builder/map.ts
new file mode 100644
index 000000000..25affef2c
--- /dev/null
+++ b/src/arrow/js/src/builder/map.ts
@@ -0,0 +1,64 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { Field } from '../schema';
+import { DataType, Map_, Struct } from '../type';
+import { Builder, VariableWidthBuilder } from '../builder';
+/** @ignore */ type MapValue<K extends DataType = any, V extends DataType = any> = Map_<K, V>['TValue'];
+/** @ignore */ type MapValues<K extends DataType = any, V extends DataType = any> = Map<number, MapValue<K, V> | undefined>;
+/** @ignore */ type MapValueExt<K extends DataType = any, V extends DataType = any> = MapValue<K, V> | { [key: string]: V } | { [key: number]: V } ;
+/** @ignore */
+export class MapBuilder<K extends DataType = any, V extends DataType = any, TNull = any> extends VariableWidthBuilder<Map_<K, V>, TNull> {
+ protected _pending: MapValues<K, V> | undefined;
+ public set(index: number, value: MapValueExt<K, V> | TNull) {
+ return super.set(index, value as MapValue<K, V> | TNull);
+ }
+ public setValue(index: number, value: MapValueExt<K, V>) {
+ value = value instanceof Map ? value : new Map(Object.entries(value));
+ const pending = this._pending || (this._pending = new Map() as MapValues<K, V>);
+ const current = pending.get(index);
+ current && (this._pendingLength -= current.size);
+ this._pendingLength += value.size;
+ pending.set(index, value);
+ }
+ public addChild(child: Builder<Struct<{ key: K; value: V }>>, name = `${this.numChildren}`) {
+ if (this.numChildren > 0) {
+ throw new Error('ListBuilder can only have one child.');
+ }
+ this.children[this.numChildren] = child;
+ this.type = new Map_<K, V>(new Field(name, child.type, true), this.type.keysSorted);
+ return this.numChildren - 1;
+ }
+ protected _flushPending(pending: MapValues<K, V>) {
+ const offsets = this._offsets;
+ const setValue = this._setValue;
+ pending.forEach((value, index) => {
+ if (value === undefined) {
+ offsets.set(index, 0);
+ } else {
+ offsets.set(index, value.size);
+ setValue(this, index, value);
+ }
+ });
+ }
diff --git a/src/arrow/js/src/builder/null.ts b/src/arrow/js/src/builder/null.ts
new file mode 100644
index 000000000..4be3f063b
--- /dev/null
+++ b/src/arrow/js/src/builder/null.ts
@@ -0,0 +1,29 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { Null } from '../type';
+import { Builder } from '../builder';
+/** @ignore */
+export class NullBuilder<TNull = any> extends Builder<Null, TNull> {
+ // @ts-ignore
+ public setValue(index: number, value: null) {}
+ public setValid(index: number, valid: boolean) {
+ this.length = Math.max(index + 1, this.length);
+ return valid;
+ }
diff --git a/src/arrow/js/src/builder/run.ts b/src/arrow/js/src/builder/run.ts
new file mode 100644
index 000000000..5239f51f2
--- /dev/null
+++ b/src/arrow/js/src/builder/run.ts
@@ -0,0 +1,34 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { Vector } from '../vector';
+import { DataType } from '../type';
+/** @ignore */
+export class Run<T extends DataType = any, TNull = any> {
+ protected _values!: ArrayLike<T['TValue'] | TNull>;
+ public get length() { return this._values.length; }
+ public get(index: number) { return this._values[index]; }
+ public clear() { this._values = <any> null; return this; }
+ public bind(values: Vector<T> | ArrayLike<T['TValue'] | TNull>) {
+ if (values instanceof Vector) {
+ return values;
+ }
+ this._values = values;
+ return this as any;
+ }
diff --git a/src/arrow/js/src/builder/struct.ts b/src/arrow/js/src/builder/struct.ts
new file mode 100644
index 000000000..4d12336ce
--- /dev/null
+++ b/src/arrow/js/src/builder/struct.ts
@@ -0,0 +1,29 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { Field } from '../schema';
+import { Builder } from '../builder';
+import { DataType, Struct } from '../type';
+/** @ignore */
+export class StructBuilder<T extends { [key: string]: DataType } = any, TNull = any> extends Builder<Struct<T>, TNull> {
+ public addChild(child: Builder, name = `${this.numChildren}`) {
+ const childIndex = this.children.push(child);
+ this.type = new Struct([...this.type.children, new Field(name, child.type, true)]);
+ return childIndex;
+ }
diff --git a/src/arrow/js/src/builder/time.ts b/src/arrow/js/src/builder/time.ts
new file mode 100644
index 000000000..bfa71d2b5
--- /dev/null
+++ b/src/arrow/js/src/builder/time.ts
@@ -0,0 +1,30 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { FixedWidthBuilder } from '../builder';
+import { Time, TimeSecond, TimeMillisecond, TimeMicrosecond, TimeNanosecond } from '../type';
+/** @ignore */
+export class TimeBuilder<T extends Time = Time, TNull = any> extends FixedWidthBuilder<T, TNull> {}
+/** @ignore */
+export class TimeSecondBuilder<TNull = any> extends TimeBuilder<TimeSecond, TNull> {}
+/** @ignore */
+export class TimeMillisecondBuilder<TNull = any> extends TimeBuilder<TimeMillisecond, TNull> {}
+/** @ignore */
+export class TimeMicrosecondBuilder<TNull = any> extends TimeBuilder<TimeMicrosecond, TNull> {}
+/** @ignore */
+export class TimeNanosecondBuilder<TNull = any> extends TimeBuilder<TimeNanosecond, TNull> {}
diff --git a/src/arrow/js/src/builder/timestamp.ts b/src/arrow/js/src/builder/timestamp.ts
new file mode 100644
index 000000000..49741e9ba
--- /dev/null
+++ b/src/arrow/js/src/builder/timestamp.ts
@@ -0,0 +1,30 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { FixedWidthBuilder } from '../builder';
+import { Timestamp, TimestampSecond, TimestampMillisecond, TimestampMicrosecond, TimestampNanosecond } from '../type';
+/** @ignore */
+export class TimestampBuilder<T extends Timestamp = Timestamp, TNull = any> extends FixedWidthBuilder<T, TNull> {}
+/** @ignore */
+export class TimestampSecondBuilder<TNull = any> extends TimestampBuilder<TimestampSecond, TNull> {}
+/** @ignore */
+export class TimestampMillisecondBuilder<TNull = any> extends TimestampBuilder<TimestampMillisecond, TNull> {}
+/** @ignore */
+export class TimestampMicrosecondBuilder<TNull = any> extends TimestampBuilder<TimestampMicrosecond, TNull> {}
+/** @ignore */
+export class TimestampNanosecondBuilder<TNull = any> extends TimestampBuilder<TimestampNanosecond, TNull> {}
diff --git a/src/arrow/js/src/builder/union.ts b/src/arrow/js/src/builder/union.ts
new file mode 100644
index 000000000..18ac05bf6
--- /dev/null
+++ b/src/arrow/js/src/builder/union.ts
@@ -0,0 +1,96 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { Field } from '../schema';
+import { DataBufferBuilder } from './buffer';
+import { Builder, BuilderOptions } from '../builder';
+import { Union, SparseUnion, DenseUnion } from '../type';
+export interface UnionBuilderOptions<T extends Union = any, TNull = any> extends BuilderOptions<T, TNull> {
+ valueToChildTypeId?: (builder: UnionBuilder<T, TNull>, value: any, offset: number) => number;
+/** @ignore */
+export abstract class UnionBuilder<T extends Union, TNull = any> extends Builder<T, TNull> {
+ protected _typeIds: DataBufferBuilder<Int8Array>;
+ constructor(options: UnionBuilderOptions<T, TNull>) {
+ super(options);
+ this._typeIds = new DataBufferBuilder(new Int8Array(0), 1);
+ if (typeof options['valueToChildTypeId'] === 'function') {
+ this._valueToChildTypeId = options['valueToChildTypeId'];
+ }
+ }
+ public get typeIdToChildIndex() { return this.type.typeIdToChildIndex; }
+ public append(value: T['TValue'] | TNull, childTypeId?: number) {
+ return this.set(this.length, value, childTypeId);
+ }
+ public set(index: number, value: T['TValue'] | TNull, childTypeId?: number) {
+ if (childTypeId === undefined) {
+ childTypeId = this._valueToChildTypeId(this, value, index);
+ }
+ if (this.setValid(index, this.isValid(value))) {
+ this.setValue(index, value, childTypeId);
+ }
+ return this;
+ }
+ public setValue(index: number, value: T['TValue'], childTypeId?: number) {
+ this._typeIds.set(index, childTypeId!);
+ super.setValue(index, value);
+ }
+ public addChild(child: Builder, name = `${this.children.length}`) {
+ const childTypeId = this.children.push(child);
+ const { type: { children, mode, typeIds } } = this;
+ const fields = [...children, new Field(name, child.type)];
+ this.type = <T> new Union(mode, [...typeIds, childTypeId], fields);
+ return childTypeId;
+ }
+ /** @ignore */
+ // @ts-ignore
+ protected _valueToChildTypeId(builder: UnionBuilder<T, TNull>, value: any, offset: number): number {
+ throw new Error(`Cannot map UnionBuilder value to child typeId. \
+Pass the \`childTypeId\` as the second argument to unionBuilder.append(), \
+or supply a \`valueToChildTypeId\` function as part of the UnionBuilder constructor options.`);
+ }
+/** @ignore */
+export class SparseUnionBuilder<T extends SparseUnion, TNull = any> extends UnionBuilder<T, TNull> {}
+/** @ignore */
+export class DenseUnionBuilder<T extends DenseUnion, TNull = any> extends UnionBuilder<T, TNull> {
+ protected _offsets: DataBufferBuilder<Int32Array>;
+ constructor(options: UnionBuilderOptions<T, TNull>) {
+ super(options);
+ this._offsets = new DataBufferBuilder(new Int32Array(0));
+ }
+ /** @ignore */
+ public setValue(index: number, value: T['TValue'], childTypeId?: number) {
+ const childIndex = this.type.typeIdToChildIndex[childTypeId!];
+ this._offsets.set(index, this.getChildAt(childIndex)!.length);
+ return super.setValue(index, value, childTypeId);
+ }
diff --git a/src/arrow/js/src/builder/utf8.ts b/src/arrow/js/src/builder/utf8.ts
new file mode 100644
index 000000000..7564cdad6
--- /dev/null
+++ b/src/arrow/js/src/builder/utf8.ts
@@ -0,0 +1,44 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { Utf8 } from '../type';
+import { encodeUtf8 } from '../util/utf8';
+import { BinaryBuilder } from './binary';
+import { BufferBuilder } from './buffer';
+import { VariableWidthBuilder, BuilderOptions } from '../builder';
+/** @ignore */
+export class Utf8Builder<TNull = any> extends VariableWidthBuilder<Utf8, TNull> {
+ constructor(opts: BuilderOptions<Utf8, TNull>) {
+ super(opts);
+ this._values = new BufferBuilder(new Uint8Array(0));
+ }
+ public get byteLength(): number {
+ let size = this._pendingLength + (this.length * 4);
+ this._offsets && (size += this._offsets.byteLength);
+ this._values && (size += this._values.byteLength);
+ this._nulls && (size += this._nulls.byteLength);
+ return size;
+ }
+ public setValue(index: number, value: string) {
+ return super.setValue(index, encodeUtf8(value) as any);
+ }
+ // @ts-ignore
+ protected _flushPending(pending: Map<number, Uint8Array | undefined>, pendingLength: number): void {}
+(Utf8Builder.prototype as any)._flushPending = (BinaryBuilder.prototype as any)._flushPending;
diff --git a/src/arrow/js/src/builder/valid.ts b/src/arrow/js/src/builder/valid.ts
new file mode 100644
index 000000000..ae5b799fb
--- /dev/null
+++ b/src/arrow/js/src/builder/valid.ts
@@ -0,0 +1,77 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { DataType } from '../type';
+import { valueToString } from '../util/pretty';
+import { BigIntAvailable } from '../util/compat';
+ * Dynamically compile the null values into an `isValid()` function whose
+ * implementation is a switch statement. Microbenchmarks in v8 indicate
+ * this approach is 25% faster than using an ES6 Map.
+ *
+ * @example
+ * console.log(createIsValidFunction([null, 'N/A', NaN]));
+ * `function (x) {
+ * if (x !== x) return false;
+ * switch (x) {
+ * case null:
+ * case "N/A":
+ * return false;
+ * }
+ * return true;
+ * }`
+ *
+ * @ignore
+ * @param nullValues
+ */
+export function createIsValidFunction<T extends DataType = any, TNull = any>(nullValues?: ReadonlyArray<TNull>) {
+ if (!nullValues || nullValues.length <= 0) {
+ // @ts-ignore
+ return function isValid(value: any) { return true; };
+ }
+ let fnBody = '';
+ const noNaNs = nullValues.filter((x) => x === x);
+ if (noNaNs.length > 0) {
+ fnBody = `
+ switch (x) {${ => `
+ case ${valueToCase(x)}:`).join('')}
+ return false;
+ }`;
+ }
+ // NaN doesn't equal anything including itself, so it doesn't work as a
+ // switch case. Instead we must explicitly check for NaN before the switch.
+ if (nullValues.length !== noNaNs.length) {
+ fnBody = `if (x !== x) return false;\n${fnBody}`;
+ }
+ return new Function(`x`, `${fnBody}\nreturn true;`) as (value: T['TValue'] | TNull) => boolean;
+/** @ignore */
+function valueToCase(x: any) {
+ if (typeof x !== 'bigint') {
+ return valueToString(x);
+ } else if (BigIntAvailable) {
+ return `${valueToString(x)}n`;
+ }
+ return `"${valueToString(x)}"`;
diff --git a/src/arrow/js/src/column.ts b/src/arrow/js/src/column.ts
new file mode 100644
index 000000000..48b40e5a1
--- /dev/null
+++ b/src/arrow/js/src/column.ts
@@ -0,0 +1,136 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { Data } from './data';
+import { Field } from './schema';
+import { DataType } from './type';
+import { Vector } from './vector';
+import { Clonable, Sliceable, Applicative } from './vector';
+import { VectorCtorArgs, VectorType as V } from './interfaces';
+import { Chunked, SearchContinuation } from './vector/chunked';
+export interface Column<T extends DataType = any> {
+ concat(...others: Vector<T>[]): Column<T>;
+ slice(begin?: number, end?: number): Column<T>;
+ clone(chunks?: Vector<T>[], offsets?: Uint32Array): Column<T>;
+export class Column<T extends DataType = any>
+ extends Chunked<T>
+ implements Clonable<Column<T>>,
+ Sliceable<Column<T>>,
+ Applicative<T, Column<T>> {
+ public static new<T extends DataType>(data: Data<T>, ...args: VectorCtorArgs<V<T>>): Column<T>;
+ public static new<T extends DataType>(field: string | Field<T>, ...chunks: (Vector<T> | Vector<T>[])[]): Column<T>;
+ public static new<T extends DataType>(field: string | Field<T>, data: Data<T>, ...args: VectorCtorArgs<V<T>>): Column<T>;
+ /** @nocollapse */
+ public static new<T extends DataType = any>(...args: any[]) {
+ let [field, data,] = args as [
+ string | Field<T>,
+ Data<T> | Vector<T> | (Data<T> | Vector<T>)[],
+ ...any[]
+ ];
+ if (typeof field !== 'string' && !(field instanceof Field)) {
+ data = <Data<T> | Vector<T> | (Data<T> | Vector<T>)[]> field;
+ field = '';
+ }
+ const chunks = Chunked.flatten<T>(
+ Array.isArray(data) ? [,] :
+ data instanceof Vector ? [data,] :
+ [,]
+ );
+ if (typeof field === 'string') {
+ const type = chunks[0].data.type;
+ field = new Field(field, type, true);
+ } else if (!field.nullable && chunks.some(({ nullCount }) => nullCount > 0)) {
+ field = field.clone({ nullable: true });
+ }
+ return new Column(field, chunks);
+ }
+ constructor(field: Field<T>, vectors: Vector<T>[] = [], offsets?: Uint32Array) {
+ vectors = Chunked.flatten<T>(...vectors);
+ super(field.type, vectors, offsets);
+ this._field = field;
+ if (vectors.length === 1 && !(this instanceof SingleChunkColumn)) {
+ return new SingleChunkColumn(field, vectors[0], this._chunkOffsets);
+ }
+ }
+ protected _field: Field<T>;
+ protected _children?: Column[];
+ public get field() { return this._field; }
+ public get name() { return; }
+ public get nullable() { return this._field.nullable; }
+ public get metadata() { return this._field.metadata; }
+ public clone(chunks = this._chunks) {
+ return new Column(this._field, chunks);
+ }
+ public getChildAt<R extends DataType = any>(index: number): Column<R> | null {
+ if (index < 0 || index >= this.numChildren) { return null; }
+ const columns = this._children || (this._children = []);
+ let column: Column<R>, field: Field<R>, chunks: Vector<R>[];
+ if (column = columns[index]) { return column; }
+ if (field = ((this.type.children || [])[index] as Field<R>)) {
+ chunks = this._chunks
+ .map((vector) => vector.getChildAt<R>(index))
+ .filter((vec): vec is Vector<R> => vec != null);
+ if (chunks.length > 0) {
+ return (columns[index] = new Column<R>(field, chunks));
+ }
+ }
+ return null;
+ }
+/** @ignore */
+class SingleChunkColumn<T extends DataType = any> extends Column<T> {
+ protected _chunk: Vector<T>;
+ constructor(field: Field<T>, vector: Vector<T>, offsets?: Uint32Array) {
+ super(field, [vector], offsets);
+ this._chunk = vector;
+ }
+ public search(index: number): [number, number] | null;
+ public search<N extends SearchContinuation<Chunked<T>>>(index: number, then?: N): ReturnType<N>;
+ public search<N extends SearchContinuation<Chunked<T>>>(index: number, then?: N) {
+ return then ? then(this, 0, index) : [0, index];
+ }
+ public isValid(index: number): boolean {
+ return this._chunk.isValid(index);
+ }
+ public get(index: number): T['TValue'] | null {
+ return this._chunk.get(index);
+ }
+ public set(index: number, value: T['TValue'] | null): void {
+ this._chunk.set(index, value);
+ }
+ public indexOf(element: T['TValue'], offset?: number): number {
+ return this._chunk.indexOf(element, offset);
+ }
diff --git a/src/arrow/js/src/compute/dataframe.ts b/src/arrow/js/src/compute/dataframe.ts
new file mode 100644
index 000000000..e9df37194
--- /dev/null
+++ b/src/arrow/js/src/compute/dataframe.ts
@@ -0,0 +1,288 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { Table } from '../table';
+import { Vector } from '../vector';
+import { IntVector } from '../vector/int';
+import { Field, Schema } from '../schema';
+import { Predicate, Col, PredicateFunc } from './predicate';
+import { RecordBatch } from '../recordbatch';
+import { VectorType as V } from '../interfaces';
+import { DataType, Int, Struct, Dictionary } from '../type';
+/** @ignore */
+export type BindFunc = (batch: RecordBatch) => void;
+/** @ignore */
+export type NextFunc = (idx: number, batch: RecordBatch) => void;
+ * `DataFrame` extends {@link Table} with support for predicate filtering.
+ *
+ * You can construct `DataFrames` like tables or convert a `Table` to a `DataFrame`
+ * with the constructor.
+ *
+ * ```ts
+ * const df = new DataFrame(table);
+ * ```
+ */
+export class DataFrame<T extends { [key: string]: DataType } = any> extends Table<T> {
+ public filter(predicate: Predicate): FilteredDataFrame<T> {
+ return new FilteredDataFrame<T>(this.chunks, predicate);
+ }
+ public scan(next: NextFunc, bind?: BindFunc) {
+ const batches = this.chunks, numBatches = batches.length;
+ for (let batchIndex = -1; ++batchIndex < numBatches;) {
+ // load batches
+ const batch = batches[batchIndex];
+ if (bind) { bind(batch); }
+ // yield all indices
+ for (let index = -1, numRows = batch.length; ++index < numRows;) {
+ next(index, batch);
+ }
+ }
+ }
+ public scanReverse(next: NextFunc, bind?: BindFunc) {
+ const batches = this.chunks, numBatches = batches.length;
+ for (let batchIndex = numBatches; --batchIndex >= 0;) {
+ // load batches
+ const batch = batches[batchIndex];
+ if (bind) { bind(batch); }
+ // yield all indices
+ for (let index = batch.length; --index >= 0;) {
+ next(index, batch);
+ }
+ }
+ }
+ public countBy(name: Col | string) {
+ const batches = this.chunks, numBatches = batches.length;
+ const count_by = typeof name === 'string' ? new Col(name) : name as Col;
+ // Assume that all dictionary batches are deltas, which means that the
+ // last record batch has the most complete dictionary
+ count_by.bind(batches[numBatches - 1]);
+ const vector = count_by.vector as V<Dictionary>;
+ if (!DataType.isDictionary(vector.type)) {
+ throw new Error('countBy currently only supports dictionary-encoded columns');
+ }
+ const countByteLength = Math.ceil(Math.log(vector.length) / Math.log(256));
+ const CountsArrayType = countByteLength == 4 ? Uint32Array :
+ countByteLength >= 2 ? Uint16Array : Uint8Array;
+ const counts = new CountsArrayType(vector.dictionary.length);
+ for (let batchIndex = -1; ++batchIndex < numBatches;) {
+ // load batches
+ const batch = batches[batchIndex];
+ // rebind the countBy Col
+ count_by.bind(batch);
+ const keys = (count_by.vector as V<Dictionary>).indices;
+ // yield all indices
+ for (let index = -1, numRows = batch.length; ++index < numRows;) {
+ const key = keys.get(index);
+ if (key !== null) { counts[key]++; }
+ }
+ }
+ return new CountByResult(vector.dictionary, IntVector.from(counts));
+ }
+/** @ignore */
+export class CountByResult<T extends DataType = any, TCount extends Int = Int> extends Table<{ values: T; counts: TCount }> {
+ constructor(values: Vector<T>, counts: V<TCount>) {
+ type R = { values: T; counts: TCount };
+ const schema = new Schema<R>([
+ new Field('values', values.type),
+ new Field('counts', counts.type)
+ ]);
+ super(new RecordBatch<R>(schema, counts.length, [values, counts]));
+ }
+ public toJSON(): Record<string, unknown> {
+ const values = this.getColumnAt(0)!;
+ const counts = this.getColumnAt(1)!;
+ const result = {} as { [k: string]: number | null };
+ for (let i = -1; ++i < this.length;) {
+ result[values.get(i)] = counts.get(i);
+ }
+ return result;
+ }
+/** @ignore */
+class FilteredBatchIterator<T extends { [key: string]: DataType }> implements IterableIterator<Struct<T>['TValue']> {
+ private batchIndex = 0;
+ private batch: RecordBatch<T>;
+ private index = 0;
+ private predicateFunc: PredicateFunc;
+ constructor(
+ private batches: RecordBatch<T>[],
+ private predicate: Predicate
+ ) {
+ // TODO: bind batches lazily
+ // If predicate doesn't match anything in the batch we don't need
+ // to bind the callback
+ this.batch = this.batches[this.batchIndex];
+ this.predicateFunc = this.predicate.bind(this.batch);
+ }
+ next(): IteratorResult<Struct<T>['TValue']> {
+ while (this.batchIndex < this.batches.length) {
+ while (this.index < this.batch.length) {
+ if (this.predicateFunc(this.index, this.batch)) {
+ return {
+ value: this.batch.get(this.index++) as any,
+ };
+ }
+ this.index++;
+ }
+ if (++this.batchIndex < this.batches.length) {
+ this.index = 0;
+ this.batch = this.batches[this.batchIndex];
+ this.predicateFunc = this.predicate.bind(this.batch);
+ }
+ }
+ return {done: true, value: null};
+ }
+ [Symbol.iterator]() {
+ return this;
+ }
+/** @ignore */
+export class FilteredDataFrame<T extends { [key: string]: DataType } = any> extends DataFrame<T> {
+ private _predicate: Predicate;
+ constructor (batches: RecordBatch<T>[], predicate: Predicate) {
+ super(batches);
+ this._predicate = predicate;
+ }
+ public scan(next: NextFunc, bind?: BindFunc) {
+ // inlined version of this:
+ // this.parent.scan((idx, columns) => {
+ // if (this.predicate(idx, columns)) next(idx, columns);
+ // });
+ const batches = this._chunks;
+ const numBatches = batches.length;
+ for (let batchIndex = -1; ++batchIndex < numBatches;) {
+ // load batches
+ const batch = batches[batchIndex];
+ const predicate = this._predicate.bind(batch);
+ let isBound = false;
+ // yield all indices
+ for (let index = -1, numRows = batch.length; ++index < numRows;) {
+ if (predicate(index, batch)) {
+ // bind batches lazily - if predicate doesn't match anything
+ // in the batch we don't need to call bind on the batch
+ if (bind && !isBound) {
+ bind(batch);
+ isBound = true;
+ }
+ next(index, batch);
+ }
+ }
+ }
+ }
+ public scanReverse(next: NextFunc, bind?: BindFunc) {
+ const batches = this._chunks;
+ const numBatches = batches.length;
+ for (let batchIndex = numBatches; --batchIndex >= 0;) {
+ // load batches
+ const batch = batches[batchIndex];
+ const predicate = this._predicate.bind(batch);
+ let isBound = false;
+ // yield all indices
+ for (let index = batch.length; --index >= 0;) {
+ if (predicate(index, batch)) {
+ // bind batches lazily - if predicate doesn't match anything
+ // in the batch we don't need to call bind on the batch
+ if (bind && !isBound) {
+ bind(batch);
+ isBound = true;
+ }
+ next(index, batch);
+ }
+ }
+ }
+ }
+ public count(): number {
+ // inlined version of this:
+ // let sum = 0;
+ // this.parent.scan((idx, columns) => {
+ // if (this.predicate(idx, columns)) ++sum;
+ // });
+ // return sum;
+ let sum = 0;
+ const batches = this._chunks;
+ const numBatches = batches.length;
+ for (let batchIndex = -1; ++batchIndex < numBatches;) {
+ // load batches
+ const batch = batches[batchIndex];
+ const predicate = this._predicate.bind(batch);
+ for (let index = -1, numRows = batch.length; ++index < numRows;) {
+ if (predicate(index, batch)) { ++sum; }
+ }
+ }
+ return sum;
+ }
+ public [Symbol.iterator](): IterableIterator<Struct<T>['TValue']> {
+ // inlined version of this:
+ // this.parent.scan((idx, columns) => {
+ // if (this.predicate(idx, columns)) next(idx, columns);
+ // });
+ return new FilteredBatchIterator<T>(this._chunks, this._predicate);
+ }
+ public filter(predicate: Predicate): FilteredDataFrame<T> {
+ return new FilteredDataFrame<T>(
+ this._chunks,
+ this._predicate.and(predicate)
+ );
+ }
+ public countBy(name: Col | string) {
+ const batches = this._chunks, numBatches = batches.length;
+ const count_by = typeof name === 'string' ? new Col(name) : name as Col;
+ // Assume that all dictionary batches are deltas, which means that the
+ // last record batch has the most complete dictionary
+ count_by.bind(batches[numBatches - 1]);
+ const vector = count_by.vector as V<Dictionary>;
+ if (!DataType.isDictionary(vector.type)) {
+ throw new Error('countBy currently only supports dictionary-encoded columns');
+ }
+ const countByteLength = Math.ceil(Math.log(vector.length) / Math.log(256));
+ const CountsArrayType = countByteLength == 4 ? Uint32Array :
+ countByteLength >= 2 ? Uint16Array : Uint8Array;
+ const counts = new CountsArrayType(vector.dictionary.length);
+ for (let batchIndex = -1; ++batchIndex < numBatches;) {
+ // load batches
+ const batch = batches[batchIndex];
+ const predicate = this._predicate.bind(batch);
+ // rebind the countBy Col
+ count_by.bind(batch);
+ const keys = (count_by.vector as V<Dictionary>).indices;
+ // yield all indices
+ for (let index = -1, numRows = batch.length; ++index < numRows;) {
+ const key = keys.get(index);
+ if (key !== null && predicate(index, batch)) { counts[key]++; }
+ }
+ }
+ return new CountByResult(vector.dictionary, IntVector.from(counts));
+ }
diff --git a/src/arrow/js/src/compute/predicate.ts b/src/arrow/js/src/compute/predicate.ts
new file mode 100644
index 000000000..52030763d
--- /dev/null
+++ b/src/arrow/js/src/compute/predicate.ts
@@ -0,0 +1,292 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { Vector } from '../vector';
+import { RecordBatch } from '../recordbatch';
+import { DictionaryVector } from '../vector/dictionary';
+/** @ignore */
+export type ValueFunc<T> = (idx: number, cols: RecordBatch) => T | null;
+/** @ignore */
+export type PredicateFunc = (idx: number, cols: RecordBatch) => boolean;
+/** @ignore */
+export abstract class Value<T> {
+ eq(other: Value<T> | T): Predicate {
+ if (!(other instanceof Value)) { other = new Literal(other); }
+ return new Equals(this, other);
+ }
+ le(other: Value<T> | T): Predicate {
+ if (!(other instanceof Value)) { other = new Literal(other); }
+ return new LTeq(this, other);
+ }
+ ge(other: Value<T> | T): Predicate {
+ if (!(other instanceof Value)) { other = new Literal(other); }
+ return new GTeq(this, other);
+ }
+ lt(other: Value<T> | T): Predicate {
+ return new Not(;
+ }
+ gt(other: Value<T> | T): Predicate {
+ return new Not(this.le(other));
+ }
+ ne(other: Value<T> | T): Predicate {
+ return new Not(this.eq(other));
+ }
+/** @ignore */
+export class Literal<T= any> extends Value<T> {
+ constructor(public v: T) { super(); }
+/** @ignore */
+export class Col<T= any> extends Value<T> {
+ public vector!: Vector;
+ public colidx!: number;
+ constructor(public name: string) { super(); }
+ bind(batch: RecordBatch): (idx: number, batch?: RecordBatch) => any {
+ if (!this.colidx) {
+ // Assume column index doesn't change between calls to bind
+ //this.colidx = cols.findIndex(v => != -1);
+ this.colidx = -1;
+ const fields = batch.schema.fields;
+ for (let idx = -1; ++idx < fields.length;) {
+ if (fields[idx].name === {
+ this.colidx = idx;
+ break;
+ }
+ }
+ if (this.colidx < 0) { throw new Error(`Failed to bind Col "${}"`); }
+ }
+ const vec = this.vector = batch.getChildAt(this.colidx)!;
+ return (idx: number) => vec.get(idx);
+ }
+/** @ignore */
+export abstract class Predicate {
+ abstract bind(batch: RecordBatch): PredicateFunc;
+ and(...expr: Predicate[]): And { return new And(this, ...expr); }
+ or(...expr: Predicate[]): Or { return new Or(this, ...expr); }
+ not(): Predicate { return new Not(this); }
+/** @ignore */
+export abstract class ComparisonPredicate<T= any> extends Predicate {
+ constructor(public readonly left: Value<T>, public readonly right: Value<T>) {
+ super();
+ }
+ bind(batch: RecordBatch) {
+ if (this.left instanceof Literal) {
+ if (this.right instanceof Literal) {
+ return this._bindLitLit(batch, this.left, this.right);
+ } else { // right is a Col
+ return this._bindLitCol(batch, this.left, this.right as Col);
+ }
+ } else { // left is a Col
+ if (this.right instanceof Literal) {
+ return this._bindColLit(batch, this.left as Col, this.right);
+ } else { // right is a Col
+ return this._bindColCol(batch, this.left as Col, this.right as Col);
+ }
+ }
+ }
+ protected abstract _bindLitLit(batch: RecordBatch, left: Literal, right: Literal): PredicateFunc;
+ protected abstract _bindColCol(batch: RecordBatch, left: Col, right: Col): PredicateFunc;
+ protected abstract _bindColLit(batch: RecordBatch, col: Col, lit: Literal): PredicateFunc;
+ protected abstract _bindLitCol(batch: RecordBatch, lit: Literal, col: Col): PredicateFunc;
+/** @ignore */
+export abstract class CombinationPredicate extends Predicate {
+ readonly children: Predicate[];
+ constructor(...children: Predicate[]) {
+ super();
+ this.children = children;
+ }
+// add children to prototype so it doesn't get mangled in es2015/umd
+(<any> CombinationPredicate.prototype).children = Object.freeze([]); // freeze for safety
+/** @ignore */
+export class And extends CombinationPredicate {
+ constructor(...children: Predicate[]) {
+ // Flatten any Ands
+ children = children.reduce((accum: Predicate[], p: Predicate): Predicate[] => {
+ return accum.concat(p instanceof And ? p.children : p);
+ }, []);
+ super(...children);
+ }
+ bind(batch: RecordBatch) {
+ const bound = => p.bind(batch));
+ return (idx: number, batch: RecordBatch) => bound.every((p) => p(idx, batch));
+ }
+/** @ignore */
+export class Or extends CombinationPredicate {
+ constructor(...children: Predicate[]) {
+ // Flatten any Ors
+ children = children.reduce((accum: Predicate[], p: Predicate): Predicate[] => {
+ return accum.concat(p instanceof Or ? p.children : p);
+ }, []);
+ super(...children);
+ }
+ bind(batch: RecordBatch) {
+ const bound = => p.bind(batch));
+ return (idx: number, batch: RecordBatch) => bound.some((p) => p(idx, batch));
+ }
+/** @ignore */
+export class Equals extends ComparisonPredicate {
+ // Helpers used to cache dictionary reverse lookups between calls to bind
+ private lastDictionary: Vector|undefined;
+ private lastKey: number|undefined;
+ protected _bindLitLit(_batch: RecordBatch, left: Literal, right: Literal): PredicateFunc {
+ const rtrn: boolean = left.v == right.v;
+ return () => rtrn;
+ }
+ protected _bindColCol(batch: RecordBatch, left: Col, right: Col): PredicateFunc {
+ const left_func = left.bind(batch);
+ const right_func = right.bind(batch);
+ return (idx: number, batch: RecordBatch) => left_func(idx, batch) == right_func(idx, batch);
+ }
+ protected _bindColLit(batch: RecordBatch, col: Col, lit: Literal): PredicateFunc {
+ const col_func = col.bind(batch);
+ if (col.vector instanceof DictionaryVector) {
+ let key: any;
+ const vector = col.vector as DictionaryVector;
+ if (vector.dictionary !== this.lastDictionary) {
+ key = vector.reverseLookup(lit.v);
+ this.lastDictionary = vector.dictionary;
+ this.lastKey = key;
+ } else {
+ key = this.lastKey;
+ }
+ if (key === -1) {
+ // the value doesn't exist in the dictionary - always return
+ // false
+ // TODO: special-case of PredicateFunc that encapsulates this
+ // "always false" behavior. That way filtering operations don't
+ // have to bother checking
+ return () => false;
+ } else {
+ return (idx: number) => {
+ return vector.getKey(idx) === key;
+ };
+ }
+ } else {
+ return (idx: number, cols: RecordBatch) => col_func(idx, cols) == lit.v;
+ }
+ }
+ protected _bindLitCol(batch: RecordBatch, lit: Literal, col: Col) {
+ // Equals is commutative
+ return this._bindColLit(batch, col, lit);
+ }
+/** @ignore */
+export class LTeq extends ComparisonPredicate {
+ protected _bindLitLit(_batch: RecordBatch, left: Literal, right: Literal): PredicateFunc {
+ const rtrn: boolean = left.v <= right.v;
+ return () => rtrn;
+ }
+ protected _bindColCol(batch: RecordBatch, left: Col, right: Col): PredicateFunc {
+ const left_func = left.bind(batch);
+ const right_func = right.bind(batch);
+ return (idx: number, cols: RecordBatch) => left_func(idx, cols) <= right_func(idx, cols);
+ }
+ protected _bindColLit(batch: RecordBatch, col: Col, lit: Literal): PredicateFunc {
+ const col_func = col.bind(batch);
+ return (idx: number, cols: RecordBatch) => col_func(idx, cols) <= lit.v;
+ }
+ protected _bindLitCol(batch: RecordBatch, lit: Literal, col: Col) {
+ const col_func = col.bind(batch);
+ return (idx: number, cols: RecordBatch) => lit.v <= col_func(idx, cols);
+ }
+/** @ignore */
+export class GTeq extends ComparisonPredicate {
+ protected _bindLitLit(_batch: RecordBatch, left: Literal, right: Literal): PredicateFunc {
+ const rtrn: boolean = left.v >= right.v;
+ return () => rtrn;
+ }
+ protected _bindColCol(batch: RecordBatch, left: Col, right: Col): PredicateFunc {
+ const left_func = left.bind(batch);
+ const right_func = right.bind(batch);
+ return (idx: number, cols: RecordBatch) => left_func(idx, cols) >= right_func(idx, cols);
+ }
+ protected _bindColLit(batch: RecordBatch, col: Col, lit: Literal): PredicateFunc {
+ const col_func = col.bind(batch);
+ return (idx: number, cols: RecordBatch) => col_func(idx, cols) >= lit.v;
+ }
+ protected _bindLitCol(batch: RecordBatch, lit: Literal, col: Col) {
+ const col_func = col.bind(batch);
+ return (idx: number, cols: RecordBatch) => lit.v >= col_func(idx, cols);
+ }
+/** @ignore */
+export class Not extends Predicate {
+ constructor(public readonly child: Predicate) {
+ super();
+ }
+ bind(batch: RecordBatch) {
+ const func = this.child.bind(batch);
+ return (idx: number, batch: RecordBatch) => !func(idx, batch);
+ }
+/** @ignore */
+export class CustomPredicate extends Predicate {
+ constructor(private next: PredicateFunc, private bind_: (batch: RecordBatch) => void) {
+ super();
+ }
+ bind(batch: RecordBatch) {
+ this.bind_(batch);
+ return;
+ }
+export function lit(v: any): Value<any> { return new Literal(v); }
+export function col(n: string): Col<any> { return new Col(n); }
+export function and(...p: Predicate[]): And { return new And(...p); }
+export function or(...p: Predicate[]): Or { return new Or(...p); }
+export function custom(next: PredicateFunc, bind: (batch: RecordBatch) => void) {
+ return new CustomPredicate(next, bind);
diff --git a/src/arrow/js/src/data.ts b/src/arrow/js/src/data.ts
new file mode 100644
index 000000000..2a549088c
--- /dev/null
+++ b/src/arrow/js/src/data.ts
@@ -0,0 +1,295 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { Vector } from './vector';
+import { truncateBitmap } from './util/bit';
+import { popcnt_bit_range } from './util/bit';
+import { BufferType, UnionMode, Type } from './enum';
+import { DataType, SparseUnion, DenseUnion, strideForType } from './type';
+import { toArrayBufferView, toUint8Array, toInt32Array } from './util/buffer';
+import {
+ Dictionary,
+ Null, Int, Float,
+ Binary, Bool, Utf8, Decimal,
+ Date_, Time, Timestamp, Interval,
+ List, Struct, Union, FixedSizeBinary, FixedSizeList, Map_,
+} from './type';
+// When slicing, we do not know the null count of the sliced range without
+// doing some computation. To avoid doing this eagerly, we set the null count
+// to -1 (any negative number will do). When Vector.nullCount is called the
+// first time, the null count will be computed. See ARROW-33
+/** @ignore */ export type kUnknownNullCount = -1;
+/** @ignore */ export const kUnknownNullCount = -1;
+/** @ignore */ export type NullBuffer = Uint8Array | null | undefined;
+/** @ignore */ export type TypeIdsBuffer = Int8Array | ArrayLike<number> | Iterable<number> | undefined;
+/** @ignore */ export type ValueOffsetsBuffer = Int32Array | ArrayLike<number> | Iterable<number> | undefined;
+/** @ignore */ export type DataBuffer<T extends DataType> = T['TArray'] | ArrayLike<number> | Iterable<number> | undefined;
+/** @ignore */
+export interface Buffers<T extends DataType> {
+ [BufferType.OFFSET]: Int32Array;
+ [BufferType.DATA]: T['TArray'];
+ [BufferType.VALIDITY]: Uint8Array;
+ [BufferType.TYPE]: T['TArray'];
+/** @ignore */
+export interface Data<T extends DataType = DataType> {
+ readonly TType: T['TType'];
+ readonly TArray: T['TArray'];
+ readonly TValue: T['TValue'];
+/** @ignore */
+export class Data<T extends DataType = DataType> {
+ public readonly type: T;
+ public readonly length: number;
+ public readonly offset: number;
+ public readonly stride: number;
+ public readonly childData: Data[];
+ /**
+ * The dictionary for this Vector, if any. Only used for Dictionary type.
+ */
+ public dictionary?: Vector;
+ public readonly values!: Buffers<T>[BufferType.DATA];
+ public readonly typeIds!: Buffers<T>[BufferType.TYPE];
+ public readonly nullBitmap!: Buffers<T>[BufferType.VALIDITY];
+ public readonly valueOffsets!: Buffers<T>[BufferType.OFFSET];
+ public get typeId(): T['TType'] { return this.type.typeId; }
+ public get ArrayType(): T['ArrayType'] { return this.type.ArrayType; }
+ public get buffers() {
+ return [this.valueOffsets, this.values, this.nullBitmap, this.typeIds] as Buffers<T>;
+ }
+ public get byteLength(): number {
+ let byteLength = 0;
+ const { valueOffsets, values, nullBitmap, typeIds } = this;
+ valueOffsets && (byteLength += valueOffsets.byteLength);
+ values && (byteLength += values.byteLength);
+ nullBitmap && (byteLength += nullBitmap.byteLength);
+ typeIds && (byteLength += typeIds.byteLength);
+ return this.childData.reduce((byteLength, child) => byteLength + child.byteLength, byteLength);
+ }
+ protected _nullCount: number | kUnknownNullCount;
+ public get nullCount() {
+ let nullCount = this._nullCount;
+ let nullBitmap: Uint8Array | undefined;
+ if (nullCount <= kUnknownNullCount && (nullBitmap = this.nullBitmap)) {
+ this._nullCount = nullCount = this.length - popcnt_bit_range(nullBitmap, this.offset, this.offset + this.length);
+ }
+ return nullCount;
+ }
+ constructor(type: T, offset: number, length: number, nullCount?: number, buffers?: Partial<Buffers<T>> | Data<T>, childData?: (Data | Vector)[], dictionary?: Vector) {
+ this.type = type;
+ this.dictionary = dictionary;
+ this.offset = Math.floor(Math.max(offset || 0, 0));
+ this.length = Math.floor(Math.max(length || 0, 0));
+ this._nullCount = Math.floor(Math.max(nullCount || 0, -1));
+ this.childData = (childData || []).map((x) => x instanceof Data ? x : as Data[];
+ let buffer: Buffers<T>[keyof Buffers<T>];
+ if (buffers instanceof Data) {
+ this.stride = buffers.stride;
+ this.values = buffers.values;
+ this.typeIds = buffers.typeIds;
+ this.nullBitmap = buffers.nullBitmap;
+ this.valueOffsets = buffers.valueOffsets;
+ } else {
+ this.stride = strideForType(type);
+ if (buffers) {
+ (buffer = (buffers as Buffers<T>)[0]) && (this.valueOffsets = buffer);
+ (buffer = (buffers as Buffers<T>)[1]) && (this.values = buffer);
+ (buffer = (buffers as Buffers<T>)[2]) && (this.nullBitmap = buffer);
+ (buffer = (buffers as Buffers<T>)[3]) && (this.typeIds = buffer);
+ }
+ }
+ }
+ public clone<R extends DataType>(type: R, offset = this.offset, length = this.length, nullCount = this._nullCount, buffers: Buffers<R> = <any> this, childData: (Data | Vector)[] = this.childData) {
+ return new Data(type, offset, length, nullCount, buffers, childData, this.dictionary);
+ }
+ public slice(offset: number, length: number): Data<T> {
+ const { stride, typeId, childData } = this;
+ // +true === 1, +false === 0, so this means
+ // we keep nullCount at 0 if it's already 0,
+ // otherwise set to the invalidated flag -1
+ const nullCount = +(this._nullCount === 0) - 1;
+ const childStride = typeId === 16 /* FixedSizeList */ ? stride : 1;
+ const buffers = this._sliceBuffers(offset, length, stride, typeId);
+ return this.clone<T>(this.type, this.offset + offset, length, nullCount, buffers,
+ // Don't slice children if we have value offsets (the variable-width types)
+ (!childData.length || this.valueOffsets) ? childData : this._sliceChildren(childData, childStride * offset, childStride * length));
+ }
+ public _changeLengthAndBackfillNullBitmap(newLength: number): Data<T> {
+ if (this.typeId === Type.Null) {
+ return this.clone(this.type, 0, newLength, 0);
+ }
+ const { length, nullCount } = this;
+ // start initialized with 0s (nulls), then fill from 0 to length with 1s (not null)
+ const bitmap = new Uint8Array(((newLength + 63) & ~63) >> 3).fill(255, 0, length >> 3);
+ // set all the bits in the last byte (up to bit `length - length % 8`) to 1 (not null)
+ bitmap[length >> 3] = (1 << (length - (length & ~7))) - 1;
+ // if we have a nullBitmap, truncate + slice and set it over the pre-filled 1s
+ if (nullCount > 0) {
+ bitmap.set(truncateBitmap(this.offset, length, this.nullBitmap), 0);
+ }
+ const buffers = this.buffers;
+ buffers[BufferType.VALIDITY] = bitmap;
+ return this.clone(this.type, 0, newLength, nullCount + (newLength - length), buffers);
+ }
+ protected _sliceBuffers(offset: number, length: number, stride: number, typeId: T['TType']): Buffers<T> {
+ let arr: any;
+ const { buffers } = this;
+ // If typeIds exist, slice the typeIds buffer
+ (arr = buffers[BufferType.TYPE]) && (buffers[BufferType.TYPE] = arr.subarray(offset, offset + length));
+ // If offsets exist, only slice the offsets buffer
+ (arr = buffers[BufferType.OFFSET]) && (buffers[BufferType.OFFSET] = arr.subarray(offset, offset + length + 1)) ||
+ // Otherwise if no offsets, slice the data buffer. Don't slice the data vector for Booleans, since the offset goes by bits not bytes
+ (arr = buffers[BufferType.DATA]) && (buffers[BufferType.DATA] = typeId === 6 ? arr : arr.subarray(stride * offset, stride * (offset + length)));
+ return buffers;
+ }
+ protected _sliceChildren(childData: Data[], offset: number, length: number): Data[] {
+ return => child.slice(offset, length));
+ }
+ //
+ // Convenience methods for creating Data instances for each of the Arrow Vector types
+ //
+ /** @nocollapse */
+ public static new<T extends DataType>(type: T, offset: number, length: number, nullCount?: number, buffers?: Partial<Buffers<T>> | Data<T>, childData?: (Data | Vector)[], dictionary?: Vector): Data<T> {
+ if (buffers instanceof Data) { buffers = buffers.buffers; } else if (!buffers) { buffers = [] as Partial<Buffers<T>>; }
+ switch (type.typeId) {
+ case Type.Null: return <unknown> Data.Null( <unknown> type as Null, offset, length) as Data<T>;
+ case Type.Int: return <unknown> Data.Int( <unknown> type as Int, offset, length, nullCount || 0, buffers[BufferType.VALIDITY], buffers[BufferType.DATA] || []) as Data<T>;
+ case Type.Dictionary: return <unknown> Data.Dictionary( <unknown> type as Dictionary, offset, length, nullCount || 0, buffers[BufferType.VALIDITY], buffers[BufferType.DATA] || [], dictionary!) as Data<T>;
+ case Type.Float: return <unknown> Data.Float( <unknown> type as Float, offset, length, nullCount || 0, buffers[BufferType.VALIDITY], buffers[BufferType.DATA] || []) as Data<T>;
+ case Type.Bool: return <unknown> Data.Bool( <unknown> type as Bool, offset, length, nullCount || 0, buffers[BufferType.VALIDITY], buffers[BufferType.DATA] || []) as Data<T>;
+ case Type.Decimal: return <unknown> Data.Decimal( <unknown> type as Decimal, offset, length, nullCount || 0, buffers[BufferType.VALIDITY], buffers[BufferType.DATA] || []) as Data<T>;
+ case Type.Date: return <unknown> Data.Date( <unknown> type as Date_, offset, length, nullCount || 0, buffers[BufferType.VALIDITY], buffers[BufferType.DATA] || []) as Data<T>;
+ case Type.Time: return <unknown> Data.Time( <unknown> type as Time, offset, length, nullCount || 0, buffers[BufferType.VALIDITY], buffers[BufferType.DATA] || []) as Data<T>;
+ case Type.Timestamp: return <unknown> Data.Timestamp( <unknown> type as Timestamp, offset, length, nullCount || 0, buffers[BufferType.VALIDITY], buffers[BufferType.DATA] || []) as Data<T>;
+ case Type.Interval: return <unknown> Data.Interval( <unknown> type as Interval, offset, length, nullCount || 0, buffers[BufferType.VALIDITY], buffers[BufferType.DATA] || []) as Data<T>;
+ case Type.FixedSizeBinary: return <unknown> Data.FixedSizeBinary( <unknown> type as FixedSizeBinary, offset, length, nullCount || 0, buffers[BufferType.VALIDITY], buffers[BufferType.DATA] || []) as Data<T>;
+ case Type.Binary: return <unknown> Data.Binary( <unknown> type as Binary, offset, length, nullCount || 0, buffers[BufferType.VALIDITY], buffers[BufferType.OFFSET] || [], buffers[BufferType.DATA] || []) as Data<T>;
+ case Type.Utf8: return <unknown> Data.Utf8( <unknown> type as Utf8, offset, length, nullCount || 0, buffers[BufferType.VALIDITY], buffers[BufferType.OFFSET] || [], buffers[BufferType.DATA] || []) as Data<T>;
+ case Type.List: return <unknown> Data.List( <unknown> type as List, offset, length, nullCount || 0, buffers[BufferType.VALIDITY], buffers[BufferType.OFFSET] || [], (childData || [])[0]) as Data<T>;
+ case Type.FixedSizeList: return <unknown> Data.FixedSizeList( <unknown> type as FixedSizeList, offset, length, nullCount || 0, buffers[BufferType.VALIDITY], (childData || [])[0]) as Data<T>;
+ case Type.Struct: return <unknown> Data.Struct( <unknown> type as Struct, offset, length, nullCount || 0, buffers[BufferType.VALIDITY], childData || []) as Data<T>;
+ case Type.Map: return <unknown> Data.Map( <unknown> type as Map_, offset, length, nullCount || 0, buffers[BufferType.VALIDITY], buffers[BufferType.OFFSET] || [], (childData || [])[0]) as Data<T>;
+ case Type.Union: return <unknown> Data.Union( <unknown> type as Union, offset, length, nullCount || 0, buffers[BufferType.VALIDITY], buffers[BufferType.TYPE] || [], buffers[BufferType.OFFSET] || childData, childData) as Data<T>;
+ }
+ throw new Error(`Unrecognized typeId ${type.typeId}`);
+ }
+ /** @nocollapse */
+ public static Null<T extends Null>(type: T, offset: number, length: number) {
+ return new Data(type, offset, length, 0);
+ }
+ /** @nocollapse */
+ public static Int<T extends Int>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, data: DataBuffer<T>) {
+ return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.ArrayType, data), toUint8Array(nullBitmap)]);
+ }
+ /** @nocollapse */
+ public static Dictionary<T extends Dictionary>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, data: DataBuffer<T>, dictionary: Vector<T['dictionary']>) {
+ return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView<T['TArray']>(type.indices.ArrayType, data), toUint8Array(nullBitmap)], [], dictionary);
+ }
+ /** @nocollapse */
+ public static Float<T extends Float>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, data: DataBuffer<T>) {
+ return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.ArrayType, data), toUint8Array(nullBitmap)]);
+ }
+ /** @nocollapse */
+ public static Bool<T extends Bool>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, data: DataBuffer<T>) {
+ return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.ArrayType, data), toUint8Array(nullBitmap)]);
+ }
+ /** @nocollapse */
+ public static Decimal<T extends Decimal>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, data: DataBuffer<T>) {
+ return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.ArrayType, data), toUint8Array(nullBitmap)]);
+ }
+ /** @nocollapse */
+ public static Date<T extends Date_>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, data: DataBuffer<T>) {
+ return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.ArrayType, data), toUint8Array(nullBitmap)]);
+ }
+ /** @nocollapse */
+ public static Time<T extends Time>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, data: DataBuffer<T>) {
+ return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.ArrayType, data), toUint8Array(nullBitmap)]);
+ }
+ /** @nocollapse */
+ public static Timestamp<T extends Timestamp>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, data: DataBuffer<T>) {
+ return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.ArrayType, data), toUint8Array(nullBitmap)]);
+ }
+ /** @nocollapse */
+ public static Interval<T extends Interval>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, data: DataBuffer<T>) {
+ return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.ArrayType, data), toUint8Array(nullBitmap)]);
+ }
+ /** @nocollapse */
+ public static FixedSizeBinary<T extends FixedSizeBinary>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, data: DataBuffer<T>) {
+ return new Data(type, offset, length, nullCount, [undefined, toArrayBufferView(type.ArrayType, data), toUint8Array(nullBitmap)]);
+ }
+ /** @nocollapse */
+ public static Binary<T extends Binary>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, valueOffsets: ValueOffsetsBuffer, data: DataBuffer<T>) {
+ return new Data(type, offset, length, nullCount, [toInt32Array(valueOffsets), toUint8Array(data), toUint8Array(nullBitmap)]);
+ }
+ /** @nocollapse */
+ public static Utf8<T extends Utf8>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, valueOffsets: ValueOffsetsBuffer, data: DataBuffer<T>) {
+ return new Data(type, offset, length, nullCount, [toInt32Array(valueOffsets), toUint8Array(data), toUint8Array(nullBitmap)]);
+ }
+ /** @nocollapse */
+ public static List<T extends List>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, valueOffsets: ValueOffsetsBuffer, child: Data<T['valueType']> | Vector<T['valueType']>) {
+ return new Data(type, offset, length, nullCount, [toInt32Array(valueOffsets), undefined, toUint8Array(nullBitmap)], child ? [child] : []);
+ }
+ /** @nocollapse */
+ public static FixedSizeList<T extends FixedSizeList>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, child: Data<T['valueType']> | Vector<T['valueType']>) {
+ return new Data(type, offset, length, nullCount, [undefined, undefined, toUint8Array(nullBitmap)], child ? [child] : []);
+ }
+ /** @nocollapse */
+ public static Struct<T extends Struct>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, children: (Data | Vector)[]) {
+ return new Data(type, offset, length, nullCount, [undefined, undefined, toUint8Array(nullBitmap)], children);
+ }
+ /** @nocollapse */
+ public static Map<T extends Map_>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, valueOffsets: ValueOffsetsBuffer, child: (Data | Vector)) {
+ return new Data(type, offset, length, nullCount, [toInt32Array(valueOffsets), undefined, toUint8Array(nullBitmap)], child ? [child] : []);
+ }
+ public static Union<T extends SparseUnion>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, typeIds: TypeIdsBuffer, children: (Data | Vector)[], _?: any): Data<T>;
+ public static Union<T extends DenseUnion>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, typeIds: TypeIdsBuffer, valueOffsets: ValueOffsetsBuffer, children: (Data | Vector)[]): Data<T>;
+ public static Union<T extends Union>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, typeIds: TypeIdsBuffer, valueOffsetsOrChildren: ValueOffsetsBuffer | (Data | Vector)[], children?: (Data | Vector)[]): Data<T>;
+ /** @nocollapse */
+ public static Union<T extends Union>(type: T, offset: number, length: number, nullCount: number, nullBitmap: NullBuffer, typeIds: TypeIdsBuffer, valueOffsetsOrChildren: ValueOffsetsBuffer | (Data | Vector)[], children?: (Data | Vector)[]) {
+ const buffers = <unknown> [
+ undefined, undefined,
+ toUint8Array(nullBitmap),
+ toArrayBufferView(type.ArrayType, typeIds)
+ ] as Partial<Buffers<T>>;
+ if (type.mode === UnionMode.Sparse) {
+ return new Data(type, offset, length, nullCount, buffers, valueOffsetsOrChildren as (Data | Vector)[]);
+ }
+ buffers[BufferType.OFFSET] = toInt32Array(<ValueOffsetsBuffer> valueOffsetsOrChildren);
+ return new Data(type, offset, length, nullCount, buffers, children);
+ }
+(Data.prototype as any).childData = Object.freeze([]);
diff --git a/src/arrow/js/src/enum.ts b/src/arrow/js/src/enum.ts
new file mode 100644
index 000000000..517aa27e8
--- /dev/null
+++ b/src/arrow/js/src/enum.ts
@@ -0,0 +1,142 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+export {
+ DateUnit,
+ TimeUnit,
+ Precision,
+ UnionMode,
+ IntervalUnit,
+ MetadataVersion,
+} from './fb/Schema';
+export { MessageHeader } from './fb/Message';
+ * Main data type enumeration.
+ *
+ * Data types in this library are all *logical*. They can be expressed as
+ * either a primitive physical type (bytes or bits of some fixed size), a
+ * nested type consisting of other data types, or another data type (e.g. a
+ * timestamp encoded as an int64).
+ *
+ * **Note**: Only enum values 0-17 (NONE through Map) are written to an Arrow
+ * IPC payload.
+ *
+ * The rest of the values are specified here so TypeScript can narrow the type
+ * signatures further beyond the base Arrow Types. The Arrow DataTypes include
+ * metadata like `bitWidth` that impact the type signatures of the values we
+ * accept and return.
+ *
+ * For example, the `Int8Vector` reads 1-byte numbers from an `Int8Array`, an
+ * `Int32Vector` reads a 4-byte number from an `Int32Array`, and an `Int64Vector`
+ * reads a pair of 4-byte lo, hi 32-bit integers as a zero-copy slice from the
+ * underlying `Int32Array`.
+ *
+ * Library consumers benefit by knowing the narrowest type, since we can ensure
+ * the types across all public methods are propagated, and never bail to `any`.
+ * These values are _never_ used at runtime, and they will _never_ be written
+ * to the flatbuffers metadata of serialized Arrow IPC payloads.
+ */
+export enum Type {
+ /** The default placeholder type */
+ NONE = 0,
+ /** A NULL type having no physical storage */
+ Null = 1,
+ /** Signed or unsigned 8, 16, 32, or 64-bit little-endian integer */
+ Int = 2,
+ /** 2, 4, or 8-byte floating point value */
+ Float = 3,
+ /** Variable-length bytes (no guarantee of UTF8-ness) */
+ Binary = 4,
+ /** UTF8 variable-length string as List<Char> */
+ Utf8 = 5,
+ /** Boolean as 1 bit, LSB bit-packed ordering */
+ Bool = 6,
+ /** Precision-and-scale-based decimal type. Storage type depends on the parameters. */
+ Decimal = 7,
+ /** int32_t days or int64_t milliseconds since the UNIX epoch */
+ Date = 8,
+ /** Time as signed 32 or 64-bit integer, representing either seconds, milliseconds, microseconds, or nanoseconds since midnight since midnight */
+ Time = 9,
+ /** Exact timestamp encoded with int64 since UNIX epoch (Default unit millisecond) */
+ Timestamp = 10,
+ /** YEAR_MONTH or DAY_TIME interval in SQL style */
+ Interval = 11,
+ /** A list of some logical data type */
+ List = 12,
+ /** Struct of logical types */
+ Struct = 13,
+ /** Union of logical types */
+ Union = 14,
+ /** Fixed-size binary. Each value occupies the same number of bytes */
+ FixedSizeBinary = 15,
+ /** Fixed-size list. Each value occupies the same number of bytes */
+ FixedSizeList = 16,
+ /** Map of named logical types */
+ Map = 17,
+ /** Dictionary aka Category type */
+ Dictionary = -1,
+ Int8 = -2,
+ Int16 = -3,
+ Int32 = -4,
+ Int64 = -5,
+ Uint8 = -6,
+ Uint16 = -7,
+ Uint32 = -8,
+ Uint64 = -9,
+ Float16 = -10,
+ Float32 = -11,
+ Float64 = -12,
+ DateDay = -13,
+ DateMillisecond = -14,
+ TimestampSecond = -15,
+ TimestampMillisecond = -16,
+ TimestampMicrosecond = -17,
+ TimestampNanosecond = -18,
+ TimeSecond = -19,
+ TimeMillisecond = -20,
+ TimeMicrosecond = -21,
+ TimeNanosecond = -22,
+ DenseUnion = -23,
+ SparseUnion = -24,
+ IntervalDayTime = -25,
+ IntervalYearMonth = -26,
+export enum BufferType {
+ /**
+ * used in List type, Dense Union and variable length primitive types (String, Binary)
+ */
+ OFFSET = 0,
+ /**
+ * actual data, either wixed width primitive types in slots or variable width delimited by an OFFSET vector
+ */
+ DATA = 1,
+ /**
+ * Bit vector indicating if each value is null
+ */
+ /**
+ * Type vector used in Union type
+ */
+ TYPE = 3
+ }
diff --git a/src/arrow/js/src/fb/.eslintrc.js b/src/arrow/js/src/fb/.eslintrc.js
new file mode 100644
index 000000000..d448540e4
--- /dev/null
+++ b/src/arrow/js/src/fb/.eslintrc.js
@@ -0,0 +1,23 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+module.exports = {
+ rules: {
+ "@typescript-eslint/no-require-imports": "off",
+ "@typescript-eslint/no-inferrable-types": "off"
+ },
+}; \ No newline at end of file
diff --git a/src/arrow/js/src/fb/File.ts b/src/arrow/js/src/fb/File.ts
new file mode 100644
index 000000000..5746dd183
--- /dev/null
+++ b/src/arrow/js/src/fb/File.ts
@@ -0,0 +1,300 @@
+// automatically generated by the FlatBuffers compiler, do not modify
+import { flatbuffers } from 'flatbuffers';
+import * as NS13596923344997147894 from './Schema';
+ * ----------------------------------------------------------------------
+ * Arrow File metadata
+ *
+ *
+ * @constructor
+ */
+export class Footer {
+ bb: flatbuffers.ByteBuffer | null = null;
+ bb_pos: number = 0;
+ /**
+ * @param number i
+ * @param flatbuffers.ByteBuffer bb
+ * @returns Footer
+ */
+ __init(i: number, bb: flatbuffers.ByteBuffer): Footer {
+ this.bb_pos = i;
+ = bb;
+ return this;
+ }
+ /**
+ * @param flatbuffers.ByteBuffer bb
+ * @param Footer= obj
+ * @returns Footer
+ */
+ static getRootAsFooter(bb: flatbuffers.ByteBuffer, obj?: Footer): Footer {
+ return (obj || new Footer()).__init(bb.readInt32(bb.position()) + bb.position(), bb);
+ }
+ /**
+ * @param flatbuffers.ByteBuffer bb
+ * @param Footer= obj
+ * @returns Footer
+ */
+ static getSizePrefixedRootAsFooter(bb: flatbuffers.ByteBuffer, obj?: Footer): Footer {
+ bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH);
+ return (obj || new Footer()).__init(bb.readInt32(bb.position()) + bb.position(), bb);
+ }
+ /**
+ * @returns MetadataVersion
+ */
+ version(): NS13596923344997147894.MetadataVersion {
+ const offset =!.__offset(this.bb_pos, 4);
+ return offset ? /** */ (!.readInt16(this.bb_pos + offset)) : NS13596923344997147894.MetadataVersion.V1;
+ }
+ /**
+ * @param Schema= obj
+ * @returns Schema|null
+ */
+ schema(obj?: NS13596923344997147894.Schema): NS13596923344997147894.Schema | null {
+ const offset =!.__offset(this.bb_pos, 6);
+ return offset ? (obj || new NS13596923344997147894.Schema()).__init(!.__indirect(this.bb_pos + offset),!) : null;
+ }
+ /**
+ * @param number index
+ * @param Block= obj
+ * @returns Block
+ */
+ dictionaries(index: number, obj?: Block): Block | null {
+ const offset =!.__offset(this.bb_pos, 8);
+ return offset ? (obj || new Block()).__init(!.__vector(this.bb_pos + offset) + index * 24,!) : null;
+ }
+ /**
+ * @returns number
+ */
+ dictionariesLength(): number {
+ const offset =!.__offset(this.bb_pos, 8);
+ return offset ?!.__vector_len(this.bb_pos + offset) : 0;
+ }
+ /**
+ * @param number index
+ * @param Block= obj
+ * @returns Block
+ */
+ recordBatches(index: number, obj?: Block): Block | null {
+ const offset =!.__offset(this.bb_pos, 10);
+ return offset ? (obj || new Block()).__init(!.__vector(this.bb_pos + offset) + index * 24,!) : null;
+ }
+ /**
+ * @returns number
+ */
+ recordBatchesLength(): number {
+ const offset =!.__offset(this.bb_pos, 10);
+ return offset ?!.__vector_len(this.bb_pos + offset) : 0;
+ }
+ /**
+ * User-defined metadata
+ *
+ * @param number index
+ * @param KeyValue= obj
+ * @returns KeyValue
+ */
+ customMetadata(index: number, obj?: NS13596923344997147894.KeyValue): NS13596923344997147894.KeyValue | null {
+ const offset =!.__offset(this.bb_pos, 12);
+ return offset ? (obj || new NS13596923344997147894.KeyValue()).__init(!.__indirect(!.__vector(this.bb_pos + offset) + index * 4),!) : null;
+ }
+ /**
+ * @returns number
+ */
+ customMetadataLength(): number {
+ const offset =!.__offset(this.bb_pos, 12);
+ return offset ?!.__vector_len(this.bb_pos + offset) : 0;
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ */
+ static startFooter(builder: flatbuffers.Builder) {
+ builder.startObject(5);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param MetadataVersion version
+ */
+ static addVersion(builder: flatbuffers.Builder, version: NS13596923344997147894.MetadataVersion) {
+ builder.addFieldInt16(0, version, NS13596923344997147894.MetadataVersion.V1);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param flatbuffers.Offset schemaOffset
+ */
+ static addSchema(builder: flatbuffers.Builder, schemaOffset: flatbuffers.Offset) {
+ builder.addFieldOffset(1, schemaOffset, 0);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param flatbuffers.Offset dictionariesOffset
+ */
+ static addDictionaries(builder: flatbuffers.Builder, dictionariesOffset: flatbuffers.Offset) {
+ builder.addFieldOffset(2, dictionariesOffset, 0);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param number numElems
+ */
+ static startDictionariesVector(builder: flatbuffers.Builder, numElems: number) {
+ builder.startVector(24, numElems, 8);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param flatbuffers.Offset recordBatchesOffset
+ */
+ static addRecordBatches(builder: flatbuffers.Builder, recordBatchesOffset: flatbuffers.Offset) {
+ builder.addFieldOffset(3, recordBatchesOffset, 0);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param number numElems
+ */
+ static startRecordBatchesVector(builder: flatbuffers.Builder, numElems: number) {
+ builder.startVector(24, numElems, 8);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param flatbuffers.Offset customMetadataOffset
+ */
+ static addCustomMetadata(builder: flatbuffers.Builder, customMetadataOffset: flatbuffers.Offset) {
+ builder.addFieldOffset(4, customMetadataOffset, 0);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param Array.<flatbuffers.Offset> data
+ * @returns flatbuffers.Offset
+ */
+ static createCustomMetadataVector(builder: flatbuffers.Builder, data: flatbuffers.Offset[]): flatbuffers.Offset {
+ builder.startVector(4, data.length, 4);
+ for (let i = data.length - 1; i >= 0; i--) {
+ builder.addOffset(data[i]);
+ }
+ return builder.endVector();
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param number numElems
+ */
+ static startCustomMetadataVector(builder: flatbuffers.Builder, numElems: number) {
+ builder.startVector(4, numElems, 4);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @returns flatbuffers.Offset
+ */
+ static endFooter(builder: flatbuffers.Builder): flatbuffers.Offset {
+ const offset = builder.endObject();
+ return offset;
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param flatbuffers.Offset offset
+ */
+ static finishFooterBuffer(builder: flatbuffers.Builder, offset: flatbuffers.Offset) {
+ builder.finish(offset);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param flatbuffers.Offset offset
+ */
+ static finishSizePrefixedFooterBuffer(builder: flatbuffers.Builder, offset: flatbuffers.Offset) {
+ builder.finish(offset, undefined, true);
+ }
+ static createFooter(builder: flatbuffers.Builder, version: NS13596923344997147894.MetadataVersion, schemaOffset: flatbuffers.Offset, dictionariesOffset: flatbuffers.Offset, recordBatchesOffset: flatbuffers.Offset, customMetadataOffset: flatbuffers.Offset): flatbuffers.Offset {
+ Footer.startFooter(builder);
+ Footer.addVersion(builder, version);
+ Footer.addSchema(builder, schemaOffset);
+ Footer.addDictionaries(builder, dictionariesOffset);
+ Footer.addRecordBatches(builder, recordBatchesOffset);
+ Footer.addCustomMetadata(builder, customMetadataOffset);
+ return Footer.endFooter(builder);
+ }
+ * @constructor
+ */
+export class Block {
+ bb: flatbuffers.ByteBuffer | null = null;
+ bb_pos: number = 0;
+ /**
+ * @param number i
+ * @param flatbuffers.ByteBuffer bb
+ * @returns Block
+ */
+ __init(i: number, bb: flatbuffers.ByteBuffer): Block {
+ this.bb_pos = i;
+ = bb;
+ return this;
+ }
+ /**
+ * Index to the start of the RecordBlock (note this is past the Message header)
+ *
+ * @returns flatbuffers.Long
+ */
+ offset(): flatbuffers.Long {
+ return!.readInt64(this.bb_pos);
+ }
+ /**
+ * Length of the metadata
+ *
+ * @returns number
+ */
+ metaDataLength(): number {
+ return!.readInt32(this.bb_pos + 8);
+ }
+ /**
+ * Length of the data (this is aligned so there can be a gap between this and
+ * the metadata).
+ *
+ * @returns flatbuffers.Long
+ */
+ bodyLength(): flatbuffers.Long {
+ return!.readInt64(this.bb_pos + 16);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param flatbuffers.Long offset
+ * @param number metaDataLength
+ * @param flatbuffers.Long bodyLength
+ * @returns flatbuffers.Offset
+ */
+ static createBlock(builder: flatbuffers.Builder, offset: flatbuffers.Long, metaDataLength: number, bodyLength: flatbuffers.Long): flatbuffers.Offset {
+ builder.prep(8, 24);
+ builder.writeInt64(bodyLength);
+ builder.pad(4);
+ builder.writeInt32(metaDataLength);
+ builder.writeInt64(offset);
+ return builder.offset();
+ }
diff --git a/src/arrow/js/src/fb/Message.ts b/src/arrow/js/src/fb/Message.ts
new file mode 100644
index 000000000..973eb0425
--- /dev/null
+++ b/src/arrow/js/src/fb/Message.ts
@@ -0,0 +1,709 @@
+// automatically generated by the FlatBuffers compiler, do not modify
+import { flatbuffers } from 'flatbuffers';
+import * as NS13596923344997147894 from './Schema';
+ * @enum {number}
+ */
+export enum CompressionType {
+ LZ4_FRAME = 0,
+ ZSTD = 1
+ * Provided for forward compatibility in case we need to support different
+ * strategies for compressing the IPC message body (like whole-body
+ * compression rather than buffer-level) in the future
+ *
+ * @enum {number}
+ */
+export enum BodyCompressionMethod {
+ /**
+ * Each constituent buffer is first compressed with the indicated
+ * compressor, and then written with the uncompressed length in the first 8
+ * bytes as a 64-bit little-endian signed integer followed by the compressed
+ * buffer bytes (and then padding as required by the protocol). The
+ * uncompressed length may be set to -1 to indicate that the data that
+ * follows is not compressed, which can be useful for cases where
+ * compression does not yield appreciable savings.
+ */
+ BUFFER = 0
+ * ----------------------------------------------------------------------
+ * The root Message type
+ * This union enables us to easily send different message types without
+ * redundant storage, and in the future we can easily add new message types.
+ *
+ * Arrow implementations do not need to implement all of the message types,
+ * which may include experimental metadata types. For maximum compatibility,
+ * it is best to send data using RecordBatch
+ *
+ * @enum {number}
+ */
+export enum MessageHeader {
+ NONE = 0,
+ Schema = 1,
+ DictionaryBatch = 2,
+ RecordBatch = 3,
+ Tensor = 4,
+ SparseTensor = 5
+ * ----------------------------------------------------------------------
+ * Data structures for describing a table row batch (a collection of
+ * equal-length Arrow arrays)
+ * Metadata about a field at some level of a nested type tree (but not
+ * its children).
+ *
+ * For example, a List<Int16> with values [[1, 2, 3], null, [4], [5, 6], null]
+ * would have {length: 5, null_count: 2} for its List node, and {length: 6,
+ * null_count: 0} for its Int16 node, as separate FieldNode structs
+ *
+ * @constructor
+ */
+export class FieldNode {
+ bb: flatbuffers.ByteBuffer | null = null;
+ bb_pos: number = 0;
+ /**
+ * @param number i
+ * @param flatbuffers.ByteBuffer bb
+ * @returns FieldNode
+ */
+ __init(i: number, bb: flatbuffers.ByteBuffer): FieldNode {
+ this.bb_pos = i;
+ = bb;
+ return this;
+ }
+ /**
+ * The number of value slots in the Arrow array at this level of a nested
+ * tree
+ *
+ * @returns flatbuffers.Long
+ */
+ length(): flatbuffers.Long {
+ return!.readInt64(this.bb_pos);
+ }
+ /**
+ * The number of observed nulls. Fields with null_count == 0 may choose not
+ * to write their physical validity bitmap out as a materialized buffer,
+ * instead setting the length of the bitmap buffer to 0.
+ *
+ * @returns flatbuffers.Long
+ */
+ nullCount(): flatbuffers.Long {
+ return!.readInt64(this.bb_pos + 8);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param flatbuffers.Long length
+ * @param flatbuffers.Long null_count
+ * @returns flatbuffers.Offset
+ */
+ static createFieldNode(builder: flatbuffers.Builder, length: flatbuffers.Long, null_count: flatbuffers.Long): flatbuffers.Offset {
+ builder.prep(8, 16);
+ builder.writeInt64(null_count);
+ builder.writeInt64(length);
+ return builder.offset();
+ }
+ * Optional compression for the memory buffers constituting IPC message
+ * bodies. Intended for use with RecordBatch but could be used for other
+ * message types
+ *
+ * @constructor
+ */
+export class BodyCompression {
+ bb: flatbuffers.ByteBuffer | null = null;
+ bb_pos: number = 0;
+ /**
+ * @param number i
+ * @param flatbuffers.ByteBuffer bb
+ * @returns BodyCompression
+ */
+ __init(i: number, bb: flatbuffers.ByteBuffer): BodyCompression {
+ this.bb_pos = i;
+ = bb;
+ return this;
+ }
+ /**
+ * @param flatbuffers.ByteBuffer bb
+ * @param BodyCompression= obj
+ * @returns BodyCompression
+ */
+ static getRootAsBodyCompression(bb: flatbuffers.ByteBuffer, obj?: BodyCompression): BodyCompression {
+ return (obj || new BodyCompression()).__init(bb.readInt32(bb.position()) + bb.position(), bb);
+ }
+ /**
+ * @param flatbuffers.ByteBuffer bb
+ * @param BodyCompression= obj
+ * @returns BodyCompression
+ */
+ static getSizePrefixedRootAsBodyCompression(bb: flatbuffers.ByteBuffer, obj?: BodyCompression): BodyCompression {
+ bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH);
+ return (obj || new BodyCompression()).__init(bb.readInt32(bb.position()) + bb.position(), bb);
+ }
+ /**
+ * Compressor library
+ *
+ * @returns CompressionType
+ */
+ codec(): CompressionType {
+ const offset =!.__offset(this.bb_pos, 4);
+ return offset ? /** */ (!.readInt8(this.bb_pos + offset)) : CompressionType.LZ4_FRAME;
+ }
+ /**
+ * Indicates the way the record batch body was compressed
+ *
+ * @returns BodyCompressionMethod
+ */
+ method(): BodyCompressionMethod {
+ const offset =!.__offset(this.bb_pos, 6);
+ return offset ? /** */ (!.readInt8(this.bb_pos + offset)) : BodyCompressionMethod.BUFFER;
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ */
+ static startBodyCompression(builder: flatbuffers.Builder) {
+ builder.startObject(2);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param CompressionType codec
+ */
+ static addCodec(builder: flatbuffers.Builder, codec: CompressionType) {
+ builder.addFieldInt8(0, codec, CompressionType.LZ4_FRAME);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param BodyCompressionMethod method
+ */
+ static addMethod(builder: flatbuffers.Builder, method: BodyCompressionMethod) {
+ builder.addFieldInt8(1, method, BodyCompressionMethod.BUFFER);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @returns flatbuffers.Offset
+ */
+ static endBodyCompression(builder: flatbuffers.Builder): flatbuffers.Offset {
+ const offset = builder.endObject();
+ return offset;
+ }
+ static createBodyCompression(builder: flatbuffers.Builder, codec: CompressionType, method: BodyCompressionMethod): flatbuffers.Offset {
+ BodyCompression.startBodyCompression(builder);
+ BodyCompression.addCodec(builder, codec);
+ BodyCompression.addMethod(builder, method);
+ return BodyCompression.endBodyCompression(builder);
+ }
+ * A data header describing the shared memory layout of a "record" or "row"
+ * batch. Some systems call this a "row batch" internally and others a "record
+ * batch".
+ *
+ * @constructor
+ */
+export class RecordBatch {
+ bb: flatbuffers.ByteBuffer | null = null;
+ bb_pos: number = 0;
+ /**
+ * @param number i
+ * @param flatbuffers.ByteBuffer bb
+ * @returns RecordBatch
+ */
+ __init(i: number, bb: flatbuffers.ByteBuffer): RecordBatch {
+ this.bb_pos = i;
+ = bb;
+ return this;
+ }
+ /**
+ * @param flatbuffers.ByteBuffer bb
+ * @param RecordBatch= obj
+ * @returns RecordBatch
+ */
+ static getRootAsRecordBatch(bb: flatbuffers.ByteBuffer, obj?: RecordBatch): RecordBatch {
+ return (obj || new RecordBatch()).__init(bb.readInt32(bb.position()) + bb.position(), bb);
+ }
+ /**
+ * @param flatbuffers.ByteBuffer bb
+ * @param RecordBatch= obj
+ * @returns RecordBatch
+ */
+ static getSizePrefixedRootAsRecordBatch(bb: flatbuffers.ByteBuffer, obj?: RecordBatch): RecordBatch {
+ bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH);
+ return (obj || new RecordBatch()).__init(bb.readInt32(bb.position()) + bb.position(), bb);
+ }
+ /**
+ * number of records / rows. The arrays in the batch should all have this
+ * length
+ *
+ * @returns flatbuffers.Long
+ */
+ length(): flatbuffers.Long {
+ const offset =!.__offset(this.bb_pos, 4);
+ return offset ?!.readInt64(this.bb_pos + offset) :!.createLong(0, 0);
+ }
+ /**
+ * Nodes correspond to the pre-ordered flattened logical schema
+ *
+ * @param number index
+ * @param FieldNode= obj
+ * @returns FieldNode
+ */
+ nodes(index: number, obj?: FieldNode): FieldNode | null {
+ const offset =!.__offset(this.bb_pos, 6);
+ return offset ? (obj || new FieldNode()).__init(!.__vector(this.bb_pos + offset) + index * 16,!) : null;
+ }
+ /**
+ * @returns number
+ */
+ nodesLength(): number {
+ const offset =!.__offset(this.bb_pos, 6);
+ return offset ?!.__vector_len(this.bb_pos + offset) : 0;
+ }
+ /**
+ * Buffers correspond to the pre-ordered flattened buffer tree
+ *
+ * The number of buffers appended to this list depends on the schema. For
+ * example, most primitive arrays will have 2 buffers, 1 for the validity
+ * bitmap and 1 for the values. For struct arrays, there will only be a
+ * single buffer for the validity (nulls) bitmap
+ *
+ * @param number index
+ * @param Buffer= obj
+ * @returns Buffer
+ */
+ buffers(index: number, obj?: NS13596923344997147894.Buffer): NS13596923344997147894.Buffer | null {
+ const offset =!.__offset(this.bb_pos, 8);
+ return offset ? (obj || new NS13596923344997147894.Buffer()).__init(!.__vector(this.bb_pos + offset) + index * 16,!) : null;
+ }
+ /**
+ * @returns number
+ */
+ buffersLength(): number {
+ const offset =!.__offset(this.bb_pos, 8);
+ return offset ?!.__vector_len(this.bb_pos + offset) : 0;
+ }
+ /**
+ * Optional compression of the message body
+ *
+ * @param BodyCompression= obj
+ * @returns BodyCompression|null
+ */
+ compression(obj?: BodyCompression): BodyCompression | null {
+ const offset =!.__offset(this.bb_pos, 10);
+ return offset ? (obj || new BodyCompression()).__init(!.__indirect(this.bb_pos + offset),!) : null;
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ */
+ static startRecordBatch(builder: flatbuffers.Builder) {
+ builder.startObject(4);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param flatbuffers.Long length
+ */
+ static addLength(builder: flatbuffers.Builder, length: flatbuffers.Long) {
+ builder.addFieldInt64(0, length, builder.createLong(0, 0));
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param flatbuffers.Offset nodesOffset
+ */
+ static addNodes(builder: flatbuffers.Builder, nodesOffset: flatbuffers.Offset) {
+ builder.addFieldOffset(1, nodesOffset, 0);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param number numElems
+ */
+ static startNodesVector(builder: flatbuffers.Builder, numElems: number) {
+ builder.startVector(16, numElems, 8);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param flatbuffers.Offset buffersOffset
+ */
+ static addBuffers(builder: flatbuffers.Builder, buffersOffset: flatbuffers.Offset) {
+ builder.addFieldOffset(2, buffersOffset, 0);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param number numElems
+ */
+ static startBuffersVector(builder: flatbuffers.Builder, numElems: number) {
+ builder.startVector(16, numElems, 8);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param flatbuffers.Offset compressionOffset
+ */
+ static addCompression(builder: flatbuffers.Builder, compressionOffset: flatbuffers.Offset) {
+ builder.addFieldOffset(3, compressionOffset, 0);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @returns flatbuffers.Offset
+ */
+ static endRecordBatch(builder: flatbuffers.Builder): flatbuffers.Offset {
+ const offset = builder.endObject();
+ return offset;
+ }
+ static createRecordBatch(builder: flatbuffers.Builder, length: flatbuffers.Long, nodesOffset: flatbuffers.Offset, buffersOffset: flatbuffers.Offset, compressionOffset: flatbuffers.Offset): flatbuffers.Offset {
+ RecordBatch.startRecordBatch(builder);
+ RecordBatch.addLength(builder, length);
+ RecordBatch.addNodes(builder, nodesOffset);
+ RecordBatch.addBuffers(builder, buffersOffset);
+ RecordBatch.addCompression(builder, compressionOffset);
+ return RecordBatch.endRecordBatch(builder);
+ }
+ * For sending dictionary encoding information. Any Field can be
+ * dictionary-encoded, but in this case none of its children may be
+ * dictionary-encoded.
+ * There is one vector / column per dictionary, but that vector / column
+ * may be spread across multiple dictionary batches by using the isDelta
+ * flag
+ *
+ * @constructor
+ */
+export class DictionaryBatch {
+ bb: flatbuffers.ByteBuffer | null = null;
+ bb_pos: number = 0;
+ /**
+ * @param number i
+ * @param flatbuffers.ByteBuffer bb
+ * @returns DictionaryBatch
+ */
+ __init(i: number, bb: flatbuffers.ByteBuffer): DictionaryBatch {
+ this.bb_pos = i;
+ = bb;
+ return this;
+ }
+ /**
+ * @param flatbuffers.ByteBuffer bb
+ * @param DictionaryBatch= obj
+ * @returns DictionaryBatch
+ */
+ static getRootAsDictionaryBatch(bb: flatbuffers.ByteBuffer, obj?: DictionaryBatch): DictionaryBatch {
+ return (obj || new DictionaryBatch()).__init(bb.readInt32(bb.position()) + bb.position(), bb);
+ }
+ /**
+ * @param flatbuffers.ByteBuffer bb
+ * @param DictionaryBatch= obj
+ * @returns DictionaryBatch
+ */
+ static getSizePrefixedRootAsDictionaryBatch(bb: flatbuffers.ByteBuffer, obj?: DictionaryBatch): DictionaryBatch {
+ bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH);
+ return (obj || new DictionaryBatch()).__init(bb.readInt32(bb.position()) + bb.position(), bb);
+ }
+ /**
+ * @returns flatbuffers.Long
+ */
+ id(): flatbuffers.Long {
+ const offset =!.__offset(this.bb_pos, 4);
+ return offset ?!.readInt64(this.bb_pos + offset) :!.createLong(0, 0);
+ }
+ /**
+ * @param RecordBatch= obj
+ * @returns RecordBatch|null
+ */
+ data(obj?: RecordBatch): RecordBatch | null {
+ const offset =!.__offset(this.bb_pos, 6);
+ return offset ? (obj || new RecordBatch()).__init(!.__indirect(this.bb_pos + offset),!) : null;
+ }
+ /**
+ * If isDelta is true the values in the dictionary are to be appended to a
+ * dictionary with the indicated id. If isDelta is false this dictionary
+ * should replace the existing dictionary.
+ *
+ * @returns boolean
+ */
+ isDelta(): boolean {
+ const offset =!.__offset(this.bb_pos, 8);
+ return offset ? !!!.readInt8(this.bb_pos + offset) : false;
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ */
+ static startDictionaryBatch(builder: flatbuffers.Builder) {
+ builder.startObject(3);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param flatbuffers.Long id
+ */
+ static addId(builder: flatbuffers.Builder, id: flatbuffers.Long) {
+ builder.addFieldInt64(0, id, builder.createLong(0, 0));
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param flatbuffers.Offset dataOffset
+ */
+ static addData(builder: flatbuffers.Builder, dataOffset: flatbuffers.Offset) {
+ builder.addFieldOffset(1, dataOffset, 0);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param boolean isDelta
+ */
+ static addIsDelta(builder: flatbuffers.Builder, isDelta: boolean) {
+ builder.addFieldInt8(2, +isDelta, +false);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @returns flatbuffers.Offset
+ */
+ static endDictionaryBatch(builder: flatbuffers.Builder): flatbuffers.Offset {
+ const offset = builder.endObject();
+ return offset;
+ }
+ static createDictionaryBatch(builder: flatbuffers.Builder, id: flatbuffers.Long, dataOffset: flatbuffers.Offset, isDelta: boolean): flatbuffers.Offset {
+ DictionaryBatch.startDictionaryBatch(builder);
+ DictionaryBatch.addId(builder, id);
+ DictionaryBatch.addData(builder, dataOffset);
+ DictionaryBatch.addIsDelta(builder, isDelta);
+ return DictionaryBatch.endDictionaryBatch(builder);
+ }
+ * @constructor
+ */
+export class Message {
+ bb: flatbuffers.ByteBuffer | null = null;
+ bb_pos: number = 0;
+ /**
+ * @param number i
+ * @param flatbuffers.ByteBuffer bb
+ * @returns Message
+ */
+ __init(i: number, bb: flatbuffers.ByteBuffer): Message {
+ this.bb_pos = i;
+ = bb;
+ return this;
+ }
+ /**
+ * @param flatbuffers.ByteBuffer bb
+ * @param Message= obj
+ * @returns Message
+ */
+ static getRootAsMessage(bb: flatbuffers.ByteBuffer, obj?: Message): Message {
+ return (obj || new Message()).__init(bb.readInt32(bb.position()) + bb.position(), bb);
+ }
+ /**
+ * @param flatbuffers.ByteBuffer bb
+ * @param Message= obj
+ * @returns Message
+ */
+ static getSizePrefixedRootAsMessage(bb: flatbuffers.ByteBuffer, obj?: Message): Message {
+ bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH);
+ return (obj || new Message()).__init(bb.readInt32(bb.position()) + bb.position(), bb);
+ }
+ /**
+ * @returns MetadataVersion
+ */
+ version(): NS13596923344997147894.MetadataVersion {
+ const offset =!.__offset(this.bb_pos, 4);
+ return offset ? /** */ (!.readInt16(this.bb_pos + offset)) : NS13596923344997147894.MetadataVersion.V1;
+ }
+ /**
+ * @returns MessageHeader
+ */
+ headerType(): MessageHeader {
+ const offset =!.__offset(this.bb_pos, 6);
+ return offset ? /** */ (!.readUint8(this.bb_pos + offset)) : MessageHeader.NONE;
+ }
+ /**
+ * @param flatbuffers.Table obj
+ * @returns ?flatbuffers.Table
+ */
+ header<T extends flatbuffers.Table>(obj: T): T | null {
+ const offset =!.__offset(this.bb_pos, 8);
+ return offset ?!.__union(obj, this.bb_pos + offset) : null;
+ }
+ /**
+ * @returns flatbuffers.Long
+ */
+ bodyLength(): flatbuffers.Long {
+ const offset =!.__offset(this.bb_pos, 10);
+ return offset ?!.readInt64(this.bb_pos + offset) :!.createLong(0, 0);
+ }
+ /**
+ * @param number index
+ * @param KeyValue= obj
+ * @returns KeyValue
+ */
+ customMetadata(index: number, obj?: NS13596923344997147894.KeyValue): NS13596923344997147894.KeyValue | null {
+ const offset =!.__offset(this.bb_pos, 12);
+ return offset ? (obj || new NS13596923344997147894.KeyValue()).__init(!.__indirect(!.__vector(this.bb_pos + offset) + index * 4),!) : null;
+ }
+ /**
+ * @returns number
+ */
+ customMetadataLength(): number {
+ const offset =!.__offset(this.bb_pos, 12);
+ return offset ?!.__vector_len(this.bb_pos + offset) : 0;
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ */
+ static startMessage(builder: flatbuffers.Builder) {
+ builder.startObject(5);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param MetadataVersion version
+ */
+ static addVersion(builder: flatbuffers.Builder, version: NS13596923344997147894.MetadataVersion) {
+ builder.addFieldInt16(0, version, NS13596923344997147894.MetadataVersion.V1);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param MessageHeader headerType
+ */
+ static addHeaderType(builder: flatbuffers.Builder, headerType: MessageHeader) {
+ builder.addFieldInt8(1, headerType, MessageHeader.NONE);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param flatbuffers.Offset headerOffset
+ */
+ static addHeader(builder: flatbuffers.Builder, headerOffset: flatbuffers.Offset) {
+ builder.addFieldOffset(2, headerOffset, 0);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param flatbuffers.Long bodyLength
+ */
+ static addBodyLength(builder: flatbuffers.Builder, bodyLength: flatbuffers.Long) {
+ builder.addFieldInt64(3, bodyLength, builder.createLong(0, 0));
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param flatbuffers.Offset customMetadataOffset
+ */
+ static addCustomMetadata(builder: flatbuffers.Builder, customMetadataOffset: flatbuffers.Offset) {
+ builder.addFieldOffset(4, customMetadataOffset, 0);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param Array.<flatbuffers.Offset> data
+ * @returns flatbuffers.Offset
+ */
+ static createCustomMetadataVector(builder: flatbuffers.Builder, data: flatbuffers.Offset[]): flatbuffers.Offset {
+ builder.startVector(4, data.length, 4);
+ for (let i = data.length - 1; i >= 0; i--) {
+ builder.addOffset(data[i]);
+ }
+ return builder.endVector();
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param number numElems
+ */
+ static startCustomMetadataVector(builder: flatbuffers.Builder, numElems: number) {
+ builder.startVector(4, numElems, 4);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @returns flatbuffers.Offset
+ */
+ static endMessage(builder: flatbuffers.Builder): flatbuffers.Offset {
+ const offset = builder.endObject();
+ return offset;
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param flatbuffers.Offset offset
+ */
+ static finishMessageBuffer(builder: flatbuffers.Builder, offset: flatbuffers.Offset) {
+ builder.finish(offset);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param flatbuffers.Offset offset
+ */
+ static finishSizePrefixedMessageBuffer(builder: flatbuffers.Builder, offset: flatbuffers.Offset) {
+ builder.finish(offset, undefined, true);
+ }
+ static createMessage(builder: flatbuffers.Builder, version: NS13596923344997147894.MetadataVersion, headerType: MessageHeader, headerOffset: flatbuffers.Offset, bodyLength: flatbuffers.Long, customMetadataOffset: flatbuffers.Offset): flatbuffers.Offset {
+ Message.startMessage(builder);
+ Message.addVersion(builder, version);
+ Message.addHeaderType(builder, headerType);
+ Message.addHeader(builder, headerOffset);
+ Message.addBodyLength(builder, bodyLength);
+ Message.addCustomMetadata(builder, customMetadataOffset);
+ return Message.endMessage(builder);
+ }
diff --git a/src/arrow/js/src/fb/Schema.ts b/src/arrow/js/src/fb/Schema.ts
new file mode 100644
index 000000000..f675bc2a0
--- /dev/null
+++ b/src/arrow/js/src/fb/Schema.ts
@@ -0,0 +1,2658 @@
+// automatically generated by the FlatBuffers compiler, do not modify
+import { flatbuffers } from 'flatbuffers';
+ * Logical types, vector layouts, and schemas
+ *
+ * @enum {number}
+ */
+export enum MetadataVersion {
+ /**
+ * 0.1.0 (October 2016).
+ */
+ V1 = 0,
+ /**
+ * 0.2.0 (February 2017). Non-backwards compatible with V1.
+ */
+ V2 = 1,
+ /**
+ * 0.3.0 -> 0.7.1 (May - December 2017). Non-backwards compatible with V2.
+ */
+ V3 = 2,
+ /**
+ * >= 0.8.0 (December 2017). Non-backwards compatible with V3.
+ */
+ V4 = 3,
+ /**
+ * >= 1.0.0 (July 2020. Backwards compatible with V4 (V5 readers can read V4
+ * metadata and IPC messages). Implementations are recommended to provide a
+ * V4 compatibility mode with V5 format changes disabled.
+ *
+ * Incompatible changes between V4 and V5:
+ * - Union buffer layout has changed. In V5, Unions don't have a validity
+ * bitmap buffer.
+ */
+ V5 = 4
+ * Represents Arrow Features that might not have full support
+ * within implementations. This is intended to be used in
+ * two scenarios:
+ * 1. A mechanism for readers of Arrow Streams
+ * and files to understand that the stream or file makes
+ * use of a feature that isn't supported or unknown to
+ * the implementation (and therefore can meet the Arrow
+ * forward compatibility guarantees).
+ * 2. A means of negotiating between a client and server
+ * what features a stream is allowed to use. The enums
+ * values here are intented to represent higher level
+ * features, additional details maybe negotiated
+ * with key-value pairs specific to the protocol.
+ *
+ * Enums added to this list should be assigned power-of-two values
+ * to facilitate exchanging and comparing bitmaps for supported
+ * features.
+ *
+ * @enum {number}
+ */
+export enum Feature {
+ /**
+ * Needed to make flatbuffers happy.
+ */
+ UNUSED = 0,
+ /**
+ * The stream makes use of multiple full dictionaries with the
+ * same ID and assumes clients implement dictionary replacement
+ * correctly.
+ */
+ /**
+ * The stream makes use of compressed bodies as described
+ * in Message.fbs.
+ */
+ * @enum {number}
+ */
+export enum UnionMode {
+ Sparse = 0,
+ Dense = 1
+ * @enum {number}
+ */
+export enum Precision {
+ HALF = 0,
+ SINGLE = 1,
+ DOUBLE = 2
+ * @enum {number}
+ */
+export enum DateUnit {
+ DAY = 0,
+ * @enum {number}
+ */
+export enum TimeUnit {
+ SECOND = 0,
+ * @enum {number}
+ */
+export enum IntervalUnit {
+ DAY_TIME = 1
+ * ----------------------------------------------------------------------
+ * Top-level Type value, enabling extensible type-specific metadata. We can
+ * add new logical types to Type without breaking backwards compatibility
+ *
+ * @enum {number}
+ */
+export enum Type {
+ NONE = 0,
+ Null = 1,
+ Int = 2,
+ FloatingPoint = 3,
+ Binary = 4,
+ Utf8 = 5,
+ Bool = 6,
+ Decimal = 7,
+ Date = 8,
+ Time = 9,
+ Timestamp = 10,
+ Interval = 11,
+ List = 12,
+ Struct_ = 13,
+ Union = 14,
+ FixedSizeBinary = 15,
+ FixedSizeList = 16,
+ Map = 17,
+ Duration = 18,
+ LargeBinary = 19,
+ LargeUtf8 = 20,
+ LargeList = 21
+ * ----------------------------------------------------------------------
+ * Dictionary encoding metadata
+ * Maintained for forwards compatibility, in the future
+ * Dictionaries might be explicit maps between integers and values
+ * allowing for non-contiguous index values
+ *
+ * @enum {number}
+ */
+export enum DictionaryKind {
+ DenseArray = 0
+ * ----------------------------------------------------------------------
+ * Endianness of the platform producing the data
+ *
+ * @enum {number}
+ */
+export enum Endianness {
+ Little = 0,
+ Big = 1
+ * These are stored in the flatbuffer in the Type union below
+ *
+ * @constructor
+ */
+export class Null {
+ bb: flatbuffers.ByteBuffer | null = null;
+ bb_pos: number = 0;
+ /**
+ * @param number i
+ * @param flatbuffers.ByteBuffer bb
+ * @returns Null
+ */
+ __init(i: number, bb: flatbuffers.ByteBuffer): Null {
+ this.bb_pos = i;
+ = bb;
+ return this;
+ }
+ /**
+ * @param flatbuffers.ByteBuffer bb
+ * @param Null= obj
+ * @returns Null
+ */
+ static getRootAsNull(bb: flatbuffers.ByteBuffer, obj?: Null): Null {
+ return (obj || new Null()).__init(bb.readInt32(bb.position()) + bb.position(), bb);
+ }
+ /**
+ * @param flatbuffers.ByteBuffer bb
+ * @param Null= obj
+ * @returns Null
+ */
+ static getSizePrefixedRootAsNull(bb: flatbuffers.ByteBuffer, obj?: Null): Null {
+ bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH);
+ return (obj || new Null()).__init(bb.readInt32(bb.position()) + bb.position(), bb);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ */
+ static startNull(builder: flatbuffers.Builder) {
+ builder.startObject(0);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @returns flatbuffers.Offset
+ */
+ static endNull(builder: flatbuffers.Builder): flatbuffers.Offset {
+ const offset = builder.endObject();
+ return offset;
+ }
+ static createNull(builder: flatbuffers.Builder): flatbuffers.Offset {
+ Null.startNull(builder);
+ return Null.endNull(builder);
+ }
+ * A Struct_ in the flatbuffer metadata is the same as an Arrow Struct
+ * (according to the physical memory layout). We used Struct_ here as
+ * Struct is a reserved word in Flatbuffers
+ *
+ * @constructor
+ */
+export class Struct_ {
+ bb: flatbuffers.ByteBuffer | null = null;
+ bb_pos: number = 0;
+ /**
+ * @param number i
+ * @param flatbuffers.ByteBuffer bb
+ * @returns Struct_
+ */
+ __init(i: number, bb: flatbuffers.ByteBuffer): Struct_ {
+ this.bb_pos = i;
+ = bb;
+ return this;
+ }
+ /**
+ * @param flatbuffers.ByteBuffer bb
+ * @param Struct_= obj
+ * @returns Struct_
+ */
+ static getRootAsStruct_(bb: flatbuffers.ByteBuffer, obj?: Struct_): Struct_ {
+ return (obj || new Struct_()).__init(bb.readInt32(bb.position()) + bb.position(), bb);
+ }
+ /**
+ * @param flatbuffers.ByteBuffer bb
+ * @param Struct_= obj
+ * @returns Struct_
+ */
+ static getSizePrefixedRootAsStruct_(bb: flatbuffers.ByteBuffer, obj?: Struct_): Struct_ {
+ bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH);
+ return (obj || new Struct_()).__init(bb.readInt32(bb.position()) + bb.position(), bb);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ */
+ static startStruct_(builder: flatbuffers.Builder) {
+ builder.startObject(0);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @returns flatbuffers.Offset
+ */
+ static endStruct_(builder: flatbuffers.Builder): flatbuffers.Offset {
+ const offset = builder.endObject();
+ return offset;
+ }
+ static createStruct_(builder: flatbuffers.Builder): flatbuffers.Offset {
+ Struct_.startStruct_(builder);
+ return Struct_.endStruct_(builder);
+ }
+ * @constructor
+ */
+export class List {
+ bb: flatbuffers.ByteBuffer | null = null;
+ bb_pos: number = 0;
+ /**
+ * @param number i
+ * @param flatbuffers.ByteBuffer bb
+ * @returns List
+ */
+ __init(i: number, bb: flatbuffers.ByteBuffer): List {
+ this.bb_pos = i;
+ = bb;
+ return this;
+ }
+ /**
+ * @param flatbuffers.ByteBuffer bb
+ * @param List= obj
+ * @returns List
+ */
+ static getRootAsList(bb: flatbuffers.ByteBuffer, obj?: List): List {
+ return (obj || new List()).__init(bb.readInt32(bb.position()) + bb.position(), bb);
+ }
+ /**
+ * @param flatbuffers.ByteBuffer bb
+ * @param List= obj
+ * @returns List
+ */
+ static getSizePrefixedRootAsList(bb: flatbuffers.ByteBuffer, obj?: List): List {
+ bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH);
+ return (obj || new List()).__init(bb.readInt32(bb.position()) + bb.position(), bb);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ */
+ static startList(builder: flatbuffers.Builder) {
+ builder.startObject(0);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @returns flatbuffers.Offset
+ */
+ static endList(builder: flatbuffers.Builder): flatbuffers.Offset {
+ const offset = builder.endObject();
+ return offset;
+ }
+ static createList(builder: flatbuffers.Builder): flatbuffers.Offset {
+ List.startList(builder);
+ return List.endList(builder);
+ }
+ * Same as List, but with 64-bit offsets, allowing to represent
+ * extremely large data values.
+ *
+ * @constructor
+ */
+export class LargeList {
+ bb: flatbuffers.ByteBuffer | null = null;
+ bb_pos: number = 0;
+ /**
+ * @param number i
+ * @param flatbuffers.ByteBuffer bb
+ * @returns LargeList
+ */
+ __init(i: number, bb: flatbuffers.ByteBuffer): LargeList {
+ this.bb_pos = i;
+ = bb;
+ return this;
+ }
+ /**
+ * @param flatbuffers.ByteBuffer bb
+ * @param LargeList= obj
+ * @returns LargeList
+ */
+ static getRootAsLargeList(bb: flatbuffers.ByteBuffer, obj?: LargeList): LargeList {
+ return (obj || new LargeList()).__init(bb.readInt32(bb.position()) + bb.position(), bb);
+ }
+ /**
+ * @param flatbuffers.ByteBuffer bb
+ * @param LargeList= obj
+ * @returns LargeList
+ */
+ static getSizePrefixedRootAsLargeList(bb: flatbuffers.ByteBuffer, obj?: LargeList): LargeList {
+ bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH);
+ return (obj || new LargeList()).__init(bb.readInt32(bb.position()) + bb.position(), bb);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ */
+ static startLargeList(builder: flatbuffers.Builder) {
+ builder.startObject(0);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @returns flatbuffers.Offset
+ */
+ static endLargeList(builder: flatbuffers.Builder): flatbuffers.Offset {
+ const offset = builder.endObject();
+ return offset;
+ }
+ static createLargeList(builder: flatbuffers.Builder): flatbuffers.Offset {
+ LargeList.startLargeList(builder);
+ return LargeList.endLargeList(builder);
+ }
+ * @constructor
+ */
+export class FixedSizeList {
+ bb: flatbuffers.ByteBuffer | null = null;
+ bb_pos: number = 0;
+ /**
+ * @param number i
+ * @param flatbuffers.ByteBuffer bb
+ * @returns FixedSizeList
+ */
+ __init(i: number, bb: flatbuffers.ByteBuffer): FixedSizeList {
+ this.bb_pos = i;
+ = bb;
+ return this;
+ }
+ /**
+ * @param flatbuffers.ByteBuffer bb
+ * @param FixedSizeList= obj
+ * @returns FixedSizeList
+ */
+ static getRootAsFixedSizeList(bb: flatbuffers.ByteBuffer, obj?: FixedSizeList): FixedSizeList {
+ return (obj || new FixedSizeList()).__init(bb.readInt32(bb.position()) + bb.position(), bb);
+ }
+ /**
+ * @param flatbuffers.ByteBuffer bb
+ * @param FixedSizeList= obj
+ * @returns FixedSizeList
+ */
+ static getSizePrefixedRootAsFixedSizeList(bb: flatbuffers.ByteBuffer, obj?: FixedSizeList): FixedSizeList {
+ bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH);
+ return (obj || new FixedSizeList()).__init(bb.readInt32(bb.position()) + bb.position(), bb);
+ }
+ /**
+ * Number of list items per value
+ *
+ * @returns number
+ */
+ listSize(): number {
+ const offset =!.__offset(this.bb_pos, 4);
+ return offset ?!.readInt32(this.bb_pos + offset) : 0;
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ */
+ static startFixedSizeList(builder: flatbuffers.Builder) {
+ builder.startObject(1);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param number listSize
+ */
+ static addListSize(builder: flatbuffers.Builder, listSize: number) {
+ builder.addFieldInt32(0, listSize, 0);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @returns flatbuffers.Offset
+ */
+ static endFixedSizeList(builder: flatbuffers.Builder): flatbuffers.Offset {
+ const offset = builder.endObject();
+ return offset;
+ }
+ static createFixedSizeList(builder: flatbuffers.Builder, listSize: number): flatbuffers.Offset {
+ FixedSizeList.startFixedSizeList(builder);
+ FixedSizeList.addListSize(builder, listSize);
+ return FixedSizeList.endFixedSizeList(builder);
+ }
+ * A Map is a logical nested type that is represented as
+ *
+ * List<entries: Struct<key: K, value: V>>
+ *
+ * In this layout, the keys and values are each respectively contiguous. We do
+ * not constrain the key and value types, so the application is responsible
+ * for ensuring that the keys are hashable and unique. Whether the keys are sorted
+ * may be set in the metadata for this field.
+ *
+ * In a field with Map type, the field has a child Struct field, which then
+ * has two children: key type and the second the value type. The names of the
+ * child fields may be respectively "entries", "key", and "value", but this is
+ * not enforced.
+ *
+ * Map
+ * - child[0] entries: Struct
+ * - child[0] key: K
+ * - child[1] value: V
+ *
+ * Neither the "entries" field nor the "key" field may be nullable.
+ *
+ * The metadata is structured so that Arrow systems without special handling
+ * for Map can make Map an alias for List. The "layout" attribute for the Map
+ * field must have the same contents as a List.
+ *
+ * @constructor
+ */
+export class Map {
+ bb: flatbuffers.ByteBuffer | null = null;
+ bb_pos: number = 0;
+ /**
+ * @param number i
+ * @param flatbuffers.ByteBuffer bb
+ * @returns Map
+ */
+ __init(i: number, bb: flatbuffers.ByteBuffer): Map {
+ this.bb_pos = i;
+ = bb;
+ return this;
+ }
+ /**
+ * @param flatbuffers.ByteBuffer bb
+ * @param Map= obj
+ * @returns Map
+ */
+ static getRootAsMap(bb: flatbuffers.ByteBuffer, obj?: Map): Map {
+ return (obj || new Map()).__init(bb.readInt32(bb.position()) + bb.position(), bb);
+ }
+ /**
+ * @param flatbuffers.ByteBuffer bb
+ * @param Map= obj
+ * @returns Map
+ */
+ static getSizePrefixedRootAsMap(bb: flatbuffers.ByteBuffer, obj?: Map): Map {
+ bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH);
+ return (obj || new Map()).__init(bb.readInt32(bb.position()) + bb.position(), bb);
+ }
+ /**
+ * Set to true if the keys within each value are sorted
+ *
+ * @returns boolean
+ */
+ keysSorted(): boolean {
+ const offset =!.__offset(this.bb_pos, 4);
+ return offset ? !!!.readInt8(this.bb_pos + offset) : false;
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ */
+ static startMap(builder: flatbuffers.Builder) {
+ builder.startObject(1);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param boolean keysSorted
+ */
+ static addKeysSorted(builder: flatbuffers.Builder, keysSorted: boolean) {
+ builder.addFieldInt8(0, +keysSorted, +false);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @returns flatbuffers.Offset
+ */
+ static endMap(builder: flatbuffers.Builder): flatbuffers.Offset {
+ const offset = builder.endObject();
+ return offset;
+ }
+ static createMap(builder: flatbuffers.Builder, keysSorted: boolean): flatbuffers.Offset {
+ Map.startMap(builder);
+ Map.addKeysSorted(builder, keysSorted);
+ return Map.endMap(builder);
+ }
+ * A union is a complex type with children in Field
+ * By default ids in the type vector refer to the offsets in the children
+ * optionally typeIds provides an indirection between the child offset and the type id
+ * for each child typeIds[offset] is the id used in the type vector
+ *
+ * @constructor
+ */
+export class Union {
+ bb: flatbuffers.ByteBuffer | null = null;
+ bb_pos: number = 0;
+ /**
+ * @param number i
+ * @param flatbuffers.ByteBuffer bb
+ * @returns Union
+ */
+ __init(i: number, bb: flatbuffers.ByteBuffer): Union {
+ this.bb_pos = i;
+ = bb;
+ return this;
+ }
+ /**
+ * @param flatbuffers.ByteBuffer bb
+ * @param Union= obj
+ * @returns Union
+ */
+ static getRootAsUnion(bb: flatbuffers.ByteBuffer, obj?: Union): Union {
+ return (obj || new Union()).__init(bb.readInt32(bb.position()) + bb.position(), bb);
+ }
+ /**
+ * @param flatbuffers.ByteBuffer bb
+ * @param Union= obj
+ * @returns Union
+ */
+ static getSizePrefixedRootAsUnion(bb: flatbuffers.ByteBuffer, obj?: Union): Union {
+ bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH);
+ return (obj || new Union()).__init(bb.readInt32(bb.position()) + bb.position(), bb);
+ }
+ /**
+ * @returns UnionMode
+ */
+ mode(): UnionMode {
+ const offset =!.__offset(this.bb_pos, 4);
+ return offset ? /** */ (!.readInt16(this.bb_pos + offset)) : UnionMode.Sparse;
+ }
+ /**
+ * @param number index
+ * @returns number
+ */
+ typeIds(index: number): number | null {
+ const offset =!.__offset(this.bb_pos, 6);
+ return offset ?!.readInt32(!.__vector(this.bb_pos + offset) + index * 4) : 0;
+ }
+ /**
+ * @returns number
+ */
+ typeIdsLength(): number {
+ const offset =!.__offset(this.bb_pos, 6);
+ return offset ?!.__vector_len(this.bb_pos + offset) : 0;
+ }
+ /**
+ * @returns Int32Array
+ */
+ typeIdsArray(): Int32Array | null {
+ const offset =!.__offset(this.bb_pos, 6);
+ return offset ? new Int32Array(!.bytes().buffer,!.bytes().byteOffset +!.__vector(this.bb_pos + offset),!.__vector_len(this.bb_pos + offset)) : null;
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ */
+ static startUnion(builder: flatbuffers.Builder) {
+ builder.startObject(2);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param UnionMode mode
+ */
+ static addMode(builder: flatbuffers.Builder, mode: UnionMode) {
+ builder.addFieldInt16(0, mode, UnionMode.Sparse);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param flatbuffers.Offset typeIdsOffset
+ */
+ static addTypeIds(builder: flatbuffers.Builder, typeIdsOffset: flatbuffers.Offset) {
+ builder.addFieldOffset(1, typeIdsOffset, 0);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param Array.<number> data
+ * @returns flatbuffers.Offset
+ */
+ static createTypeIdsVector(builder: flatbuffers.Builder, data: number[] | Int32Array): flatbuffers.Offset {
+ builder.startVector(4, data.length, 4);
+ for (let i = data.length - 1; i >= 0; i--) {
+ builder.addInt32(data[i]);
+ }
+ return builder.endVector();
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param number numElems
+ */
+ static startTypeIdsVector(builder: flatbuffers.Builder, numElems: number) {
+ builder.startVector(4, numElems, 4);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @returns flatbuffers.Offset
+ */
+ static endUnion(builder: flatbuffers.Builder): flatbuffers.Offset {
+ const offset = builder.endObject();
+ return offset;
+ }
+ static createUnion(builder: flatbuffers.Builder, mode: UnionMode, typeIdsOffset: flatbuffers.Offset): flatbuffers.Offset {
+ Union.startUnion(builder);
+ Union.addMode(builder, mode);
+ Union.addTypeIds(builder, typeIdsOffset);
+ return Union.endUnion(builder);
+ }
+ * @constructor
+ */
+export class Int {
+ bb: flatbuffers.ByteBuffer | null = null;
+ bb_pos: number = 0;
+ /**
+ * @param number i
+ * @param flatbuffers.ByteBuffer bb
+ * @returns Int
+ */
+ __init(i: number, bb: flatbuffers.ByteBuffer): Int {
+ this.bb_pos = i;
+ = bb;
+ return this;
+ }
+ /**
+ * @param flatbuffers.ByteBuffer bb
+ * @param Int= obj
+ * @returns Int
+ */
+ static getRootAsInt(bb: flatbuffers.ByteBuffer, obj?: Int): Int {
+ return (obj || new Int()).__init(bb.readInt32(bb.position()) + bb.position(), bb);
+ }
+ /**
+ * @param flatbuffers.ByteBuffer bb
+ * @param Int= obj
+ * @returns Int
+ */
+ static getSizePrefixedRootAsInt(bb: flatbuffers.ByteBuffer, obj?: Int): Int {
+ bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH);
+ return (obj || new Int()).__init(bb.readInt32(bb.position()) + bb.position(), bb);
+ }
+ /**
+ * @returns number
+ */
+ bitWidth(): number {
+ const offset =!.__offset(this.bb_pos, 4);
+ return offset ?!.readInt32(this.bb_pos + offset) : 0;
+ }
+ /**
+ * @returns boolean
+ */
+ isSigned(): boolean {
+ const offset =!.__offset(this.bb_pos, 6);
+ return offset ? !!!.readInt8(this.bb_pos + offset) : false;
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ */
+ static startInt(builder: flatbuffers.Builder) {
+ builder.startObject(2);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param number bitWidth
+ */
+ static addBitWidth(builder: flatbuffers.Builder, bitWidth: number) {
+ builder.addFieldInt32(0, bitWidth, 0);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param boolean isSigned
+ */
+ static addIsSigned(builder: flatbuffers.Builder, isSigned: boolean) {
+ builder.addFieldInt8(1, +isSigned, +false);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @returns flatbuffers.Offset
+ */
+ static endInt(builder: flatbuffers.Builder): flatbuffers.Offset {
+ const offset = builder.endObject();
+ return offset;
+ }
+ static createInt(builder: flatbuffers.Builder, bitWidth: number, isSigned: boolean): flatbuffers.Offset {
+ Int.startInt(builder);
+ Int.addBitWidth(builder, bitWidth);
+ Int.addIsSigned(builder, isSigned);
+ return Int.endInt(builder);
+ }
+ * @constructor
+ */
+export class FloatingPoint {
+ bb: flatbuffers.ByteBuffer | null = null;
+ bb_pos: number = 0;
+ /**
+ * @param number i
+ * @param flatbuffers.ByteBuffer bb
+ * @returns FloatingPoint
+ */
+ __init(i: number, bb: flatbuffers.ByteBuffer): FloatingPoint {
+ this.bb_pos = i;
+ = bb;
+ return this;
+ }
+ /**
+ * @param flatbuffers.ByteBuffer bb
+ * @param FloatingPoint= obj
+ * @returns FloatingPoint
+ */
+ static getRootAsFloatingPoint(bb: flatbuffers.ByteBuffer, obj?: FloatingPoint): FloatingPoint {
+ return (obj || new FloatingPoint()).__init(bb.readInt32(bb.position()) + bb.position(), bb);
+ }
+ /**
+ * @param flatbuffers.ByteBuffer bb
+ * @param FloatingPoint= obj
+ * @returns FloatingPoint
+ */
+ static getSizePrefixedRootAsFloatingPoint(bb: flatbuffers.ByteBuffer, obj?: FloatingPoint): FloatingPoint {
+ bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH);
+ return (obj || new FloatingPoint()).__init(bb.readInt32(bb.position()) + bb.position(), bb);
+ }
+ /**
+ * @returns Precision
+ */
+ precision(): Precision {
+ const offset =!.__offset(this.bb_pos, 4);
+ return offset ? /** */ (!.readInt16(this.bb_pos + offset)) : Precision.HALF;
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ */
+ static startFloatingPoint(builder: flatbuffers.Builder) {
+ builder.startObject(1);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param Precision precision
+ */
+ static addPrecision(builder: flatbuffers.Builder, precision: Precision) {
+ builder.addFieldInt16(0, precision, Precision.HALF);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @returns flatbuffers.Offset
+ */
+ static endFloatingPoint(builder: flatbuffers.Builder): flatbuffers.Offset {
+ const offset = builder.endObject();
+ return offset;
+ }
+ static createFloatingPoint(builder: flatbuffers.Builder, precision: Precision): flatbuffers.Offset {
+ FloatingPoint.startFloatingPoint(builder);
+ FloatingPoint.addPrecision(builder, precision);
+ return FloatingPoint.endFloatingPoint(builder);
+ }
+ * Unicode with UTF-8 encoding
+ *
+ * @constructor
+ */
+export class Utf8 {
+ bb: flatbuffers.ByteBuffer | null = null;
+ bb_pos: number = 0;
+ /**
+ * @param number i
+ * @param flatbuffers.ByteBuffer bb
+ * @returns Utf8
+ */
+ __init(i: number, bb: flatbuffers.ByteBuffer): Utf8 {
+ this.bb_pos = i;
+ = bb;
+ return this;
+ }
+ /**
+ * @param flatbuffers.ByteBuffer bb
+ * @param Utf8= obj
+ * @returns Utf8
+ */
+ static getRootAsUtf8(bb: flatbuffers.ByteBuffer, obj?: Utf8): Utf8 {
+ return (obj || new Utf8()).__init(bb.readInt32(bb.position()) + bb.position(), bb);
+ }
+ /**
+ * @param flatbuffers.ByteBuffer bb
+ * @param Utf8= obj
+ * @returns Utf8
+ */
+ static getSizePrefixedRootAsUtf8(bb: flatbuffers.ByteBuffer, obj?: Utf8): Utf8 {
+ bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH);
+ return (obj || new Utf8()).__init(bb.readInt32(bb.position()) + bb.position(), bb);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ */
+ static startUtf8(builder: flatbuffers.Builder) {
+ builder.startObject(0);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @returns flatbuffers.Offset
+ */
+ static endUtf8(builder: flatbuffers.Builder): flatbuffers.Offset {
+ const offset = builder.endObject();
+ return offset;
+ }
+ static createUtf8(builder: flatbuffers.Builder): flatbuffers.Offset {
+ Utf8.startUtf8(builder);
+ return Utf8.endUtf8(builder);
+ }
+ * Opaque binary data
+ *
+ * @constructor
+ */
+export class Binary {
+ bb: flatbuffers.ByteBuffer | null = null;
+ bb_pos: number = 0;
+ /**
+ * @param number i
+ * @param flatbuffers.ByteBuffer bb
+ * @returns Binary
+ */
+ __init(i: number, bb: flatbuffers.ByteBuffer): Binary {
+ this.bb_pos = i;
+ = bb;
+ return this;
+ }
+ /**
+ * @param flatbuffers.ByteBuffer bb
+ * @param Binary= obj
+ * @returns Binary
+ */
+ static getRootAsBinary(bb: flatbuffers.ByteBuffer, obj?: Binary): Binary {
+ return (obj || new Binary()).__init(bb.readInt32(bb.position()) + bb.position(), bb);
+ }
+ /**
+ * @param flatbuffers.ByteBuffer bb
+ * @param Binary= obj
+ * @returns Binary
+ */
+ static getSizePrefixedRootAsBinary(bb: flatbuffers.ByteBuffer, obj?: Binary): Binary {
+ bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH);
+ return (obj || new Binary()).__init(bb.readInt32(bb.position()) + bb.position(), bb);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ */
+ static startBinary(builder: flatbuffers.Builder) {
+ builder.startObject(0);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @returns flatbuffers.Offset
+ */
+ static endBinary(builder: flatbuffers.Builder): flatbuffers.Offset {
+ const offset = builder.endObject();
+ return offset;
+ }
+ static createBinary(builder: flatbuffers.Builder): flatbuffers.Offset {
+ Binary.startBinary(builder);
+ return Binary.endBinary(builder);
+ }
+ * Same as Utf8, but with 64-bit offsets, allowing to represent
+ * extremely large data values.
+ *
+ * @constructor
+ */
+export class LargeUtf8 {
+ bb: flatbuffers.ByteBuffer | null = null;
+ bb_pos: number = 0;
+ /**
+ * @param number i
+ * @param flatbuffers.ByteBuffer bb
+ * @returns LargeUtf8
+ */
+ __init(i: number, bb: flatbuffers.ByteBuffer): LargeUtf8 {
+ this.bb_pos = i;
+ = bb;
+ return this;
+ }
+ /**
+ * @param flatbuffers.ByteBuffer bb
+ * @param LargeUtf8= obj
+ * @returns LargeUtf8
+ */
+ static getRootAsLargeUtf8(bb: flatbuffers.ByteBuffer, obj?: LargeUtf8): LargeUtf8 {
+ return (obj || new LargeUtf8()).__init(bb.readInt32(bb.position()) + bb.position(), bb);
+ }
+ /**
+ * @param flatbuffers.ByteBuffer bb
+ * @param LargeUtf8= obj
+ * @returns LargeUtf8
+ */
+ static getSizePrefixedRootAsLargeUtf8(bb: flatbuffers.ByteBuffer, obj?: LargeUtf8): LargeUtf8 {
+ bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH);
+ return (obj || new LargeUtf8()).__init(bb.readInt32(bb.position()) + bb.position(), bb);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ */
+ static startLargeUtf8(builder: flatbuffers.Builder) {
+ builder.startObject(0);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @returns flatbuffers.Offset
+ */
+ static endLargeUtf8(builder: flatbuffers.Builder): flatbuffers.Offset {
+ const offset = builder.endObject();
+ return offset;
+ }
+ static createLargeUtf8(builder: flatbuffers.Builder): flatbuffers.Offset {
+ LargeUtf8.startLargeUtf8(builder);
+ return LargeUtf8.endLargeUtf8(builder);
+ }
+ * Same as Binary, but with 64-bit offsets, allowing to represent
+ * extremely large data values.
+ *
+ * @constructor
+ */
+export class LargeBinary {
+ bb: flatbuffers.ByteBuffer | null = null;
+ bb_pos: number = 0;
+ /**
+ * @param number i
+ * @param flatbuffers.ByteBuffer bb
+ * @returns LargeBinary
+ */
+ __init(i: number, bb: flatbuffers.ByteBuffer): LargeBinary {
+ this.bb_pos = i;
+ = bb;
+ return this;
+ }
+ /**
+ * @param flatbuffers.ByteBuffer bb
+ * @param LargeBinary= obj
+ * @returns LargeBinary
+ */
+ static getRootAsLargeBinary(bb: flatbuffers.ByteBuffer, obj?: LargeBinary): LargeBinary {
+ return (obj || new LargeBinary()).__init(bb.readInt32(bb.position()) + bb.position(), bb);
+ }
+ /**
+ * @param flatbuffers.ByteBuffer bb
+ * @param LargeBinary= obj
+ * @returns LargeBinary
+ */
+ static getSizePrefixedRootAsLargeBinary(bb: flatbuffers.ByteBuffer, obj?: LargeBinary): LargeBinary {
+ bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH);
+ return (obj || new LargeBinary()).__init(bb.readInt32(bb.position()) + bb.position(), bb);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ */
+ static startLargeBinary(builder: flatbuffers.Builder) {
+ builder.startObject(0);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @returns flatbuffers.Offset
+ */
+ static endLargeBinary(builder: flatbuffers.Builder): flatbuffers.Offset {
+ const offset = builder.endObject();
+ return offset;
+ }
+ static createLargeBinary(builder: flatbuffers.Builder): flatbuffers.Offset {
+ LargeBinary.startLargeBinary(builder);
+ return LargeBinary.endLargeBinary(builder);
+ }
+ * @constructor
+ */
+export class FixedSizeBinary {
+ bb: flatbuffers.ByteBuffer | null = null;
+ bb_pos: number = 0;
+ /**
+ * @param number i
+ * @param flatbuffers.ByteBuffer bb
+ * @returns FixedSizeBinary
+ */
+ __init(i: number, bb: flatbuffers.ByteBuffer): FixedSizeBinary {
+ this.bb_pos = i;
+ = bb;
+ return this;
+ }
+ /**
+ * @param flatbuffers.ByteBuffer bb
+ * @param FixedSizeBinary= obj
+ * @returns FixedSizeBinary
+ */
+ static getRootAsFixedSizeBinary(bb: flatbuffers.ByteBuffer, obj?: FixedSizeBinary): FixedSizeBinary {
+ return (obj || new FixedSizeBinary()).__init(bb.readInt32(bb.position()) + bb.position(), bb);
+ }
+ /**
+ * @param flatbuffers.ByteBuffer bb
+ * @param FixedSizeBinary= obj
+ * @returns FixedSizeBinary
+ */
+ static getSizePrefixedRootAsFixedSizeBinary(bb: flatbuffers.ByteBuffer, obj?: FixedSizeBinary): FixedSizeBinary {
+ bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH);
+ return (obj || new FixedSizeBinary()).__init(bb.readInt32(bb.position()) + bb.position(), bb);
+ }
+ /**
+ * Number of bytes per value
+ *
+ * @returns number
+ */
+ byteWidth(): number {
+ const offset =!.__offset(this.bb_pos, 4);
+ return offset ?!.readInt32(this.bb_pos + offset) : 0;
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ */
+ static startFixedSizeBinary(builder: flatbuffers.Builder) {
+ builder.startObject(1);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param number byteWidth
+ */
+ static addByteWidth(builder: flatbuffers.Builder, byteWidth: number) {
+ builder.addFieldInt32(0, byteWidth, 0);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @returns flatbuffers.Offset
+ */
+ static endFixedSizeBinary(builder: flatbuffers.Builder): flatbuffers.Offset {
+ const offset = builder.endObject();
+ return offset;
+ }
+ static createFixedSizeBinary(builder: flatbuffers.Builder, byteWidth: number): flatbuffers.Offset {
+ FixedSizeBinary.startFixedSizeBinary(builder);
+ FixedSizeBinary.addByteWidth(builder, byteWidth);
+ return FixedSizeBinary.endFixedSizeBinary(builder);
+ }
+ * @constructor
+ */
+export class Bool {
+ bb: flatbuffers.ByteBuffer | null = null;
+ bb_pos: number = 0;
+ /**
+ * @param number i
+ * @param flatbuffers.ByteBuffer bb
+ * @returns Bool
+ */
+ __init(i: number, bb: flatbuffers.ByteBuffer): Bool {
+ this.bb_pos = i;
+ = bb;
+ return this;
+ }
+ /**
+ * @param flatbuffers.ByteBuffer bb
+ * @param Bool= obj
+ * @returns Bool
+ */
+ static getRootAsBool(bb: flatbuffers.ByteBuffer, obj?: Bool): Bool {
+ return (obj || new Bool()).__init(bb.readInt32(bb.position()) + bb.position(), bb);
+ }
+ /**
+ * @param flatbuffers.ByteBuffer bb
+ * @param Bool= obj
+ * @returns Bool
+ */
+ static getSizePrefixedRootAsBool(bb: flatbuffers.ByteBuffer, obj?: Bool): Bool {
+ bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH);
+ return (obj || new Bool()).__init(bb.readInt32(bb.position()) + bb.position(), bb);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ */
+ static startBool(builder: flatbuffers.Builder) {
+ builder.startObject(0);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @returns flatbuffers.Offset
+ */
+ static endBool(builder: flatbuffers.Builder): flatbuffers.Offset {
+ const offset = builder.endObject();
+ return offset;
+ }
+ static createBool(builder: flatbuffers.Builder): flatbuffers.Offset {
+ Bool.startBool(builder);
+ return Bool.endBool(builder);
+ }
+ * Exact decimal value represented as an integer value in two's
+ * complement. Currently only 128-bit (16-byte) and 256-bit (32-byte) integers
+ * are used. The representation uses the endianness indicated
+ * in the Schema.
+ *
+ * @constructor
+ */
+export class Decimal {
+ bb: flatbuffers.ByteBuffer | null = null;
+ bb_pos: number = 0;
+ /**
+ * @param number i
+ * @param flatbuffers.ByteBuffer bb
+ * @returns Decimal
+ */
+ __init(i: number, bb: flatbuffers.ByteBuffer): Decimal {
+ this.bb_pos = i;
+ = bb;
+ return this;
+ }
+ /**
+ * @param flatbuffers.ByteBuffer bb
+ * @param Decimal= obj
+ * @returns Decimal
+ */
+ static getRootAsDecimal(bb: flatbuffers.ByteBuffer, obj?: Decimal): Decimal {
+ return (obj || new Decimal()).__init(bb.readInt32(bb.position()) + bb.position(), bb);
+ }
+ /**
+ * @param flatbuffers.ByteBuffer bb
+ * @param Decimal= obj
+ * @returns Decimal
+ */
+ static getSizePrefixedRootAsDecimal(bb: flatbuffers.ByteBuffer, obj?: Decimal): Decimal {
+ bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH);
+ return (obj || new Decimal()).__init(bb.readInt32(bb.position()) + bb.position(), bb);
+ }
+ /**
+ * Total number of decimal digits
+ *
+ * @returns number
+ */
+ precision(): number {
+ const offset =!.__offset(this.bb_pos, 4);
+ return offset ?!.readInt32(this.bb_pos + offset) : 0;
+ }
+ /**
+ * Number of digits after the decimal point "."
+ *
+ * @returns number
+ */
+ scale(): number {
+ const offset =!.__offset(this.bb_pos, 6);
+ return offset ?!.readInt32(this.bb_pos + offset) : 0;
+ }
+ /**
+ * Number of bits per value. The only accepted widths are 128 and 256.
+ * We use bitWidth for consistency with Int::bitWidth.
+ *
+ * @returns number
+ */
+ bitWidth(): number {
+ const offset =!.__offset(this.bb_pos, 8);
+ return offset ?!.readInt32(this.bb_pos + offset) : 128;
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ */
+ static startDecimal(builder: flatbuffers.Builder) {
+ builder.startObject(3);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param number precision
+ */
+ static addPrecision(builder: flatbuffers.Builder, precision: number) {
+ builder.addFieldInt32(0, precision, 0);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param number scale
+ */
+ static addScale(builder: flatbuffers.Builder, scale: number) {
+ builder.addFieldInt32(1, scale, 0);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param number bitWidth
+ */
+ static addBitWidth(builder: flatbuffers.Builder, bitWidth: number) {
+ builder.addFieldInt32(2, bitWidth, 128);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @returns flatbuffers.Offset
+ */
+ static endDecimal(builder: flatbuffers.Builder): flatbuffers.Offset {
+ const offset = builder.endObject();
+ return offset;
+ }
+ static createDecimal(builder: flatbuffers.Builder, precision: number, scale: number, bitWidth: number): flatbuffers.Offset {
+ Decimal.startDecimal(builder);
+ Decimal.addPrecision(builder, precision);
+ Decimal.addScale(builder, scale);
+ Decimal.addBitWidth(builder, bitWidth);
+ return Decimal.endDecimal(builder);
+ }
+ * Date is either a 32-bit or 64-bit type representing elapsed time since UNIX
+ * epoch (1970-01-01), stored in either of two units:
+ *
+ * * Milliseconds (64 bits) indicating UNIX time elapsed since the epoch (no
+ * leap seconds), where the values are evenly divisible by 86400000
+ * * Days (32 bits) since the UNIX epoch
+ *
+ * @constructor
+ */
+export class Date {
+ bb: flatbuffers.ByteBuffer | null = null;
+ bb_pos: number = 0;
+ /**
+ * @param number i
+ * @param flatbuffers.ByteBuffer bb
+ * @returns Date
+ */
+ __init(i: number, bb: flatbuffers.ByteBuffer): Date {
+ this.bb_pos = i;
+ = bb;
+ return this;
+ }
+ /**
+ * @param flatbuffers.ByteBuffer bb
+ * @param Date= obj
+ * @returns Date
+ */
+ static getRootAsDate(bb: flatbuffers.ByteBuffer, obj?: Date): Date {
+ return (obj || new Date()).__init(bb.readInt32(bb.position()) + bb.position(), bb);
+ }
+ /**
+ * @param flatbuffers.ByteBuffer bb
+ * @param Date= obj
+ * @returns Date
+ */
+ static getSizePrefixedRootAsDate(bb: flatbuffers.ByteBuffer, obj?: Date): Date {
+ bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH);
+ return (obj || new Date()).__init(bb.readInt32(bb.position()) + bb.position(), bb);
+ }
+ /**
+ * @returns DateUnit
+ */
+ unit(): DateUnit {
+ const offset =!.__offset(this.bb_pos, 4);
+ return offset ? /** */ (!.readInt16(this.bb_pos + offset)) : DateUnit.MILLISECOND;
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ */
+ static startDate(builder: flatbuffers.Builder) {
+ builder.startObject(1);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param DateUnit unit
+ */
+ static addUnit(builder: flatbuffers.Builder, unit: DateUnit) {
+ builder.addFieldInt16(0, unit, DateUnit.MILLISECOND);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @returns flatbuffers.Offset
+ */
+ static endDate(builder: flatbuffers.Builder): flatbuffers.Offset {
+ const offset = builder.endObject();
+ return offset;
+ }
+ static createDate(builder: flatbuffers.Builder, unit: DateUnit): flatbuffers.Offset {
+ Date.startDate(builder);
+ Date.addUnit(builder, unit);
+ return Date.endDate(builder);
+ }
+ * Time type. The physical storage type depends on the unit
+ * - SECOND and MILLISECOND: 32 bits
+ *
+ * @constructor
+ */
+export class Time {
+ bb: flatbuffers.ByteBuffer | null = null;
+ bb_pos: number = 0;
+ /**
+ * @param number i
+ * @param flatbuffers.ByteBuffer bb
+ * @returns Time
+ */
+ __init(i: number, bb: flatbuffers.ByteBuffer): Time {
+ this.bb_pos = i;
+ = bb;
+ return this;
+ }
+ /**
+ * @param flatbuffers.ByteBuffer bb
+ * @param Time= obj
+ * @returns Time
+ */
+ static getRootAsTime(bb: flatbuffers.ByteBuffer, obj?: Time): Time {
+ return (obj || new Time()).__init(bb.readInt32(bb.position()) + bb.position(), bb);
+ }
+ /**
+ * @param flatbuffers.ByteBuffer bb
+ * @param Time= obj
+ * @returns Time
+ */
+ static getSizePrefixedRootAsTime(bb: flatbuffers.ByteBuffer, obj?: Time): Time {
+ bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH);
+ return (obj || new Time()).__init(bb.readInt32(bb.position()) + bb.position(), bb);
+ }
+ /**
+ * @returns TimeUnit
+ */
+ unit(): TimeUnit {
+ const offset =!.__offset(this.bb_pos, 4);
+ return offset ? /** */ (!.readInt16(this.bb_pos + offset)) : TimeUnit.MILLISECOND;
+ }
+ /**
+ * @returns number
+ */
+ bitWidth(): number {
+ const offset =!.__offset(this.bb_pos, 6);
+ return offset ?!.readInt32(this.bb_pos + offset) : 32;
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ */
+ static startTime(builder: flatbuffers.Builder) {
+ builder.startObject(2);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param TimeUnit unit
+ */
+ static addUnit(builder: flatbuffers.Builder, unit: TimeUnit) {
+ builder.addFieldInt16(0, unit, TimeUnit.MILLISECOND);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param number bitWidth
+ */
+ static addBitWidth(builder: flatbuffers.Builder, bitWidth: number) {
+ builder.addFieldInt32(1, bitWidth, 32);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @returns flatbuffers.Offset
+ */
+ static endTime(builder: flatbuffers.Builder): flatbuffers.Offset {
+ const offset = builder.endObject();
+ return offset;
+ }
+ static createTime(builder: flatbuffers.Builder, unit: TimeUnit, bitWidth: number): flatbuffers.Offset {
+ Time.startTime(builder);
+ Time.addUnit(builder, unit);
+ Time.addBitWidth(builder, bitWidth);
+ return Time.endTime(builder);
+ }
+ * Time elapsed from the Unix epoch, 00:00:00.000 on 1 January 1970, excluding
+ * leap seconds, as a 64-bit integer. Note that UNIX time does not include
+ * leap seconds.
+ *
+ * The Timestamp metadata supports both "time zone naive" and "time zone
+ * aware" timestamps. Read about the timezone attribute for more detail
+ *
+ * @constructor
+ */
+export class Timestamp {
+ bb: flatbuffers.ByteBuffer | null = null;
+ bb_pos: number = 0;
+ /**
+ * @param number i
+ * @param flatbuffers.ByteBuffer bb
+ * @returns Timestamp
+ */
+ __init(i: number, bb: flatbuffers.ByteBuffer): Timestamp {
+ this.bb_pos = i;
+ = bb;
+ return this;
+ }
+ /**
+ * @param flatbuffers.ByteBuffer bb
+ * @param Timestamp= obj
+ * @returns Timestamp
+ */
+ static getRootAsTimestamp(bb: flatbuffers.ByteBuffer, obj?: Timestamp): Timestamp {
+ return (obj || new Timestamp()).__init(bb.readInt32(bb.position()) + bb.position(), bb);
+ }
+ /**
+ * @param flatbuffers.ByteBuffer bb
+ * @param Timestamp= obj
+ * @returns Timestamp
+ */
+ static getSizePrefixedRootAsTimestamp(bb: flatbuffers.ByteBuffer, obj?: Timestamp): Timestamp {
+ bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH);
+ return (obj || new Timestamp()).__init(bb.readInt32(bb.position()) + bb.position(), bb);
+ }
+ /**
+ * @returns TimeUnit
+ */
+ unit(): TimeUnit {
+ const offset =!.__offset(this.bb_pos, 4);
+ return offset ? /** */ (!.readInt16(this.bb_pos + offset)) : TimeUnit.SECOND;
+ }
+ /**
+ * The time zone is a string indicating the name of a time zone, one of:
+ *
+ * * As used in the Olson time zone database (the "tz database" or
+ * "tzdata"), such as "America/New_York"
+ * * An absolute time zone offset of the form +XX:XX or -XX:XX, such as +07:30
+ *
+ * Whether a timezone string is present indicates different semantics about
+ * the data:
+ *
+ * * If the time zone is null or equal to an empty string, the data is "time
+ * zone naive" and shall be displayed *as is* to the user, not localized
+ * to the locale of the user. This data can be though of as UTC but
+ * without having "UTC" as the time zone, it is not considered to be
+ * localized to any time zone
+ *
+ * * If the time zone is set to a valid value, values can be displayed as
+ * "localized" to that time zone, even though the underlying 64-bit
+ * integers are identical to the same data stored in UTC. Converting
+ * between time zones is a metadata-only operation and does not change the
+ * underlying values
+ *
+ * @param flatbuffers.Encoding= optionalEncoding
+ * @returns string|Uint8Array|null
+ */
+ timezone(): string | null;
+ timezone(optionalEncoding: flatbuffers.Encoding): string | Uint8Array | null;
+ timezone(optionalEncoding?: any): string | Uint8Array | null {
+ const offset =!.__offset(this.bb_pos, 6);
+ return offset ?!.__string(this.bb_pos + offset, optionalEncoding) : null;
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ */
+ static startTimestamp(builder: flatbuffers.Builder) {
+ builder.startObject(2);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param TimeUnit unit
+ */
+ static addUnit(builder: flatbuffers.Builder, unit: TimeUnit) {
+ builder.addFieldInt16(0, unit, TimeUnit.SECOND);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param flatbuffers.Offset timezoneOffset
+ */
+ static addTimezone(builder: flatbuffers.Builder, timezoneOffset: flatbuffers.Offset) {
+ builder.addFieldOffset(1, timezoneOffset, 0);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @returns flatbuffers.Offset
+ */
+ static endTimestamp(builder: flatbuffers.Builder): flatbuffers.Offset {
+ const offset = builder.endObject();
+ return offset;
+ }
+ static createTimestamp(builder: flatbuffers.Builder, unit: TimeUnit, timezoneOffset: flatbuffers.Offset): flatbuffers.Offset {
+ Timestamp.startTimestamp(builder);
+ Timestamp.addUnit(builder, unit);
+ Timestamp.addTimezone(builder, timezoneOffset);
+ return Timestamp.endTimestamp(builder);
+ }
+ * @constructor
+ */
+export class Interval {
+ bb: flatbuffers.ByteBuffer | null = null;
+ bb_pos: number = 0;
+ /**
+ * @param number i
+ * @param flatbuffers.ByteBuffer bb
+ * @returns Interval
+ */
+ __init(i: number, bb: flatbuffers.ByteBuffer): Interval {
+ this.bb_pos = i;
+ = bb;
+ return this;
+ }
+ /**
+ * @param flatbuffers.ByteBuffer bb
+ * @param Interval= obj
+ * @returns Interval
+ */
+ static getRootAsInterval(bb: flatbuffers.ByteBuffer, obj?: Interval): Interval {
+ return (obj || new Interval()).__init(bb.readInt32(bb.position()) + bb.position(), bb);
+ }
+ /**
+ * @param flatbuffers.ByteBuffer bb
+ * @param Interval= obj
+ * @returns Interval
+ */
+ static getSizePrefixedRootAsInterval(bb: flatbuffers.ByteBuffer, obj?: Interval): Interval {
+ bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH);
+ return (obj || new Interval()).__init(bb.readInt32(bb.position()) + bb.position(), bb);
+ }
+ /**
+ * @returns IntervalUnit
+ */
+ unit(): IntervalUnit {
+ const offset =!.__offset(this.bb_pos, 4);
+ return offset ? /** */ (!.readInt16(this.bb_pos + offset)) : IntervalUnit.YEAR_MONTH;
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ */
+ static startInterval(builder: flatbuffers.Builder) {
+ builder.startObject(1);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param IntervalUnit unit
+ */
+ static addUnit(builder: flatbuffers.Builder, unit: IntervalUnit) {
+ builder.addFieldInt16(0, unit, IntervalUnit.YEAR_MONTH);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @returns flatbuffers.Offset
+ */
+ static endInterval(builder: flatbuffers.Builder): flatbuffers.Offset {
+ const offset = builder.endObject();
+ return offset;
+ }
+ static createInterval(builder: flatbuffers.Builder, unit: IntervalUnit): flatbuffers.Offset {
+ Interval.startInterval(builder);
+ Interval.addUnit(builder, unit);
+ return Interval.endInterval(builder);
+ }
+ * @constructor
+ */
+export class Duration {
+ bb: flatbuffers.ByteBuffer | null = null;
+ bb_pos: number = 0;
+ /**
+ * @param number i
+ * @param flatbuffers.ByteBuffer bb
+ * @returns Duration
+ */
+ __init(i: number, bb: flatbuffers.ByteBuffer): Duration {
+ this.bb_pos = i;
+ = bb;
+ return this;
+ }
+ /**
+ * @param flatbuffers.ByteBuffer bb
+ * @param Duration= obj
+ * @returns Duration
+ */
+ static getRootAsDuration(bb: flatbuffers.ByteBuffer, obj?: Duration): Duration {
+ return (obj || new Duration()).__init(bb.readInt32(bb.position()) + bb.position(), bb);
+ }
+ /**
+ * @param flatbuffers.ByteBuffer bb
+ * @param Duration= obj
+ * @returns Duration
+ */
+ static getSizePrefixedRootAsDuration(bb: flatbuffers.ByteBuffer, obj?: Duration): Duration {
+ bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH);
+ return (obj || new Duration()).__init(bb.readInt32(bb.position()) + bb.position(), bb);
+ }
+ /**
+ * @returns TimeUnit
+ */
+ unit(): TimeUnit {
+ const offset =!.__offset(this.bb_pos, 4);
+ return offset ? /** */ (!.readInt16(this.bb_pos + offset)) : TimeUnit.MILLISECOND;
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ */
+ static startDuration(builder: flatbuffers.Builder) {
+ builder.startObject(1);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param TimeUnit unit
+ */
+ static addUnit(builder: flatbuffers.Builder, unit: TimeUnit) {
+ builder.addFieldInt16(0, unit, TimeUnit.MILLISECOND);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @returns flatbuffers.Offset
+ */
+ static endDuration(builder: flatbuffers.Builder): flatbuffers.Offset {
+ const offset = builder.endObject();
+ return offset;
+ }
+ static createDuration(builder: flatbuffers.Builder, unit: TimeUnit): flatbuffers.Offset {
+ Duration.startDuration(builder);
+ Duration.addUnit(builder, unit);
+ return Duration.endDuration(builder);
+ }
+ * ----------------------------------------------------------------------
+ * user defined key value pairs to add custom metadata to arrow
+ * key namespacing is the responsibility of the user
+ *
+ * @constructor
+ */
+export class KeyValue {
+ bb: flatbuffers.ByteBuffer | null = null;
+ bb_pos: number = 0;
+ /**
+ * @param number i
+ * @param flatbuffers.ByteBuffer bb
+ * @returns KeyValue
+ */
+ __init(i: number, bb: flatbuffers.ByteBuffer): KeyValue {
+ this.bb_pos = i;
+ = bb;
+ return this;
+ }
+ /**
+ * @param flatbuffers.ByteBuffer bb
+ * @param KeyValue= obj
+ * @returns KeyValue
+ */
+ static getRootAsKeyValue(bb: flatbuffers.ByteBuffer, obj?: KeyValue): KeyValue {
+ return (obj || new KeyValue()).__init(bb.readInt32(bb.position()) + bb.position(), bb);
+ }
+ /**
+ * @param flatbuffers.ByteBuffer bb
+ * @param KeyValue= obj
+ * @returns KeyValue
+ */
+ static getSizePrefixedRootAsKeyValue(bb: flatbuffers.ByteBuffer, obj?: KeyValue): KeyValue {
+ bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH);
+ return (obj || new KeyValue()).__init(bb.readInt32(bb.position()) + bb.position(), bb);
+ }
+ /**
+ * @param flatbuffers.Encoding= optionalEncoding
+ * @returns string|Uint8Array|null
+ */
+ key(): string | null;
+ key(optionalEncoding: flatbuffers.Encoding): string | Uint8Array | null;
+ key(optionalEncoding?: any): string | Uint8Array | null {
+ const offset =!.__offset(this.bb_pos, 4);
+ return offset ?!.__string(this.bb_pos + offset, optionalEncoding) : null;
+ }
+ /**
+ * @param flatbuffers.Encoding= optionalEncoding
+ * @returns string|Uint8Array|null
+ */
+ value(): string | null;
+ value(optionalEncoding: flatbuffers.Encoding): string | Uint8Array | null;
+ value(optionalEncoding?: any): string | Uint8Array | null {
+ const offset =!.__offset(this.bb_pos, 6);
+ return offset ?!.__string(this.bb_pos + offset, optionalEncoding) : null;
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ */
+ static startKeyValue(builder: flatbuffers.Builder) {
+ builder.startObject(2);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param flatbuffers.Offset keyOffset
+ */
+ static addKey(builder: flatbuffers.Builder, keyOffset: flatbuffers.Offset) {
+ builder.addFieldOffset(0, keyOffset, 0);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param flatbuffers.Offset valueOffset
+ */
+ static addValue(builder: flatbuffers.Builder, valueOffset: flatbuffers.Offset) {
+ builder.addFieldOffset(1, valueOffset, 0);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @returns flatbuffers.Offset
+ */
+ static endKeyValue(builder: flatbuffers.Builder): flatbuffers.Offset {
+ const offset = builder.endObject();
+ return offset;
+ }
+ static createKeyValue(builder: flatbuffers.Builder, keyOffset: flatbuffers.Offset, valueOffset: flatbuffers.Offset): flatbuffers.Offset {
+ KeyValue.startKeyValue(builder);
+ KeyValue.addKey(builder, keyOffset);
+ KeyValue.addValue(builder, valueOffset);
+ return KeyValue.endKeyValue(builder);
+ }
+ * @constructor
+ */
+export class DictionaryEncoding {
+ bb: flatbuffers.ByteBuffer | null = null;
+ bb_pos: number = 0;
+ /**
+ * @param number i
+ * @param flatbuffers.ByteBuffer bb
+ * @returns DictionaryEncoding
+ */
+ __init(i: number, bb: flatbuffers.ByteBuffer): DictionaryEncoding {
+ this.bb_pos = i;
+ = bb;
+ return this;
+ }
+ /**
+ * @param flatbuffers.ByteBuffer bb
+ * @param DictionaryEncoding= obj
+ * @returns DictionaryEncoding
+ */
+ static getRootAsDictionaryEncoding(bb: flatbuffers.ByteBuffer, obj?: DictionaryEncoding): DictionaryEncoding {
+ return (obj || new DictionaryEncoding()).__init(bb.readInt32(bb.position()) + bb.position(), bb);
+ }
+ /**
+ * @param flatbuffers.ByteBuffer bb
+ * @param DictionaryEncoding= obj
+ * @returns DictionaryEncoding
+ */
+ static getSizePrefixedRootAsDictionaryEncoding(bb: flatbuffers.ByteBuffer, obj?: DictionaryEncoding): DictionaryEncoding {
+ bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH);
+ return (obj || new DictionaryEncoding()).__init(bb.readInt32(bb.position()) + bb.position(), bb);
+ }
+ /**
+ * The known dictionary id in the application where this data is used. In
+ * the file or streaming formats, the dictionary ids are found in the
+ * DictionaryBatch messages
+ *
+ * @returns flatbuffers.Long
+ */
+ id(): flatbuffers.Long {
+ const offset =!.__offset(this.bb_pos, 4);
+ return offset ?!.readInt64(this.bb_pos + offset) :!.createLong(0, 0);
+ }
+ /**
+ * The dictionary indices are constrained to be non-negative integers. If
+ * this field is null, the indices must be signed int32. To maximize
+ * cross-language compatibility and performance, implementations are
+ * recommended to prefer signed integer types over unsigned integer types
+ * and to avoid uint64 indices unless they are required by an application.
+ *
+ * @param Int= obj
+ * @returns Int|null
+ */
+ indexType(obj?: Int): Int | null {
+ const offset =!.__offset(this.bb_pos, 6);
+ return offset ? (obj || new Int()).__init(!.__indirect(this.bb_pos + offset),!) : null;
+ }
+ /**
+ * By default, dictionaries are not ordered, or the order does not have
+ * semantic meaning. In some statistical, applications, dictionary-encoding
+ * is used to represent ordered categorical data, and we provide a way to
+ * preserve that metadata here
+ *
+ * @returns boolean
+ */
+ isOrdered(): boolean {
+ const offset =!.__offset(this.bb_pos, 8);
+ return offset ? !!!.readInt8(this.bb_pos + offset) : false;
+ }
+ /**
+ * @returns DictionaryKind
+ */
+ dictionaryKind(): DictionaryKind {
+ const offset =!.__offset(this.bb_pos, 10);
+ return offset ? /** */ (!.readInt16(this.bb_pos + offset)) : DictionaryKind.DenseArray;
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ */
+ static startDictionaryEncoding(builder: flatbuffers.Builder) {
+ builder.startObject(4);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param flatbuffers.Long id
+ */
+ static addId(builder: flatbuffers.Builder, id: flatbuffers.Long) {
+ builder.addFieldInt64(0, id, builder.createLong(0, 0));
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param flatbuffers.Offset indexTypeOffset
+ */
+ static addIndexType(builder: flatbuffers.Builder, indexTypeOffset: flatbuffers.Offset) {
+ builder.addFieldOffset(1, indexTypeOffset, 0);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param boolean isOrdered
+ */
+ static addIsOrdered(builder: flatbuffers.Builder, isOrdered: boolean) {
+ builder.addFieldInt8(2, +isOrdered, +false);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param DictionaryKind dictionaryKind
+ */
+ static addDictionaryKind(builder: flatbuffers.Builder, dictionaryKind: DictionaryKind) {
+ builder.addFieldInt16(3, dictionaryKind, DictionaryKind.DenseArray);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @returns flatbuffers.Offset
+ */
+ static endDictionaryEncoding(builder: flatbuffers.Builder): flatbuffers.Offset {
+ const offset = builder.endObject();
+ return offset;
+ }
+ static createDictionaryEncoding(builder: flatbuffers.Builder, id: flatbuffers.Long, indexTypeOffset: flatbuffers.Offset, isOrdered: boolean, dictionaryKind: DictionaryKind): flatbuffers.Offset {
+ DictionaryEncoding.startDictionaryEncoding(builder);
+ DictionaryEncoding.addId(builder, id);
+ DictionaryEncoding.addIndexType(builder, indexTypeOffset);
+ DictionaryEncoding.addIsOrdered(builder, isOrdered);
+ DictionaryEncoding.addDictionaryKind(builder, dictionaryKind);
+ return DictionaryEncoding.endDictionaryEncoding(builder);
+ }
+ * ----------------------------------------------------------------------
+ * A field represents a named column in a record / row batch or child of a
+ * nested type.
+ *
+ * @constructor
+ */
+export class Field {
+ bb: flatbuffers.ByteBuffer | null = null;
+ bb_pos: number = 0;
+ /**
+ * @param number i
+ * @param flatbuffers.ByteBuffer bb
+ * @returns Field
+ */
+ __init(i: number, bb: flatbuffers.ByteBuffer): Field {
+ this.bb_pos = i;
+ = bb;
+ return this;
+ }
+ /**
+ * @param flatbuffers.ByteBuffer bb
+ * @param Field= obj
+ * @returns Field
+ */
+ static getRootAsField(bb: flatbuffers.ByteBuffer, obj?: Field): Field {
+ return (obj || new Field()).__init(bb.readInt32(bb.position()) + bb.position(), bb);
+ }
+ /**
+ * @param flatbuffers.ByteBuffer bb
+ * @param Field= obj
+ * @returns Field
+ */
+ static getSizePrefixedRootAsField(bb: flatbuffers.ByteBuffer, obj?: Field): Field {
+ bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH);
+ return (obj || new Field()).__init(bb.readInt32(bb.position()) + bb.position(), bb);
+ }
+ /**
+ * Name is not required, in i.e. a List
+ *
+ * @param flatbuffers.Encoding= optionalEncoding
+ * @returns string|Uint8Array|null
+ */
+ name(): string | null;
+ name(optionalEncoding: flatbuffers.Encoding): string | Uint8Array | null;
+ name(optionalEncoding?: any): string | Uint8Array | null {
+ const offset =!.__offset(this.bb_pos, 4);
+ return offset ?!.__string(this.bb_pos + offset, optionalEncoding) : null;
+ }
+ /**
+ * Whether or not this field can contain nulls. Should be true in general.
+ *
+ * @returns boolean
+ */
+ nullable(): boolean {
+ const offset =!.__offset(this.bb_pos, 6);
+ return offset ? !!!.readInt8(this.bb_pos + offset) : false;
+ }
+ /**
+ * @returns Type
+ */
+ typeType(): Type {
+ const offset =!.__offset(this.bb_pos, 8);
+ return offset ? /** */ (!.readUint8(this.bb_pos + offset)) : Type.NONE;
+ }
+ /**
+ * This is the type of the decoded value if the field is dictionary encoded.
+ *
+ * @param flatbuffers.Table obj
+ * @returns ?flatbuffers.Table
+ */
+ type<T extends flatbuffers.Table>(obj: T): T | null {
+ const offset =!.__offset(this.bb_pos, 10);
+ return offset ?!.__union(obj, this.bb_pos + offset) : null;
+ }
+ /**
+ * Present only if the field is dictionary encoded.
+ *
+ * @param DictionaryEncoding= obj
+ * @returns DictionaryEncoding|null
+ */
+ dictionary(obj?: DictionaryEncoding): DictionaryEncoding | null {
+ const offset =!.__offset(this.bb_pos, 12);
+ return offset ? (obj || new DictionaryEncoding()).__init(!.__indirect(this.bb_pos + offset),!) : null;
+ }
+ /**
+ * children apply only to nested data types like Struct, List and Union. For
+ * primitive types children will have length 0.
+ *
+ * @param number index
+ * @param Field= obj
+ * @returns Field
+ */
+ children(index: number, obj?: Field): Field | null {
+ const offset =!.__offset(this.bb_pos, 14);
+ return offset ? (obj || new Field()).__init(!.__indirect(!.__vector(this.bb_pos + offset) + index * 4),!) : null;
+ }
+ /**
+ * @returns number
+ */
+ childrenLength(): number {
+ const offset =!.__offset(this.bb_pos, 14);
+ return offset ?!.__vector_len(this.bb_pos + offset) : 0;
+ }
+ /**
+ * User-defined metadata
+ *
+ * @param number index
+ * @param KeyValue= obj
+ * @returns KeyValue
+ */
+ customMetadata(index: number, obj?: KeyValue): KeyValue | null {
+ const offset =!.__offset(this.bb_pos, 16);
+ return offset ? (obj || new KeyValue()).__init(!.__indirect(!.__vector(this.bb_pos + offset) + index * 4),!) : null;
+ }
+ /**
+ * @returns number
+ */
+ customMetadataLength(): number {
+ const offset =!.__offset(this.bb_pos, 16);
+ return offset ?!.__vector_len(this.bb_pos + offset) : 0;
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ */
+ static startField(builder: flatbuffers.Builder) {
+ builder.startObject(7);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param flatbuffers.Offset nameOffset
+ */
+ static addName(builder: flatbuffers.Builder, nameOffset: flatbuffers.Offset) {
+ builder.addFieldOffset(0, nameOffset, 0);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param boolean nullable
+ */
+ static addNullable(builder: flatbuffers.Builder, nullable: boolean) {
+ builder.addFieldInt8(1, +nullable, +false);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param Type typeType
+ */
+ static addTypeType(builder: flatbuffers.Builder, typeType: Type) {
+ builder.addFieldInt8(2, typeType, Type.NONE);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param flatbuffers.Offset typeOffset
+ */
+ static addType(builder: flatbuffers.Builder, typeOffset: flatbuffers.Offset) {
+ builder.addFieldOffset(3, typeOffset, 0);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param flatbuffers.Offset dictionaryOffset
+ */
+ static addDictionary(builder: flatbuffers.Builder, dictionaryOffset: flatbuffers.Offset) {
+ builder.addFieldOffset(4, dictionaryOffset, 0);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param flatbuffers.Offset childrenOffset
+ */
+ static addChildren(builder: flatbuffers.Builder, childrenOffset: flatbuffers.Offset) {
+ builder.addFieldOffset(5, childrenOffset, 0);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param Array.<flatbuffers.Offset> data
+ * @returns flatbuffers.Offset
+ */
+ static createChildrenVector(builder: flatbuffers.Builder, data: flatbuffers.Offset[]): flatbuffers.Offset {
+ builder.startVector(4, data.length, 4);
+ for (let i = data.length - 1; i >= 0; i--) {
+ builder.addOffset(data[i]);
+ }
+ return builder.endVector();
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param number numElems
+ */
+ static startChildrenVector(builder: flatbuffers.Builder, numElems: number) {
+ builder.startVector(4, numElems, 4);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param flatbuffers.Offset customMetadataOffset
+ */
+ static addCustomMetadata(builder: flatbuffers.Builder, customMetadataOffset: flatbuffers.Offset) {
+ builder.addFieldOffset(6, customMetadataOffset, 0);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param Array.<flatbuffers.Offset> data
+ * @returns flatbuffers.Offset
+ */
+ static createCustomMetadataVector(builder: flatbuffers.Builder, data: flatbuffers.Offset[]): flatbuffers.Offset {
+ builder.startVector(4, data.length, 4);
+ for (let i = data.length - 1; i >= 0; i--) {
+ builder.addOffset(data[i]);
+ }
+ return builder.endVector();
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param number numElems
+ */
+ static startCustomMetadataVector(builder: flatbuffers.Builder, numElems: number) {
+ builder.startVector(4, numElems, 4);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @returns flatbuffers.Offset
+ */
+ static endField(builder: flatbuffers.Builder): flatbuffers.Offset {
+ const offset = builder.endObject();
+ return offset;
+ }
+ static createField(builder: flatbuffers.Builder, nameOffset: flatbuffers.Offset, nullable: boolean, typeType: Type, typeOffset: flatbuffers.Offset, dictionaryOffset: flatbuffers.Offset, childrenOffset: flatbuffers.Offset, customMetadataOffset: flatbuffers.Offset): flatbuffers.Offset {
+ Field.startField(builder);
+ Field.addName(builder, nameOffset);
+ Field.addNullable(builder, nullable);
+ Field.addTypeType(builder, typeType);
+ Field.addType(builder, typeOffset);
+ Field.addDictionary(builder, dictionaryOffset);
+ Field.addChildren(builder, childrenOffset);
+ Field.addCustomMetadata(builder, customMetadataOffset);
+ return Field.endField(builder);
+ }
+ * ----------------------------------------------------------------------
+ * A Buffer represents a single contiguous memory segment
+ *
+ * @constructor
+ */
+export class Buffer {
+ bb: flatbuffers.ByteBuffer | null = null;
+ bb_pos: number = 0;
+ /**
+ * @param number i
+ * @param flatbuffers.ByteBuffer bb
+ * @returns Buffer
+ */
+ __init(i: number, bb: flatbuffers.ByteBuffer): Buffer {
+ this.bb_pos = i;
+ = bb;
+ return this;
+ }
+ /**
+ * The relative offset into the shared memory page where the bytes for this
+ * buffer starts
+ *
+ * @returns flatbuffers.Long
+ */
+ offset(): flatbuffers.Long {
+ return!.readInt64(this.bb_pos);
+ }
+ /**
+ * The absolute length (in bytes) of the memory buffer. The memory is found
+ * from offset (inclusive) to offset + length (non-inclusive). When building
+ * messages using the encapsulated IPC message, padding bytes may be written
+ * after a buffer, but such padding bytes do not need to be accounted for in
+ * the size here.
+ *
+ * @returns flatbuffers.Long
+ */
+ length(): flatbuffers.Long {
+ return!.readInt64(this.bb_pos + 8);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param flatbuffers.Long offset
+ * @param flatbuffers.Long length
+ * @returns flatbuffers.Offset
+ */
+ static createBuffer(builder: flatbuffers.Builder, offset: flatbuffers.Long, length: flatbuffers.Long): flatbuffers.Offset {
+ builder.prep(8, 16);
+ builder.writeInt64(length);
+ builder.writeInt64(offset);
+ return builder.offset();
+ }
+ * ----------------------------------------------------------------------
+ * A Schema describes the columns in a row batch
+ *
+ * @constructor
+ */
+export class Schema {
+ bb: flatbuffers.ByteBuffer | null = null;
+ bb_pos: number = 0;
+ /**
+ * @param number i
+ * @param flatbuffers.ByteBuffer bb
+ * @returns Schema
+ */
+ __init(i: number, bb: flatbuffers.ByteBuffer): Schema {
+ this.bb_pos = i;
+ = bb;
+ return this;
+ }
+ /**
+ * @param flatbuffers.ByteBuffer bb
+ * @param Schema= obj
+ * @returns Schema
+ */
+ static getRootAsSchema(bb: flatbuffers.ByteBuffer, obj?: Schema): Schema {
+ return (obj || new Schema()).__init(bb.readInt32(bb.position()) + bb.position(), bb);
+ }
+ /**
+ * @param flatbuffers.ByteBuffer bb
+ * @param Schema= obj
+ * @returns Schema
+ */
+ static getSizePrefixedRootAsSchema(bb: flatbuffers.ByteBuffer, obj?: Schema): Schema {
+ bb.setPosition(bb.position() + flatbuffers.SIZE_PREFIX_LENGTH);
+ return (obj || new Schema()).__init(bb.readInt32(bb.position()) + bb.position(), bb);
+ }
+ /**
+ * endianness of the buffer
+ * it is Little Endian by default
+ * if endianness doesn't match the underlying system then the vectors need to be converted
+ *
+ * @returns Endianness
+ */
+ endianness(): Endianness {
+ const offset =!.__offset(this.bb_pos, 4);
+ return offset ? /** */ (!.readInt16(this.bb_pos + offset)) : Endianness.Little;
+ }
+ /**
+ * @param number index
+ * @param Field= obj
+ * @returns Field
+ */
+ fields(index: number, obj?: Field): Field | null {
+ const offset =!.__offset(this.bb_pos, 6);
+ return offset ? (obj || new Field()).__init(!.__indirect(!.__vector(this.bb_pos + offset) + index * 4),!) : null;
+ }
+ /**
+ * @returns number
+ */
+ fieldsLength(): number {
+ const offset =!.__offset(this.bb_pos, 6);
+ return offset ?!.__vector_len(this.bb_pos + offset) : 0;
+ }
+ /**
+ * @param number index
+ * @param KeyValue= obj
+ * @returns KeyValue
+ */
+ customMetadata(index: number, obj?: KeyValue): KeyValue | null {
+ const offset =!.__offset(this.bb_pos, 8);
+ return offset ? (obj || new KeyValue()).__init(!.__indirect(!.__vector(this.bb_pos + offset) + index * 4),!) : null;
+ }
+ /**
+ * @returns number
+ */
+ customMetadataLength(): number {
+ const offset =!.__offset(this.bb_pos, 8);
+ return offset ?!.__vector_len(this.bb_pos + offset) : 0;
+ }
+ /**
+ * Features used in the stream/file.
+ *
+ * @param number index
+ * @returns flatbuffers.Long
+ */
+ features(index: number): flatbuffers.Long | null {
+ const offset =!.__offset(this.bb_pos, 10);
+ return offset ? /** */ (!.readInt64(!.__vector(this.bb_pos + offset) + index * 8)) :!.createLong(0, 0);
+ }
+ /**
+ * @returns number
+ */
+ featuresLength(): number {
+ const offset =!.__offset(this.bb_pos, 10);
+ return offset ?!.__vector_len(this.bb_pos + offset) : 0;
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ */
+ static startSchema(builder: flatbuffers.Builder) {
+ builder.startObject(4);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param Endianness endianness
+ */
+ static addEndianness(builder: flatbuffers.Builder, endianness: Endianness) {
+ builder.addFieldInt16(0, endianness, Endianness.Little);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param flatbuffers.Offset fieldsOffset
+ */
+ static addFields(builder: flatbuffers.Builder, fieldsOffset: flatbuffers.Offset) {
+ builder.addFieldOffset(1, fieldsOffset, 0);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param Array.<flatbuffers.Offset> data
+ * @returns flatbuffers.Offset
+ */
+ static createFieldsVector(builder: flatbuffers.Builder, data: flatbuffers.Offset[]): flatbuffers.Offset {
+ builder.startVector(4, data.length, 4);
+ for (let i = data.length - 1; i >= 0; i--) {
+ builder.addOffset(data[i]);
+ }
+ return builder.endVector();
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param number numElems
+ */
+ static startFieldsVector(builder: flatbuffers.Builder, numElems: number) {
+ builder.startVector(4, numElems, 4);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param flatbuffers.Offset customMetadataOffset
+ */
+ static addCustomMetadata(builder: flatbuffers.Builder, customMetadataOffset: flatbuffers.Offset) {
+ builder.addFieldOffset(2, customMetadataOffset, 0);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param Array.<flatbuffers.Offset> data
+ * @returns flatbuffers.Offset
+ */
+ static createCustomMetadataVector(builder: flatbuffers.Builder, data: flatbuffers.Offset[]): flatbuffers.Offset {
+ builder.startVector(4, data.length, 4);
+ for (let i = data.length - 1; i >= 0; i--) {
+ builder.addOffset(data[i]);
+ }
+ return builder.endVector();
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param number numElems
+ */
+ static startCustomMetadataVector(builder: flatbuffers.Builder, numElems: number) {
+ builder.startVector(4, numElems, 4);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param flatbuffers.Offset featuresOffset
+ */
+ static addFeatures(builder: flatbuffers.Builder, featuresOffset: flatbuffers.Offset) {
+ builder.addFieldOffset(3, featuresOffset, 0);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param Array.<flatbuffers.Long> data
+ * @returns flatbuffers.Offset
+ */
+ static createFeaturesVector(builder: flatbuffers.Builder, data: flatbuffers.Long[]): flatbuffers.Offset {
+ builder.startVector(8, data.length, 8);
+ for (let i = data.length - 1; i >= 0; i--) {
+ builder.addInt64(data[i]);
+ }
+ return builder.endVector();
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param number numElems
+ */
+ static startFeaturesVector(builder: flatbuffers.Builder, numElems: number) {
+ builder.startVector(8, numElems, 8);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @returns flatbuffers.Offset
+ */
+ static endSchema(builder: flatbuffers.Builder): flatbuffers.Offset {
+ const offset = builder.endObject();
+ return offset;
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param flatbuffers.Offset offset
+ */
+ static finishSchemaBuffer(builder: flatbuffers.Builder, offset: flatbuffers.Offset) {
+ builder.finish(offset);
+ }
+ /**
+ * @param flatbuffers.Builder builder
+ * @param flatbuffers.Offset offset
+ */
+ static finishSizePrefixedSchemaBuffer(builder: flatbuffers.Builder, offset: flatbuffers.Offset) {
+ builder.finish(offset, undefined, true);
+ }
+ static createSchema(builder: flatbuffers.Builder, endianness: Endianness, fieldsOffset: flatbuffers.Offset, customMetadataOffset: flatbuffers.Offset, featuresOffset: flatbuffers.Offset): flatbuffers.Offset {
+ Schema.startSchema(builder);
+ Schema.addEndianness(builder, endianness);
+ Schema.addFields(builder, fieldsOffset);
+ Schema.addCustomMetadata(builder, customMetadataOffset);
+ Schema.addFeatures(builder, featuresOffset);
+ return Schema.endSchema(builder);
+ }
diff --git a/src/arrow/js/src/interfaces.ts b/src/arrow/js/src/interfaces.ts
new file mode 100644
index 000000000..43977ca7a
--- /dev/null
+++ b/src/arrow/js/src/interfaces.ts
@@ -0,0 +1,417 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { Data } from './data';
+import { Type } from './enum';
+import * as type from './type';
+import { DataType } from './type';
+import * as vecs from './vector/index';
+import * as builders from './builder/index';
+import { BuilderOptions } from './builder/index';
+/** @ignore */ type FloatArray = Float32Array | Float64Array;
+/** @ignore */ type IntArray = Int8Array | Int16Array | Int32Array;
+/** @ignore */ type UintArray = Uint8Array | Uint16Array | Uint32Array | Uint8ClampedArray;
+/** @ignore */
+export type TypedArray = FloatArray | IntArray | UintArray;
+/** @ignore */
+export type BigIntArray = BigInt64Array | BigUint64Array;
+/** @ignore */
+export interface TypedArrayConstructor<T extends TypedArray> {
+ readonly prototype: T;
+ new(length?: number): T;
+ new(array: Iterable<number>): T;
+ new(buffer: ArrayBufferLike, byteOffset?: number, length?: number): T;
+ /**
+ * The size in bytes of each element in the array.
+ */
+ readonly BYTES_PER_ELEMENT: number;
+ /**
+ * Returns a new array from a set of elements.
+ * @param items A set of elements to include in the new array object.
+ */
+ of(...items: number[]): T;
+ /**
+ * Creates an array from an array-like or iterable object.
+ * @param arrayLike An array-like or iterable object to convert to an array.
+ * @param mapfn A mapping function to call on every element of the array.
+ * @param thisArg Value of 'this' used to invoke the mapfn.
+ */
+ from(arrayLike: ArrayLike<number>, mapfn?: (v: number, k: number) => number, thisArg?: any): T;
+ from<U>(arrayLike: ArrayLike<U>, mapfn: (v: U, k: number) => number, thisArg?: any): T;
+/** @ignore */
+export interface BigIntArrayConstructor<T extends BigIntArray> {
+ readonly prototype: T;
+ new(length?: number): T;
+ new(array: Iterable<bigint>): T;
+ new(buffer: ArrayBufferLike, byteOffset?: number, length?: number): T;
+ /**
+ * The size in bytes of each element in the array.
+ */
+ readonly BYTES_PER_ELEMENT: number;
+ /**
+ * Returns a new array from a set of elements.
+ * @param items A set of elements to include in the new array object.
+ */
+ of(...items: bigint[]): T;
+ /**
+ * Creates an array from an array-like or iterable object.
+ * @param arrayLike An array-like or iterable object to convert to an array.
+ * @param mapfn A mapping function to call on every element of the array.
+ * @param thisArg Value of 'this' used to invoke the mapfn.
+ */
+ from(arrayLike: ArrayLike<bigint>, mapfn?: (v: bigint, k: number) => bigint, thisArg?: any): T;
+ from<U>(arrayLike: ArrayLike<U>, mapfn: (v: U, k: number) => bigint, thisArg?: any): T;
+/** @ignore */
+export type VectorCtorArgs<
+ T extends VectorType<R>,
+ R extends DataType = any,
+ TArgs extends any[] = any[],
+ TCtor extends new (data: Data<R>, ...args: TArgs) => T =
+ new (data: Data<R>, ...args: TArgs) => T
+> = TCtor extends new (data: Data<R>, ...args: infer TArgs) => T ? TArgs : never;
+/** @ignore */
+export type BuilderCtorArgs<
+ T extends BuilderType<R, any>,
+ R extends DataType = any,
+ TArgs extends any[] = any[],
+ TCtor extends new (type: R, ...args: TArgs) => T =
+ new (type: R, ...args: TArgs) => T
+> = TCtor extends new (type: R, ...args: infer TArgs) => T ? TArgs : never;
+ * Obtain the constructor function of an instance type
+ * @ignore
+ */
+export type ConstructorType<
+ T,
+ TCtor extends new (...args: any[]) => T =
+ new (...args: any[]) => T
+> = TCtor extends new (...args: any[]) => T ? TCtor : never;
+/** @ignore */
+export type VectorCtorType<
+ T extends VectorType<R>,
+ R extends DataType = any,
+ TCtor extends new (type: R, data?: Data<R>[], offsets?: Uint32Array) => T =
+ new (type: R, data?: Data<R>[], offsets?: Uint32Array) => T
+> = TCtor extends new (type: R, data?: Data<R>[], offsets?: Uint32Array) => T ? TCtor : never;
+/** @ignore */
+export type BuilderCtorType<
+ T extends BuilderType<R, any>,
+ R extends DataType = any,
+ TCtor extends new (options: BuilderOptions<R, any>) => T =
+ new (options: BuilderOptions<R, any>) => T
+> = TCtor extends new (options: BuilderOptions<R, any>) => T ? TCtor : never;
+/** @ignore */
+export type VectorType<T extends Type | DataType = any> =
+ T extends Type ? TypeToVector<T> :
+ T extends DataType ? DataTypeToVector<T> :
+ vecs.BaseVector<any>
+ ;
+/** @ignore */
+export type BuilderType<T extends Type | DataType = any, TNull = any> =
+ T extends Type ? TypeToBuilder<T, TNull> :
+ T extends DataType ? DataTypeToBuilder<T, TNull> :
+ builders.Builder<any, TNull>
+ ;
+/** @ignore */
+export type VectorCtor<T extends Type | DataType | VectorType> =
+ T extends VectorType ? VectorCtorType<VectorType<T['TType']>> :
+ T extends Type ? VectorCtorType<VectorType<T>> :
+ T extends DataType ? VectorCtorType<VectorType<T['TType']>> :
+ VectorCtorType<vecs.BaseVector<any>>
+ ;
+/** @ignore */
+export type BuilderCtor<T extends Type | DataType = any> =
+ T extends Type ? BuilderCtorType<BuilderType<T>> :
+ T extends DataType ? BuilderCtorType<BuilderType<T>> :
+ BuilderCtorType<builders.Builder>
+ ;
+/** @ignore */
+export type DataTypeCtor<T extends Type | DataType | VectorType = any> =
+ T extends DataType ? ConstructorType<T> :
+ T extends VectorType ? ConstructorType<T['type']> :
+ T extends Type ? ConstructorType<TypeToDataType<T>> :
+ never
+ ;
+/** @ignore */
+export type TypedArrayDataType<T extends Exclude<TypedArray, Uint8ClampedArray> | BigIntArray> =
+ T extends Int8Array ? type.Int8 :
+ T extends Int16Array ? type.Int16 :
+ T extends Int32Array ? type.Int32 :
+ T extends BigInt64Array ? type.Int64 :
+ T extends Uint8Array ? type.Uint8 :
+ T extends Uint16Array ? type.Uint16 :
+ T extends Uint32Array ? type.Uint32 :
+ T extends BigUint64Array ? type.Uint64 :
+ T extends Float32Array ? type.Float32 :
+ T extends Float64Array ? type.Float64 :
+ never;
+/** @ignore */
+type TypeToVector<T extends Type> = {
+ [key: number ]: vecs.Vector<any> ;
+ [Type.Null ]: vecs.NullVector ;
+ [Type.Bool ]: vecs.BoolVector ;
+ [Type.Int8 ]: vecs.Int8Vector ;
+ [Type.Int16 ]: vecs.Int16Vector ;
+ [Type.Int32 ]: vecs.Int32Vector ;
+ [Type.Int64 ]: vecs.Int64Vector ;
+ [Type.Uint8 ]: vecs.Uint8Vector ;
+ [Type.Uint16 ]: vecs.Uint16Vector ;
+ [Type.Uint32 ]: vecs.Uint32Vector ;
+ [Type.Uint64 ]: vecs.Uint64Vector ;
+ [Type.Int ]: vecs.IntVector ;
+ [Type.Float16 ]: vecs.Float16Vector ;
+ [Type.Float32 ]: vecs.Float32Vector ;
+ [Type.Float64 ]: vecs.Float64Vector ;
+ [Type.Float ]: vecs.FloatVector ;
+ [Type.Utf8 ]: vecs.Utf8Vector ;
+ [Type.Binary ]: vecs.BinaryVector ;
+ [Type.FixedSizeBinary ]: vecs.FixedSizeBinaryVector ;
+ [Type.Date ]: vecs.DateVector ;
+ [Type.DateDay ]: vecs.DateDayVector ;
+ [Type.DateMillisecond ]: vecs.DateMillisecondVector ;
+ [Type.Timestamp ]: vecs.TimestampVector ;
+ [Type.TimestampSecond ]: vecs.TimestampSecondVector ;
+ [Type.TimestampMillisecond ]: vecs.TimestampMillisecondVector ;
+ [Type.TimestampMicrosecond ]: vecs.TimestampMicrosecondVector ;
+ [Type.TimestampNanosecond ]: vecs.TimestampNanosecondVector ;
+ [Type.Time ]: vecs.TimeVector ;
+ [Type.TimeSecond ]: vecs.TimeSecondVector ;
+ [Type.TimeMillisecond ]: vecs.TimeMillisecondVector ;
+ [Type.TimeMicrosecond ]: vecs.TimeMicrosecondVector ;
+ [Type.TimeNanosecond ]: vecs.TimeNanosecondVector ;
+ [Type.Decimal ]: vecs.DecimalVector ;
+ [Type.Union ]: vecs.UnionVector ;
+ [Type.DenseUnion ]: vecs.DenseUnionVector ;
+ [Type.SparseUnion ]: vecs.SparseUnionVector ;
+ [Type.Interval ]: vecs.IntervalVector ;
+ [Type.IntervalDayTime ]: vecs.IntervalDayTimeVector ;
+ [Type.IntervalYearMonth ]: vecs.IntervalYearMonthVector ;
+ [Type.Map ]: vecs.MapVector ;
+ [Type.List ]: vecs.ListVector ;
+ [Type.Struct ]: vecs.StructVector ;
+ [Type.Dictionary ]: vecs.DictionaryVector ;
+ [Type.FixedSizeList ]: vecs.FixedSizeListVector ;
+/** @ignore */
+type DataTypeToVector<T extends DataType = any> = {
+ [key: number ]: vecs.Vector<any> ;
+ [Type.Null ]: T extends type.Null ? vecs.NullVector : never ;
+ [Type.Bool ]: T extends type.Bool ? vecs.BoolVector : never ;
+ [Type.Int8 ]: T extends type.Int8 ? vecs.Int8Vector : never ;
+ [Type.Int16 ]: T extends type.Int16 ? vecs.Int16Vector : never ;
+ [Type.Int32 ]: T extends type.Int32 ? vecs.Int32Vector : never ;
+ [Type.Int64 ]: T extends type.Int64 ? vecs.Int64Vector : never ;
+ [Type.Uint8 ]: T extends type.Uint8 ? vecs.Uint8Vector : never ;
+ [Type.Uint16 ]: T extends type.Uint16 ? vecs.Uint16Vector : never ;
+ [Type.Uint32 ]: T extends type.Uint32 ? vecs.Uint32Vector : never ;
+ [Type.Uint64 ]: T extends type.Uint64 ? vecs.Uint64Vector : never ;
+ [Type.Int ]: T extends type.Int ? vecs.IntVector : never ;
+ [Type.Float16 ]: T extends type.Float16 ? vecs.Float16Vector : never ;
+ [Type.Float32 ]: T extends type.Float32 ? vecs.Float32Vector : never ;
+ [Type.Float64 ]: T extends type.Float64 ? vecs.Float64Vector : never ;
+ [Type.Float ]: T extends type.Float ? vecs.FloatVector : never ;
+ [Type.Utf8 ]: T extends type.Utf8 ? vecs.Utf8Vector : never ;
+ [Type.Binary ]: T extends type.Binary ? vecs.BinaryVector : never ;
+ [Type.FixedSizeBinary ]: T extends type.FixedSizeBinary ? vecs.FixedSizeBinaryVector : never ;
+ [Type.Date ]: T extends type.Date_ ? vecs.DateVector : never ;
+ [Type.DateDay ]: T extends type.DateDay ? vecs.DateDayVector : never ;
+ [Type.DateMillisecond ]: T extends type.DateMillisecond ? vecs.DateMillisecondVector : never ;
+ [Type.Timestamp ]: T extends type.Timestamp ? vecs.TimestampVector : never ;
+ [Type.TimestampSecond ]: T extends type.TimestampSecond ? vecs.TimestampSecondVector : never ;
+ [Type.TimestampMillisecond ]: T extends type.TimestampMillisecond ? vecs.TimestampMillisecondVector : never ;
+ [Type.TimestampMicrosecond ]: T extends type.TimestampMicrosecond ? vecs.TimestampMicrosecondVector : never ;
+ [Type.TimestampNanosecond ]: T extends type.TimestampNanosecond ? vecs.TimestampNanosecondVector : never ;
+ [Type.Time ]: T extends type.Time ? vecs.TimeVector : never ;
+ [Type.TimeSecond ]: T extends type.TimeSecond ? vecs.TimeSecondVector : never ;
+ [Type.TimeMillisecond ]: T extends type.TimeMillisecond ? vecs.TimeMillisecondVector : never ;
+ [Type.TimeMicrosecond ]: T extends type.TimeMicrosecond ? vecs.TimeMicrosecondVector : never ;
+ [Type.TimeNanosecond ]: T extends type.TimeNanosecond ? vecs.TimeNanosecondVector : never ;
+ [Type.Decimal ]: T extends type.Decimal ? vecs.DecimalVector : never ;
+ [Type.Union ]: T extends type.Union ? vecs.UnionVector : never ;
+ [Type.DenseUnion ]: T extends type.DenseUnion ? vecs.DenseUnionVector : never ;
+ [Type.SparseUnion ]: T extends type.SparseUnion ? vecs.SparseUnionVector : never ;
+ [Type.Interval ]: T extends type.Interval ? vecs.IntervalVector : never ;
+ [Type.IntervalDayTime ]: T extends type.IntervalDayTime ? vecs.IntervalDayTimeVector : never ;
+ [Type.IntervalYearMonth ]: T extends type.IntervalYearMonth ? vecs.IntervalYearMonthVector : never ;
+ [Type.Map ]: T extends type.Map_ ? vecs.MapVector<T['keyType'], T['valueType']> : never ;
+ [Type.List ]: T extends type.List ? vecs.ListVector<T['valueType']> : never ;
+ [Type.Struct ]: T extends type.Struct ? vecs.StructVector<T['dataTypes']> : never ;
+ [Type.Dictionary ]: T extends type.Dictionary ? vecs.DictionaryVector<T['valueType'], T['indices']> : never ;
+ [Type.FixedSizeList ]: T extends type.FixedSizeList ? vecs.FixedSizeListVector<T['valueType']> : never ;
+/** @ignore */
+export type TypeToDataType<T extends Type> = {
+ [key: number ]: type.DataType ;
+ [Type.Null ]: type.Null ;
+ [Type.Bool ]: type.Bool ;
+ [Type.Int ]: type.Int ;
+ [Type.Int16 ]: type.Int16 ;
+ [Type.Int32 ]: type.Int32 ;
+ [Type.Int64 ]: type.Int64 ;
+ [Type.Uint8 ]: type.Uint8 ;
+ [Type.Uint16 ]: type.Uint16 ;
+ [Type.Uint32 ]: type.Uint32 ;
+ [Type.Uint64 ]: type.Uint64 ;
+ [Type.Int8 ]: type.Int8 ;
+ [Type.Float16 ]: type.Float16 ;
+ [Type.Float32 ]: type.Float32 ;
+ [Type.Float64 ]: type.Float64 ;
+ [Type.Float ]: type.Float ;
+ [Type.Utf8 ]: type.Utf8 ;
+ [Type.Binary ]: type.Binary ;
+ [Type.FixedSizeBinary ]: type.FixedSizeBinary ;
+ [Type.Date ]: type.Date_ ;
+ [Type.DateDay ]: type.DateDay ;
+ [Type.DateMillisecond ]: type.DateMillisecond ;
+ [Type.Timestamp ]: type.Timestamp ;
+ [Type.TimestampSecond ]: type.TimestampSecond ;
+ [Type.TimestampMillisecond ]: type.TimestampMillisecond ;
+ [Type.TimestampMicrosecond ]: type.TimestampMicrosecond ;
+ [Type.TimestampNanosecond ]: type.TimestampNanosecond ;
+ [Type.Time ]: type.Time ;
+ [Type.TimeSecond ]: type.TimeSecond ;
+ [Type.TimeMillisecond ]: type.TimeMillisecond ;
+ [Type.TimeMicrosecond ]: type.TimeMicrosecond ;
+ [Type.TimeNanosecond ]: type.TimeNanosecond ;
+ [Type.Decimal ]: type.Decimal ;
+ [Type.Union ]: type.Union ;
+ [Type.DenseUnion ]: type.DenseUnion ;
+ [Type.SparseUnion ]: type.SparseUnion ;
+ [Type.Interval ]: type.Interval ;
+ [Type.IntervalDayTime ]: type.IntervalDayTime ;
+ [Type.IntervalYearMonth ]: type.IntervalYearMonth ;
+ [Type.Map ]: type.Map_ ;
+ [Type.List ]: type.List ;
+ [Type.Struct ]: type.Struct ;
+ [Type.Dictionary ]: type.Dictionary ;
+ [Type.FixedSizeList ]: type.FixedSizeList ;
+/** @ignore */
+type TypeToBuilder<T extends Type = any, TNull = any> = {
+ [key: number ]: builders.Builder ;
+ [Type.Null ]: builders.NullBuilder<TNull> ;
+ [Type.Bool ]: builders.BoolBuilder<TNull> ;
+ [Type.Int8 ]: builders.Int8Builder<TNull> ;
+ [Type.Int16 ]: builders.Int16Builder<TNull> ;
+ [Type.Int32 ]: builders.Int32Builder<TNull> ;
+ [Type.Int64 ]: builders.Int64Builder<TNull> ;
+ [Type.Uint8 ]: builders.Uint8Builder<TNull> ;
+ [Type.Uint16 ]: builders.Uint16Builder<TNull> ;
+ [Type.Uint32 ]: builders.Uint32Builder<TNull> ;
+ [Type.Uint64 ]: builders.Uint64Builder<TNull> ;
+ [Type.Int ]: builders.IntBuilder<any, TNull> ;
+ [Type.Float16 ]: builders.Float16Builder<TNull> ;
+ [Type.Float32 ]: builders.Float32Builder<TNull> ;
+ [Type.Float64 ]: builders.Float64Builder<TNull> ;
+ [Type.Float ]: builders.FloatBuilder<any, TNull> ;
+ [Type.Utf8 ]: builders.Utf8Builder<TNull> ;
+ [Type.Binary ]: builders.BinaryBuilder<TNull> ;
+ [Type.FixedSizeBinary ]: builders.FixedSizeBinaryBuilder<TNull> ;
+ [Type.Date ]: builders.DateBuilder<any, TNull> ;
+ [Type.DateDay ]: builders.DateDayBuilder<TNull> ;
+ [Type.DateMillisecond ]: builders.DateMillisecondBuilder<TNull> ;
+ [Type.Timestamp ]: builders.TimestampBuilder<any, TNull> ;
+ [Type.TimestampSecond ]: builders.TimestampSecondBuilder<TNull> ;
+ [Type.TimestampMillisecond ]: builders.TimestampMillisecondBuilder<TNull> ;
+ [Type.TimestampMicrosecond ]: builders.TimestampMicrosecondBuilder<TNull> ;
+ [Type.TimestampNanosecond ]: builders.TimestampNanosecondBuilder<TNull> ;
+ [Type.Time ]: builders.TimeBuilder<any, TNull> ;
+ [Type.TimeSecond ]: builders.TimeSecondBuilder<TNull> ;
+ [Type.TimeMillisecond ]: builders.TimeMillisecondBuilder<TNull> ;
+ [Type.TimeMicrosecond ]: builders.TimeMicrosecondBuilder<TNull> ;
+ [Type.TimeNanosecond ]: builders.TimeNanosecondBuilder<TNull> ;
+ [Type.Decimal ]: builders.DecimalBuilder<TNull> ;
+ [Type.Union ]: builders.UnionBuilder<any, TNull> ;
+ [Type.DenseUnion ]: builders.DenseUnionBuilder<any, TNull> ;
+ [Type.SparseUnion ]: builders.SparseUnionBuilder<any, TNull> ;
+ [Type.Interval ]: builders.IntervalBuilder<any, TNull> ;
+ [Type.IntervalDayTime ]: builders.IntervalDayTimeBuilder<TNull> ;
+ [Type.IntervalYearMonth ]: builders.IntervalYearMonthBuilder<TNull> ;
+ [Type.Map ]: builders.MapBuilder<any, any, TNull> ;
+ [Type.List ]: builders.ListBuilder<any, TNull> ;
+ [Type.Struct ]: builders.StructBuilder<any, TNull> ;
+ [Type.Dictionary ]: builders.DictionaryBuilder<any, TNull> ;
+ [Type.FixedSizeList ]: builders.FixedSizeListBuilder<any, TNull> ;
+/** @ignore */
+type DataTypeToBuilder<T extends DataType = any, TNull = any> = {
+ [key: number ]: builders.Builder<any, TNull> ;
+ [Type.Null ]: T extends type.Null ? builders.NullBuilder<TNull> : never ;
+ [Type.Bool ]: T extends type.Bool ? builders.BoolBuilder<TNull> : never ;
+ [Type.Int8 ]: T extends type.Int8 ? builders.Int8Builder<TNull> : never ;
+ [Type.Int16 ]: T extends type.Int16 ? builders.Int16Builder<TNull> : never ;
+ [Type.Int32 ]: T extends type.Int32 ? builders.Int32Builder<TNull> : never ;
+ [Type.Int64 ]: T extends type.Int64 ? builders.Int64Builder<TNull> : never ;
+ [Type.Uint8 ]: T extends type.Uint8 ? builders.Uint8Builder<TNull> : never ;
+ [Type.Uint16 ]: T extends type.Uint16 ? builders.Uint16Builder<TNull> : never ;
+ [Type.Uint32 ]: T extends type.Uint32 ? builders.Uint32Builder<TNull> : never ;
+ [Type.Uint64 ]: T extends type.Uint64 ? builders.Uint64Builder<TNull> : never ;
+ [Type.Int ]: T extends type.Int ? builders.IntBuilder<T, TNull> : never ;
+ [Type.Float16 ]: T extends type.Float16 ? builders.Float16Builder<TNull> : never ;
+ [Type.Float32 ]: T extends type.Float32 ? builders.Float32Builder<TNull> : never ;
+ [Type.Float64 ]: T extends type.Float64 ? builders.Float64Builder<TNull> : never ;
+ [Type.Float ]: T extends type.Float ? builders.FloatBuilder<T, TNull> : never ;
+ [Type.Utf8 ]: T extends type.Utf8 ? builders.Utf8Builder<TNull> : never ;
+ [Type.Binary ]: T extends type.Binary ? builders.BinaryBuilder<TNull> : never ;
+ [Type.FixedSizeBinary ]: T extends type.FixedSizeBinary ? builders.FixedSizeBinaryBuilder<TNull> : never ;
+ [Type.Date ]: T extends type.Date_ ? builders.DateBuilder<T, TNull> : never ;
+ [Type.DateDay ]: T extends type.DateDay ? builders.DateDayBuilder<TNull> : never ;
+ [Type.DateMillisecond ]: T extends type.DateMillisecond ? builders.DateMillisecondBuilder<TNull> : never ;
+ [Type.Timestamp ]: T extends type.Timestamp ? builders.TimestampBuilder<T, TNull> : never ;
+ [Type.TimestampSecond ]: T extends type.TimestampSecond ? builders.TimestampSecondBuilder<TNull> : never ;
+ [Type.TimestampMillisecond ]: T extends type.TimestampMillisecond ? builders.TimestampMillisecondBuilder<TNull> : never ;
+ [Type.TimestampMicrosecond ]: T extends type.TimestampMicrosecond ? builders.TimestampMicrosecondBuilder<TNull> : never ;
+ [Type.TimestampNanosecond ]: T extends type.TimestampNanosecond ? builders.TimestampNanosecondBuilder<TNull> : never ;
+ [Type.Time ]: T extends type.Time ? builders.TimeBuilder<T, TNull> : never ;
+ [Type.TimeSecond ]: T extends type.TimeSecond ? builders.TimeSecondBuilder<TNull> : never ;
+ [Type.TimeMillisecond ]: T extends type.TimeMillisecond ? builders.TimeMillisecondBuilder<TNull> : never ;
+ [Type.TimeMicrosecond ]: T extends type.TimeMicrosecond ? builders.TimeMicrosecondBuilder<TNull> : never ;
+ [Type.TimeNanosecond ]: T extends type.TimeNanosecond ? builders.TimeNanosecondBuilder<TNull> : never ;
+ [Type.Decimal ]: T extends type.Decimal ? builders.DecimalBuilder<TNull> : never ;
+ [Type.Union ]: T extends type.Union ? builders.UnionBuilder<T, TNull> : never ;
+ [Type.DenseUnion ]: T extends type.DenseUnion ? builders.DenseUnionBuilder<T, TNull> : never ;
+ [Type.SparseUnion ]: T extends type.SparseUnion ? builders.SparseUnionBuilder<T, TNull> : never ;
+ [Type.Interval ]: T extends type.Interval ? builders.IntervalBuilder<T, TNull> : never ;
+ [Type.IntervalDayTime ]: T extends type.IntervalDayTime ? builders.IntervalDayTimeBuilder<TNull> : never ;
+ [Type.IntervalYearMonth ]: T extends type.IntervalYearMonth ? builders.IntervalYearMonthBuilder<TNull> : never ;
+ [Type.Map ]: T extends type.Map_ ? builders.MapBuilder<T['keyType'], T['valueType'], TNull> : never ;
+ [Type.List ]: T extends type.List ? builders.ListBuilder<T['valueType'], TNull> : never ;
+ [Type.Struct ]: T extends type.Struct ? builders.StructBuilder<T['dataTypes'], TNull> : never ;
+ [Type.Dictionary ]: T extends type.Dictionary ? builders.DictionaryBuilder<T, TNull> : never ;
+ [Type.FixedSizeList ]: T extends type.FixedSizeList ? builders.FixedSizeListBuilder<T['valueType'], TNull> : never ;
diff --git a/src/arrow/js/src/io/adapters.ts b/src/arrow/js/src/io/adapters.ts
new file mode 100644
index 000000000..a83346ef7
--- /dev/null
+++ b/src/arrow/js/src/io/adapters.ts
@@ -0,0 +1,398 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import {
+ toUint8Array,
+ joinUint8Arrays,
+ ArrayBufferViewInput,
+ toUint8ArrayIterator,
+ toUint8ArrayAsyncIterator
+} from '../util/buffer';
+import { ReadableDOMStreamOptions } from './interfaces';
+interface ReadableStreamReadResult<T> { done: boolean; value: T }
+type Uint8ArrayGenerator = Generator<Uint8Array, null, { cmd: 'peek' | 'read'; size: number }>;
+type AsyncUint8ArrayGenerator = AsyncGenerator<Uint8Array, null, { cmd: 'peek' | 'read'; size: number }>;
+/** @ignore */
+export default {
+ fromIterable<T extends ArrayBufferViewInput>(source: Iterable<T> | T): Uint8ArrayGenerator {
+ return pump(fromIterable<T>(source));
+ },
+ fromAsyncIterable<T extends ArrayBufferViewInput>(source: AsyncIterable<T> | PromiseLike<T>): AsyncUint8ArrayGenerator {
+ return pump(fromAsyncIterable<T>(source));
+ },
+ fromDOMStream<T extends ArrayBufferViewInput>(source: ReadableStream<T>): AsyncUint8ArrayGenerator {
+ return pump(fromDOMStream<T>(source));
+ },
+ fromNodeStream(stream: NodeJS.ReadableStream): AsyncUint8ArrayGenerator {
+ return pump(fromNodeStream(stream));
+ },
+ // @ts-ignore
+ toDOMStream<T>(source: Iterable<T> | AsyncIterable<T>, options?: ReadableDOMStreamOptions): ReadableStream<T> {
+ throw new Error(`"toDOMStream" not available in this environment`);
+ },
+ // @ts-ignore
+ toNodeStream<T>(source: Iterable<T> | AsyncIterable<T>, options?: import('stream').ReadableOptions): import('stream').Readable {
+ throw new Error(`"toNodeStream" not available in this environment`);
+ },
+/** @ignore */
+const pump = <T extends Uint8ArrayGenerator | AsyncUint8ArrayGenerator>(iterator: T) => {; return iterator; };
+/** @ignore */
+function* fromIterable<T extends ArrayBufferViewInput>(source: Iterable<T> | T): Uint8ArrayGenerator {
+ let done: boolean | undefined, threw = false;
+ let buffers: Uint8Array[] = [], buffer: Uint8Array;
+ let cmd: 'peek' | 'read', size: number, bufferLength = 0;
+ function byteRange() {
+ if (cmd === 'peek') {
+ return joinUint8Arrays(buffers, size)[0];
+ }
+ [buffer, buffers, bufferLength] = joinUint8Arrays(buffers, size);
+ return buffer;
+ }
+ // Yield so the caller can inject the read command before creating the source Iterator
+ ({ cmd, size } = yield <any> null);
+ // initialize the iterator
+ const it = toUint8ArrayIterator(source)[Symbol.iterator]();
+ try {
+ do {
+ // read the next value
+ ({ done, value: buffer } = isNaN(size - bufferLength) ?
+ : - bufferLength));
+ // if chunk is not null or empty, push it onto the queue
+ if (!done && buffer.byteLength > 0) {
+ buffers.push(buffer);
+ bufferLength += buffer.byteLength;
+ }
+ // If we have enough bytes in our buffer, yield chunks until we don't
+ if (done || size <= bufferLength) {
+ do {
+ ({ cmd, size } = yield byteRange());
+ } while (size < bufferLength);
+ }
+ } while (!done);
+ } catch (e) {
+ (threw = true) && (typeof it.throw === 'function') && (it.throw(e));
+ } finally {
+ (threw === false) && (typeof it.return === 'function') && (it.return(null!));
+ }
+ return null;
+/** @ignore */
+async function* fromAsyncIterable<T extends ArrayBufferViewInput>(source: AsyncIterable<T> | PromiseLike<T>): AsyncUint8ArrayGenerator {
+ let done: boolean | undefined, threw = false;
+ let buffers: Uint8Array[] = [], buffer: Uint8Array;
+ let cmd: 'peek' | 'read', size: number, bufferLength = 0;
+ function byteRange() {
+ if (cmd === 'peek') {
+ return joinUint8Arrays(buffers, size)[0];
+ }
+ [buffer, buffers, bufferLength] = joinUint8Arrays(buffers, size);
+ return buffer;
+ }
+ // Yield so the caller can inject the read command before creating the source AsyncIterator
+ ({ cmd, size } = (yield <any> null)!);
+ // initialize the iterator
+ const it = toUint8ArrayAsyncIterator(source)[Symbol.asyncIterator]();
+ try {
+ do {
+ // read the next value
+ ({ done, value: buffer } = isNaN(size - bufferLength)
+ ? await
+ : await - bufferLength));
+ // if chunk is not null or empty, push it onto the queue
+ if (!done && buffer.byteLength > 0) {
+ buffers.push(buffer);
+ bufferLength += buffer.byteLength;
+ }
+ // If we have enough bytes in our buffer, yield chunks until we don't
+ if (done || size <= bufferLength) {
+ do {
+ ({ cmd, size } = yield byteRange());
+ } while (size < bufferLength);
+ }
+ } while (!done);
+ } catch (e) {
+ (threw = true) && (typeof it.throw === 'function') && (await it.throw(e));
+ } finally {
+ (threw === false) && (typeof it.return === 'function') && (await it.return(new Uint8Array(0)));
+ }
+ return null;
+// All this manual Uint8Array chunk management can be avoided if/when engines
+// add support for ArrayBuffer.transfer() or ArrayBuffer.prototype.realloc():
+/** @ignore */
+async function* fromDOMStream<T extends ArrayBufferViewInput>(source: ReadableStream<T>): AsyncUint8ArrayGenerator {
+ let done = false, threw = false;
+ let buffers: Uint8Array[] = [], buffer: Uint8Array;
+ let cmd: 'peek' | 'read', size: number, bufferLength = 0;
+ function byteRange() {
+ if (cmd === 'peek') {
+ return joinUint8Arrays(buffers, size)[0];
+ }
+ [buffer, buffers, bufferLength] = joinUint8Arrays(buffers, size);
+ return buffer;
+ }
+ // Yield so the caller can inject the read command before we establish the ReadableStream lock
+ ({ cmd, size } = yield <any> null);
+ // initialize the reader and lock the stream
+ const it = new AdaptiveByteReader(source);
+ try {
+ do {
+ // read the next value
+ ({ done, value: buffer } = isNaN(size - bufferLength)
+ ? await it['read'](undefined)
+ : await it['read'](size - bufferLength));
+ // if chunk is not null or empty, push it onto the queue
+ if (!done && buffer.byteLength > 0) {
+ buffers.push(toUint8Array(buffer));
+ bufferLength += buffer.byteLength;
+ }
+ // If we have enough bytes in our buffer, yield chunks until we don't
+ if (done || size <= bufferLength) {
+ do {
+ ({ cmd, size } = yield byteRange());
+ } while (size < bufferLength);
+ }
+ } while (!done);
+ } catch (e) {
+ (threw = true) && (await it['cancel'](e));
+ } finally {
+ (threw === false) ? (await it['cancel']())
+ : source['locked'] && it.releaseLock();
+ }
+ return null;
+/** @ignore */
+class AdaptiveByteReader<T extends ArrayBufferViewInput> {
+ private supportsBYOB: boolean;
+ private byobReader: ReadableStreamBYOBReader | null = null;
+ private defaultReader: ReadableStreamDefaultReader<T> | null = null;
+ private reader: ReadableStreamBYOBReader | ReadableStreamDefaultReader<T> | null;
+ constructor(private source: ReadableStream<T>) {
+ try {
+ this.supportsBYOB = !!(this.reader = this.getBYOBReader());
+ } catch (e) {
+ this.supportsBYOB = !(this.reader = this.getDefaultReader());
+ }
+ }
+ get closed(): Promise<void> {
+ return this.reader ? this.reader['closed'].catch(() => {}) : Promise.resolve();
+ }
+ releaseLock(): void {
+ if (this.reader) {
+ this.reader.releaseLock();
+ }
+ this.reader = this.byobReader = this.defaultReader = null;
+ }
+ async cancel(reason?: any): Promise<void> {
+ const { reader, source } = this;
+ reader && (await reader['cancel'](reason).catch(() => {}));
+ source && (source['locked'] && this.releaseLock());
+ }
+ async read(size?: number): Promise<ReadableStreamReadResult<Uint8Array>> {
+ if (size === 0) {
+ return { done: this.reader == null, value: new Uint8Array(0) };
+ }
+ const result = !this.supportsBYOB || typeof size !== 'number'
+ ? await this.getDefaultReader().read()
+ : await this.readFromBYOBReader(size);
+ !result.done && (result.value = toUint8Array(result as ReadableStreamReadResult<Uint8Array>));
+ return result as ReadableStreamReadResult<Uint8Array>;
+ }
+ private getDefaultReader() {
+ if (this.byobReader) { this.releaseLock(); }
+ if (!this.defaultReader) {
+ this.defaultReader = this.source['getReader']();
+ // We have to catch and swallow errors here to avoid uncaught promise rejection exceptions
+ // that seem to be raised when we call `releaseLock()` on this reader. I'm still mystified
+ // about why these errors are raised, but I'm sure there's some important spec reason that
+ // I haven't considered. I hate to employ such an anti-pattern here, but it seems like the
+ // only solution in this case :/
+ this.defaultReader['closed'].catch(() => {});
+ }
+ return (this.reader = this.defaultReader);
+ }
+ private getBYOBReader() {
+ if (this.defaultReader) { this.releaseLock(); }
+ if (!this.byobReader) {
+ this.byobReader = this.source['getReader']({ mode: 'byob' });
+ // We have to catch and swallow errors here to avoid uncaught promise rejection exceptions
+ // that seem to be raised when we call `releaseLock()` on this reader. I'm still mystified
+ // about why these errors are raised, but I'm sure there's some important spec reason that
+ // I haven't considered. I hate to employ such an anti-pattern here, but it seems like the
+ // only solution in this case :/
+ this.byobReader['closed'].catch(() => {});
+ }
+ return (this.reader = this.byobReader);
+ }
+ // This strategy plucked from the example in the streams spec:
+ //
+ private async readFromBYOBReader(size: number) {
+ return await readInto(this.getBYOBReader(), new ArrayBuffer(size), 0, size);
+ }
+/** @ignore */
+async function readInto(reader: ReadableStreamBYOBReader, buffer: ArrayBufferLike, offset: number, size: number): Promise<ReadableStreamReadResult<Uint8Array>> {
+ if (offset >= size) {
+ return { done: false, value: new Uint8Array(buffer, 0, size) };
+ }
+ const { done, value } = await Uint8Array(buffer, offset, size - offset));
+ if (((offset += value!.byteLength) < size) && !done) {
+ return await readInto(reader, value!.buffer, offset, size);
+ }
+ return { done, value: new Uint8Array(value!.buffer, 0, offset) };
+/** @ignore */
+type EventName = 'end' | 'error' | 'readable';
+/** @ignore */
+type Event = [EventName, (_: any) => void, Promise<[EventName, Error | null]>];
+/** @ignore */
+const onEvent = <T extends string>(stream: NodeJS.ReadableStream, event: T) => {
+ const handler = (_: any) => resolve([event, _]);
+ let resolve: (value?: [T, any] | PromiseLike<[T, any]>) => void;
+ return [event, handler, new Promise<[T, any]>(
+ (r) => (resolve = r) && stream['once'](event, handler)
+ )] as Event;
+/** @ignore */
+async function* fromNodeStream(stream: NodeJS.ReadableStream): AsyncUint8ArrayGenerator {
+ const events: Event[] = [];
+ let event: EventName = 'error';
+ let done = false, err: Error | null = null;
+ let cmd: 'peek' | 'read', size: number, bufferLength = 0;
+ let buffers: Uint8Array[] = [], buffer: Uint8Array | Buffer | string;
+ function byteRange() {
+ if (cmd === 'peek') {
+ return joinUint8Arrays(buffers, size)[0];
+ }
+ [buffer, buffers, bufferLength] = joinUint8Arrays(buffers, size);
+ return buffer;
+ }
+ // Yield so the caller can inject the read command before we
+ // add the listener for the source stream's 'readable' event.
+ ({ cmd, size } = yield <any> null);
+ // ignore stdin if it's a TTY
+ if ((stream as any)['isTTY']) {
+ yield new Uint8Array(0);
+ return null;
+ }
+ try {
+ // initialize the stream event handlers
+ events[0] = onEvent(stream, 'end');
+ events[1] = onEvent(stream, 'error');
+ do {
+ events[2] = onEvent(stream, 'readable');
+ // wait on the first message event from the stream
+ [event, err] = await Promise.race( => x[2]));
+ // if the stream emitted an Error, rethrow it
+ if (event === 'error') { break; }
+ if (!(done = event === 'end')) {
+ // If the size is NaN, request to read everything in the stream's internal buffer
+ if (!isFinite(size - bufferLength)) {
+ buffer = toUint8Array(stream['read'](undefined));
+ } else {
+ buffer = toUint8Array(stream['read'](size - bufferLength));
+ // If the byteLength is 0, then the requested amount is more than the stream has
+ // in its internal buffer. In this case the stream needs a "kick" to tell it to
+ // continue emitting readable events, so request to read everything the stream
+ // has in its internal buffer right now.
+ if (buffer.byteLength < (size - bufferLength)) {
+ buffer = toUint8Array(stream['read'](undefined));
+ }
+ }
+ // if chunk is not null or empty, push it onto the queue
+ if (buffer.byteLength > 0) {
+ buffers.push(buffer);
+ bufferLength += buffer.byteLength;
+ }
+ }
+ // If we have enough bytes in our buffer, yield chunks until we don't
+ if (done || size <= bufferLength) {
+ do {
+ ({ cmd, size } = yield byteRange());
+ } while (size < bufferLength);
+ }
+ } while (!done);
+ } finally {
+ await cleanup(events, event === 'error' ? err : null);
+ }
+ return null;
+ function cleanup<T extends Error | null | void>(events: Event[], err?: T) {
+ buffer = buffers = <any> null;
+ return new Promise<T>((resolve, reject) => {
+ for (const [evt, fn] of events) {
+ stream['off'](evt, fn);
+ }
+ try {
+ // Some stream implementations don't call the destroy callback,
+ // because it's really a node-internal API. Just calling `destroy`
+ // here should be enough to conform to the ReadableStream contract
+ const destroy = (stream as any)['destroy'];
+ destroy &&, err);
+ err = undefined;
+ } catch (e) { err = e || err; } finally {
+ err != null ? reject(err) : resolve();
+ }
+ });
+ }
diff --git a/src/arrow/js/src/io/file.ts b/src/arrow/js/src/io/file.ts
new file mode 100644
index 000000000..20b7dbf02
--- /dev/null
+++ b/src/arrow/js/src/io/file.ts
@@ -0,0 +1,115 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { FileHandle } from './interfaces';
+import { ByteStream, AsyncByteStream } from './stream';
+import { ArrayBufferViewInput, toUint8Array } from '../util/buffer';
+/** @ignore */
+export class RandomAccessFile extends ByteStream {
+ public size: number;
+ public position = 0;
+ protected buffer: Uint8Array | null;
+ constructor(buffer: ArrayBufferViewInput, byteLength?: number) {
+ super();
+ this.buffer = toUint8Array(buffer);
+ this.size = typeof byteLength === 'undefined' ? this.buffer.byteLength : byteLength;
+ }
+ public readInt32(position: number) {
+ const { buffer, byteOffset } = this.readAt(position, 4);
+ return new DataView(buffer, byteOffset).getInt32(0, true);
+ }
+ public seek(position: number) {
+ this.position = Math.min(position, this.size);
+ return position < this.size;
+ }
+ public read(nBytes?: number | null) {
+ const { buffer, size, position } = this;
+ if (buffer && position < size) {
+ if (typeof nBytes !== 'number') { nBytes = Infinity; }
+ this.position = Math.min(size,
+ position + Math.min(size - position, nBytes));
+ return buffer.subarray(position, this.position);
+ }
+ return null;
+ }
+ public readAt(position: number, nBytes: number) {
+ const buf = this.buffer;
+ const end = Math.min(this.size, position + nBytes);
+ return buf ? buf.subarray(position, end) : new Uint8Array(nBytes);
+ }
+ public close() { this.buffer && (this.buffer = null); }
+ public throw(value?: any) { this.close(); return { done: true, value }; }
+ public return(value?: any) { this.close(); return { done: true, value }; }
+/** @ignore */
+export class AsyncRandomAccessFile extends AsyncByteStream {
+ public size!: number;
+ public position = 0;
+ public _pending?: Promise<void>;
+ protected _handle: FileHandle | null;
+ constructor(file: FileHandle, byteLength?: number) {
+ super();
+ this._handle = file;
+ if (typeof byteLength === 'number') {
+ this.size = byteLength;
+ } else {
+ this._pending = (async () => {
+ this.size = (await file.stat()).size;
+ delete this._pending;
+ })();
+ }
+ }
+ public async readInt32(position: number) {
+ const { buffer, byteOffset } = await this.readAt(position, 4);
+ return new DataView(buffer, byteOffset).getInt32(0, true);
+ }
+ public async seek(position: number) {
+ this._pending && await this._pending;
+ this.position = Math.min(position, this.size);
+ return position < this.size;
+ }
+ public async read(nBytes?: number | null) {
+ this._pending && await this._pending;
+ const { _handle: file, size, position } = this;
+ if (file && position < size) {
+ if (typeof nBytes !== 'number') { nBytes = Infinity; }
+ let pos = position, offset = 0, bytesRead = 0;
+ const end = Math.min(size, pos + Math.min(size - pos, nBytes));
+ const buffer = new Uint8Array(Math.max(0, (this.position = end) - pos));
+ while ((pos += bytesRead) < end && (offset += bytesRead) < buffer.byteLength) {
+ ({ bytesRead } = await, offset, buffer.byteLength - offset, pos));
+ }
+ return buffer;
+ }
+ return null;
+ }
+ public async readAt(position: number, nBytes: number) {
+ this._pending && await this._pending;
+ const { _handle: file, size } = this;
+ if (file && (position + nBytes) < size) {
+ const end = Math.min(size, position + nBytes);
+ const buffer = new Uint8Array(end - position);
+ return (await, 0, nBytes, position)).buffer;
+ }
+ return new Uint8Array(nBytes);
+ }
+ public async close() { const f = this._handle; this._handle = null; f && await f.close(); }
+ public async throw(value?: any) { await this.close(); return { done: true, value }; }
+ public async return(value?: any) { await this.close(); return { done: true, value }; }
diff --git a/src/arrow/js/src/io/interfaces.ts b/src/arrow/js/src/io/interfaces.ts
new file mode 100644
index 000000000..4b5641ff1
--- /dev/null
+++ b/src/arrow/js/src/io/interfaces.ts
@@ -0,0 +1,179 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import streamAdapters from './adapters';
+/** @ignore */
+export const ITERATOR_DONE: any = Object.freeze({ done: true, value: void (0) });
+/** @ignore */
+export type FileHandle = import('fs').promises.FileHandle;
+/** @ignore */
+export type ArrowJSONLike = { schema: any; batches?: any[]; dictionaries?: any[] };
+/** @ignore */
+export type ReadableDOMStreamOptions = { type: 'bytes' | undefined; autoAllocateChunkSize?: number; highWaterMark?: number };
+/** @ignore */
+export class ArrowJSON {
+ constructor(private _json: ArrowJSONLike) {}
+ public get schema(): any { return this._json['schema']; }
+ public get batches(): any[] { return (this._json['batches'] || []) as any[]; }
+ public get dictionaries(): any[] { return (this._json['dictionaries'] || []) as any[]; }
+/** @ignore */
+export interface Readable<T> {
+ readonly closed: Promise<void>;
+ cancel(reason?: any): Promise<void>;
+ read(size?: number | null): Promise<T | null>;
+ peek(size?: number | null): Promise<T | null>;
+ throw(value?: any): Promise<IteratorResult<any>>;
+ return(value?: any): Promise<IteratorResult<any>>;
+ next(size?: number | null): Promise<IteratorResult<T>>;
+/** @ignore */
+export interface Writable<T> {
+ readonly closed: Promise<void>;
+ close(): void;
+ write(chunk: T): void;
+ abort(reason?: any): void;
+/** @ignore */
+export interface ReadableWritable<TReadable, TWritable> extends Readable<TReadable>, Writable<TWritable> {
+ [Symbol.asyncIterator](): AsyncIterableIterator<TReadable>;
+ toDOMStream(options?: ReadableDOMStreamOptions): ReadableStream<TReadable>;
+ toNodeStream(options?: import('stream').ReadableOptions): import('stream').Readable;
+/** @ignore */
+export abstract class ReadableInterop<T> {
+ public abstract toDOMStream(options?: ReadableDOMStreamOptions): ReadableStream<T>;
+ public abstract toNodeStream(options?: import('stream').ReadableOptions): import('stream').Readable;
+ public tee(): [ReadableStream<T>, ReadableStream<T>] {
+ return this._getDOMStream().tee();
+ }
+ public pipe<R extends NodeJS.WritableStream>(writable: R, options?: { end?: boolean }) {
+ return this._getNodeStream().pipe(writable, options);
+ }
+ public pipeTo(writable: WritableStream<T>, options?: PipeOptions) { return this._getDOMStream().pipeTo(writable, options); }
+ public pipeThrough<R extends ReadableStream<any>>(duplex: { writable: WritableStream<T>; readable: R }, options?: PipeOptions) {
+ return this._getDOMStream().pipeThrough(duplex, options);
+ }
+ protected _DOMStream?: ReadableStream<T>;
+ private _getDOMStream() {
+ return this._DOMStream || (this._DOMStream = this.toDOMStream());
+ }
+ protected _nodeStream?: import('stream').Readable;
+ private _getNodeStream() {
+ return this._nodeStream || (this._nodeStream = this.toNodeStream());
+ }
+/** @ignore */
+type Resolution<T> = { resolve: (value?: T | PromiseLike<T>) => void; reject: (reason?: any) => void };
+/** @ignore */
+export class AsyncQueue<TReadable = Uint8Array, TWritable = TReadable> extends ReadableInterop<TReadable>
+ implements AsyncIterableIterator<TReadable>, ReadableWritable<TReadable, TWritable> {
+ protected _values: TWritable[] = [];
+ protected _error?: { error: any };
+ protected _closedPromise: Promise<void>;
+ protected _closedPromiseResolve?: (value?: any) => void;
+ protected resolvers: Resolution<IteratorResult<TReadable>>[] = [];
+ constructor() {
+ super();
+ this._closedPromise = new Promise((r) => this._closedPromiseResolve = r);
+ }
+ public get closed(): Promise<void> { return this._closedPromise; }
+ public async cancel(reason?: any) { await this.return(reason); }
+ public write(value: TWritable) {
+ if (this._ensureOpen()) {
+ this.resolvers.length <= 0
+ ? (this._values.push(value))
+ : (this.resolvers.shift()!.resolve({ done: false, value } as any));
+ }
+ }
+ public abort(value?: any) {
+ if (this._closedPromiseResolve) {
+ this.resolvers.length <= 0
+ ? (this._error = { error: value })
+ : (this.resolvers.shift()!.reject({ done: true, value }));
+ }
+ }
+ public close() {
+ if (this._closedPromiseResolve) {
+ const { resolvers } = this;
+ while (resolvers.length > 0) {
+ resolvers.shift()!.resolve(ITERATOR_DONE);
+ }
+ this._closedPromiseResolve();
+ this._closedPromiseResolve = undefined;
+ }
+ }
+ public [Symbol.asyncIterator]() { return this; }
+ public toDOMStream(options?: ReadableDOMStreamOptions) {
+ return streamAdapters.toDOMStream(
+ (this._closedPromiseResolve || this._error)
+ ? (this as AsyncIterable<TReadable>)
+ : (this._values as any) as Iterable<TReadable>,
+ options);
+ }
+ public toNodeStream(options?: import('stream').ReadableOptions) {
+ return streamAdapters.toNodeStream(
+ (this._closedPromiseResolve || this._error)
+ ? (this as AsyncIterable<TReadable>)
+ : (this._values as any) as Iterable<TReadable>,
+ options);
+ }
+ public async throw(_?: any) { await this.abort(_); return ITERATOR_DONE; }
+ public async return(_?: any) { await this.close(); return ITERATOR_DONE; }
+ public async read(size?: number | null): Promise<TReadable | null> { return (await, 'read')).value; }
+ public async peek(size?: number | null): Promise<TReadable | null> { return (await, 'peek')).value; }
+ public next(..._args: any[]): Promise<IteratorResult<TReadable>> {
+ if (this._values.length > 0) {
+ return Promise.resolve({ done: false, value: this._values.shift()! } as any);
+ } else if (this._error) {
+ return Promise.reject({ done: true, value: this._error.error });
+ } else if (!this._closedPromiseResolve) {
+ return Promise.resolve(ITERATOR_DONE);
+ } else {
+ return new Promise<IteratorResult<TReadable>>((resolve, reject) => {
+ this.resolvers.push({ resolve, reject });
+ });
+ }
+ }
+ protected _ensureOpen() {
+ if (this._closedPromiseResolve) {
+ return true;
+ }
+ throw new Error(`AsyncQueue is closed`);
+ }
diff --git a/src/arrow/js/src/io/node/builder.ts b/src/arrow/js/src/io/node/builder.ts
new file mode 100644
index 000000000..eb9579536
--- /dev/null
+++ b/src/arrow/js/src/io/node/builder.ts
@@ -0,0 +1,98 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { Duplex } from 'stream';
+import { DataType } from '../../type';
+import { Builder, BuilderOptions } from '../../builder/index';
+/** @ignore */
+export interface BuilderDuplexOptions<T extends DataType = any, TNull = any> extends BuilderOptions<T, TNull> {
+ autoDestroy?: boolean;
+ highWaterMark?: number;
+ queueingStrategy?: 'bytes' | 'count';
+ dictionaryHashFunction?: (value: any) => string | number;
+ valueToChildTypeId?: (builder: Builder<T, TNull>, value: any, offset: number) => number;
+/** @ignore */
+export function builderThroughNodeStream<T extends DataType = any, TNull = any>(options: BuilderDuplexOptions<T, TNull>) {
+ return new BuilderDuplex(, options);
+/** @ignore */
+type CB = (error?: Error | null | undefined) => void;
+/** @ignore */
+class BuilderDuplex<T extends DataType = any, TNull = any> extends Duplex {
+ private _finished: boolean;
+ private _numChunks: number;
+ private _desiredSize: number;
+ private _builder: Builder<T, TNull>;
+ private _getSize: (builder: Builder<T, TNull>) => number;
+ constructor(builder: Builder<T, TNull>, options: BuilderDuplexOptions<T, TNull>) {
+ const { queueingStrategy = 'count', autoDestroy = true } = options;
+ const { highWaterMark = queueingStrategy !== 'bytes' ? 1000 : 2 ** 14 } = options;
+ super({ autoDestroy, highWaterMark: 1, allowHalfOpen: true, writableObjectMode: true, readableObjectMode: true });
+ this._numChunks = 0;
+ this._finished = false;
+ this._builder = builder;
+ this._desiredSize = highWaterMark;
+ this._getSize = queueingStrategy !== 'bytes' ? builderLength : builderByteLength;
+ }
+ _read(size: number) {
+ this._maybeFlush(this._builder, this._desiredSize = size);
+ }
+ _final(cb?: CB) {
+ this._maybeFlush(this._builder.finish(), this._desiredSize);
+ cb && cb();
+ }
+ _write(value: any, _: string, cb?: CB) {
+ const result = this._maybeFlush(
+ this._builder.append(value),
+ this._desiredSize
+ );
+ cb && cb();
+ return result;
+ }
+ _destroy(err: Error | null, cb?: (error: Error | null) => void) {
+ this._builder.clear();
+ cb && cb(err);
+ }
+ private _maybeFlush(builder: Builder<T, TNull>, size: number) {
+ if (this._getSize(builder) >= size) {
+ ++this._numChunks && this.push(builder.toVector());
+ }
+ if (builder.finished) {
+ if (builder.length > 0 || this._numChunks === 0) {
+ ++this._numChunks && this.push(builder.toVector());
+ }
+ if (!this._finished && (this._finished = true)) {
+ this.push(null);
+ }
+ return false;
+ }
+ return this._getSize(builder) < this.writableHighWaterMark;
+ }
+/** @ignore */ const builderLength = <T extends DataType = any>(builder: Builder<T>) => builder.length;
+/** @ignore */ const builderByteLength = <T extends DataType = any>(builder: Builder<T>) => builder.byteLength;
diff --git a/src/arrow/js/src/io/node/iterable.ts b/src/arrow/js/src/io/node/iterable.ts
new file mode 100644
index 000000000..457bc894d
--- /dev/null
+++ b/src/arrow/js/src/io/node/iterable.ts
@@ -0,0 +1,113 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { Readable } from 'stream';
+import { isIterable, isAsyncIterable } from '../../util/compat';
+/** @ignore */
+type ReadableOptions = import('stream').ReadableOptions;
+/** @ignore */
+type SourceIterator<T> = Generator<T, void, number | null>;
+/** @ignore */
+type AsyncSourceIterator<T> = AsyncGenerator<T, void, number | null>;
+/** @ignore */
+export function toNodeStream<T>(source: Iterable<T> | AsyncIterable<T>, options?: ReadableOptions): Readable {
+ if (isAsyncIterable<T>(source)) { return new AsyncIterableReadable(source[Symbol.asyncIterator]() as AsyncSourceIterator<T>, options); }
+ if (isIterable<T>(source)) { return new IterableReadable(source[Symbol.iterator]() as SourceIterator<T>, options); }
+ /* istanbul ignore next */
+ throw new Error(`toNodeStream() must be called with an Iterable or AsyncIterable`);
+/** @ignore */
+class IterableReadable<T extends Uint8Array | any> extends Readable {
+ private _pulling: boolean;
+ private _bytesMode: boolean;
+ private _iterator: SourceIterator<T>;
+ constructor(it: SourceIterator<T>, options?: ReadableOptions) {
+ super(options);
+ this._iterator = it;
+ this._pulling = false;
+ this._bytesMode = !options || !options.objectMode;
+ }
+ _read(size: number) {
+ const it = this._iterator;
+ if (it && !this._pulling && (this._pulling = true)) {
+ this._pulling = this._pull(size, it);
+ }
+ }
+ _destroy(e: Error | null, cb: (e: Error | null) => void) {
+ const it = this._iterator;
+ let fn: any;
+ it && (fn = e != null && it.throw || it.return);
+ fn?.call(it, e);
+ cb && cb(null);
+ }
+ private _pull(size: number, it: SourceIterator<T>) {
+ const bm = this._bytesMode;
+ let r: IteratorResult<T> | null = null;
+ while (this.readable && !(r = ? size : null)).done) {
+ if (size != null) {
+ size -= (bm && ArrayBuffer.isView(r.value) ? r.value.byteLength : 1);
+ }
+ if (!this.push(r.value) || size <= 0) { break; }
+ }
+ if ((r?.done || !this.readable) && (this.push(null) || true)) {
+ it.return && it.return();
+ }
+ return !this.readable;
+ }
+/** @ignore */
+class AsyncIterableReadable<T extends Uint8Array | any> extends Readable {
+ private _pulling: boolean;
+ private _bytesMode: boolean;
+ private _iterator: AsyncSourceIterator<T>;
+ constructor(it: AsyncSourceIterator<T>, options?: ReadableOptions) {
+ super(options);
+ this._iterator = it;
+ this._pulling = false;
+ this._bytesMode = !options || !options.objectMode;
+ }
+ _read(size: number) {
+ const it = this._iterator;
+ if (it && !this._pulling && (this._pulling = true)) {
+ (async () => this._pulling = await this._pull(size, it))();
+ }
+ }
+ _destroy(e: Error | null, cb: (e: Error | null) => void) {
+ const it = this._iterator;
+ let fn: any;
+ it && (fn = e != null && it.throw || it.return);
+ fn?.call(it, e).then(() => cb && cb(null)) || (cb && cb(null));
+ }
+ private async _pull(size: number, it: AsyncSourceIterator<T>) {
+ const bm = this._bytesMode;
+ let r: IteratorResult<T> | null = null;
+ while (this.readable && !(r = await ? size : null)).done) {
+ if (size != null) {
+ size -= (bm && ArrayBuffer.isView(r.value) ? r.value.byteLength : 1);
+ }
+ if (!this.push(r.value) || size <= 0) { break; }
+ }
+ if ((r?.done || !this.readable) && (this.push(null) || true)) {
+ it.return && it.return();
+ }
+ return !this.readable;
+ }
diff --git a/src/arrow/js/src/io/node/reader.ts b/src/arrow/js/src/io/node/reader.ts
new file mode 100644
index 000000000..a51fb0b40
--- /dev/null
+++ b/src/arrow/js/src/io/node/reader.ts
@@ -0,0 +1,86 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { DataType } from '../../type';
+import { Duplex, DuplexOptions } from 'stream';
+import { RecordBatch } from '../../recordbatch';
+import { AsyncByteQueue } from '../../io/stream';
+import { RecordBatchReader } from '../../ipc/reader';
+/** @ignore */
+export function recordBatchReaderThroughNodeStream<T extends { [key: string]: DataType } = any>(options?: DuplexOptions & { autoDestroy: boolean }) {
+ return new RecordBatchReaderDuplex<T>(options);
+/** @ignore */
+type CB = (error?: Error | null | undefined) => void;
+/** @ignore */
+class RecordBatchReaderDuplex<T extends { [key: string]: DataType } = any> extends Duplex {
+ private _pulling = false;
+ private _autoDestroy = true;
+ private _reader: RecordBatchReader | null;
+ private _asyncQueue: AsyncByteQueue | null;
+ constructor(options?: DuplexOptions & { autoDestroy: boolean }) {
+ super({ allowHalfOpen: false, ...options, readableObjectMode: true, writableObjectMode: false });
+ this._reader = null;
+ this._pulling = false;
+ this._asyncQueue = new AsyncByteQueue();
+ this._autoDestroy = options && (typeof options.autoDestroy === 'boolean') ? options.autoDestroy : true;
+ }
+ _final(cb?: CB) {
+ const aq = this._asyncQueue;
+ aq?.close();
+ cb && cb();
+ }
+ _write(x: any, _: string, cb: CB) {
+ const aq = this._asyncQueue;
+ aq?.write(x);
+ cb && cb();
+ return true;
+ }
+ _read(size: number) {
+ const aq = this._asyncQueue;
+ if (aq && !this._pulling && (this._pulling = true)) {
+ (async () => {
+ if (!this._reader) {
+ this._reader = await this._open(aq);
+ }
+ this._pulling = await this._pull(size, this._reader);
+ })();
+ }
+ }
+ _destroy(err: Error | null, cb: (error: Error | null) => void) {
+ const aq = this._asyncQueue;
+ if (aq) { err ? aq.abort(err) : aq.close(); }
+ cb(this._asyncQueue = this._reader = null);
+ }
+ async _open(source: AsyncByteQueue) {
+ return await (await RecordBatchReader.from<T>(source)).open({ autoDestroy: this._autoDestroy });
+ }
+ async _pull(size: number, reader: RecordBatchReader<T>) {
+ let r: IteratorResult<RecordBatch<T>> | null = null;
+ while (this.readable && !(r = await {
+ if (!this.push(r.value) || (size != null && --size <= 0)) { break; }
+ }
+ if (!this.readable || (r?.done && (reader.autoDestroy || (await reader.reset().open()).closed))) {
+ this.push(null);
+ await reader.cancel();
+ }
+ return !this.readable;
+ }
diff --git a/src/arrow/js/src/io/node/writer.ts b/src/arrow/js/src/io/node/writer.ts
new file mode 100644
index 000000000..79d61b9a3
--- /dev/null
+++ b/src/arrow/js/src/io/node/writer.ts
@@ -0,0 +1,77 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { DataType } from '../../type';
+import { Duplex, DuplexOptions } from 'stream';
+import { AsyncByteStream } from '../../io/stream';
+import { RecordBatchWriter } from '../../ipc/writer';
+/** @ignore */
+export function recordBatchWriterThroughNodeStream<T extends { [key: string]: DataType } = any>(this: typeof RecordBatchWriter, options?: DuplexOptions & { autoDestroy: boolean }) {
+ return new RecordBatchWriterDuplex(new this<T>(options));
+/** @ignore */
+type CB = (error?: Error | null | undefined) => void;
+/** @ignore */
+class RecordBatchWriterDuplex<T extends { [key: string]: DataType } = any> extends Duplex {
+ private _pulling = false;
+ private _reader: AsyncByteStream | null;
+ private _writer: RecordBatchWriter | null;
+ constructor(writer: RecordBatchWriter<T>, options?: DuplexOptions) {
+ super({ allowHalfOpen: false, ...options, writableObjectMode: true, readableObjectMode: false });
+ this._writer = writer;
+ this._reader = new AsyncByteStream(writer);
+ }
+ _final(cb?: CB) {
+ const writer = this._writer;
+ writer?.close();
+ cb && cb();
+ }
+ _write(x: any, _: string, cb: CB) {
+ const writer = this._writer;
+ writer?.write(x);
+ cb && cb();
+ return true;
+ }
+ _read(size: number) {
+ const it = this._reader;
+ if (it && !this._pulling && (this._pulling = true)) {
+ (async () => this._pulling = await this._pull(size, it))();
+ }
+ }
+ _destroy(err: Error | null, cb: (error: Error | null) => void) {
+ const writer = this._writer;
+ if (writer) { err ? writer.abort(err) : writer.close(); }
+ cb(this._reader = this._writer = null);
+ }
+ async _pull(size: number, reader: AsyncByteStream) {
+ let r: IteratorResult<Uint8Array> | null = null;
+ while (this.readable && !(r = await || null)).done) {
+ if (size != null && r.value) {
+ size -= r.value.byteLength;
+ }
+ if (!this.push(r.value) || size <= 0) { break; }
+ }
+ if ((r?.done || !this.readable)) {
+ this.push(null);
+ await reader.cancel();
+ }
+ return !this.readable;
+ }
diff --git a/src/arrow/js/src/io/stream.ts b/src/arrow/js/src/io/stream.ts
new file mode 100644
index 000000000..2384ab0b9
--- /dev/null
+++ b/src/arrow/js/src/io/stream.ts
@@ -0,0 +1,152 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import streamAdapters from './adapters';
+import { decodeUtf8 } from '../util/utf8';
+import { ITERATOR_DONE, Readable, Writable, AsyncQueue } from './interfaces';
+import { toUint8Array, joinUint8Arrays, ArrayBufferViewInput } from '../util/buffer';
+import {
+ isPromise, isFetchResponse,
+ isIterable, isAsyncIterable,
+ isReadableDOMStream, isReadableNodeStream
+} from '../util/compat';
+/** @ignore */
+export type WritableSink<T> = Writable<T> | WritableStream<T> | NodeJS.WritableStream | null;
+/** @ignore */
+export type ReadableSource<T> = Readable<T> | PromiseLike<T> | AsyncIterable<T> | ReadableStream<T> | NodeJS.ReadableStream | null;
+/** @ignore */
+export class AsyncByteQueue<T extends ArrayBufferViewInput = Uint8Array> extends AsyncQueue<Uint8Array, T> {
+ public write(value: ArrayBufferViewInput | Uint8Array) {
+ if ((value = toUint8Array(value)).byteLength > 0) {
+ return super.write(value as T);
+ }
+ }
+ public toString(sync: true): string;
+ public toString(sync?: false): Promise<string>;
+ public toString(sync = false) {
+ return sync
+ ? decodeUtf8(this.toUint8Array(true))
+ : this.toUint8Array(false).then(decodeUtf8);
+ }
+ public toUint8Array(sync: true): Uint8Array;
+ public toUint8Array(sync?: false): Promise<Uint8Array>;
+ public toUint8Array(sync = false) {
+ return sync ? joinUint8Arrays(this._values as any[])[0] : (async () => {
+ const buffers = [];
+ let byteLength = 0;
+ for await (const chunk of this) {
+ buffers.push(chunk);
+ byteLength += chunk.byteLength;
+ }
+ return joinUint8Arrays(buffers, byteLength)[0];
+ })();
+ }
+/** @ignore */
+export class ByteStream implements IterableIterator<Uint8Array> {
+ private source!: ByteStreamSource<Uint8Array>;
+ constructor(source?: Iterable<ArrayBufferViewInput> | ArrayBufferViewInput) {
+ if (source) {
+ this.source = new ByteStreamSource(streamAdapters.fromIterable(source));
+ }
+ }
+ [Symbol.iterator]() { return this; }
+ public next(value?: any) { return; }
+ public throw(value?: any) { return this.source.throw(value); }
+ public return(value?: any) { return this.source.return(value); }
+ public peek(size?: number | null) { return this.source.peek(size); }
+ public read(size?: number | null) { return; }
+/** @ignore */
+export class AsyncByteStream implements Readable<Uint8Array>, AsyncIterableIterator<Uint8Array> {
+ private source!: AsyncByteStreamSource<Uint8Array>;
+ constructor(source?: PromiseLike<ArrayBufferViewInput> | Response | ReadableStream<ArrayBufferViewInput> | NodeJS.ReadableStream | AsyncIterable<ArrayBufferViewInput> | Iterable<ArrayBufferViewInput>) {
+ if (source instanceof AsyncByteStream) {
+ this.source = (source as AsyncByteStream).source;
+ } else if (source instanceof AsyncByteQueue) {
+ this.source = new AsyncByteStreamSource(streamAdapters.fromAsyncIterable(source));
+ } else if (isReadableNodeStream(source)) {
+ this.source = new AsyncByteStreamSource(streamAdapters.fromNodeStream(source));
+ } else if (isReadableDOMStream<ArrayBufferViewInput>(source)) {
+ this.source = new AsyncByteStreamSource(streamAdapters.fromDOMStream(source));
+ } else if (isFetchResponse(source)) {
+ this.source = new AsyncByteStreamSource(streamAdapters.fromDOMStream(source.body!));
+ } else if (isIterable<ArrayBufferViewInput>(source)) {
+ this.source = new AsyncByteStreamSource(streamAdapters.fromIterable(source));
+ } else if (isPromise<ArrayBufferViewInput>(source)) {
+ this.source = new AsyncByteStreamSource(streamAdapters.fromAsyncIterable(source));
+ } else if (isAsyncIterable<ArrayBufferViewInput>(source)) {
+ this.source = new AsyncByteStreamSource(streamAdapters.fromAsyncIterable(source));
+ }
+ }
+ [Symbol.asyncIterator]() { return this; }
+ public next(value?: any) { return; }
+ public throw(value?: any) { return this.source.throw(value); }
+ public return(value?: any) { return this.source.return(value); }
+ public get closed(): Promise<void> { return this.source.closed; }
+ public cancel(reason?: any) { return this.source.cancel(reason); }
+ public peek(size?: number | null) { return this.source.peek(size); }
+ public read(size?: number | null) { return; }
+/** @ignore */
+type ByteStreamSourceIterator<T> = Generator<T, null, { cmd: 'peek' | 'read'; size?: number | null }>;
+/** @ignore */
+type AsyncByteStreamSourceIterator<T> = AsyncGenerator<T, null, { cmd: 'peek' | 'read'; size?: number | null }>;
+/** @ignore */
+class ByteStreamSource<T> {
+ constructor(protected source: ByteStreamSourceIterator<T>) {}
+ public cancel(reason?: any) { this.return(reason); }
+ public peek(size?: number | null): T | null { return, 'peek').value; }
+ public read(size?: number | null): T | null { return, 'read').value; }
+ public next(size?: number | null, cmd: 'peek' | 'read' = 'read') { return{ cmd, size }); }
+ public throw(value?: any) { return Object.create((this.source.throw && this.source.throw(value)) || ITERATOR_DONE); }
+ public return(value?: any) { return Object.create((this.source.return && this.source.return(value)) || ITERATOR_DONE); }
+/** @ignore */
+class AsyncByteStreamSource<T> implements Readable<T> {
+ private _closedPromise: Promise<void>;
+ private _closedPromiseResolve?: (value?: any) => void;
+ constructor (protected source: ByteStreamSourceIterator<T> | AsyncByteStreamSourceIterator<T>) {
+ this._closedPromise = new Promise((r) => this._closedPromiseResolve = r);
+ }
+ public async cancel(reason?: any) { await this.return(reason); }
+ public get closed(): Promise<void> { return this._closedPromise; }
+ public async read(size?: number | null): Promise<T | null> { return (await, 'read')).value; }
+ public async peek(size?: number | null): Promise<T | null> { return (await, 'peek')).value; }
+ public async next(size?: number | null, cmd: 'peek' | 'read' = 'read') { return (await{ cmd, size })); }
+ public async throw(value?: any) {
+ const result = (this.source.throw && await this.source.throw(value)) || ITERATOR_DONE;
+ this._closedPromiseResolve && this._closedPromiseResolve();
+ this._closedPromiseResolve = undefined;
+ return Object.create(result);
+ }
+ public async return(value?: any) {
+ const result = (this.source.return && await this.source.return(value)) || ITERATOR_DONE;
+ this._closedPromiseResolve && this._closedPromiseResolve();
+ this._closedPromiseResolve = undefined;
+ return Object.create(result);
+ }
diff --git a/src/arrow/js/src/io/whatwg/builder.ts b/src/arrow/js/src/io/whatwg/builder.ts
new file mode 100644
index 000000000..c65511844
--- /dev/null
+++ b/src/arrow/js/src/io/whatwg/builder.ts
@@ -0,0 +1,116 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { DataType } from '../../type';
+import { Vector } from '../../vector';
+import { VectorType as V } from '../../interfaces';
+import { Builder, BuilderOptions } from '../../builder/index';
+/** @ignore */
+export interface BuilderTransformOptions<T extends DataType = any, TNull = any> extends BuilderOptions<T, TNull> {
+ queueingStrategy?: 'bytes' | 'count';
+ dictionaryHashFunction?: (value: any) => string | number;
+ readableStrategy?: { highWaterMark?: number; size?: any; type?: 'bytes' };
+ writableStrategy?: { highWaterMark?: number; size?: any; type?: 'bytes' };
+ valueToChildTypeId?: (builder: Builder<T, TNull>, value: any, offset: number) => number;
+/** @ignore */
+export function builderThroughDOMStream<T extends DataType = any, TNull = any>(options: BuilderTransformOptions<T, TNull>) {
+ return new BuilderTransform(options);
+/** @ignore */
+export class BuilderTransform<T extends DataType = any, TNull = any> {
+ public readable: ReadableStream<V<T>>;
+ public writable: WritableStream<T['TValue'] | TNull>;
+ public _controller: ReadableStreamDefaultController<V<T>> | null;
+ private _numChunks = 0;
+ private _finished = false;
+ private _bufferedSize = 0;
+ private _builder: Builder<T, TNull>;
+ private _getSize: (builder: Builder<T, TNull>) => number;
+ constructor(options: BuilderTransformOptions<T, TNull>) {
+ // Access properties by string indexers to defeat closure compiler
+ const {
+ ['readableStrategy']: readableStrategy,
+ ['writableStrategy']: writableStrategy,
+ ['queueingStrategy']: queueingStrategy = 'count',
+ ...builderOptions
+ } = options;
+ this._controller = null;
+ this._builder =<T, TNull>(builderOptions);
+ this._getSize = queueingStrategy !== 'bytes' ? chunkLength : chunkByteLength;
+ const { ['highWaterMark']: readableHighWaterMark = queueingStrategy === 'bytes' ? 2 ** 14 : 1000 } = { ...readableStrategy };
+ const { ['highWaterMark']: writableHighWaterMark = queueingStrategy === 'bytes' ? 2 ** 14 : 1000 } = { ...writableStrategy };
+ this['readable'] = new ReadableStream<V<T>>({
+ ['cancel']: () => { this._builder.clear(); },
+ ['pull']: (c) => { this._maybeFlush(this._builder, this._controller = c); },
+ ['start']: (c) => { this._maybeFlush(this._builder, this._controller = c); },
+ }, {
+ 'highWaterMark': readableHighWaterMark,
+ 'size': queueingStrategy !== 'bytes' ? chunkLength : chunkByteLength,
+ });
+ this['writable'] = new WritableStream({
+ ['abort']: () => { this._builder.clear(); },
+ ['write']: () => { this._maybeFlush(this._builder, this._controller); },
+ ['close']: () => { this._maybeFlush(this._builder.finish(), this._controller); },
+ }, {
+ 'highWaterMark': writableHighWaterMark,
+ 'size': (value: T['TValue'] | TNull) => this._writeValueAndReturnChunkSize(value),
+ });
+ }
+ private _writeValueAndReturnChunkSize(value: T['TValue'] | TNull) {
+ const bufferedSize = this._bufferedSize;
+ this._bufferedSize = this._getSize(this._builder.append(value));
+ return this._bufferedSize - bufferedSize;
+ }
+ private _maybeFlush(builder: Builder<T, TNull>, controller: ReadableStreamDefaultController<V<T>> | null) {
+ if (controller === null) { return; }
+ if (this._bufferedSize >= controller.desiredSize!) {
+ ++this._numChunks && this._enqueue(controller, builder.toVector());
+ }
+ if (builder.finished) {
+ if (builder.length > 0 || this._numChunks === 0) {
+ ++this._numChunks && this._enqueue(controller, builder.toVector());
+ }
+ if (!this._finished && (this._finished = true)) {
+ this._enqueue(controller, null);
+ }
+ }
+ }
+ private _enqueue(controller: ReadableStreamDefaultController<V<T>>, chunk: V<T> | null) {
+ this._bufferedSize = 0;
+ this._controller = null;
+ chunk === null ? controller.close() : controller.enqueue(chunk);
+ }
+/** @ignore */ const chunkLength = <T extends DataType = any>(chunk: Vector<T> | Builder<T>) => chunk.length;
+/** @ignore */ const chunkByteLength = <T extends DataType = any>(chunk: Vector<T> | Builder<T>) => chunk.byteLength;
diff --git a/src/arrow/js/src/io/whatwg/iterable.ts b/src/arrow/js/src/io/whatwg/iterable.ts
new file mode 100644
index 000000000..ce9e97369
--- /dev/null
+++ b/src/arrow/js/src/io/whatwg/iterable.ts
@@ -0,0 +1,93 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { toUint8Array } from '../../util/buffer';
+import { ReadableDOMStreamOptions } from '../../io/interfaces';
+import { isIterable, isAsyncIterable } from '../../util/compat';
+/** @ignore */
+type SourceIterator<T> = Generator<T, void, number | null>;
+/** @ignore */
+type AsyncSourceIterator<T> = AsyncGenerator<T, void, number | null>;
+/** @ignore */
+export function toDOMStream<T>(source: Iterable<T> | AsyncIterable<T>, options?: ReadableDOMStreamOptions): ReadableStream<T> {
+ if (isAsyncIterable<T>(source)) { return asyncIterableAsReadableDOMStream(source, options); }
+ if (isIterable<T>(source)) { return iterableAsReadableDOMStream(source, options); }
+ /* istanbul ignore next */
+ throw new Error(`toDOMStream() must be called with an Iterable or AsyncIterable`);
+/** @ignore */
+function iterableAsReadableDOMStream<T>(source: Iterable<T>, options?: ReadableDOMStreamOptions) {
+ let it: SourceIterator<T> | null = null;
+ const bm = (options?.type === 'bytes') || false;
+ const hwm = options?.highWaterMark || (2 ** 24);
+ return new ReadableStream<T>({
+ ...options as any,
+ start(controller) { next(controller, it || (it = source[Symbol.iterator]() as SourceIterator<T>)); },
+ pull(controller) { it ? (next(controller, it)) : controller.close(); },
+ cancel() { (it?.return && it.return() || true) && (it = null); }
+ }, { highWaterMark: bm ? hwm : undefined, ...options });
+ function next(controller: ReadableStreamDefaultController<T>, it: SourceIterator<T>) {
+ let buf: Uint8Array;
+ let r: IteratorResult<T> | null = null;
+ let size = controller.desiredSize || null;
+ while (!(r = ? size : null)).done) {
+ if (ArrayBuffer.isView(r.value) && (buf = toUint8Array(r.value))) {
+ size != null && bm && (size = size - buf.byteLength + 1);
+ r.value = <any> buf;
+ }
+ controller.enqueue(r.value);
+ if (size != null && --size <= 0) { return; }
+ }
+ controller.close();
+ }
+/** @ignore */
+function asyncIterableAsReadableDOMStream<T>(source: AsyncIterable<T>, options?: ReadableDOMStreamOptions) {
+ let it: AsyncSourceIterator<T> | null = null;
+ const bm = (options?.type === 'bytes') || false;
+ const hwm = options?.highWaterMark || (2 ** 24);
+ return new ReadableStream<T>({
+ ...options as any,
+ async start(controller) { await next(controller, it || (it = source[Symbol.asyncIterator]() as AsyncSourceIterator<T>)); },
+ async pull(controller) { it ? (await next(controller, it)) : controller.close(); },
+ async cancel() { (it?.return && await it.return() || true) && (it = null); },
+ }, { highWaterMark: bm ? hwm : undefined, ...options });
+ async function next(controller: ReadableStreamDefaultController<T>, it: AsyncSourceIterator<T>) {
+ let buf: Uint8Array;
+ let r: IteratorResult<T> | null = null;
+ let size = controller.desiredSize || null;
+ while (!(r = await ? size : null)).done) {
+ if (ArrayBuffer.isView(r.value) && (buf = toUint8Array(r.value))) {
+ size != null && bm && (size = size - buf.byteLength + 1);
+ r.value = <any> buf;
+ }
+ controller.enqueue(r.value);
+ if (size != null && --size <= 0) { return; }
+ }
+ controller.close();
+ }
diff --git a/src/arrow/js/src/io/whatwg/reader.ts b/src/arrow/js/src/io/whatwg/reader.ts
new file mode 100644
index 000000000..9e19bac53
--- /dev/null
+++ b/src/arrow/js/src/io/whatwg/reader.ts
@@ -0,0 +1,52 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { DataType } from '../../type';
+import { RecordBatch } from '../../recordbatch';
+import { AsyncByteQueue } from '../../io/stream';
+import { RecordBatchReader } from '../../ipc/reader';
+/** @ignore */
+export function recordBatchReaderThroughDOMStream<T extends { [key: string]: DataType } = any>(writableStrategy?: ByteLengthQueuingStrategy, readableStrategy?: { autoDestroy: boolean }) {
+ const queue = new AsyncByteQueue();
+ let reader: RecordBatchReader<T> | null = null;
+ const readable = new ReadableStream<RecordBatch<T>>({
+ async cancel() { await queue.close(); },
+ async start(controller) { await next(controller, reader || (reader = await open())); },
+ async pull(controller) { reader ? await next(controller, reader) : controller.close(); }
+ });
+ return { writable: new WritableStream(queue, { 'highWaterMark': 2 ** 14, ...writableStrategy }), readable };
+ async function open() {
+ return await (await RecordBatchReader.from<T>(queue)).open(readableStrategy);
+ }
+ async function next(controller: ReadableStreamDefaultController<RecordBatch<T>>, reader: RecordBatchReader<T>) {
+ let size = controller.desiredSize;
+ let r: IteratorResult<RecordBatch<T>> | null = null;
+ while (!(r = await {
+ controller.enqueue(r.value);
+ if (size != null && --size <= 0) {
+ return;
+ }
+ }
+ controller.close();
+ }
diff --git a/src/arrow/js/src/io/whatwg/writer.ts b/src/arrow/js/src/io/whatwg/writer.ts
new file mode 100644
index 000000000..49789bdd3
--- /dev/null
+++ b/src/arrow/js/src/io/whatwg/writer.ts
@@ -0,0 +1,50 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { DataType } from '../../type';
+import { RecordBatch } from '../../recordbatch';
+import { AsyncByteStream } from '../../io/stream';
+import { RecordBatchWriter } from '../../ipc/writer';
+/** @ignore */
+export function recordBatchWriterThroughDOMStream<T extends { [key: string]: DataType } = any>(
+ this: typeof RecordBatchWriter,
+ writableStrategy?: QueuingStrategy<RecordBatch<T>> & { autoDestroy: boolean },
+ readableStrategy?: { highWaterMark?: number; size?: any }
+) {
+ const writer = new this<T>(writableStrategy);
+ const reader = new AsyncByteStream(writer);
+ const readable = new ReadableStream({
+ type: 'bytes',
+ async cancel() { await reader.cancel(); },
+ async pull(controller) { await next(controller); },
+ async start(controller) { await next(controller); },
+ }, { 'highWaterMark': 2 ** 14, ...readableStrategy });
+ return { writable: new WritableStream(writer, writableStrategy), readable };
+ async function next(controller: ReadableStreamDefaultController<Uint8Array>) {
+ let buf: Uint8Array | null = null;
+ let size = controller.desiredSize;
+ while (buf = await || null)) {
+ controller.enqueue(buf);
+ if (size != null && (size -= buf.byteLength) <= 0) { return; }
+ }
+ controller.close();
+ }
diff --git a/src/arrow/js/src/ipc/message.ts b/src/arrow/js/src/ipc/message.ts
new file mode 100644
index 000000000..34c0aa308
--- /dev/null
+++ b/src/arrow/js/src/ipc/message.ts
@@ -0,0 +1,257 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { MessageHeader } from '../enum';
+import { flatbuffers } from 'flatbuffers';
+import ByteBuffer = flatbuffers.ByteBuffer;
+import { Message } from './metadata/message';
+import { isFileHandle } from '../util/compat';
+import { AsyncRandomAccessFile } from '../io/file';
+import { toUint8Array, ArrayBufferViewInput } from '../util/buffer';
+import { ByteStream, ReadableSource, AsyncByteStream } from '../io/stream';
+import { ArrowJSON, ArrowJSONLike, ITERATOR_DONE, FileHandle } from '../io/interfaces';
+/** @ignore */ const invalidMessageType = (type: MessageHeader) => `Expected ${MessageHeader[type]} Message in stream, but was null or length 0.`;
+/** @ignore */ const nullMessage = (type: MessageHeader) => `Header pointer of flatbuffer-encoded ${MessageHeader[type]} Message is null or length 0.`;
+/** @ignore */ const invalidMessageMetadata = (expected: number, actual: number) => `Expected to read ${expected} metadata bytes, but only read ${actual}.`;
+/** @ignore */ const invalidMessageBodyLength = (expected: number, actual: number) => `Expected to read ${expected} bytes for message body, but only read ${actual}.`;
+/** @ignore */
+export class MessageReader implements IterableIterator<Message> {
+ protected source: ByteStream;
+ constructor(source: ByteStream | ArrayBufferViewInput | Iterable<ArrayBufferViewInput>) {
+ this.source = source instanceof ByteStream ? source : new ByteStream(source);
+ }
+ public [Symbol.iterator](): IterableIterator<Message> { return this as IterableIterator<Message>; }
+ public next(): IteratorResult<Message> {
+ let r;
+ if ((r = this.readMetadataLength()).done) { return ITERATOR_DONE; }
+ // ARROW-6313: If the first 4 bytes are continuation indicator (-1), read
+ // the next 4 for the 32-bit metadata length. Otherwise, assume this is a
+ // pre-v0.15 message, where the first 4 bytes are the metadata length.
+ if ((r.value === -1) &&
+ (r = this.readMetadataLength()).done) { return ITERATOR_DONE; }
+ if ((r = this.readMetadata(r.value)).done) { return ITERATOR_DONE; }
+ return (<any> r) as IteratorResult<Message>;
+ }
+ public throw(value?: any) { return this.source.throw(value); }
+ public return(value?: any) { return this.source.return(value); }
+ public readMessage<T extends MessageHeader>(type?: T | null) {
+ let r: IteratorResult<Message<T>>;
+ if ((r = { return null; }
+ if ((type != null) && r.value.headerType !== type) {
+ throw new Error(invalidMessageType(type));
+ }
+ return r.value;
+ }
+ public readMessageBody(bodyLength: number): Uint8Array {
+ if (bodyLength <= 0) { return new Uint8Array(0); }
+ const buf = toUint8Array(;
+ if (buf.byteLength < bodyLength) {
+ throw new Error(invalidMessageBodyLength(bodyLength, buf.byteLength));
+ }
+ // 1. Work around bugs in fs.ReadStream's internal Buffer pooling, see:
+ // 2. Work around
+ return /* 1. */ (buf.byteOffset % 8 === 0) &&
+ /* 2. */ (buf.byteOffset + buf.byteLength) <= buf.buffer.byteLength ? buf : buf.slice();
+ }
+ public readSchema(throwIfNull = false) {
+ const type = MessageHeader.Schema;
+ const message = this.readMessage(type);
+ const schema = message?.header();
+ if (throwIfNull && !schema) {
+ throw new Error(nullMessage(type));
+ }
+ return schema;
+ }
+ protected readMetadataLength(): IteratorResult<number> {
+ const buf =;
+ const bb = buf && new ByteBuffer(buf);
+ const len = bb?.readInt32(0) || 0;
+ return { done: len === 0, value: len };
+ }
+ protected readMetadata(metadataLength: number): IteratorResult<Message> {
+ const buf =;
+ if (!buf) { return ITERATOR_DONE; }
+ if (buf.byteLength < metadataLength) {
+ throw new Error(invalidMessageMetadata(metadataLength, buf.byteLength));
+ }
+ return { done: false, value: Message.decode(buf) };
+ }
+/** @ignore */
+export class AsyncMessageReader implements AsyncIterableIterator<Message> {
+ protected source: AsyncByteStream;
+ constructor(source: ReadableSource<Uint8Array>);
+ constructor(source: FileHandle, byteLength?: number);
+ constructor(source: any, byteLength?: number) {
+ this.source = source instanceof AsyncByteStream ? source
+ : isFileHandle(source)
+ ? new AsyncRandomAccessFile(source, byteLength!)
+ : new AsyncByteStream(source);
+ }
+ public [Symbol.asyncIterator](): AsyncIterableIterator<Message> { return this as AsyncIterableIterator<Message>; }
+ public async next(): Promise<IteratorResult<Message>> {
+ let r;
+ if ((r = await this.readMetadataLength()).done) { return ITERATOR_DONE; }
+ // ARROW-6313: If the first 4 bytes are continuation indicator (-1), read
+ // the next 4 for the 32-bit metadata length. Otherwise, assume this is a
+ // pre-v0.15 message, where the first 4 bytes are the metadata length.
+ if ((r.value === -1) &&
+ (r = await this.readMetadataLength()).done) { return ITERATOR_DONE; }
+ if ((r = await this.readMetadata(r.value)).done) { return ITERATOR_DONE; }
+ return (<any> r) as IteratorResult<Message>;
+ }
+ public async throw(value?: any) { return await this.source.throw(value); }
+ public async return(value?: any) { return await this.source.return(value); }
+ public async readMessage<T extends MessageHeader>(type?: T | null) {
+ let r: IteratorResult<Message<T>>;
+ if ((r = await { return null; }
+ if ((type != null) && r.value.headerType !== type) {
+ throw new Error(invalidMessageType(type));
+ }
+ return r.value;
+ }
+ public async readMessageBody(bodyLength: number): Promise<Uint8Array> {
+ if (bodyLength <= 0) { return new Uint8Array(0); }
+ const buf = toUint8Array(await;
+ if (buf.byteLength < bodyLength) {
+ throw new Error(invalidMessageBodyLength(bodyLength, buf.byteLength));
+ }
+ // 1. Work around bugs in fs.ReadStream's internal Buffer pooling, see:
+ // 2. Work around
+ return /* 1. */ (buf.byteOffset % 8 === 0) &&
+ /* 2. */ (buf.byteOffset + buf.byteLength) <= buf.buffer.byteLength ? buf : buf.slice();
+ }
+ public async readSchema(throwIfNull = false) {
+ const type = MessageHeader.Schema;
+ const message = await this.readMessage(type);
+ const schema = message?.header();
+ if (throwIfNull && !schema) {
+ throw new Error(nullMessage(type));
+ }
+ return schema;
+ }
+ protected async readMetadataLength(): Promise<IteratorResult<number>> {
+ const buf = await;
+ const bb = buf && new ByteBuffer(buf);
+ const len = bb?.readInt32(0) || 0;
+ return { done: len === 0, value: len };
+ }
+ protected async readMetadata(metadataLength: number): Promise<IteratorResult<Message>> {
+ const buf = await;
+ if (!buf) { return ITERATOR_DONE; }
+ if (buf.byteLength < metadataLength) {
+ throw new Error(invalidMessageMetadata(metadataLength, buf.byteLength));
+ }
+ return { done: false, value: Message.decode(buf) };
+ }
+/** @ignore */
+export class JSONMessageReader extends MessageReader {
+ private _schema = false;
+ private _json: ArrowJSON;
+ private _body: any[] = [];
+ private _batchIndex = 0;
+ private _dictionaryIndex = 0;
+ constructor(source: ArrowJSON | ArrowJSONLike) {
+ super(new Uint8Array(0));
+ this._json = source instanceof ArrowJSON ? source : new ArrowJSON(source);
+ }
+ public next() {
+ const { _json } = this;
+ if (!this._schema) {
+ this._schema = true;
+ const message = Message.fromJSON(_json.schema, MessageHeader.Schema);
+ return { done: false, value: message };
+ }
+ if (this._dictionaryIndex < _json.dictionaries.length) {
+ const batch = _json.dictionaries[this._dictionaryIndex++];
+ this._body = batch['data']['columns'];
+ const message = Message.fromJSON(batch, MessageHeader.DictionaryBatch);
+ return { done: false, value: message };
+ }
+ if (this._batchIndex < _json.batches.length) {
+ const batch = _json.batches[this._batchIndex++];
+ this._body = batch['columns'];
+ const message = Message.fromJSON(batch, MessageHeader.RecordBatch);
+ return { done: false, value: message };
+ }
+ this._body = [];
+ }
+ public readMessageBody(_bodyLength?: number) {
+ return flattenDataSources(this._body) as any;
+ function flattenDataSources(xs: any[]): any[][] {
+ return (xs || []).reduce<any[][]>((buffers, column: any) => [
+ ...buffers,
+ ...(column['VALIDITY'] && [column['VALIDITY']] || []),
+ ...(column['TYPE'] && [column['TYPE']] || []),
+ ...(column['OFFSET'] && [column['OFFSET']] || []),
+ ...(column['DATA'] && [column['DATA']] || []),
+ ...flattenDataSources(column['children'])
+ ], [] as any[][]);
+ }
+ }
+ public readMessage<T extends MessageHeader>(type?: T | null) {
+ let r: IteratorResult<Message<T>>;
+ if ((r = { return null; }
+ if ((type != null) && r.value.headerType !== type) {
+ throw new Error(invalidMessageType(type));
+ }
+ return r.value;
+ }
+ public readSchema() {
+ const type = MessageHeader.Schema;
+ const message = this.readMessage(type);
+ const schema = message?.header();
+ if (!message || !schema) {
+ throw new Error(nullMessage(type));
+ }
+ return schema;
+ }
+/** @ignore */
+export const PADDING = 4;
+/** @ignore */
+export const MAGIC_STR = 'ARROW1';
+/** @ignore */
+export const MAGIC = new Uint8Array(MAGIC_STR.length);
+for (let i = 0; i < MAGIC_STR.length; i += 1 | 0) {
+ MAGIC[i] = MAGIC_STR.charCodeAt(i);
+/** @ignore */
+export function checkForMagicArrowString(buffer: Uint8Array, index = 0) {
+ for (let i = -1, n = MAGIC.length; ++i < n;) {
+ if (MAGIC[i] !== buffer[index + i]) {
+ return false;
+ }
+ }
+ return true;
+/** @ignore */
+export const magicLength = MAGIC.length;
+/** @ignore */
+export const magicAndPadding = magicLength + PADDING;
+/** @ignore */
+export const magicX2AndPadding = magicLength * 2 + PADDING;
diff --git a/src/arrow/js/src/ipc/metadata/file.ts b/src/arrow/js/src/ipc/metadata/file.ts
new file mode 100644
index 000000000..5a1be844e
--- /dev/null
+++ b/src/arrow/js/src/ipc/metadata/file.ts
@@ -0,0 +1,163 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+/* eslint-disable @typescript-eslint/naming-convention */
+import {
+ Block as _Block,
+ Footer as _Footer
+} from '../../fb/File';
+import { flatbuffers } from 'flatbuffers';
+import Long = flatbuffers.Long;
+import Builder = flatbuffers.Builder;
+import ByteBuffer = flatbuffers.ByteBuffer;
+import { Schema } from '../../schema';
+import { MetadataVersion } from '../../enum';
+import { toUint8Array } from '../../util/buffer';
+import { ArrayBufferViewInput } from '../../util/buffer';
+/** @ignore */
+class Footer_ {
+ /** @nocollapse */
+ public static decode(buf: ArrayBufferViewInput) {
+ buf = new ByteBuffer(toUint8Array(buf));
+ const footer = _Footer.getRootAsFooter(buf);
+ const schema = Schema.decode(footer.schema()!);
+ return new OffHeapFooter(schema, footer) as Footer_;
+ }
+ /** @nocollapse */
+ public static encode(footer: Footer_) {
+ const b: Builder = new Builder();
+ const schemaOffset = Schema.encode(b, footer.schema);
+ _Footer.startRecordBatchesVector(b, footer.numRecordBatches);
+ [...footer.recordBatches()].slice().reverse().forEach((rb) => FileBlock.encode(b, rb));
+ const recordBatchesOffset = b.endVector();
+ _Footer.startDictionariesVector(b, footer.numDictionaries);
+ [...footer.dictionaryBatches()].slice().reverse().forEach((db) => FileBlock.encode(b, db));
+ const dictionaryBatchesOffset = b.endVector();
+ _Footer.startFooter(b);
+ _Footer.addSchema(b, schemaOffset);
+ _Footer.addVersion(b, MetadataVersion.V4);
+ _Footer.addRecordBatches(b, recordBatchesOffset);
+ _Footer.addDictionaries(b, dictionaryBatchesOffset);
+ _Footer.finishFooterBuffer(b, _Footer.endFooter(b));
+ return b.asUint8Array();
+ }
+ protected _recordBatches!: FileBlock[];
+ protected _dictionaryBatches!: FileBlock[];
+ public get numRecordBatches() { return this._recordBatches.length; }
+ public get numDictionaries() { return this._dictionaryBatches.length; }
+ constructor(public schema: Schema,
+ public version: MetadataVersion = MetadataVersion.V4,
+ recordBatches?: FileBlock[], dictionaryBatches?: FileBlock[]) {
+ recordBatches && (this._recordBatches = recordBatches);
+ dictionaryBatches && (this._dictionaryBatches = dictionaryBatches);
+ }
+ public *recordBatches(): Iterable<FileBlock> {
+ for (let block, i = -1, n = this.numRecordBatches; ++i < n;) {
+ if (block = this.getRecordBatch(i)) { yield block; }
+ }
+ }
+ public *dictionaryBatches(): Iterable<FileBlock> {
+ for (let block, i = -1, n = this.numDictionaries; ++i < n;) {
+ if (block = this.getDictionaryBatch(i)) { yield block; }
+ }
+ }
+ public getRecordBatch(index: number) {
+ return index >= 0
+ && index < this.numRecordBatches
+ && this._recordBatches[index] || null;
+ }
+ public getDictionaryBatch(index: number) {
+ return index >= 0
+ && index < this.numDictionaries
+ && this._dictionaryBatches[index] || null;
+ }
+export { Footer_ as Footer };
+/** @ignore */
+class OffHeapFooter extends Footer_ {
+ public get numRecordBatches() { return this._footer.recordBatchesLength(); }
+ public get numDictionaries() { return this._footer.dictionariesLength(); }
+ constructor(schema: Schema, protected _footer: _Footer) {
+ super(schema, _footer.version());
+ }
+ public getRecordBatch(index: number) {
+ if (index >= 0 && index < this.numRecordBatches) {
+ const fileBlock = this._footer.recordBatches(index);
+ if (fileBlock) { return FileBlock.decode(fileBlock); }
+ }
+ return null;
+ }
+ public getDictionaryBatch(index: number) {
+ if (index >= 0 && index < this.numDictionaries) {
+ const fileBlock = this._footer.dictionaries(index);
+ if (fileBlock) { return FileBlock.decode(fileBlock); }
+ }
+ return null;
+ }
+/** @ignore */
+export class FileBlock {
+ /** @nocollapse */
+ public static decode(block: _Block) {
+ return new FileBlock(block.metaDataLength(), block.bodyLength(), block.offset());
+ }
+ /** @nocollapse */
+ public static encode(b: Builder, fileBlock: FileBlock) {
+ const { metaDataLength } = fileBlock;
+ const offset = new Long(fileBlock.offset, 0);
+ const bodyLength = new Long(fileBlock.bodyLength, 0);
+ return _Block.createBlock(b, offset, metaDataLength, bodyLength);
+ }
+ public offset: number;
+ public bodyLength: number;
+ public metaDataLength: number;
+ constructor(metaDataLength: number, bodyLength: Long | number, offset: Long | number) {
+ this.metaDataLength = metaDataLength;
+ this.offset = typeof offset === 'number' ? offset : offset.low;
+ this.bodyLength = typeof bodyLength === 'number' ? bodyLength : bodyLength.low;
+ }
diff --git a/src/arrow/js/src/ipc/metadata/json.ts b/src/arrow/js/src/ipc/metadata/json.ts
new file mode 100644
index 000000000..399615c31
--- /dev/null
+++ b/src/arrow/js/src/ipc/metadata/json.ts
@@ -0,0 +1,206 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+/* eslint-disable brace-style */
+import { Schema, Field } from '../../schema';
+import {
+ DataType, Dictionary, TimeBitWidth,
+ Utf8, Binary, Decimal, FixedSizeBinary,
+ List, FixedSizeList, Map_, Struct, Union,
+ Bool, Null, Int, Float, Date_, Time, Interval, Timestamp, IntBitWidth, Int32, TKeys,
+} from '../../type';
+import { DictionaryBatch, RecordBatch, FieldNode, BufferRegion } from './message';
+import { TimeUnit, Precision, IntervalUnit, UnionMode, DateUnit } from '../../enum';
+/** @ignore */
+export function schemaFromJSON(_schema: any, dictionaries: Map<number, DataType> = new Map()) {
+ return new Schema(
+ schemaFieldsFromJSON(_schema, dictionaries),
+ customMetadataFromJSON(_schema['customMetadata']),
+ dictionaries
+ );
+/** @ignore */
+export function recordBatchFromJSON(b: any) {
+ return new RecordBatch(
+ b['count'],
+ fieldNodesFromJSON(b['columns']),
+ buffersFromJSON(b['columns'])
+ );
+/** @ignore */
+export function dictionaryBatchFromJSON(b: any) {
+ return new DictionaryBatch(
+ recordBatchFromJSON(b['data']),
+ b['id'], b['isDelta']
+ );
+/** @ignore */
+function schemaFieldsFromJSON(_schema: any, dictionaries?: Map<number, DataType>) {
+ return (_schema['fields'] || []).filter(Boolean).map((f: any) => Field.fromJSON(f, dictionaries));
+/** @ignore */
+function fieldChildrenFromJSON(_field: any, dictionaries?: Map<number, DataType>): Field[] {
+ return (_field['children'] || []).filter(Boolean).map((f: any) => Field.fromJSON(f, dictionaries));
+/** @ignore */
+function fieldNodesFromJSON(xs: any[]): FieldNode[] {
+ return (xs || []).reduce<FieldNode[]>((fieldNodes, column: any) => [
+ ...fieldNodes,
+ new FieldNode(
+ column['count'],
+ nullCountFromJSON(column['VALIDITY'])
+ ),
+ ...fieldNodesFromJSON(column['children'])
+ ], [] as FieldNode[]);
+/** @ignore */
+function buffersFromJSON(xs: any[], buffers: BufferRegion[] = []): BufferRegion[] {
+ for (let i = -1, n = (xs || []).length; ++i < n;) {
+ const column = xs[i];
+ column['VALIDITY'] && buffers.push(new BufferRegion(buffers.length, column['VALIDITY'].length));
+ column['TYPE'] && buffers.push(new BufferRegion(buffers.length, column['TYPE'].length));
+ column['OFFSET'] && buffers.push(new BufferRegion(buffers.length, column['OFFSET'].length));
+ column['DATA'] && buffers.push(new BufferRegion(buffers.length, column['DATA'].length));
+ buffers = buffersFromJSON(column['children'], buffers);
+ }
+ return buffers;
+/** @ignore */
+function nullCountFromJSON(validity: number[]) {
+ return (validity || []).reduce((sum, val) => sum + +(val === 0), 0);
+/** @ignore */
+export function fieldFromJSON(_field: any, dictionaries?: Map<number, DataType>) {
+ let id: number;
+ let keys: TKeys | null;
+ let field: Field | void;
+ let dictMeta: any;
+ let type: DataType<any>;
+ let dictType: Dictionary;
+ // If no dictionary encoding
+ if (!dictionaries || !(dictMeta = _field['dictionary'])) {
+ type = typeFromJSON(_field, fieldChildrenFromJSON(_field, dictionaries));
+ field = new Field(_field['name'], type, _field['nullable'], customMetadataFromJSON(_field['customMetadata']));
+ }
+ // If dictionary encoded and the first time we've seen this dictionary id, decode
+ // the data type and child fields, then wrap in a Dictionary type and insert the
+ // data type into the dictionary types map.
+ else if (!dictionaries.has(id = dictMeta['id'])) {
+ // a dictionary index defaults to signed 32 bit int if unspecified
+ keys = (keys = dictMeta['indexType']) ? indexTypeFromJSON(keys) as TKeys : new Int32();
+ dictionaries.set(id, type = typeFromJSON(_field, fieldChildrenFromJSON(_field, dictionaries)));
+ dictType = new Dictionary(type, keys, id, dictMeta['isOrdered']);
+ field = new Field(_field['name'], dictType, _field['nullable'], customMetadataFromJSON(_field['customMetadata']));
+ }
+ // If dictionary encoded, and have already seen this dictionary Id in the schema, then reuse the
+ // data type and wrap in a new Dictionary type and field.
+ else {
+ // a dictionary index defaults to signed 32 bit int if unspecified
+ keys = (keys = dictMeta['indexType']) ? indexTypeFromJSON(keys) as TKeys : new Int32();
+ dictType = new Dictionary(dictionaries.get(id)!, keys, id, dictMeta['isOrdered']);
+ field = new Field(_field['name'], dictType, _field['nullable'], customMetadataFromJSON(_field['customMetadata']));
+ }
+ return field || null;
+/** @ignore */
+function customMetadataFromJSON(_metadata?: Record<string, string>) {
+ return new Map<string, string>(Object.entries(_metadata || {}));
+/** @ignore */
+function indexTypeFromJSON(_type: any) {
+ return new Int(_type['isSigned'], _type['bitWidth']);
+/** @ignore */
+function typeFromJSON(f: any, children?: Field[]): DataType<any> {
+ const typeId = f['type']['name'];
+ switch (typeId) {
+ case 'NONE': return new Null();
+ case 'null': return new Null();
+ case 'binary': return new Binary();
+ case 'utf8': return new Utf8();
+ case 'bool': return new Bool();
+ case 'list': return new List((children || [])[0]);
+ case 'struct': return new Struct(children || []);
+ case 'struct_': return new Struct(children || []);
+ }
+ switch (typeId) {
+ case 'int': {
+ const t = f['type'];
+ return new Int(t['isSigned'], t['bitWidth'] as IntBitWidth);
+ }
+ case 'floatingpoint': {
+ const t = f['type'];
+ return new Float(Precision[t['precision']] as any);
+ }
+ case 'decimal': {
+ const t = f['type'];
+ return new Decimal(t['scale'], t['precision']);
+ }
+ case 'date': {
+ const t = f['type'];
+ return new Date_(DateUnit[t['unit']] as any);
+ }
+ case 'time': {
+ const t = f['type'];
+ return new Time(TimeUnit[t['unit']] as any, t['bitWidth'] as TimeBitWidth);
+ }
+ case 'timestamp': {
+ const t = f['type'];
+ return new Timestamp(TimeUnit[t['unit']] as any, t['timezone']);
+ }
+ case 'interval': {
+ const t = f['type'];
+ return new Interval(IntervalUnit[t['unit']] as any);
+ }
+ case 'union': {
+ const t = f['type'];
+ return new Union(UnionMode[t['mode']] as any, (t['typeIds'] || []), children || []);
+ }
+ case 'fixedsizebinary': {
+ const t = f['type'];
+ return new FixedSizeBinary(t['byteWidth']);
+ }
+ case 'fixedsizelist': {
+ const t = f['type'];
+ return new FixedSizeList(t['listSize'], (children || [])[0]);
+ }
+ case 'map': {
+ const t = f['type'];
+ return new Map_((children || [])[0], t['keysSorted']);
+ }
+ }
+ throw new Error(`Unrecognized type: "${typeId}"`);
diff --git a/src/arrow/js/src/ipc/metadata/message.ts b/src/arrow/js/src/ipc/metadata/message.ts
new file mode 100644
index 000000000..2ebb73e4c
--- /dev/null
+++ b/src/arrow/js/src/ipc/metadata/message.ts
@@ -0,0 +1,621 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+/* eslint-disable brace-style */
+import { flatbuffers } from 'flatbuffers';
+import {
+ Type,
+ Int as _Int,
+ Field as _Field,
+ Schema as _Schema,
+ Buffer as _Buffer,
+ KeyValue as _KeyValue,
+ Endianness as _Endianness,
+ DictionaryEncoding as _DictionaryEncoding,
+ FloatingPoint as _FloatingPoint,
+ Decimal as _Decimal,
+ Date as _Date,
+ Time as _Time,
+ Timestamp as _Timestamp,
+ Interval as _Interval,
+ Union as _Union,
+ FixedSizeBinary as _FixedSizeBinary,
+ FixedSizeList as _FixedSizeList,
+ Map as _Map,
+} from '../../fb/Schema';
+import {
+ Message as _Message,
+ FieldNode as _FieldNode,
+ RecordBatch as _RecordBatch,
+ DictionaryBatch as _DictionaryBatch,
+} from '../../fb/Message';
+import { Schema, Field } from '../../schema';
+import { toUint8Array } from '../../util/buffer';
+import { ArrayBufferViewInput } from '../../util/buffer';
+import { MessageHeader, MetadataVersion } from '../../enum';
+import { instance as typeAssembler } from '../../visitor/typeassembler';
+import { fieldFromJSON, schemaFromJSON, recordBatchFromJSON, dictionaryBatchFromJSON } from './json';
+import Long = flatbuffers.Long;
+import Builder = flatbuffers.Builder;
+import ByteBuffer = flatbuffers.ByteBuffer;
+import {
+ DataType, Dictionary, TimeBitWidth,
+ Utf8, Binary, Decimal, FixedSizeBinary,
+ List, FixedSizeList, Map_, Struct, Union,
+ Bool, Null, Int, Float, Date_, Time, Interval, Timestamp, IntBitWidth, Int32, TKeys,
+} from '../../type';
+ * @ignore
+ * @private
+ **/
+export class Message<T extends MessageHeader = any> {
+ /** @nocollapse */
+ public static fromJSON<T extends MessageHeader>(msg: any, headerType: T): Message<T> {
+ const message = new Message(0, MetadataVersion.V4, headerType);
+ message._createHeader = messageHeaderFromJSON(msg, headerType);
+ return message;
+ }
+ /** @nocollapse */
+ public static decode(buf: ArrayBufferViewInput) {
+ buf = new ByteBuffer(toUint8Array(buf));
+ const _message = _Message.getRootAsMessage(buf);
+ const bodyLength: Long = _message.bodyLength()!;
+ const version: MetadataVersion = _message.version();
+ const headerType: MessageHeader = _message.headerType();
+ const message = new Message(bodyLength, version, headerType);
+ message._createHeader = decodeMessageHeader(_message, headerType);
+ return message;
+ }
+ /** @nocollapse */
+ public static encode<T extends MessageHeader>(message: Message<T>) {
+ const b = new Builder();
+ let headerOffset = -1;
+ if (message.isSchema()) {
+ headerOffset = Schema.encode(b, message.header() as Schema);
+ } else if (message.isRecordBatch()) {
+ headerOffset = RecordBatch.encode(b, message.header() as RecordBatch);
+ } else if (message.isDictionaryBatch()) {
+ headerOffset = DictionaryBatch.encode(b, message.header() as DictionaryBatch);
+ }
+ _Message.startMessage(b);
+ _Message.addVersion(b, MetadataVersion.V4);
+ _Message.addHeader(b, headerOffset);
+ _Message.addHeaderType(b, message.headerType);
+ _Message.addBodyLength(b, new Long(message.bodyLength, 0));
+ _Message.finishMessageBuffer(b, _Message.endMessage(b));
+ return b.asUint8Array();
+ }
+ /** @nocollapse */
+ public static from(header: Schema | RecordBatch | DictionaryBatch, bodyLength = 0) {
+ if (header instanceof Schema) {
+ return new Message(0, MetadataVersion.V4, MessageHeader.Schema, header);
+ }
+ if (header instanceof RecordBatch) {
+ return new Message(bodyLength, MetadataVersion.V4, MessageHeader.RecordBatch, header);
+ }
+ if (header instanceof DictionaryBatch) {
+ return new Message(bodyLength, MetadataVersion.V4, MessageHeader.DictionaryBatch, header);
+ }
+ throw new Error(`Unrecognized Message header: ${header}`);
+ }
+ public body: Uint8Array;
+ protected _headerType: T;
+ protected _bodyLength: number;
+ protected _version: MetadataVersion;
+ public get type() { return this.headerType; }
+ public get version() { return this._version; }
+ public get headerType() { return this._headerType; }
+ public get bodyLength() { return this._bodyLength; }
+ protected _createHeader!: MessageHeaderDecoder;
+ public header() { return this._createHeader<T>(); }
+ public isSchema(): this is Message<MessageHeader.Schema> { return this.headerType === MessageHeader.Schema; }
+ public isRecordBatch(): this is Message<MessageHeader.RecordBatch> { return this.headerType === MessageHeader.RecordBatch; }
+ public isDictionaryBatch(): this is Message<MessageHeader.DictionaryBatch> { return this.headerType === MessageHeader.DictionaryBatch; }
+ constructor(bodyLength: Long | number, version: MetadataVersion, headerType: T, header?: any) {
+ this._version = version;
+ this._headerType = headerType;
+ this.body = new Uint8Array(0);
+ header && (this._createHeader = () => header);
+ this._bodyLength = typeof bodyLength === 'number' ? bodyLength : bodyLength.low;
+ }
+ * @ignore
+ * @private
+ **/
+export class RecordBatch {
+ protected _length: number;
+ protected _nodes: FieldNode[];
+ protected _buffers: BufferRegion[];
+ public get nodes() { return this._nodes; }
+ public get length() { return this._length; }
+ public get buffers() { return this._buffers; }
+ constructor(length: Long | number, nodes: FieldNode[], buffers: BufferRegion[]) {
+ this._nodes = nodes;
+ this._buffers = buffers;
+ this._length = typeof length === 'number' ? length : length.low;
+ }
+ * @ignore
+ * @private
+ **/
+export class DictionaryBatch {
+ protected _id: number;
+ protected _isDelta: boolean;
+ protected _data: RecordBatch;
+ public get id() { return this._id; }
+ public get data() { return this._data; }
+ public get isDelta() { return this._isDelta; }
+ public get length(): number { return; }
+ public get nodes(): FieldNode[] { return; }
+ public get buffers(): BufferRegion[] { return; }
+ constructor(data: RecordBatch, id: Long | number, isDelta = false) {
+ this._data = data;
+ this._isDelta = isDelta;
+ this._id = typeof id === 'number' ? id : id.low;
+ }
+ * @ignore
+ * @private
+ **/
+export class BufferRegion {
+ public offset: number;
+ public length: number;
+ constructor(offset: Long | number, length: Long | number) {
+ this.offset = typeof offset === 'number' ? offset : offset.low;
+ this.length = typeof length === 'number' ? length : length.low;
+ }
+ * @ignore
+ * @private
+ **/
+export class FieldNode {
+ public length: number;
+ public nullCount: number;
+ constructor(length: Long | number, nullCount: Long | number) {
+ this.length = typeof length === 'number' ? length : length.low;
+ this.nullCount = typeof nullCount === 'number' ? nullCount : nullCount.low;
+ }
+/** @ignore */
+function messageHeaderFromJSON(message: any, type: MessageHeader) {
+ return (() => {
+ switch (type) {
+ case MessageHeader.Schema: return Schema.fromJSON(message);
+ case MessageHeader.RecordBatch: return RecordBatch.fromJSON(message);
+ case MessageHeader.DictionaryBatch: return DictionaryBatch.fromJSON(message);
+ }
+ throw new Error(`Unrecognized Message type: { name: ${MessageHeader[type]}, type: ${type} }`);
+ }) as MessageHeaderDecoder;
+/** @ignore */
+function decodeMessageHeader(message: _Message, type: MessageHeader) {
+ return (() => {
+ switch (type) {
+ case MessageHeader.Schema: return Schema.decode(message.header(new _Schema())!);
+ case MessageHeader.RecordBatch: return RecordBatch.decode(message.header(new _RecordBatch())!, message.version());
+ case MessageHeader.DictionaryBatch: return DictionaryBatch.decode(message.header(new _DictionaryBatch())!, message.version());
+ }
+ throw new Error(`Unrecognized Message type: { name: ${MessageHeader[type]}, type: ${type} }`);
+ }) as MessageHeaderDecoder;
+Field['encode'] = encodeField;
+Field['decode'] = decodeField;
+Field['fromJSON'] = fieldFromJSON;
+Schema['encode'] = encodeSchema;
+Schema['decode'] = decodeSchema;
+Schema['fromJSON'] = schemaFromJSON;
+RecordBatch['encode'] = encodeRecordBatch;
+RecordBatch['decode'] = decodeRecordBatch;
+RecordBatch['fromJSON'] = recordBatchFromJSON;
+DictionaryBatch['encode'] = encodeDictionaryBatch;
+DictionaryBatch['decode'] = decodeDictionaryBatch;
+DictionaryBatch['fromJSON'] = dictionaryBatchFromJSON;
+FieldNode['encode'] = encodeFieldNode;
+FieldNode['decode'] = decodeFieldNode;
+BufferRegion['encode'] = encodeBufferRegion;
+BufferRegion['decode'] = decodeBufferRegion;
+declare module '../../schema' {
+ namespace Field {
+ export { encodeField as encode };
+ export { decodeField as decode };
+ export { fieldFromJSON as fromJSON };
+ }
+ namespace Schema {
+ export { encodeSchema as encode };
+ export { decodeSchema as decode };
+ export { schemaFromJSON as fromJSON };
+ }
+declare module './message' {
+ namespace RecordBatch {
+ export { encodeRecordBatch as encode };
+ export { decodeRecordBatch as decode };
+ export { recordBatchFromJSON as fromJSON };
+ }
+ namespace DictionaryBatch {
+ export { encodeDictionaryBatch as encode };
+ export { decodeDictionaryBatch as decode };
+ export { dictionaryBatchFromJSON as fromJSON };
+ }
+ namespace FieldNode {
+ export { encodeFieldNode as encode };
+ export { decodeFieldNode as decode };
+ }
+ namespace BufferRegion {
+ export { encodeBufferRegion as encode };
+ export { decodeBufferRegion as decode };
+ }
+/** @ignore */
+function decodeSchema(_schema: _Schema, dictionaries: Map<number, DataType> = new Map()) {
+ const fields = decodeSchemaFields(_schema, dictionaries);
+ return new Schema(fields, decodeCustomMetadata(_schema), dictionaries);
+/** @ignore */
+function decodeRecordBatch(batch: _RecordBatch, version = MetadataVersion.V4) {
+ return new RecordBatch(batch.length(), decodeFieldNodes(batch), decodeBuffers(batch, version));
+/** @ignore */
+function decodeDictionaryBatch(batch: _DictionaryBatch, version = MetadataVersion.V4) {
+ return new DictionaryBatch(RecordBatch.decode(!, version),, batch.isDelta());
+/** @ignore */
+function decodeBufferRegion(b: _Buffer) {
+ return new BufferRegion(b.offset(), b.length());
+/** @ignore */
+function decodeFieldNode(f: _FieldNode) {
+ return new FieldNode(f.length(), f.nullCount());
+/** @ignore */
+function decodeFieldNodes(batch: _RecordBatch) {
+ const nodes = [] as FieldNode[];
+ for (let f, i = -1, j = -1, n = batch.nodesLength(); ++i < n;) {
+ if (f = batch.nodes(i)) {
+ nodes[++j] = FieldNode.decode(f);
+ }
+ }
+ return nodes;
+/** @ignore */
+function decodeBuffers(batch: _RecordBatch, version: MetadataVersion) {
+ const bufferRegions = [] as BufferRegion[];
+ for (let b, i = -1, j = -1, n = batch.buffersLength(); ++i < n;) {
+ if (b = batch.buffers(i)) {
+ // If this Arrow buffer was written before version 4,
+ // advance the buffer's bb_pos 8 bytes to skip past
+ // the now-removed page_id field
+ if (version < MetadataVersion.V4) {
+ b.bb_pos += (8 * (i + 1));
+ }
+ bufferRegions[++j] = BufferRegion.decode(b);
+ }
+ }
+ return bufferRegions;
+/** @ignore */
+function decodeSchemaFields(schema: _Schema, dictionaries?: Map<number, DataType>) {
+ const fields = [] as Field[];
+ for (let f, i = -1, j = -1, n = schema.fieldsLength(); ++i < n;) {
+ if (f = schema.fields(i)) {
+ fields[++j] = Field.decode(f, dictionaries);
+ }
+ }
+ return fields;
+/** @ignore */
+function decodeFieldChildren(field: _Field, dictionaries?: Map<number, DataType>): Field[] {
+ const children = [] as Field[];
+ for (let f, i = -1, j = -1, n = field.childrenLength(); ++i < n;) {
+ if (f = field.children(i)) {
+ children[++j] = Field.decode(f, dictionaries);
+ }
+ }
+ return children;
+/** @ignore */
+function decodeField(f: _Field, dictionaries?: Map<number, DataType>) {
+ let id: number;
+ let field: Field | void;
+ let type: DataType<any>;
+ let keys: _Int | TKeys | null;
+ let dictType: Dictionary;
+ let dictMeta: _DictionaryEncoding | null;
+ // If no dictionary encoding
+ if (!dictionaries || !(dictMeta = f.dictionary())) {
+ type = decodeFieldType(f, decodeFieldChildren(f, dictionaries));
+ field = new Field(!, type, f.nullable(), decodeCustomMetadata(f));
+ }
+ // If dictionary encoded and the first time we've seen this dictionary id, decode
+ // the data type and child fields, then wrap in a Dictionary type and insert the
+ // data type into the dictionary types map.
+ else if (!dictionaries.has(id = {
+ // a dictionary index defaults to signed 32 bit int if unspecified
+ keys = (keys = dictMeta.indexType()) ? decodeIndexType(keys) as TKeys : new Int32();
+ dictionaries.set(id, type = decodeFieldType(f, decodeFieldChildren(f, dictionaries)));
+ dictType = new Dictionary(type, keys, id, dictMeta.isOrdered());
+ field = new Field(!, dictType, f.nullable(), decodeCustomMetadata(f));
+ }
+ // If dictionary encoded, and have already seen this dictionary Id in the schema, then reuse the
+ // data type and wrap in a new Dictionary type and field.
+ else {
+ // a dictionary index defaults to signed 32 bit int if unspecified
+ keys = (keys = dictMeta.indexType()) ? decodeIndexType(keys) as TKeys : new Int32();
+ dictType = new Dictionary(dictionaries.get(id)!, keys, id, dictMeta.isOrdered());
+ field = new Field(!, dictType, f.nullable(), decodeCustomMetadata(f));
+ }
+ return field || null;
+/** @ignore */
+function decodeCustomMetadata(parent?: _Schema | _Field | null) {
+ const data = new Map<string, string>();
+ if (parent) {
+ for (let entry, key, i = -1, n = parent.customMetadataLength() | 0; ++i < n;) {
+ if ((entry = parent.customMetadata(i)) && (key = entry.key()) != null) {
+ data.set(key, entry.value()!);
+ }
+ }
+ }
+ return data;
+/** @ignore */
+function decodeIndexType(_type: _Int) {
+ return new Int(_type.isSigned(), _type.bitWidth() as IntBitWidth);
+/** @ignore */
+function decodeFieldType(f: _Field, children?: Field[]): DataType<any> {
+ const typeId = f.typeType();
+ switch (typeId) {
+ case Type['NONE']: return new Null();
+ case Type['Null']: return new Null();
+ case Type['Binary']: return new Binary();
+ case Type['Utf8']: return new Utf8();
+ case Type['Bool']: return new Bool();
+ case Type['List']: return new List((children || [])[0]);
+ case Type['Struct_']: return new Struct(children || []);
+ }
+ switch (typeId) {
+ case Type['Int']: {
+ const t = f.type(new _Int())!;
+ return new Int(t.isSigned(), t.bitWidth());
+ }
+ case Type['FloatingPoint']: {
+ const t = f.type(new _FloatingPoint())!;
+ return new Float(t.precision());
+ }
+ case Type['Decimal']: {
+ const t = f.type(new _Decimal())!;
+ return new Decimal(t.scale(), t.precision());
+ }
+ case Type['Date']: {
+ const t = f.type(new _Date())!;
+ return new Date_(t.unit());
+ }
+ case Type['Time']: {
+ const t = f.type(new _Time())!;
+ return new Time(t.unit(), t.bitWidth() as TimeBitWidth);
+ }
+ case Type['Timestamp']: {
+ const t = f.type(new _Timestamp())!;
+ return new Timestamp(t.unit(), t.timezone());
+ }
+ case Type['Interval']: {
+ const t = f.type(new _Interval())!;
+ return new Interval(t.unit());
+ }
+ case Type['Union']: {
+ const t = f.type(new _Union())!;
+ return new Union(t.mode(), t.typeIdsArray() || [], children || []);
+ }
+ case Type['FixedSizeBinary']: {
+ const t = f.type(new _FixedSizeBinary())!;
+ return new FixedSizeBinary(t.byteWidth());
+ }
+ case Type['FixedSizeList']: {
+ const t = f.type(new _FixedSizeList())!;
+ return new FixedSizeList(t.listSize(), (children || [])[0]);
+ }
+ case Type['Map']: {
+ const t = f.type(new _Map())!;
+ return new Map_((children || [])[0], t.keysSorted());
+ }
+ }
+ throw new Error(`Unrecognized type: "${Type[typeId]}" (${typeId})`);
+/** @ignore */
+function encodeSchema(b: Builder, schema: Schema) {
+ const fieldOffsets = => Field.encode(b, f));
+ _Schema.startFieldsVector(b, fieldOffsets.length);
+ const fieldsVectorOffset = _Schema.createFieldsVector(b, fieldOffsets);
+ const metadataOffset = !(schema.metadata && schema.metadata.size > 0) ? -1 :
+ _Schema.createCustomMetadataVector(b, [...schema.metadata].map(([k, v]) => {
+ const key = b.createString(`${k}`);
+ const val = b.createString(`${v}`);
+ _KeyValue.startKeyValue(b);
+ _KeyValue.addKey(b, key);
+ _KeyValue.addValue(b, val);
+ return _KeyValue.endKeyValue(b);
+ }));
+ _Schema.startSchema(b);
+ _Schema.addFields(b, fieldsVectorOffset);
+ _Schema.addEndianness(b, platformIsLittleEndian ? _Endianness.Little : _Endianness.Big);
+ if (metadataOffset !== -1) { _Schema.addCustomMetadata(b, metadataOffset); }
+ return _Schema.endSchema(b);
+/** @ignore */
+function encodeField(b: Builder, field: Field) {
+ let nameOffset = -1;
+ let typeOffset = -1;
+ let dictionaryOffset = -1;
+ const type = field.type;
+ let typeId: Type = <any> field.typeId;
+ if (!DataType.isDictionary(type)) {
+ typeOffset = typeAssembler.visit(type, b)!;
+ } else {
+ typeId = type.dictionary.typeId;
+ dictionaryOffset = typeAssembler.visit(type, b)!;
+ typeOffset = typeAssembler.visit(type.dictionary, b)!;
+ }
+ const childOffsets = (type.children || []).map((f: Field) => Field.encode(b, f));
+ const childrenVectorOffset = _Field.createChildrenVector(b, childOffsets);
+ const metadataOffset = !(field.metadata && field.metadata.size > 0) ? -1 :
+ _Field.createCustomMetadataVector(b, [...field.metadata].map(([k, v]) => {
+ const key = b.createString(`${k}`);
+ const val = b.createString(`${v}`);
+ _KeyValue.startKeyValue(b);
+ _KeyValue.addKey(b, key);
+ _KeyValue.addValue(b, val);
+ return _KeyValue.endKeyValue(b);
+ }));
+ if ( {
+ nameOffset = b.createString(;
+ }
+ _Field.startField(b);
+ _Field.addType(b, typeOffset);
+ _Field.addTypeType(b, typeId);
+ _Field.addChildren(b, childrenVectorOffset);
+ _Field.addNullable(b, !!field.nullable);
+ if (nameOffset !== -1) { _Field.addName(b, nameOffset); }
+ if (dictionaryOffset !== -1) { _Field.addDictionary(b, dictionaryOffset); }
+ if (metadataOffset !== -1) { _Field.addCustomMetadata(b, metadataOffset); }
+ return _Field.endField(b);
+/** @ignore */
+function encodeRecordBatch(b: Builder, recordBatch: RecordBatch) {
+ const nodes = recordBatch.nodes || [];
+ const buffers = recordBatch.buffers || [];
+ _RecordBatch.startNodesVector(b, nodes.length);
+ nodes.slice().reverse().forEach((n) => FieldNode.encode(b, n));
+ const nodesVectorOffset = b.endVector();
+ _RecordBatch.startBuffersVector(b, buffers.length);
+ buffers.slice().reverse().forEach((b_) => BufferRegion.encode(b, b_));
+ const buffersVectorOffset = b.endVector();
+ _RecordBatch.startRecordBatch(b);
+ _RecordBatch.addLength(b, new Long(recordBatch.length, 0));
+ _RecordBatch.addNodes(b, nodesVectorOffset);
+ _RecordBatch.addBuffers(b, buffersVectorOffset);
+ return _RecordBatch.endRecordBatch(b);
+/** @ignore */
+function encodeDictionaryBatch(b: Builder, dictionaryBatch: DictionaryBatch) {
+ const dataOffset = RecordBatch.encode(b,;
+ _DictionaryBatch.startDictionaryBatch(b);
+ _DictionaryBatch.addId(b, new Long(, 0));
+ _DictionaryBatch.addIsDelta(b, dictionaryBatch.isDelta);
+ _DictionaryBatch.addData(b, dataOffset);
+ return _DictionaryBatch.endDictionaryBatch(b);
+/** @ignore */
+function encodeFieldNode(b: Builder, node: FieldNode) {
+ return _FieldNode.createFieldNode(b, new Long(node.length, 0), new Long(node.nullCount, 0));
+/** @ignore */
+function encodeBufferRegion(b: Builder, node: BufferRegion) {
+ return _Buffer.createBuffer(b, new Long(node.offset, 0), new Long(node.length, 0));
+/** @ignore */
+const platformIsLittleEndian = (function() {
+ const buffer = new ArrayBuffer(2);
+ new DataView(buffer).setInt16(0, 256, true /* littleEndian */);
+ // Int16Array uses the platform's endianness.
+ return new Int16Array(buffer)[0] === 256;
+/** @ignore */
+type MessageHeaderDecoder = <T extends MessageHeader>() => T extends MessageHeader.Schema ? Schema
+ : T extends MessageHeader.RecordBatch ? RecordBatch
+ : T extends MessageHeader.DictionaryBatch ? DictionaryBatch : never;
diff --git a/src/arrow/js/src/ipc/reader.ts b/src/arrow/js/src/ipc/reader.ts
new file mode 100644
index 000000000..a150ac1bb
--- /dev/null
+++ b/src/arrow/js/src/ipc/reader.ts
@@ -0,0 +1,739 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { Vector } from '../vector';
+import { DataType } from '../type';
+import { MessageHeader } from '../enum';
+import { Footer } from './metadata/file';
+import { Schema, Field } from '../schema';
+import streamAdapters from '../io/adapters';
+import { Message } from './metadata/message';
+import * as metadata from './metadata/message';
+import { ArrayBufferViewInput } from '../util/buffer';
+import { ByteStream, AsyncByteStream } from '../io/stream';
+import { RandomAccessFile, AsyncRandomAccessFile } from '../io/file';
+import { VectorLoader, JSONVectorLoader } from '../visitor/vectorloader';
+import { RecordBatch, _InternalEmptyPlaceholderRecordBatch } from '../recordbatch';
+import {
+ FileHandle,
+ ArrowJSONLike,
+ ReadableInterop,
+} from '../io/interfaces';
+import {
+ MessageReader, AsyncMessageReader, JSONMessageReader,
+ checkForMagicArrowString, magicLength, magicAndPadding, magicX2AndPadding
+} from './message';
+import {
+ isPromise,
+ isIterable, isAsyncIterable,
+ isIteratorResult, isArrowJSON,
+ isFileHandle, isFetchResponse,
+ isReadableDOMStream, isReadableNodeStream
+} from '../util/compat';
+/** @ignore */ export type FromArg0 = ArrowJSONLike;
+/** @ignore */ export type FromArg1 = PromiseLike<ArrowJSONLike>;
+/** @ignore */ export type FromArg2 = Iterable<ArrayBufferViewInput> | ArrayBufferViewInput;
+/** @ignore */ export type FromArg3 = PromiseLike<Iterable<ArrayBufferViewInput> | ArrayBufferViewInput>;
+/** @ignore */ export type FromArg4 = Response | NodeJS.ReadableStream | ReadableStream<ArrayBufferViewInput> | AsyncIterable<ArrayBufferViewInput>;
+/** @ignore */ export type FromArg5 = FileHandle | PromiseLike<FileHandle> | PromiseLike<FromArg4>;
+/** @ignore */ export type FromArgs = FromArg0 | FromArg1 | FromArg2 | FromArg3 | FromArg4 | FromArg5;
+/** @ignore */ type OpenOptions = { autoDestroy?: boolean };
+/** @ignore */ type RecordBatchReaders<T extends { [key: string]: DataType } = any> = RecordBatchFileReader<T> | RecordBatchStreamReader<T>;
+/** @ignore */ type AsyncRecordBatchReaders<T extends { [key: string]: DataType } = any> = AsyncRecordBatchFileReader<T> | AsyncRecordBatchStreamReader<T>;
+/** @ignore */ type RecordBatchFileReaders<T extends { [key: string]: DataType } = any> = RecordBatchFileReader<T> | AsyncRecordBatchFileReader<T>;
+/** @ignore */ type RecordBatchStreamReaders<T extends { [key: string]: DataType } = any> = RecordBatchStreamReader<T> | AsyncRecordBatchStreamReader<T>;
+export class RecordBatchReader<T extends { [key: string]: DataType } = any> extends ReadableInterop<RecordBatch<T>> {
+ protected _impl: RecordBatchReaderImpls<T>;
+ protected constructor(impl: RecordBatchReaderImpls<T>) {
+ super();
+ this._impl = impl;
+ }
+ public get closed() { return this._impl.closed; }
+ public get schema() { return this._impl.schema; }
+ public get autoDestroy() { return this._impl.autoDestroy; }
+ public get dictionaries() { return this._impl.dictionaries; }
+ public get numDictionaries() { return this._impl.numDictionaries; }
+ public get numRecordBatches() { return this._impl.numRecordBatches; }
+ public get footer(): Footer | null { return this._impl.isFile() ? this._impl.footer : null; }
+ public isSync(): this is RecordBatchReaders<T> { return this._impl.isSync(); }
+ public isAsync(): this is AsyncRecordBatchReaders<T> { return this._impl.isAsync(); }
+ public isFile(): this is RecordBatchFileReaders<T> { return this._impl.isFile(); }
+ public isStream(): this is RecordBatchStreamReaders<T> { return this._impl.isStream(); }
+ public next() {
+ return;
+ }
+ public throw(value?: any) {
+ return this._impl.throw(value);
+ }
+ public return(value?: any) {
+ return this._impl.return(value);
+ }
+ public cancel() {
+ return this._impl.cancel();
+ }
+ public reset(schema?: Schema<T> | null): this {
+ this._impl.reset(schema);
+ this._DOMStream = undefined;
+ this._nodeStream = undefined;
+ return this;
+ }
+ public open(options?: OpenOptions) {
+ const opening =;
+ return isPromise(opening) ? opening.then(() => this) : this;
+ }
+ public readRecordBatch(index: number): RecordBatch<T> | null | Promise<RecordBatch<T> | null> {
+ return this._impl.isFile() ? this._impl.readRecordBatch(index) : null;
+ }
+ public [Symbol.iterator](): IterableIterator<RecordBatch<T>> {
+ return (<IterableIterator<RecordBatch<T>>> this._impl)[Symbol.iterator]();
+ }
+ public [Symbol.asyncIterator](): AsyncIterableIterator<RecordBatch<T>> {
+ return (<AsyncIterableIterator<RecordBatch<T>>> this._impl)[Symbol.asyncIterator]();
+ }
+ public toDOMStream() {
+ return streamAdapters.toDOMStream<RecordBatch<T>>(
+ (this.isSync()
+ ? { [Symbol.iterator]: () => this } as Iterable<RecordBatch<T>>
+ : { [Symbol.asyncIterator]: () => this } as AsyncIterable<RecordBatch<T>>));
+ }
+ public toNodeStream() {
+ return streamAdapters.toNodeStream<RecordBatch<T>>(
+ (this.isSync()
+ ? { [Symbol.iterator]: () => this } as Iterable<RecordBatch<T>>
+ : { [Symbol.asyncIterator]: () => this } as AsyncIterable<RecordBatch<T>>),
+ { objectMode: true });
+ }
+ /** @nocollapse */
+ // @ts-ignore
+ public static throughNode(options?: import('stream').DuplexOptions & { autoDestroy: boolean }): import('stream').Duplex {
+ throw new Error(`"throughNode" not available in this environment`);
+ }
+ /** @nocollapse */
+ public static throughDOM<T extends { [key: string]: DataType }>(
+ // @ts-ignore
+ writableStrategy?: ByteLengthQueuingStrategy,
+ // @ts-ignore
+ readableStrategy?: { autoDestroy: boolean }
+ ): { writable: WritableStream<Uint8Array>; readable: ReadableStream<RecordBatch<T>> } {
+ throw new Error(`"throughDOM" not available in this environment`);
+ }
+ public static from<T extends RecordBatchReader>(source: T): T;
+ public static from<T extends { [key: string]: DataType } = any>(source: FromArg0): RecordBatchStreamReader<T>;
+ public static from<T extends { [key: string]: DataType } = any>(source: FromArg1): Promise<RecordBatchStreamReader<T>>;
+ public static from<T extends { [key: string]: DataType } = any>(source: FromArg2): RecordBatchFileReader<T> | RecordBatchStreamReader<T>;
+ public static from<T extends { [key: string]: DataType } = any>(source: FromArg3): Promise<RecordBatchFileReader<T> | RecordBatchStreamReader<T>>;
+ public static from<T extends { [key: string]: DataType } = any>(source: FromArg4): Promise<RecordBatchFileReader<T> | AsyncRecordBatchReaders<T>>;
+ public static from<T extends { [key: string]: DataType } = any>(source: FromArg5): Promise<AsyncRecordBatchFileReader<T> | AsyncRecordBatchStreamReader<T>>;
+ /** @nocollapse */
+ public static from<T extends { [key: string]: DataType } = any>(source: any) {
+ if (source instanceof RecordBatchReader) {
+ return source;
+ } else if (isArrowJSON(source)) {
+ return fromArrowJSON<T>(source);
+ } else if (isFileHandle(source)) {
+ return fromFileHandle<T>(source);
+ } else if (isPromise<any>(source)) {
+ return (async () => await RecordBatchReader.from<any>(await source))();
+ } else if (isFetchResponse(source) || isReadableDOMStream(source) || isReadableNodeStream(source) || isAsyncIterable(source)) {
+ return fromAsyncByteStream<T>(new AsyncByteStream(source));
+ }
+ return fromByteStream<T>(new ByteStream(source));
+ }
+ public static readAll<T extends RecordBatchReader>(source: T): T extends RecordBatchReaders ? IterableIterator<T> : AsyncIterableIterator<T>;
+ public static readAll<T extends { [key: string]: DataType } = any>(source: FromArg0): IterableIterator<RecordBatchStreamReader<T>>;
+ public static readAll<T extends { [key: string]: DataType } = any>(source: FromArg1): AsyncIterableIterator<RecordBatchStreamReader<T>>;
+ public static readAll<T extends { [key: string]: DataType } = any>(source: FromArg2): IterableIterator<RecordBatchFileReader<T> | RecordBatchStreamReader<T>>;
+ public static readAll<T extends { [key: string]: DataType } = any>(source: FromArg3): AsyncIterableIterator<RecordBatchFileReader<T> | RecordBatchStreamReader<T>>;
+ public static readAll<T extends { [key: string]: DataType } = any>(source: FromArg4): AsyncIterableIterator<RecordBatchFileReader<T> | AsyncRecordBatchReaders<T>>;
+ public static readAll<T extends { [key: string]: DataType } = any>(source: FromArg5): AsyncIterableIterator<AsyncRecordBatchFileReader<T> | AsyncRecordBatchStreamReader<T>>;
+ /** @nocollapse */
+ public static readAll<T extends { [key: string]: DataType } = any>(source: any) {
+ if (source instanceof RecordBatchReader) {
+ return source.isSync() ? readAllSync(source) : readAllAsync(source as AsyncRecordBatchReaders<T>);
+ } else if (isArrowJSON(source) || ArrayBuffer.isView(source) || isIterable<ArrayBufferViewInput>(source) || isIteratorResult(source)) {
+ return readAllSync<T>(source) as IterableIterator<RecordBatchReaders<T>>;
+ }
+ return readAllAsync<T>(source) as AsyncIterableIterator<RecordBatchReaders<T> | AsyncRecordBatchReaders<T>>;
+ }
+// Since TS is a structural type system, we define the following subclass stubs
+// so that concrete types exist to associate with with the interfaces below.
+// The implementation for each RecordBatchReader is hidden away in the set of
+// `RecordBatchReaderImpl` classes in the second half of this file. This allows
+// us to export a single RecordBatchReader class, and swap out the impl based
+// on the io primitives or underlying arrow (JSON, file, or stream) at runtime.
+// Async/await makes our job a bit harder, since it forces everything to be
+// either fully sync or fully async. This is why the logic for the reader impls
+// has been duplicated into both sync and async variants. Since the RBR
+// delegates to its impl, an RBR with an AsyncRecordBatchFileReaderImpl for
+// example will return async/await-friendly Promises, but one with a (sync)
+// RecordBatchStreamReaderImpl will always return values. Nothing should be
+// different about their logic, aside from the async handling. This is also why
+// this code looks highly structured, as it should be nearly identical and easy
+// to follow.
+/** @ignore */
+export class RecordBatchStreamReader<T extends { [key: string]: DataType } = any> extends RecordBatchReader<T> {
+ constructor(protected _impl: RecordBatchStreamReaderImpl<T>) { super (_impl); }
+ public [Symbol.iterator]() { return (this._impl as IterableIterator<RecordBatch<T>>)[Symbol.iterator](); }
+ public async *[Symbol.asyncIterator](): AsyncIterableIterator<RecordBatch<T>> { yield* this[Symbol.iterator](); }
+/** @ignore */
+export class AsyncRecordBatchStreamReader<T extends { [key: string]: DataType } = any> extends RecordBatchReader<T> {
+ constructor(protected _impl: AsyncRecordBatchStreamReaderImpl<T>) { super (_impl); }
+ public [Symbol.iterator](): IterableIterator<RecordBatch<T>> { throw new Error(`AsyncRecordBatchStreamReader is not Iterable`); }
+ public [Symbol.asyncIterator]() { return (this._impl as AsyncIterableIterator<RecordBatch<T>>)[Symbol.asyncIterator](); }
+/** @ignore */
+export class RecordBatchFileReader<T extends { [key: string]: DataType } = any> extends RecordBatchStreamReader<T> {
+ constructor(protected _impl: RecordBatchFileReaderImpl<T>) { super (_impl); }
+/** @ignore */
+export class AsyncRecordBatchFileReader<T extends { [key: string]: DataType } = any> extends AsyncRecordBatchStreamReader<T> {
+ constructor(protected _impl: AsyncRecordBatchFileReaderImpl<T>) { super (_impl); }
+// Now override the return types for each sync/async RecordBatchReader variant
+/** @ignore */
+export interface RecordBatchStreamReader<T extends { [key: string]: DataType } = any> extends RecordBatchReader<T> {
+ open(options?: OpenOptions | undefined): this;
+ cancel(): void;
+ throw(value?: any): IteratorResult<any>;
+ return(value?: any): IteratorResult<any>;
+ next(value?: any): IteratorResult<RecordBatch<T>>;
+/** @ignore */
+export interface AsyncRecordBatchStreamReader<T extends { [key: string]: DataType } = any> extends RecordBatchReader<T> {
+ open(options?: OpenOptions | undefined): Promise<this>;
+ cancel(): Promise<void>;
+ throw(value?: any): Promise<IteratorResult<any>>;
+ return(value?: any): Promise<IteratorResult<any>>;
+ next(value?: any): Promise<IteratorResult<RecordBatch<T>>>;
+/** @ignore */
+export interface RecordBatchFileReader<T extends { [key: string]: DataType } = any> extends RecordBatchStreamReader<T> {
+ readRecordBatch(index: number): RecordBatch<T> | null;
+/** @ignore */
+export interface AsyncRecordBatchFileReader<T extends { [key: string]: DataType } = any> extends AsyncRecordBatchStreamReader<T> {
+ readRecordBatch(index: number): Promise<RecordBatch<T> | null>;
+/** @ignore */
+type RecordBatchReaderImpls<T extends { [key: string]: DataType } = any> =
+ RecordBatchJSONReaderImpl<T> |
+ RecordBatchFileReaderImpl<T> |
+ RecordBatchStreamReaderImpl<T> |
+ AsyncRecordBatchFileReaderImpl<T> |
+ AsyncRecordBatchStreamReaderImpl<T>;
+/** @ignore */
+interface RecordBatchReaderImpl<T extends { [key: string]: DataType } = any> {
+ closed: boolean;
+ schema: Schema<T>;
+ autoDestroy: boolean;
+ dictionaries: Map<number, Vector>;
+ isFile(): this is RecordBatchFileReaders<T>;
+ isStream(): this is RecordBatchStreamReaders<T>;
+ isSync(): this is RecordBatchReaders<T>;
+ isAsync(): this is AsyncRecordBatchReaders<T>;
+ reset(schema?: Schema<T> | null): this;
+/** @ignore */
+interface RecordBatchStreamReaderImpl<T extends { [key: string]: DataType } = any> extends RecordBatchReaderImpl<T> {
+ open(options?: OpenOptions): this;
+ cancel(): void;
+ throw(value?: any): IteratorResult<any>;
+ return(value?: any): IteratorResult<any>;
+ next(value?: any): IteratorResult<RecordBatch<T>>;
+ [Symbol.iterator](): IterableIterator<RecordBatch<T>>;
+/** @ignore */
+interface AsyncRecordBatchStreamReaderImpl<T extends { [key: string]: DataType } = any> extends RecordBatchReaderImpl<T> {
+ open(options?: OpenOptions): Promise<this>;
+ cancel(): Promise<void>;
+ throw(value?: any): Promise<IteratorResult<any>>;
+ return(value?: any): Promise<IteratorResult<any>>;
+ next(value?: any): Promise<IteratorResult<RecordBatch<T>>>;
+ [Symbol.asyncIterator](): AsyncIterableIterator<RecordBatch<T>>;
+/** @ignore */
+interface RecordBatchFileReaderImpl<T extends { [key: string]: DataType } = any> extends RecordBatchStreamReaderImpl<T> {
+ readRecordBatch(index: number): RecordBatch<T> | null;
+/** @ignore */
+interface AsyncRecordBatchFileReaderImpl<T extends { [key: string]: DataType } = any> extends AsyncRecordBatchStreamReaderImpl<T> {
+ readRecordBatch(index: number): Promise<RecordBatch<T> | null>;
+/** @ignore */
+abstract class RecordBatchReaderImpl<T extends { [key: string]: DataType } = any> implements RecordBatchReaderImpl<T> {
+ public schema!: Schema<T>;
+ public closed = false;
+ public autoDestroy = true;
+ public dictionaries: Map<number, Vector>;
+ protected _dictionaryIndex = 0;
+ protected _recordBatchIndex = 0;
+ public get numDictionaries() { return this._dictionaryIndex; }
+ public get numRecordBatches() { return this._recordBatchIndex; }
+ constructor(dictionaries = new Map<number, Vector>()) {
+ this.dictionaries = dictionaries;
+ }
+ public isSync(): this is RecordBatchReaders<T> { return false; }
+ public isAsync(): this is AsyncRecordBatchReaders<T> { return false; }
+ public isFile(): this is RecordBatchFileReaders<T> { return false; }
+ public isStream(): this is RecordBatchStreamReaders<T> { return false; }
+ public reset(schema?: Schema<T> | null) {
+ this._dictionaryIndex = 0;
+ this._recordBatchIndex = 0;
+ this.schema = <any> schema;
+ this.dictionaries = new Map();
+ return this;
+ }
+ protected _loadRecordBatch(header: metadata.RecordBatch, body: any) {
+ return new RecordBatch<T>(this.schema, header.length, this._loadVectors(header, body, this.schema.fields));
+ }
+ protected _loadDictionaryBatch(header: metadata.DictionaryBatch, body: any) {
+ const { id, isDelta, data } = header;
+ const { dictionaries, schema } = this;
+ const dictionary = dictionaries.get(id);
+ if (isDelta || !dictionary) {
+ const type = schema.dictionaries.get(id)!;
+ return (dictionary && isDelta ? dictionary.concat(
+, body, [type])[0])) :
+, body, [type])[0])) as Vector;
+ }
+ return dictionary;
+ }
+ protected _loadVectors(header: metadata.RecordBatch, body: any, types: (Field | DataType)[]) {
+ return new VectorLoader(body, header.nodes, header.buffers, this.dictionaries).visitMany(types);
+ }
+/** @ignore */
+class RecordBatchStreamReaderImpl<T extends { [key: string]: DataType } = any> extends RecordBatchReaderImpl<T> implements IterableIterator<RecordBatch<T>> {
+ protected _reader: MessageReader;
+ protected _handle: ByteStream | ArrowJSONLike;
+ constructor(source: ByteStream | ArrowJSONLike, dictionaries?: Map<number, Vector>) {
+ super(dictionaries);
+ this._reader = !isArrowJSON(source)
+ ? new MessageReader(this._handle = source)
+ : new JSONMessageReader(this._handle = source);
+ }
+ public isSync(): this is RecordBatchReaders<T> { return true; }
+ public isStream(): this is RecordBatchStreamReaders<T> { return true; }
+ public [Symbol.iterator](): IterableIterator<RecordBatch<T>> {
+ return this as IterableIterator<RecordBatch<T>>;
+ }
+ public cancel() {
+ if (!this.closed && (this.closed = true)) {
+ this.reset()._reader.return();
+ this._reader = <any> null;
+ this.dictionaries = <any> null;
+ }
+ }
+ public open(options?: OpenOptions) {
+ if (!this.closed) {
+ this.autoDestroy = shouldAutoDestroy(this, options);
+ if (!(this.schema || (this.schema = this._reader.readSchema()!))) {
+ this.cancel();
+ }
+ }
+ return this;
+ }
+ public throw(value?: any): IteratorResult<any> {
+ if (!this.closed && this.autoDestroy && (this.closed = true)) {
+ return this.reset()._reader.throw(value);
+ }
+ }
+ public return(value?: any): IteratorResult<any> {
+ if (!this.closed && this.autoDestroy && (this.closed = true)) {
+ return this.reset()._reader.return(value);
+ }
+ }
+ public next(): IteratorResult<RecordBatch<T>> {
+ if (this.closed) { return ITERATOR_DONE; }
+ let message: Message | null;
+ const { _reader: reader } = this;
+ while (message = this._readNextMessageAndValidate()) {
+ if (message.isSchema()) {
+ this.reset(message.header());
+ } else if (message.isRecordBatch()) {
+ this._recordBatchIndex++;
+ const header = message.header();
+ const buffer = reader.readMessageBody(message.bodyLength);
+ const recordBatch = this._loadRecordBatch(header, buffer);
+ return { done: false, value: recordBatch };
+ } else if (message.isDictionaryBatch()) {
+ this._dictionaryIndex++;
+ const header = message.header();
+ const buffer = reader.readMessageBody(message.bodyLength);
+ const vector = this._loadDictionaryBatch(header, buffer);
+ this.dictionaries.set(, vector);
+ }
+ }
+ if (this.schema && this._recordBatchIndex === 0) {
+ this._recordBatchIndex++;
+ return { done: false, value: new _InternalEmptyPlaceholderRecordBatch<T>(this.schema) };
+ }
+ return this.return();
+ }
+ protected _readNextMessageAndValidate<T extends MessageHeader>(type?: T | null) {
+ return this._reader.readMessage<T>(type);
+ }
+/** @ignore */
+class AsyncRecordBatchStreamReaderImpl<T extends { [key: string]: DataType } = any> extends RecordBatchReaderImpl<T> implements AsyncIterableIterator<RecordBatch<T>> {
+ protected _handle: AsyncByteStream;
+ protected _reader: AsyncMessageReader;
+ constructor(source: AsyncByteStream, dictionaries?: Map<number, Vector>) {
+ super(dictionaries);
+ this._reader = new AsyncMessageReader(this._handle = source);
+ }
+ public isAsync(): this is AsyncRecordBatchReaders<T> { return true; }
+ public isStream(): this is RecordBatchStreamReaders<T> { return true; }
+ public [Symbol.asyncIterator](): AsyncIterableIterator<RecordBatch<T>> {
+ return this as AsyncIterableIterator<RecordBatch<T>>;
+ }
+ public async cancel() {
+ if (!this.closed && (this.closed = true)) {
+ await this.reset()._reader.return();
+ this._reader = <any> null;
+ this.dictionaries = <any> null;
+ }
+ }
+ public async open(options?: OpenOptions) {
+ if (!this.closed) {
+ this.autoDestroy = shouldAutoDestroy(this, options);
+ if (!(this.schema || (this.schema = (await this._reader.readSchema())!))) {
+ await this.cancel();
+ }
+ }
+ return this;
+ }
+ public async throw(value?: any): Promise<IteratorResult<any>> {
+ if (!this.closed && this.autoDestroy && (this.closed = true)) {
+ return await this.reset()._reader.throw(value);
+ }
+ }
+ public async return(value?: any): Promise<IteratorResult<any>> {
+ if (!this.closed && this.autoDestroy && (this.closed = true)) {
+ return await this.reset()._reader.return(value);
+ }
+ }
+ public async next() {
+ if (this.closed) { return ITERATOR_DONE; }
+ let message: Message | null;
+ const { _reader: reader } = this;
+ while (message = await this._readNextMessageAndValidate()) {
+ if (message.isSchema()) {
+ await this.reset(message.header());
+ } else if (message.isRecordBatch()) {
+ this._recordBatchIndex++;
+ const header = message.header();
+ const buffer = await reader.readMessageBody(message.bodyLength);
+ const recordBatch = this._loadRecordBatch(header, buffer);
+ return { done: false, value: recordBatch };
+ } else if (message.isDictionaryBatch()) {
+ this._dictionaryIndex++;
+ const header = message.header();
+ const buffer = await reader.readMessageBody(message.bodyLength);
+ const vector = this._loadDictionaryBatch(header, buffer);
+ this.dictionaries.set(, vector);
+ }
+ }
+ if (this.schema && this._recordBatchIndex === 0) {
+ this._recordBatchIndex++;
+ return { done: false, value: new _InternalEmptyPlaceholderRecordBatch<T>(this.schema) };
+ }
+ return await this.return();
+ }
+ protected async _readNextMessageAndValidate<T extends MessageHeader>(type?: T | null) {
+ return await this._reader.readMessage<T>(type);
+ }
+/** @ignore */
+class RecordBatchFileReaderImpl<T extends { [key: string]: DataType } = any> extends RecordBatchStreamReaderImpl<T> {
+ protected _footer?: Footer;
+ protected _handle!: RandomAccessFile;
+ public get footer() { return this._footer!; }
+ public get numDictionaries() { return this._footer ? this._footer.numDictionaries : 0; }
+ public get numRecordBatches() { return this._footer ? this._footer.numRecordBatches : 0; }
+ constructor(source: RandomAccessFile | ArrayBufferViewInput, dictionaries?: Map<number, Vector>) {
+ super(source instanceof RandomAccessFile ? source : new RandomAccessFile(source), dictionaries);
+ }
+ public isSync(): this is RecordBatchReaders<T> { return true; }
+ public isFile(): this is RecordBatchFileReaders<T> { return true; }
+ public open(options?: OpenOptions) {
+ if (!this.closed && !this._footer) {
+ this.schema = (this._footer = this._readFooter()).schema;
+ for (const block of this._footer.dictionaryBatches()) {
+ block && this._readDictionaryBatch(this._dictionaryIndex++);
+ }
+ }
+ return;
+ }
+ public readRecordBatch(index: number) {
+ if (this.closed) { return null; }
+ if (!this._footer) {; }
+ const block = this._footer && this._footer.getRecordBatch(index);
+ if (block && {
+ const message = this._reader.readMessage(MessageHeader.RecordBatch);
+ if (message?.isRecordBatch()) {
+ const header = message.header();
+ const buffer = this._reader.readMessageBody(message.bodyLength);
+ const recordBatch = this._loadRecordBatch(header, buffer);
+ return recordBatch;
+ }
+ }
+ return null;
+ }
+ protected _readDictionaryBatch(index: number) {
+ const block = this._footer && this._footer.getDictionaryBatch(index);
+ if (block && {
+ const message = this._reader.readMessage(MessageHeader.DictionaryBatch);
+ if (message?.isDictionaryBatch()) {
+ const header = message.header();
+ const buffer = this._reader.readMessageBody(message.bodyLength);
+ const vector = this._loadDictionaryBatch(header, buffer);
+ this.dictionaries.set(, vector);
+ }
+ }
+ }
+ protected _readFooter() {
+ const { _handle } = this;
+ const offset = _handle.size - magicAndPadding;
+ const length = _handle.readInt32(offset);
+ const buffer = _handle.readAt(offset - length, length);
+ return Footer.decode(buffer);
+ }
+ protected _readNextMessageAndValidate<T extends MessageHeader>(type?: T | null): Message<T> | null {
+ if (!this._footer) {; }
+ if (this._footer && this._recordBatchIndex < this.numRecordBatches) {
+ const block = this._footer && this._footer.getRecordBatch(this._recordBatchIndex);
+ if (block && {
+ return this._reader.readMessage(type);
+ }
+ }
+ return null;
+ }
+/** @ignore */
+class AsyncRecordBatchFileReaderImpl<T extends { [key: string]: DataType } = any> extends AsyncRecordBatchStreamReaderImpl<T>
+ implements AsyncRecordBatchFileReaderImpl<T> {
+ protected _footer?: Footer;
+ protected _handle!: AsyncRandomAccessFile;
+ public get footer() { return this._footer!; }
+ public get numDictionaries() { return this._footer ? this._footer.numDictionaries : 0; }
+ public get numRecordBatches() { return this._footer ? this._footer.numRecordBatches : 0; }
+ constructor(source: FileHandle, byteLength?: number, dictionaries?: Map<number, Vector>);
+ constructor(source: FileHandle | AsyncRandomAccessFile, dictionaries?: Map<number, Vector>);
+ constructor(source: FileHandle | AsyncRandomAccessFile, any[]) {
+ const byteLength = typeof rest[0] !== 'number' ? <number> rest.shift() : undefined;
+ const dictionaries = rest[0] instanceof Map ? <Map<number, Vector>> rest.shift() : undefined;
+ super(source instanceof AsyncRandomAccessFile ? source : new AsyncRandomAccessFile(source, byteLength), dictionaries);
+ }
+ public isFile(): this is RecordBatchFileReaders<T> { return true; }
+ public isAsync(): this is AsyncRecordBatchReaders<T> { return true; }
+ public async open(options?: OpenOptions) {
+ if (!this.closed && !this._footer) {
+ this.schema = (this._footer = await this._readFooter()).schema;
+ for (const block of this._footer.dictionaryBatches()) {
+ block && await this._readDictionaryBatch(this._dictionaryIndex++);
+ }
+ }
+ return await;
+ }
+ public async readRecordBatch(index: number) {
+ if (this.closed) { return null; }
+ if (!this._footer) { await; }
+ const block = this._footer && this._footer.getRecordBatch(index);
+ if (block && (await {
+ const message = await this._reader.readMessage(MessageHeader.RecordBatch);
+ if (message?.isRecordBatch()) {
+ const header = message.header();
+ const buffer = await this._reader.readMessageBody(message.bodyLength);
+ const recordBatch = this._loadRecordBatch(header, buffer);
+ return recordBatch;
+ }
+ }
+ return null;
+ }
+ protected async _readDictionaryBatch(index: number) {
+ const block = this._footer && this._footer.getDictionaryBatch(index);
+ if (block && (await {
+ const message = await this._reader.readMessage(MessageHeader.DictionaryBatch);
+ if (message?.isDictionaryBatch()) {
+ const header = message.header();
+ const buffer = await this._reader.readMessageBody(message.bodyLength);
+ const vector = this._loadDictionaryBatch(header, buffer);
+ this.dictionaries.set(, vector);
+ }
+ }
+ }
+ protected async _readFooter() {
+ const { _handle } = this;
+ _handle._pending && await _handle._pending;
+ const offset = _handle.size - magicAndPadding;
+ const length = await _handle.readInt32(offset);
+ const buffer = await _handle.readAt(offset - length, length);
+ return Footer.decode(buffer);
+ }
+ protected async _readNextMessageAndValidate<T extends MessageHeader>(type?: T | null): Promise<Message<T> | null> {
+ if (!this._footer) { await; }
+ if (this._footer && this._recordBatchIndex < this.numRecordBatches) {
+ const block = this._footer.getRecordBatch(this._recordBatchIndex);
+ if (block && await {
+ return await this._reader.readMessage(type);
+ }
+ }
+ return null;
+ }
+/** @ignore */
+class RecordBatchJSONReaderImpl<T extends { [key: string]: DataType } = any> extends RecordBatchStreamReaderImpl<T> {
+ constructor(source: ArrowJSONLike, dictionaries?: Map<number, Vector>) {
+ super(source, dictionaries);
+ }
+ protected _loadVectors(header: metadata.RecordBatch, body: any, types: (Field | DataType)[]) {
+ return new JSONVectorLoader(body, header.nodes, header.buffers, this.dictionaries).visitMany(types);
+ }
+// Define some helper functions and static implementations down here. There's
+// a bit of branching in the static methods that can lead to the same routines
+// being executed, so we've broken those out here for readability.
+/** @ignore */
+function shouldAutoDestroy(self: { autoDestroy: boolean }, options?: OpenOptions) {
+ return options && (typeof options['autoDestroy'] === 'boolean') ? options['autoDestroy'] : self['autoDestroy'];
+/** @ignore */
+function* readAllSync<T extends { [key: string]: DataType } = any>(source: RecordBatchReaders<T> | FromArg0 | FromArg2) {
+ const reader = RecordBatchReader.from<T>(<any> source) as RecordBatchReaders<T>;
+ try {
+ if (!{ autoDestroy: false }).closed) {
+ do { yield reader; } while (!(reader.reset().open()).closed);
+ }
+ } finally { reader.cancel(); }
+/** @ignore */
+async function* readAllAsync<T extends { [key: string]: DataType } = any>(source: AsyncRecordBatchReaders<T> | FromArg1 | FromArg3 | FromArg4 | FromArg5) {
+ const reader = await RecordBatchReader.from<T>(<any> source) as RecordBatchReader<T>;
+ try {
+ if (!(await{ autoDestroy: false })).closed) {
+ do { yield reader; } while (!(await reader.reset().open()).closed);
+ }
+ } finally { await reader.cancel(); }
+/** @ignore */
+function fromArrowJSON<T extends { [key: string]: DataType }>(source: ArrowJSONLike) {
+ return new RecordBatchStreamReader(new RecordBatchJSONReaderImpl<T>(source));
+/** @ignore */
+function fromByteStream<T extends { [key: string]: DataType }>(source: ByteStream) {
+ const bytes = source.peek((magicLength + 7) & ~7);
+ return bytes && bytes.byteLength >= 4 ? !checkForMagicArrowString(bytes)
+ ? new RecordBatchStreamReader(new RecordBatchStreamReaderImpl<T>(source))
+ : new RecordBatchFileReader(new RecordBatchFileReaderImpl<T>(
+ : new RecordBatchStreamReader(new RecordBatchStreamReaderImpl<T>(function*(): any {}()));
+/** @ignore */
+async function fromAsyncByteStream<T extends { [key: string]: DataType }>(source: AsyncByteStream) {
+ const bytes = await source.peek((magicLength + 7) & ~7);
+ return bytes && bytes.byteLength >= 4 ? !checkForMagicArrowString(bytes)
+ ? new AsyncRecordBatchStreamReader(new AsyncRecordBatchStreamReaderImpl<T>(source))
+ : new RecordBatchFileReader(new RecordBatchFileReaderImpl<T>(await
+ : new AsyncRecordBatchStreamReader(new AsyncRecordBatchStreamReaderImpl<T>(async function*(): any {}()));
+/** @ignore */
+async function fromFileHandle<T extends { [key: string]: DataType }>(source: FileHandle) {
+ const { size } = await source.stat();
+ const file = new AsyncRandomAccessFile(source, size);
+ if (size >= magicX2AndPadding) {
+ if (checkForMagicArrowString(await file.readAt(0, (magicLength + 7) & ~7))) {
+ return new AsyncRecordBatchFileReader(new AsyncRecordBatchFileReaderImpl<T>(file));
+ }
+ }
+ return new AsyncRecordBatchStreamReader(new AsyncRecordBatchStreamReaderImpl<T>(file));
diff --git a/src/arrow/js/src/ipc/writer.ts b/src/arrow/js/src/ipc/writer.ts
new file mode 100644
index 000000000..12aa83355
--- /dev/null
+++ b/src/arrow/js/src/ipc/writer.ts
@@ -0,0 +1,492 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { Table } from '../table';
+import { MAGIC } from './message';
+import { Vector } from '../vector';
+import { Column } from '../column';
+import { DataType } from '../type';
+import { Schema, Field } from '../schema';
+import { Message } from './metadata/message';
+import * as metadata from './metadata/message';
+import { FileBlock, Footer } from './metadata/file';
+import { MessageHeader, MetadataVersion } from '../enum';
+import { compareSchemas } from '../visitor/typecomparator';
+import { WritableSink, AsyncByteQueue } from '../io/stream';
+import { VectorAssembler } from '../visitor/vectorassembler';
+import { JSONTypeAssembler } from '../visitor/jsontypeassembler';
+import { JSONVectorAssembler } from '../visitor/jsonvectorassembler';
+import { ArrayBufferViewInput, toUint8Array } from '../util/buffer';
+import { RecordBatch, _InternalEmptyPlaceholderRecordBatch } from '../recordbatch';
+import { Writable, ReadableInterop, ReadableDOMStreamOptions } from '../io/interfaces';
+import { isPromise, isAsyncIterable, isWritableDOMStream, isWritableNodeStream, isIterable, isObject } from '../util/compat';
+export interface RecordBatchStreamWriterOptions {
+ /**
+ *
+ */
+ autoDestroy?: boolean;
+ /**
+ * A flag indicating whether the RecordBatchWriter should construct pre-0.15.0
+ * encapsulated IPC Messages, which reserves 4 bytes for the Message metadata
+ * length instead of 8.
+ * @see
+ */
+ writeLegacyIpcFormat?: boolean;
+export class RecordBatchWriter<T extends { [key: string]: DataType } = any> extends ReadableInterop<Uint8Array> implements Writable<RecordBatch<T>> {
+ /** @nocollapse */
+ // @ts-ignore
+ public static throughNode(options?: import('stream').DuplexOptions & { autoDestroy: boolean }): import('stream').Duplex {
+ throw new Error(`"throughNode" not available in this environment`);
+ }
+ /** @nocollapse */
+ public static throughDOM<T extends { [key: string]: DataType }>(
+ // @ts-ignore
+ writableStrategy?: QueuingStrategy<RecordBatch<T>> & { autoDestroy: boolean },
+ // @ts-ignore
+ readableStrategy?: { highWaterMark?: number; size?: any }
+ ): { writable: WritableStream<Table<T> | RecordBatch<T>>; readable: ReadableStream<Uint8Array> } {
+ throw new Error(`"throughDOM" not available in this environment`);
+ }
+ constructor(options?: RecordBatchStreamWriterOptions) {
+ super();
+ isObject(options) || (options = { autoDestroy: true, writeLegacyIpcFormat: false });
+ this._autoDestroy = (typeof options.autoDestroy === 'boolean') ? options.autoDestroy : true;
+ this._writeLegacyIpcFormat = (typeof options.writeLegacyIpcFormat === 'boolean') ? options.writeLegacyIpcFormat : false;
+ }
+ protected _position = 0;
+ protected _started = false;
+ protected _autoDestroy: boolean;
+ protected _writeLegacyIpcFormat: boolean;
+ // @ts-ignore
+ protected _sink = new AsyncByteQueue();
+ protected _schema: Schema | null = null;
+ protected _dictionaryBlocks: FileBlock[] = [];
+ protected _recordBatchBlocks: FileBlock[] = [];
+ protected _dictionaryDeltaOffsets = new Map<number, number>();
+ public toString(sync: true): string;
+ public toString(sync?: false): Promise<string>;
+ public toString(sync: any = false) {
+ return this._sink.toString(sync) as Promise<string> | string;
+ }
+ public toUint8Array(sync: true): Uint8Array;
+ public toUint8Array(sync?: false): Promise<Uint8Array>;
+ public toUint8Array(sync: any = false) {
+ return this._sink.toUint8Array(sync) as Promise<Uint8Array> | Uint8Array;
+ }
+ public writeAll(input: Table<T> | Iterable<RecordBatch<T>>): this;
+ public writeAll(input: AsyncIterable<RecordBatch<T>>): Promise<this>;
+ public writeAll(input: PromiseLike<AsyncIterable<RecordBatch<T>>>): Promise<this>;
+ public writeAll(input: PromiseLike<Table<T> | Iterable<RecordBatch<T>>>): Promise<this>;
+ public writeAll(input: PromiseLike<any> | Table<T> | Iterable<RecordBatch<T>> | AsyncIterable<RecordBatch<T>>) {
+ if (isPromise<any>(input)) {
+ return input.then((x) => this.writeAll(x));
+ } else if (isAsyncIterable<RecordBatch<T>>(input)) {
+ return writeAllAsync(this, input);
+ }
+ return writeAll(this, <any> input);
+ }
+ public get closed() { return this._sink.closed; }
+ public [Symbol.asyncIterator]() { return this._sink[Symbol.asyncIterator](); }
+ public toDOMStream(options?: ReadableDOMStreamOptions) { return this._sink.toDOMStream(options); }
+ public toNodeStream(options?: import('stream').ReadableOptions) { return this._sink.toNodeStream(options); }
+ public close() {
+ return this.reset()._sink.close();
+ }
+ public abort(reason?: any) {
+ return this.reset()._sink.abort(reason);
+ }
+ public finish() {
+ this._autoDestroy ? this.close() : this.reset(this._sink, this._schema);
+ return this;
+ }
+ public reset(sink: WritableSink<ArrayBufferViewInput> = this._sink, schema: Schema<T> | null = null) {
+ if ((sink === this._sink) || (sink instanceof AsyncByteQueue)) {
+ this._sink = sink as AsyncByteQueue;
+ } else {
+ this._sink = new AsyncByteQueue();
+ if (sink && isWritableDOMStream(sink)) {
+ this.toDOMStream({ type: 'bytes' }).pipeTo(sink);
+ } else if (sink && isWritableNodeStream(sink)) {
+ this.toNodeStream({ objectMode: false }).pipe(sink);
+ }
+ }
+ if (this._started && this._schema) {
+ this._writeFooter(this._schema);
+ }
+ this._started = false;
+ this._dictionaryBlocks = [];
+ this._recordBatchBlocks = [];
+ this._dictionaryDeltaOffsets = new Map();
+ if (!schema || !(compareSchemas(schema, this._schema))) {
+ if (schema === null) {
+ this._position = 0;
+ this._schema = null;
+ } else {
+ this._started = true;
+ this._schema = schema;
+ this._writeSchema(schema);
+ }
+ }
+ return this;
+ }
+ public write(payload?: Table<T> | RecordBatch<T> | Iterable<RecordBatch<T>> | null) {
+ let schema: Schema<T> | null = null;
+ if (!this._sink) {
+ throw new Error(`RecordBatchWriter is closed`);
+ } else if (payload == null) {
+ return this.finish() && undefined;
+ } else if (payload instanceof Table && !(schema = payload.schema)) {
+ return this.finish() && undefined;
+ } else if (payload instanceof RecordBatch && !(schema = payload.schema)) {
+ return this.finish() && undefined;
+ }
+ if (schema && !compareSchemas(schema, this._schema)) {
+ if (this._started && this._autoDestroy) {
+ return this.close();
+ }
+ this.reset(this._sink, schema);
+ }
+ if (payload instanceof RecordBatch) {
+ if (!(payload instanceof _InternalEmptyPlaceholderRecordBatch)) {
+ this._writeRecordBatch(payload);
+ }
+ } else if (payload instanceof Table) {
+ this.writeAll(payload.chunks);
+ } else if (isIterable(payload)) {
+ this.writeAll(payload);
+ }
+ }
+ protected _writeMessage<T extends MessageHeader>(message: Message<T>, alignment = 8) {
+ const a = alignment - 1;
+ const buffer = Message.encode(message);
+ const flatbufferSize = buffer.byteLength;
+ const prefixSize = !this._writeLegacyIpcFormat ? 8 : 4;
+ const alignedSize = (flatbufferSize + prefixSize + a) & ~a;
+ const nPaddingBytes = alignedSize - flatbufferSize - prefixSize;
+ if (message.headerType === MessageHeader.RecordBatch) {
+ this._recordBatchBlocks.push(new FileBlock(alignedSize, message.bodyLength, this._position));
+ } else if (message.headerType === MessageHeader.DictionaryBatch) {
+ this._dictionaryBlocks.push(new FileBlock(alignedSize, message.bodyLength, this._position));
+ }
+ // If not in legacy pre-0.15.0 mode, write the stream continuation indicator
+ if (!this._writeLegacyIpcFormat) {
+ this._write(Int32Array.of(-1));
+ }
+ // Write the flatbuffer size prefix including padding
+ this._write(Int32Array.of(alignedSize - prefixSize));
+ // Write the flatbuffer
+ if (flatbufferSize > 0) { this._write(buffer); }
+ // Write any padding
+ return this._writePadding(nPaddingBytes);
+ }
+ protected _write(chunk: ArrayBufferViewInput) {
+ if (this._started) {
+ const buffer = toUint8Array(chunk);
+ if (buffer && buffer.byteLength > 0) {
+ this._sink.write(buffer);
+ this._position += buffer.byteLength;
+ }
+ }
+ return this;
+ }
+ protected _writeSchema(schema: Schema<T>) {
+ return this._writeMessage(Message.from(schema));
+ }
+ // @ts-ignore
+ protected _writeFooter(schema: Schema<T>) {
+ // eos bytes
+ return this._writeLegacyIpcFormat
+ ? this._write(Int32Array.of(0))
+ : this._write(Int32Array.of(-1, 0));
+ }
+ protected _writeMagic() {
+ return this._write(MAGIC);
+ }
+ protected _writePadding(nBytes: number) {
+ return nBytes > 0 ? this._write(new Uint8Array(nBytes)) : this;
+ }
+ protected _writeRecordBatch(batch: RecordBatch<T>) {
+ const { byteLength, nodes, bufferRegions, buffers } = VectorAssembler.assemble(batch);
+ const recordBatch = new metadata.RecordBatch(batch.length, nodes, bufferRegions);
+ const message = Message.from(recordBatch, byteLength);
+ return this
+ ._writeDictionaries(batch)
+ ._writeMessage(message)
+ ._writeBodyBuffers(buffers);
+ }
+ protected _writeDictionaryBatch(dictionary: Vector, id: number, isDelta = false) {
+ this._dictionaryDeltaOffsets.set(id, dictionary.length + (this._dictionaryDeltaOffsets.get(id) || 0));
+ const { byteLength, nodes, bufferRegions, buffers } = VectorAssembler.assemble(dictionary);
+ const recordBatch = new metadata.RecordBatch(dictionary.length, nodes, bufferRegions);
+ const dictionaryBatch = new metadata.DictionaryBatch(recordBatch, id, isDelta);
+ const message = Message.from(dictionaryBatch, byteLength);
+ return this
+ ._writeMessage(message)
+ ._writeBodyBuffers(buffers);
+ }
+ protected _writeBodyBuffers(buffers: ArrayBufferView[]) {
+ let buffer: ArrayBufferView;
+ let size: number, padding: number;
+ for (let i = -1, n = buffers.length; ++i < n;) {
+ if ((buffer = buffers[i]) && (size = buffer.byteLength) > 0) {
+ this._write(buffer);
+ if ((padding = ((size + 7) & ~7) - size) > 0) {
+ this._writePadding(padding);
+ }
+ }
+ }
+ return this;
+ }
+ protected _writeDictionaries(batch: RecordBatch<T>) {
+ for (let [id, dictionary] of batch.dictionaries) {
+ let offset = this._dictionaryDeltaOffsets.get(id) || 0;
+ if (offset === 0 || (dictionary = dictionary.slice(offset)).length > 0) {
+ const chunks = 'chunks' in dictionary ? (dictionary as any).chunks : [dictionary];
+ for (const chunk of chunks) {
+ this._writeDictionaryBatch(chunk, id, offset > 0);
+ offset += chunk.length;
+ }
+ }
+ }
+ return this;
+ }
+/** @ignore */
+export class RecordBatchStreamWriter<T extends { [key: string]: DataType } = any> extends RecordBatchWriter<T> {
+ public static writeAll<T extends { [key: string]: DataType } = any>(input: Table<T> | Iterable<RecordBatch<T>>, options?: RecordBatchStreamWriterOptions): RecordBatchStreamWriter<T>;
+ public static writeAll<T extends { [key: string]: DataType } = any>(input: AsyncIterable<RecordBatch<T>>, options?: RecordBatchStreamWriterOptions): Promise<RecordBatchStreamWriter<T>>;
+ public static writeAll<T extends { [key: string]: DataType } = any>(input: PromiseLike<AsyncIterable<RecordBatch<T>>>, options?: RecordBatchStreamWriterOptions): Promise<RecordBatchStreamWriter<T>>;
+ public static writeAll<T extends { [key: string]: DataType } = any>(input: PromiseLike<Table<T> | Iterable<RecordBatch<T>>>, options?: RecordBatchStreamWriterOptions): Promise<RecordBatchStreamWriter<T>>;
+ /** @nocollapse */
+ public static writeAll<T extends { [key: string]: DataType } = any>(input: any, options?: RecordBatchStreamWriterOptions) {
+ const writer = new RecordBatchStreamWriter<T>(options);
+ if (isPromise<any>(input)) {
+ return input.then((x) => writer.writeAll(x));
+ } else if (isAsyncIterable<RecordBatch<T>>(input)) {
+ return writeAllAsync(writer, input);
+ }
+ return writeAll(writer, input);
+ }
+/** @ignore */
+export class RecordBatchFileWriter<T extends { [key: string]: DataType } = any> extends RecordBatchWriter<T> {
+ public static writeAll<T extends { [key: string]: DataType } = any>(input: Table<T> | Iterable<RecordBatch<T>>): RecordBatchFileWriter<T>;
+ public static writeAll<T extends { [key: string]: DataType } = any>(input: AsyncIterable<RecordBatch<T>>): Promise<RecordBatchFileWriter<T>>;
+ public static writeAll<T extends { [key: string]: DataType } = any>(input: PromiseLike<AsyncIterable<RecordBatch<T>>>): Promise<RecordBatchFileWriter<T>>;
+ public static writeAll<T extends { [key: string]: DataType } = any>(input: PromiseLike<Table<T> | Iterable<RecordBatch<T>>>): Promise<RecordBatchFileWriter<T>>;
+ /** @nocollapse */
+ public static writeAll<T extends { [key: string]: DataType } = any>(input: any) {
+ const writer = new RecordBatchFileWriter<T>();
+ if (isPromise<any>(input)) {
+ return input.then((x) => writer.writeAll(x));
+ } else if (isAsyncIterable<RecordBatch<T>>(input)) {
+ return writeAllAsync(writer, input);
+ }
+ return writeAll(writer, input);
+ }
+ constructor() {
+ super();
+ this._autoDestroy = true;
+ }
+ // @ts-ignore
+ protected _writeSchema(schema: Schema<T>) {
+ return this._writeMagic()._writePadding(2);
+ }
+ protected _writeFooter(schema: Schema<T>) {
+ const buffer = Footer.encode(new Footer(
+ schema, MetadataVersion.V4,
+ this._recordBatchBlocks, this._dictionaryBlocks
+ ));
+ return super
+ ._writeFooter(schema) // EOS bytes for sequential readers
+ ._write(buffer) // Write the flatbuffer
+ ._write(Int32Array.of(buffer.byteLength)) // then the footer size suffix
+ ._writeMagic(); // then the magic suffix
+ }
+/** @ignore */
+export class RecordBatchJSONWriter<T extends { [key: string]: DataType } = any> extends RecordBatchWriter<T> {
+ public static writeAll<T extends { [key: string]: DataType } = any>(this: typeof RecordBatchWriter, input: Table<T> | Iterable<RecordBatch<T>>): RecordBatchJSONWriter<T>;
+ // @ts-ignore
+ public static writeAll<T extends { [key: string]: DataType } = any>(this: typeof RecordBatchWriter, input: AsyncIterable<RecordBatch<T>>): Promise<RecordBatchJSONWriter<T>>;
+ public static writeAll<T extends { [key: string]: DataType } = any>(this: typeof RecordBatchWriter, input: PromiseLike<AsyncIterable<RecordBatch<T>>>): Promise<RecordBatchJSONWriter<T>>;
+ public static writeAll<T extends { [key: string]: DataType } = any>(this: typeof RecordBatchWriter, input: PromiseLike<Table<T> | Iterable<RecordBatch<T>>>): Promise<RecordBatchJSONWriter<T>>;
+ /** @nocollapse */
+ public static writeAll<T extends { [key: string]: DataType } = any>(this: typeof RecordBatchWriter, input: any) {
+ return new RecordBatchJSONWriter<T>().writeAll(input as any);
+ }
+ private _recordBatches: RecordBatch[];
+ private _dictionaries: RecordBatch[];
+ constructor() {
+ super();
+ this._autoDestroy = true;
+ this._recordBatches = [];
+ this._dictionaries = [];
+ }
+ protected _writeMessage() { return this; }
+ // @ts-ignore
+ protected _writeFooter(schema: Schema<T>) { return this; }
+ protected _writeSchema(schema: Schema<T>) {
+ return this._write(`{\n "schema": ${
+ JSON.stringify({ fields: }, null, 2)
+ }`);
+ }
+ protected _writeDictionaries(batch: RecordBatch<T>) {
+ if (batch.dictionaries.size > 0) {
+ this._dictionaries.push(batch);
+ }
+ return this;
+ }
+ protected _writeDictionaryBatch(dictionary: Vector, id: number, isDelta = false) {
+ this._dictionaryDeltaOffsets.set(id, dictionary.length + (this._dictionaryDeltaOffsets.get(id) || 0));
+ this._write(this._dictionaryBlocks.length === 0 ? ` ` : `,\n `);
+ this._write(`${dictionaryBatchToJSON(dictionary, id, isDelta)}`);
+ this._dictionaryBlocks.push(new FileBlock(0, 0, 0));
+ return this;
+ }
+ protected _writeRecordBatch(batch: RecordBatch<T>) {
+ this._writeDictionaries(batch);
+ this._recordBatches.push(batch);
+ return this;
+ }
+ public close() {
+ if (this._dictionaries.length > 0) {
+ this._write(`,\n "dictionaries": [\n`);
+ for (const batch of this._dictionaries) {
+ super._writeDictionaries(batch);
+ }
+ this._write(`\n ]`);
+ }
+ if (this._recordBatches.length > 0) {
+ for (let i = -1, n = this._recordBatches.length; ++i < n;) {
+ this._write(i === 0 ? `,\n "batches": [\n ` : `,\n `);
+ this._write(`${recordBatchToJSON(this._recordBatches[i])}`);
+ this._recordBatchBlocks.push(new FileBlock(0, 0, 0));
+ }
+ this._write(`\n ]`);
+ }
+ if (this._schema) {
+ this._write(`\n}`);
+ }
+ this._dictionaries = [];
+ this._recordBatches = [];
+ return super.close();
+ }
+/** @ignore */
+function writeAll<T extends { [key: string]: DataType } = any>(writer: RecordBatchWriter<T>, input: Table<T> | Iterable<RecordBatch<T>>) {
+ let chunks = input as Iterable<RecordBatch<T>>;
+ if (input instanceof Table) {
+ chunks = input.chunks;
+ writer.reset(undefined, input.schema);
+ }
+ for (const batch of chunks) {
+ writer.write(batch);
+ }
+ return writer.finish();
+/** @ignore */
+async function writeAllAsync<T extends { [key: string]: DataType } = any>(writer: RecordBatchWriter<T>, batches: AsyncIterable<RecordBatch<T>>) {
+ for await (const batch of batches) {
+ writer.write(batch);
+ }
+ return writer.finish();
+/** @ignore */
+function fieldToJSON({ name, type, nullable }: Field): Record<string, unknown> {
+ const assembler = new JSONTypeAssembler();
+ return {
+ 'name': name, 'nullable': nullable,
+ 'type': assembler.visit(type),
+ 'children': (type.children || []).map(fieldToJSON),
+ 'dictionary': !DataType.isDictionary(type) ? undefined : {
+ 'id':,
+ 'isOrdered': type.isOrdered,
+ 'indexType': assembler.visit(type.indices)
+ }
+ };
+/** @ignore */
+function dictionaryBatchToJSON(dictionary: Vector, id: number, isDelta = false) {
+ const field = new Field(`${id}`, dictionary.type, dictionary.nullCount > 0);
+ const columns = JSONVectorAssembler.assemble(new Column(field, [dictionary]));
+ return JSON.stringify({
+ 'id': id,
+ 'isDelta': isDelta,
+ 'data': {
+ 'count': dictionary.length,
+ 'columns': columns
+ }
+ }, null, 2);
+/** @ignore */
+function recordBatchToJSON(records: RecordBatch) {
+ return JSON.stringify({
+ 'count': records.length,
+ 'columns': JSONVectorAssembler.assemble(records)
+ }, null, 2);
diff --git a/src/arrow/js/src/recordbatch.ts b/src/arrow/js/src/recordbatch.ts
new file mode 100644
index 000000000..5463a387f
--- /dev/null
+++ b/src/arrow/js/src/recordbatch.ts
@@ -0,0 +1,151 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { Data } from './data';
+import { Table } from './table';
+import { Vector } from './vector';
+import { Visitor } from './visitor';
+import { Schema, Field } from './schema';
+import { isIterable } from './util/compat';
+import { Chunked } from './vector/chunked';
+import { selectFieldArgs } from './util/args';
+import { DataType, Struct, Dictionary } from './type';
+import { ensureSameLengthData } from './util/recordbatch';
+import { Clonable, Sliceable, Applicative } from './vector';
+import { StructVector, VectorBuilderOptions, VectorBuilderOptionsAsync } from './vector/index';
+type VectorMap = { [key: string]: Vector };
+type Fields<T extends { [key: string]: DataType }> = (keyof T)[] | Field<T[keyof T]>[];
+type ChildData<T extends { [key: string]: DataType }> = (Data<T[keyof T]> | Vector<T[keyof T]>)[];
+export interface RecordBatch<T extends { [key: string]: DataType } = any> {
+ concat(...others: Vector<Struct<T>>[]): Table<T>;
+ slice(begin?: number, end?: number): RecordBatch<T>;
+ clone(data: Data<Struct<T>>, children?: Vector[]): RecordBatch<T>;
+export class RecordBatch<T extends { [key: string]: DataType } = any>
+ extends StructVector<T>
+ implements Clonable<RecordBatch<T>>,
+ Sliceable<RecordBatch<T>>,
+ Applicative<Struct<T>, Table<T>> {
+ public static from<T extends { [key: string]: DataType } = any, TNull = any>(options: VectorBuilderOptions<Struct<T>, TNull>): Table<T>;
+ public static from<T extends { [key: string]: DataType } = any, TNull = any>(options: VectorBuilderOptionsAsync<Struct<T>, TNull>): Promise<Table<T>>;
+ /** @nocollapse */
+ public static from<T extends { [key: string]: DataType } = any, TNull = any>(options: VectorBuilderOptions<Struct<T>, TNull> | VectorBuilderOptionsAsync<Struct<T>, TNull>) {
+ if (isIterable<(Struct<T>)['TValue'] | TNull>(options['values'])) {
+ return Table.from(options as VectorBuilderOptions<Struct<T>, TNull>);
+ }
+ return Table.from(options as VectorBuilderOptionsAsync<Struct<T>, TNull>);
+ }
+ public static new<T extends VectorMap = any>(children: T): RecordBatch<{ [P in keyof T]: T[P]['type'] }>;
+ public static new<T extends { [key: string]: DataType } = any>(children: ChildData<T>, fields?: Fields<T>): RecordBatch<T>;
+ /** @nocollapse */
+ public static new<T extends { [key: string]: DataType } = any>(...args: any[]) {
+ const [fs, xs] = selectFieldArgs<T>(args);
+ const vs = xs.filter((x): x is Vector<T[keyof T]> => x instanceof Vector);
+ return new RecordBatch(...ensureSameLengthData(new Schema<T>(fs), =>;
+ }
+ protected _schema: Schema;
+ protected _dictionaries?: Map<number, Vector>;
+ constructor(schema: Schema<T>, length: number, children: (Data | Vector)[]);
+ constructor(schema: Schema<T>, data: Data<Struct<T>>, children?: Vector[]);
+ constructor(...args: any[]) {
+ let data: Data<Struct<T>>;
+ const schema = args[0] as Schema<T>;
+ let children: Vector[] | undefined;
+ if (args[1] instanceof Data) {
+ [, data, children] = (args as [any, Data<Struct<T>>, Vector<T[keyof T]>[]?]);
+ } else {
+ const fields = schema.fields as Field<T[keyof T]>[];
+ const [, length, childData] = args as [any, number, Data<T[keyof T]>[]];
+ data = Data.Struct(new Struct<T>(fields), 0, length, 0, null, childData);
+ }
+ super(data, children);
+ this._schema = schema;
+ }
+ public clone(data: Data<Struct<T>>, children = this._children) {
+ return new RecordBatch<T>(this._schema, data, children);
+ }
+ public concat(...others: Vector<Struct<T>>[]): Table<T> {
+ const schema = this._schema, chunks = Chunked.flatten(this, ...others);
+ return new Table(schema,{ data }) => new RecordBatch(schema, data)));
+ }
+ public get schema() { return this._schema; }
+ public get numCols() { return this._schema.fields.length; }
+ public get dictionaries() {
+ return this._dictionaries || (this._dictionaries = DictionaryCollector.collect(this));
+ }
+ public select<K extends keyof T = any>(...columnNames: K[]) {
+ const nameToIndex = this._schema.fields.reduce((m, f, i) => m.set( as K, i), new Map<K, number>());
+ return this.selectAt( => nameToIndex.get(columnName)!).filter((x) => x > -1));
+ }
+ public selectAt<K extends T[keyof T] = any>(...columnIndices: number[]) {
+ const schema = this._schema.selectAt(...columnIndices);
+ const childData = =>[i]).filter(Boolean);
+ return new RecordBatch<{ [key: string]: K }>(schema, this.length, childData);
+ }
+ * An internal class used by the `RecordBatchReader` and `RecordBatchWriter`
+ * implementations to differentiate between a stream with valid zero-length
+ * RecordBatches, and a stream with a Schema message, but no RecordBatches.
+ * @see
+ * @ignore
+ * @private
+ */
+/* eslint-disable @typescript-eslint/naming-convention */
+export class _InternalEmptyPlaceholderRecordBatch<T extends { [key: string]: DataType } = any> extends RecordBatch<T> {
+ constructor(schema: Schema<T>) {
+ super(schema, 0, =>, 0, 0, 0)));
+ }
+/** @ignore */
+class DictionaryCollector extends Visitor {
+ public dictionaries = new Map<number, Vector>();
+ public static collect<T extends RecordBatch>(batch: T) {
+ return new DictionaryCollector().visit(
+, new Struct(batch.schema.fields)
+ ).dictionaries;
+ }
+ public visit(data: Data, type: DataType) {
+ if (DataType.isDictionary(type)) {
+ return this.visitDictionary(data, type);
+ } else {
+ data.childData.forEach((child, i) =>
+ this.visit(child, type.children[i].type));
+ }
+ return this;
+ }
+ public visitDictionary(data: Data, type: Dictionary) {
+ const dictionary = data.dictionary;
+ if (dictionary && dictionary.length > 0) {
+ this.dictionaries.set(, dictionary);
+ }
+ return this;
+ }
diff --git a/src/arrow/js/src/schema.ts b/src/arrow/js/src/schema.ts
new file mode 100644
index 000000000..437ffa228
--- /dev/null
+++ b/src/arrow/js/src/schema.ts
@@ -0,0 +1,154 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { DataType } from './type';
+export class Schema<T extends { [key: string]: DataType } = any> {
+ public readonly fields: Field<T[keyof T]>[];
+ public readonly metadata: Map<string, string>;
+ public readonly dictionaries: Map<number, DataType>;
+ constructor(fields: Field[] = [],
+ metadata?: Map<string, string> | null,
+ dictionaries?: Map<number, DataType> | null) {
+ this.fields = (fields || []) as Field<T[keyof T]>[];
+ this.metadata = metadata || new Map();
+ if (!dictionaries) {
+ dictionaries = generateDictionaryMap(fields);
+ }
+ this.dictionaries = dictionaries;
+ }
+ public get [Symbol.toStringTag]() { return 'Schema'; }
+ public toString() {
+ return `Schema<{ ${, i) => `${i}: ${f}`).join(', ')} }>`;
+ }
+ public select<K extends keyof T = any>(...columnNames: K[]) {
+ const names = columnNames.reduce((xs, x) => (xs[x] = true) && xs, Object.create(null));
+ return new Schema<{ [P in K]: T[P] }>(this.fields.filter((f) => names[]), this.metadata);
+ }
+ public selectAt<K extends T[keyof T] = any>(...columnIndices: number[]) {
+ return new Schema<{ [key: string]: K }>( => this.fields[i]).filter(Boolean), this.metadata);
+ }
+ public assign<R extends { [key: string]: DataType } = any>(schema: Schema<R>): Schema<T & R>;
+ public assign<R extends { [key: string]: DataType } = any>(...fields: (Field<R[keyof R]> | Field<R[keyof R]>[])[]): Schema<T & R>;
+ public assign<R extends { [key: string]: DataType } = any>(...args: (Schema<R> | Field<R[keyof R]> | Field<R[keyof R]>[])[]) {
+ const other = (args[0] instanceof Schema
+ ? args[0] as Schema<R>
+ : Array.isArray(args[0])
+ ? new Schema<R>(<Field<R[keyof R]>[]> args[0])
+ : new Schema<R>(<Field<R[keyof R]>[]> args));
+ const curFields = [...this.fields] as Field[];
+ const metadata = mergeMaps(mergeMaps(new Map(), this.metadata), other.metadata);
+ const newFields = other.fields.filter((f2) => {
+ const i = curFields.findIndex((f) => ===;
+ return ~i ? (curFields[i] = f2.clone({
+ metadata: mergeMaps(mergeMaps(new Map(), curFields[i].metadata), f2.metadata)
+ })) && false : true;
+ }) as Field[];
+ const newDictionaries = generateDictionaryMap(newFields, new Map());
+ return new Schema<T & R>(
+ [...curFields, ...newFields], metadata,
+ new Map([...this.dictionaries, ...newDictionaries])
+ );
+ }
+export class Field<T extends DataType = any> {
+ public static new<T extends DataType = any>(props: { name: string | number; type: T; nullable?: boolean; metadata?: Map<string, string> | null }): Field<T>;
+ public static new<T extends DataType = any>(name: string | number | Field<T>, type: T, nullable?: boolean, metadata?: Map<string, string> | null): Field<T>;
+ /** @nocollapse */
+ public static new<T extends DataType = any>(...args: any[]) {
+ let [name, type, nullable, metadata] = args;
+ if (args[0] && typeof args[0] === 'object') {
+ ({ name } = args[0]);
+ (type === undefined) && (type = args[0].type);
+ (nullable === undefined) && (nullable = args[0].nullable);
+ (metadata === undefined) && (metadata = args[0].metadata);
+ }
+ return new Field<T>(`${name}`, type, nullable, metadata);
+ }
+ public readonly type: T;
+ public readonly name: string;
+ public readonly nullable: boolean;
+ public readonly metadata: Map<string, string>;
+ constructor(name: string, type: T, nullable = false, metadata?: Map<string, string> | null) {
+ = name;
+ this.type = type;
+ this.nullable = nullable;
+ this.metadata = metadata || new Map();
+ }
+ public get typeId() { return this.type.typeId; }
+ public get [Symbol.toStringTag]() { return 'Field'; }
+ public toString() { return `${}: ${this.type}`; }
+ public clone<R extends DataType = T>(props: { name?: string | number; type?: R; nullable?: boolean; metadata?: Map<string, string> | null }): Field<R>;
+ public clone<R extends DataType = T>(name?: string | number | Field<T>, type?: R, nullable?: boolean, metadata?: Map<string, string> | null): Field<R>;
+ public clone<R extends DataType = T>(...args: any[]) {
+ let [name, type, nullable, metadata] = args;
+ (!args[0] || typeof args[0] !== 'object')
+ ? ([name =, type = this.type, nullable = this.nullable, metadata = this.metadata] = args)
+ : ({name =, type = this.type, nullable = this.nullable, metadata = this.metadata} = args[0]);
+ return<R>(name, type, nullable, metadata);
+ }
+/** @ignore */
+function mergeMaps<TKey, TVal>(m1?: Map<TKey, TVal> | null, m2?: Map<TKey, TVal> | null): Map<TKey, TVal> {
+ return new Map([...(m1 || new Map()), ...(m2 || new Map())]);
+/** @ignore */
+function generateDictionaryMap(fields: Field[], dictionaries = new Map<number, DataType>()): Map<number, DataType> {
+ for (let i = -1, n = fields.length; ++i < n;) {
+ const field = fields[i];
+ const type = field.type;
+ if (DataType.isDictionary(type)) {
+ if (!dictionaries.has( {
+ dictionaries.set(, type.dictionary);
+ } else if (dictionaries.get( !== type.dictionary) {
+ throw new Error(`Cannot create Schema containing two different dictionaries with the same Id`);
+ }
+ }
+ if (type.children && type.children.length > 0) {
+ generateDictionaryMap(type.children, dictionaries);
+ }
+ }
+ return dictionaries;
+// Add these here so they're picked up by the externs creator
+// in the build, and closure-compiler doesn't minify them away
+(Schema.prototype as any).fields = null;
+(Schema.prototype as any).metadata = null;
+(Schema.prototype as any).dictionaries = null;
+(Field.prototype as any).type = null;
+(Field.prototype as any).name = null;
+(Field.prototype as any).nullable = null;
+(Field.prototype as any).metadata = null;
diff --git a/src/arrow/js/src/table.ts b/src/arrow/js/src/table.ts
new file mode 100644
index 000000000..d5e121de7
--- /dev/null
+++ b/src/arrow/js/src/table.ts
@@ -0,0 +1,289 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { Column } from './column';
+import { Data } from './data';
+import { TypedArray, TypedArrayDataType } from './interfaces';
+import { RecordBatchReader } from './ipc/reader';
+import { RecordBatchFileWriter, RecordBatchStreamWriter } from './ipc/writer';
+import { RecordBatch, _InternalEmptyPlaceholderRecordBatch } from './recordbatch';
+import { Field, Schema } from './schema';
+import { DataType, RowLike, Struct } from './type';
+import { selectArgs, selectColumnArgs } from './util/args';
+import { isAsyncIterable, isIterable, isPromise } from './util/compat';
+import { distributeColumnsIntoRecordBatches, distributeVectorsIntoRecordBatches } from './util/recordbatch';
+import { Applicative, Clonable, Sliceable } from './vector';
+import { Chunked, StructVector, Vector, VectorBuilderOptions, VectorBuilderOptionsAsync } from './vector/index';
+type VectorMap = { [key: string]: Vector | Exclude<TypedArray, Uint8ClampedArray> };
+type Fields<T extends { [key: string]: DataType }> = (keyof T)[] | Field<T[keyof T]>[];
+type ChildData<T extends { [key: string]: DataType }> = Data<T[keyof T]>[] | Vector<T[keyof T]>[];
+type Columns<T extends { [key: string]: DataType }> = Column<T[keyof T]>[] | Column<T[keyof T]>[][];
+export interface Table<T extends { [key: string]: DataType } = any> {
+ get(index: number): Struct<T>['TValue'];
+ [Symbol.iterator](): IterableIterator<RowLike<T>>;
+ slice(begin?: number, end?: number): Table<T>;
+ concat(...others: Vector<Struct<T>>[]): Table<T>;
+ clone(chunks?: RecordBatch<T>[], offsets?: Uint32Array): Table<T>;
+export class Table<T extends { [key: string]: DataType } = any>
+ extends Chunked<Struct<T>>
+ implements Clonable<Table<T>>,
+ Sliceable<Table<T>>,
+ Applicative<Struct<T>, Table<T>> {
+ /** @nocollapse */
+ public static empty<T extends { [key: string]: DataType } = Record<string, never>>(schema = new Schema<T>([])) { return new Table<T>(schema, []); }
+ public static from(): Table<Record<string, never>>;
+ public static from<T extends { [key: string]: DataType } = any>(source: RecordBatchReader<T>): Table<T>;
+ public static from<T extends { [key: string]: DataType } = any>(source: import('./ipc/reader').FromArg0): Table<T>;
+ public static from<T extends { [key: string]: DataType } = any>(source: import('./ipc/reader').FromArg2): Table<T>;
+ public static from<T extends { [key: string]: DataType } = any>(source: import('./ipc/reader').FromArg1): Promise<Table<T>>;
+ public static from<T extends { [key: string]: DataType } = any>(source: import('./ipc/reader').FromArg3): Promise<Table<T>>;
+ public static from<T extends { [key: string]: DataType } = any>(source: import('./ipc/reader').FromArg4): Promise<Table<T>>;
+ public static from<T extends { [key: string]: DataType } = any>(source: import('./ipc/reader').FromArg5): Promise<Table<T>>;
+ public static from<T extends { [key: string]: DataType } = any>(source: PromiseLike<RecordBatchReader<T>>): Promise<Table<T>>;
+ public static from<T extends { [key: string]: DataType } = any, TNull = any>(options: VectorBuilderOptions<Struct<T>, TNull>): Table<T>;
+ public static from<T extends { [key: string]: DataType } = any, TNull = any>(options: VectorBuilderOptionsAsync<Struct<T>, TNull>): Promise<Table<T>>;
+ /** @nocollapse */
+ public static from<T extends { [key: string]: DataType } = any, TNull = any>(input?: any) {
+ if (!input) { return Table.empty(); }
+ if (typeof input === 'object') {
+ const table = isIterable(input['values']) ? tableFromIterable<T, TNull>(input)
+ : isAsyncIterable(input['values']) ? tableFromAsyncIterable<T, TNull>(input)
+ : null;
+ if (table !== null) { return table; }
+ }
+ let reader = RecordBatchReader.from<T>(input) as RecordBatchReader<T> | Promise<RecordBatchReader<T>>;
+ if (isPromise<RecordBatchReader<T>>(reader)) {
+ return (async () => await Table.from(await reader))();
+ }
+ if (reader.isSync() && (reader = {
+ return !reader.schema ? Table.empty() : new Table<T>(reader.schema, [...reader]);
+ }
+ return (async (opening) => {
+ const reader = await opening;
+ const schema = reader.schema;
+ const batches: RecordBatch[] = [];
+ if (schema) {
+ for await (const batch of reader) {
+ batches.push(batch);
+ }
+ return new Table<T>(schema, batches);
+ }
+ return Table.empty();
+ })(;
+ }
+ /** @nocollapse */
+ public static async fromAsync<T extends { [key: string]: DataType } = any>(source: import('./ipc/reader').FromArgs): Promise<Table<T>> {
+ return await Table.from<T>(source as any);
+ }
+ /** @nocollapse */
+ public static fromStruct<T extends { [key: string]: DataType } = any>(vector: Vector<Struct<T>>) {
+ return<T>( as Data<T[keyof T]>[], vector.type.children);
+ }
+ /**
+ * @summary Create a new Table from a collection of Columns or Vectors,
+ * with an optional list of names or Fields.
+ *
+ *
+ * `` accepts an Object of
+ * Columns or Vectors, where the keys will be used as the field names
+ * for the Schema:
+ * ```ts
+ * const i32s = Int32Vector.from([1, 2, 3]);
+ * const f32s = Float32Vector.from([.1, .2, .3]);
+ * const table ={ i32: i32s, f32: f32s });
+ * assert(table.schema.fields[0].name === 'i32');
+ * ```
+ *
+ * It also accepts a a list of Vectors with an optional list of names or
+ * Fields for the resulting Schema. If the list is omitted or a name is
+ * missing, the numeric index of each Vector will be used as the name:
+ * ```ts
+ * const i32s = Int32Vector.from([1, 2, 3]);
+ * const f32s = Float32Vector.from([.1, .2, .3]);
+ * const table =[i32s, f32s], ['i32']);
+ * assert(table.schema.fields[0].name === 'i32');
+ * assert(table.schema.fields[1].name === '1');
+ * ```
+ *
+ * If the supplied arguments are Columns, `` will infer the Schema
+ * from the Columns:
+ * ```ts
+ * const i32s ='i32', Int32Vector.from([1, 2, 3]));
+ * const f32s ='f32', Float32Vector.from([.1, .2, .3]));
+ * const table =, f32s);
+ * assert(table.schema.fields[0].name === 'i32');
+ * assert(table.schema.fields[1].name === 'f32');
+ * ```
+ *
+ * If the supplied Vector or Column lengths are unequal, `` will
+ * extend the lengths of the shorter Columns, allocating additional bytes
+ * to represent the additional null slots. The memory required to allocate
+ * these additional bitmaps can be computed as:
+ * ```ts
+ * let additionalBytes = 0;
+ * for (let vec in shorter_vectors) {
+ * additionalBytes += (((longestLength - vec.length) + 63) & ~63) >> 3;
+ * }
+ * ```
+ *
+ * For example, an additional null bitmap for one million null values would require
+ * 125,000 bytes (`((1e6 + 63) & ~63) >> 3`), or approx. `0.11MiB`
+ */
+ public static new<T extends { [key: string]: DataType } = any>(...columns: Columns<T>): Table<T>;
+ public static new<T extends VectorMap = any>(children: T): Table<{ [P in keyof T]: T[P] extends Vector ? T[P]['type'] : T[P] extends Exclude<TypedArray, Uint8ClampedArray> ? TypedArrayDataType<T[P]> : never}>;
+ public static new<T extends { [key: string]: DataType } = any>(children: ChildData<T>, fields?: Fields<T>): Table<T>;
+ /** @nocollapse */
+ public static new(...cols: any[]) {
+ return new Table(...distributeColumnsIntoRecordBatches(selectColumnArgs(cols)));
+ }
+ constructor(table: Table<T>);
+ constructor(batches: RecordBatch<T>[]);
+ constructor(...batches: RecordBatch<T>[]);
+ constructor(schema: Schema<T>, batches: RecordBatch<T>[]);
+ constructor(schema: Schema<T>, ...batches: RecordBatch<T>[]);
+ constructor(...args: any[]) {
+ let schema: Schema<T> = null!;
+ if (args[0] instanceof Schema) { schema = args[0]; }
+ const chunks = args[0] instanceof Table ? (args[0] as Table<T>).chunks : selectArgs<RecordBatch<T>>(RecordBatch, args);
+ if (!schema && !(schema = chunks[0]?.schema)) {
+ throw new TypeError('Table must be initialized with a Schema or at least one RecordBatch');
+ }
+ chunks[0] || (chunks[0] = new _InternalEmptyPlaceholderRecordBatch(schema));
+ super(new Struct(schema.fields), chunks);
+ this._schema = schema;
+ this._chunks = chunks;
+ }
+ protected _schema: Schema<T>;
+ // List of inner RecordBatches
+ protected _chunks: RecordBatch<T>[];
+ protected _children?: Column<T[keyof T]>[];
+ public get schema() { return this._schema; }
+ public get length() { return this._length; }
+ public get chunks() { return this._chunks; }
+ public get numCols() { return this._numChildren; }
+ public clone(chunks = this._chunks) {
+ return new Table<T>(this._schema, chunks);
+ }
+ public getColumn<R extends keyof T>(name: R): Column<T[R]> {
+ return this.getColumnAt(this.getColumnIndex(name)) as Column<T[R]>;
+ }
+ public getColumnAt<R extends DataType = any>(index: number): Column<R> | null {
+ return this.getChildAt(index);
+ }
+ public getColumnIndex<R extends keyof T>(name: R) {
+ return this._schema.fields.findIndex((f) => === name);
+ }
+ public getChildAt<R extends DataType = any>(index: number): Column<R> | null {
+ if (index < 0 || index >= this.numChildren) { return null; }
+ let field: Field<R>, child: Column<R>;
+ const fields = (this._schema as Schema<any>).fields;
+ const columns = this._children || (this._children = []) as Column[];
+ if (child = columns[index]) { return child as Column<R>; }
+ if (field = fields[index]) {
+ const chunks = this._chunks
+ .map((chunk) => chunk.getChildAt<R>(index))
+ .filter((vec): vec is Vector<R> => vec != null);
+ if (chunks.length > 0) {
+ return (columns[index] = new Column<R>(field, chunks));
+ }
+ }
+ return null;
+ }
+ // @ts-ignore
+ public serialize(encoding = 'binary', stream = true) {
+ const Writer = !stream
+ ? RecordBatchFileWriter
+ : RecordBatchStreamWriter;
+ return Writer.writeAll(this).toUint8Array(true);
+ }
+ public count(): number {
+ return this._length;
+ }
+ public select<K extends keyof T = any>(...columnNames: K[]) {
+ const nameToIndex = this._schema.fields.reduce((m, f, i) => m.set( as K, i), new Map<K, number>());
+ return this.selectAt( => nameToIndex.get(columnName)!).filter((x) => x > -1));
+ }
+ public selectAt<K extends T[keyof T] = any>(...columnIndices: number[]) {
+ const schema = this._schema.selectAt<K>(...columnIndices);
+ return new Table(schema,{ length, data: { childData } }) => {
+ return new RecordBatch(schema, length, => childData[i]).filter(Boolean));
+ }));
+ }
+ public assign<R extends { [key: string]: DataType } = any>(other: Table<R>) {
+ const fields = this._schema.fields;
+ const [indices, oldToNew] = other.schema.fields.reduce((memo, f2, newIdx) => {
+ const [indices, oldToNew] = memo;
+ const i = fields.findIndex((f) => ===;
+ ~i ? (oldToNew[i] = newIdx) : indices.push(newIdx);
+ return memo;
+ }, [[], []] as number[][]);
+ const schema = this._schema.assign(other.schema);
+ const columns = [
+, i, _fs, j = oldToNew[i]) =>
+ (j === undefined ? this.getColumnAt(i) : other.getColumnAt(j))!),
+ => other.getColumnAt(i)!)
+ ].filter(Boolean) as Column<(T & R)[keyof T | keyof R]>[];
+ return new Table<T & R>(...distributeVectorsIntoRecordBatches<any>(schema, columns));
+ }
+function tableFromIterable<T extends { [key: string]: DataType } = any, TNull = any>(input: VectorBuilderOptions<Struct<T>, TNull>) {
+ const { type } = input;
+ if (type instanceof Struct) {
+ return Table.fromStruct(StructVector.from(input as VectorBuilderOptions<Struct<T>, TNull>));
+ }
+ return null;
+function tableFromAsyncIterable<T extends { [key: string]: DataType } = any, TNull = any>(input: VectorBuilderOptionsAsync<Struct<T>, TNull>) {
+ const { type } = input;
+ if (type instanceof Struct) {
+ return StructVector.from(input as VectorBuilderOptionsAsync<Struct<T>, TNull>).then((vector) => Table.fromStruct(vector));
+ }
+ return null;
diff --git a/src/arrow/js/src/type.ts b/src/arrow/js/src/type.ts
new file mode 100644
index 000000000..7d5c051ad
--- /dev/null
+++ b/src/arrow/js/src/type.ts
@@ -0,0 +1,613 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+/* eslint-disable @typescript-eslint/naming-convention */
+import { Field } from './schema';
+import { flatbuffers } from 'flatbuffers';
+import { VectorType as V } from './interfaces';
+import { TypedArrayConstructor } from './interfaces';
+import Long = flatbuffers.Long;
+import {
+ Type,
+ Precision, UnionMode,
+ DateUnit, TimeUnit, IntervalUnit
+} from './enum';
+/** @ignore */
+export type TimeBitWidth = 32 | 64;
+/** @ignore */
+export type IntBitWidth = 8 | 16 | 32 | 64;
+/** @ignore */
+export type IsSigned = { 'true': true; 'false': false };
+/** @ignore */
+export type RowLike<T extends { [key: string]: DataType }> =
+ ( Iterable<[string, T[keyof T]['TValue'] | null]> )
+ & { [P in keyof T]: T[P]['TValue'] | null }
+ & { get<K extends keyof T>(key: K): T[K]['TValue'] | null }
+ & { set<K extends keyof T>(key: K, val: T[K]['TValue'] | null): void }
+ ;
+/** @ignore */
+export type MapLike<K extends DataType = any, V extends DataType = any> =
+ { [P in K['TValue']]: V['TValue'] | null }
+ & ( Map<K['TValue'], V['TValue'] | null> )
+ ;
+export interface DataType<TType extends Type = Type, TChildren extends { [key: string]: DataType } = any> {
+ readonly TType: TType;
+ readonly TArray: any;
+ readonly TValue: any;
+ readonly ArrayType: any;
+ readonly children: Field<TChildren[keyof TChildren]>[];
+ * An abstract base class for classes that encapsulate metadata about each of
+ * the logical types that Arrow can represent.
+ */
+export abstract class DataType<TType extends Type = Type, TChildren extends { [key: string]: DataType } = any> {
+ public [Symbol.toStringTag]: string;
+ /** @nocollapse */ static isNull (x: any): x is Null { return x?.typeId === Type.Null; }
+ /** @nocollapse */ static isInt (x: any): x is Int_ { return x?.typeId === Type.Int; }
+ /** @nocollapse */ static isFloat (x: any): x is Float { return x?.typeId === Type.Float; }
+ /** @nocollapse */ static isBinary (x: any): x is Binary { return x?.typeId === Type.Binary; }
+ /** @nocollapse */ static isUtf8 (x: any): x is Utf8 { return x?.typeId === Type.Utf8; }
+ /** @nocollapse */ static isBool (x: any): x is Bool { return x?.typeId === Type.Bool; }
+ /** @nocollapse */ static isDecimal (x: any): x is Decimal { return x?.typeId === Type.Decimal; }
+ /** @nocollapse */ static isDate (x: any): x is Date_ { return x?.typeId === Type.Date; }
+ /** @nocollapse */ static isTime (x: any): x is Time_ { return x?.typeId === Type.Time; }
+ /** @nocollapse */ static isTimestamp (x: any): x is Timestamp_ { return x?.typeId === Type.Timestamp; }
+ /** @nocollapse */ static isInterval (x: any): x is Interval_ { return x?.typeId === Type.Interval; }
+ /** @nocollapse */ static isList (x: any): x is List { return x?.typeId === Type.List; }
+ /** @nocollapse */ static isStruct (x: any): x is Struct { return x?.typeId === Type.Struct; }
+ /** @nocollapse */ static isUnion (x: any): x is Union_ { return x?.typeId === Type.Union; }
+ /** @nocollapse */ static isFixedSizeBinary (x: any): x is FixedSizeBinary { return x?.typeId === Type.FixedSizeBinary; }
+ /** @nocollapse */ static isFixedSizeList (x: any): x is FixedSizeList { return x?.typeId === Type.FixedSizeList; }
+ /** @nocollapse */ static isMap (x: any): x is Map_ { return x?.typeId === Type.Map; }
+ /** @nocollapse */ static isDictionary (x: any): x is Dictionary { return x?.typeId === Type.Dictionary; }
+ public get typeId(): TType { return <any> Type.NONE; }
+ protected static [Symbol.toStringTag] = ((proto: DataType) => {
+ (<any> proto).children = null;
+ (<any> proto).ArrayType = Array;
+ return proto[Symbol.toStringTag] = 'DataType';
+ })(DataType.prototype);
+/** @ignore */
+export interface Null extends DataType<Type.Null> { TArray: void; TValue: null }
+/** @ignore */
+export class Null extends DataType<Type.Null> {
+ public toString() { return `Null`; }
+ public get typeId() { return Type.Null as Type.Null; }
+ protected static [Symbol.toStringTag] = ((proto: Null) => {
+ return proto[Symbol.toStringTag] = 'Null';
+ })(Null.prototype);
+/** @ignore */
+type Ints = Type.Int | Type.Int8 | Type.Int16 | Type.Int32 | Type.Int64 | Type.Uint8 | Type.Uint16 | Type.Uint32 | Type.Uint64;
+/** @ignore */
+type IType = {
+ [Type.Int ]: { bitWidth: IntBitWidth; isSigned: true | false; TArray: IntArray; TValue: number | bigint | Int32Array | Uint32Array };
+ [Type.Int8 ]: { bitWidth: 8; isSigned: true; TArray: Int8Array; TValue: number };
+ [Type.Int16 ]: { bitWidth: 16; isSigned: true; TArray: Int16Array; TValue: number };
+ [Type.Int32 ]: { bitWidth: 32; isSigned: true; TArray: Int32Array; TValue: number };
+ [Type.Int64 ]: { bitWidth: 64; isSigned: true; TArray: Int32Array; TValue: bigint | Int32Array | Uint32Array };
+ [Type.Uint8 ]: { bitWidth: 8; isSigned: false; TArray: Uint8Array; TValue: number };
+ [Type.Uint16]: { bitWidth: 16; isSigned: false; TArray: Uint16Array; TValue: number };
+ [Type.Uint32]: { bitWidth: 32; isSigned: false; TArray: Uint32Array; TValue: number };
+ [Type.Uint64]: { bitWidth: 64; isSigned: false; TArray: Uint32Array; TValue: bigint | Int32Array | Uint32Array };
+/** @ignore */
+interface Int_<T extends Ints = Ints> extends DataType<T> { TArray: IType[T]['TArray']; TValue: IType[T]['TValue'] }
+/** @ignore */
+class Int_<T extends Ints = Ints> extends DataType<T> {
+ constructor(public readonly isSigned: IType[T]['isSigned'],
+ public readonly bitWidth: IType[T]['bitWidth']) {
+ super();
+ }
+ public get typeId() { return Type.Int as T; }
+ public get ArrayType(): TypedArrayConstructor<IType[T]['TArray']> {
+ switch (this.bitWidth) {
+ case 8: return this.isSigned ? Int8Array : Uint8Array;
+ case 16: return this.isSigned ? Int16Array : Uint16Array;
+ case 32: return this.isSigned ? Int32Array : Uint32Array;
+ case 64: return this.isSigned ? Int32Array : Uint32Array;
+ }
+ throw new Error(`Unrecognized ${this[Symbol.toStringTag]} type`);
+ }
+ public toString() { return `${this.isSigned ? `I` : `Ui`}nt${this.bitWidth}`; }
+ protected static [Symbol.toStringTag] = ((proto: Int_) => {
+ (<any> proto).isSigned = null;
+ (<any> proto).bitWidth = null;
+ return proto[Symbol.toStringTag] = 'Int';
+ })(Int_.prototype);
+export { Int_ as Int };
+/** @ignore */
+export class Int8 extends Int_<Type.Int8> { constructor() { super(true, 8); } }
+/** @ignore */
+export class Int16 extends Int_<Type.Int16> { constructor() { super(true, 16); } }
+/** @ignore */
+export class Int32 extends Int_<Type.Int32> { constructor() { super(true, 32); } }
+/** @ignore */
+export class Int64 extends Int_<Type.Int64> { constructor() { super(true, 64); } }
+/** @ignore */
+export class Uint8 extends Int_<Type.Uint8> { constructor() { super(false, 8); } }
+/** @ignore */
+export class Uint16 extends Int_<Type.Uint16> { constructor() { super(false, 16); } }
+/** @ignore */
+export class Uint32 extends Int_<Type.Uint32> { constructor() { super(false, 32); } }
+/** @ignore */
+export class Uint64 extends Int_<Type.Uint64> { constructor() { super(false, 64); } }
+Object.defineProperty(Int8.prototype, 'ArrayType', { value: Int8Array });
+Object.defineProperty(Int16.prototype, 'ArrayType', { value: Int16Array });
+Object.defineProperty(Int32.prototype, 'ArrayType', { value: Int32Array });
+Object.defineProperty(Int64.prototype, 'ArrayType', { value: Int32Array });
+Object.defineProperty(Uint8.prototype, 'ArrayType', { value: Uint8Array });
+Object.defineProperty(Uint16.prototype, 'ArrayType', { value: Uint16Array });
+Object.defineProperty(Uint32.prototype, 'ArrayType', { value: Uint32Array });
+Object.defineProperty(Uint64.prototype, 'ArrayType', { value: Uint32Array });
+/** @ignore */
+type Floats = Type.Float | Type.Float16 | Type.Float32 | Type.Float64;
+/** @ignore */
+type FType = {
+ [Type.Float ]: { precision: Precision; TArray: FloatArray; TValue: number };
+ [Type.Float16]: { precision: Precision.HALF; TArray: Uint16Array; TValue: number };
+ [Type.Float32]: { precision: Precision.SINGLE; TArray: Float32Array; TValue: number };
+ [Type.Float64]: { precision: Precision.DOUBLE; TArray: Float64Array; TValue: number };
+/** @ignore */
+export interface Float<T extends Floats = Floats> extends DataType<T> { TArray: FType[T]['TArray']; TValue: number }
+/** @ignore */
+export class Float<T extends Floats = Floats> extends DataType<T> {
+ constructor(public readonly precision: Precision) {
+ super();
+ }
+ public get typeId() { return Type.Float as T; }
+ public get ArrayType(): TypedArrayConstructor<FType[T]['TArray']> {
+ switch (this.precision) {
+ case Precision.HALF: return Uint16Array;
+ case Precision.SINGLE: return Float32Array;
+ case Precision.DOUBLE: return Float64Array;
+ }
+ // @ts-ignore
+ throw new Error(`Unrecognized ${this[Symbol.toStringTag]} type`);
+ }
+ public toString() { return `Float${(this.precision << 5) || 16}`; }
+ protected static [Symbol.toStringTag] = ((proto: Float) => {
+ (<any> proto).precision = null;
+ return proto[Symbol.toStringTag] = 'Float';
+ })(Float.prototype);
+/** @ignore */
+export class Float16 extends Float<Type.Float16> { constructor() { super(Precision.HALF); } }
+/** @ignore */
+export class Float32 extends Float<Type.Float32> { constructor() { super(Precision.SINGLE); } }
+/** @ignore */
+export class Float64 extends Float<Type.Float64> { constructor() { super(Precision.DOUBLE); } }
+Object.defineProperty(Float16.prototype, 'ArrayType', { value: Uint16Array });
+Object.defineProperty(Float32.prototype, 'ArrayType', { value: Float32Array });
+Object.defineProperty(Float64.prototype, 'ArrayType', { value: Float64Array });
+/** @ignore */
+export interface Binary extends DataType<Type.Binary> { TArray: Uint8Array; TValue: Uint8Array; ArrayType: TypedArrayConstructor<Uint8Array> }
+/** @ignore */
+export class Binary extends DataType<Type.Binary> {
+ constructor() {
+ super();
+ }
+ public get typeId() { return Type.Binary as Type.Binary; }
+ public toString() { return `Binary`; }
+ protected static [Symbol.toStringTag] = ((proto: Binary) => {
+ (<any> proto).ArrayType = Uint8Array;
+ return proto[Symbol.toStringTag] = 'Binary';
+ })(Binary.prototype);
+/** @ignore */
+export interface Utf8 extends DataType<Type.Utf8> { TArray: Uint8Array; TValue: string; ArrayType: TypedArrayConstructor<Uint8Array> }
+/** @ignore */
+export class Utf8 extends DataType<Type.Utf8> {
+ constructor() {
+ super();
+ }
+ public get typeId() { return Type.Utf8 as Type.Utf8; }
+ public toString() { return `Utf8`; }
+ protected static [Symbol.toStringTag] = ((proto: Utf8) => {
+ (<any> proto).ArrayType = Uint8Array;
+ return proto[Symbol.toStringTag] = 'Utf8';
+ })(Utf8.prototype);
+/** @ignore */
+export interface Bool extends DataType<Type.Bool> { TArray: Uint8Array; TValue: boolean; ArrayType: TypedArrayConstructor<Uint8Array> }
+/** @ignore */
+export class Bool extends DataType<Type.Bool> {
+ constructor() {
+ super();
+ }
+ public get typeId() { return Type.Bool as Type.Bool; }
+ public toString() { return `Bool`; }
+ protected static [Symbol.toStringTag] = ((proto: Bool) => {
+ (<any> proto).ArrayType = Uint8Array;
+ return proto[Symbol.toStringTag] = 'Bool';
+ })(Bool.prototype);
+/** @ignore */
+export interface Decimal extends DataType<Type.Decimal> { TArray: Uint32Array; TValue: Uint32Array; ArrayType: TypedArrayConstructor<Uint32Array> }
+/** @ignore */
+export class Decimal extends DataType<Type.Decimal> {
+ constructor(public readonly scale: number,
+ public readonly precision: number) {
+ super();
+ }
+ public get typeId() { return Type.Decimal as Type.Decimal; }
+ public toString() { return `Decimal[${this.precision}e${this.scale > 0 ? `+` : ``}${this.scale}]`; }
+ protected static [Symbol.toStringTag] = ((proto: Decimal) => {
+ (<any> proto).scale = null;
+ (<any> proto).precision = null;
+ (<any> proto).ArrayType = Uint32Array;
+ return proto[Symbol.toStringTag] = 'Decimal';
+ })(Decimal.prototype);
+/** @ignore */
+export type Dates = Type.Date | Type.DateDay | Type.DateMillisecond;
+/** @ignore */
+export interface Date_<T extends Dates = Dates> extends DataType<T> { TArray: Int32Array; TValue: Date; ArrayType: TypedArrayConstructor<Int32Array> }
+/** @ignore */
+export class Date_<T extends Dates = Dates> extends DataType<T> {
+ constructor(public readonly unit: DateUnit) {
+ super();
+ }
+ public get typeId() { return Type.Date as T; }
+ public toString() { return `Date${(this.unit + 1) * 32}<${DateUnit[this.unit]}>`; }
+ protected static [Symbol.toStringTag] = ((proto: Date_) => {
+ (<any> proto).unit = null;
+ (<any> proto).ArrayType = Int32Array;
+ return proto[Symbol.toStringTag] = 'Date';
+ })(Date_.prototype);
+/** @ignore */
+export class DateDay extends Date_<Type.DateDay> { constructor() { super(DateUnit.DAY); } }
+/** @ignore */
+export class DateMillisecond extends Date_<Type.DateMillisecond> { constructor() { super(DateUnit.MILLISECOND); } }
+/** @ignore */
+type Times = Type.Time | Type.TimeSecond | Type.TimeMillisecond | Type.TimeMicrosecond | Type.TimeNanosecond;
+/** @ignore */
+type TimesType = {
+ [Type.Time ]: { unit: TimeUnit; TValue: number | Int32Array };
+ [Type.TimeSecond ]: { unit: TimeUnit.SECOND; TValue: number };
+ [Type.TimeMillisecond]: { unit: TimeUnit.MILLISECOND; TValue: number };
+ [Type.TimeMicrosecond]: { unit: TimeUnit.MICROSECOND; TValue: Int32Array };
+ [Type.TimeNanosecond ]: { unit: TimeUnit.NANOSECOND; TValue: Int32Array };
+/** @ignore */
+interface Time_<T extends Times = Times> extends DataType<T> { TArray: Int32Array; TValue: TimesType[T]['TValue']; ArrayType: TypedArrayConstructor<Int32Array> }
+/** @ignore */
+class Time_<T extends Times = Times> extends DataType<T> {
+ constructor(public readonly unit: TimesType[T]['unit'],
+ public readonly bitWidth: TimeBitWidth) {
+ super();
+ }
+ public get typeId() { return Type.Time as T; }
+ public toString() { return `Time${this.bitWidth}<${TimeUnit[this.unit]}>`; }
+ protected static [Symbol.toStringTag] = ((proto: Time_) => {
+ (<any> proto).unit = null;
+ (<any> proto).bitWidth = null;
+ (<any> proto).ArrayType = Int32Array;
+ return proto[Symbol.toStringTag] = 'Time';
+ })(Time_.prototype);
+export { Time_ as Time };
+/** @ignore */
+export class TimeSecond extends Time_<Type.TimeSecond> { constructor() { super(TimeUnit.SECOND, 32); } }
+/** @ignore */
+export class TimeMillisecond extends Time_<Type.TimeMillisecond> { constructor() { super(TimeUnit.MILLISECOND, 32); } }
+/** @ignore */
+export class TimeMicrosecond extends Time_<Type.TimeMicrosecond> { constructor() { super(TimeUnit.MICROSECOND, 64); } }
+/** @ignore */
+export class TimeNanosecond extends Time_<Type.TimeNanosecond> { constructor() { super(TimeUnit.NANOSECOND, 64); } }
+/** @ignore */
+type Timestamps = Type.Timestamp | Type.TimestampSecond | Type.TimestampMillisecond | Type.TimestampMicrosecond | Type.TimestampNanosecond;
+/** @ignore */
+interface Timestamp_<T extends Timestamps = Timestamps> extends DataType<T> { TArray: Int32Array; TValue: number; ArrayType: TypedArrayConstructor<Int32Array> }
+/** @ignore */
+class Timestamp_<T extends Timestamps = Timestamps> extends DataType<T> {
+ constructor(public readonly unit: TimeUnit,
+ public readonly timezone?: string | null) {
+ super();
+ }
+ public get typeId() { return Type.Timestamp as T; }
+ public toString() { return `Timestamp<${TimeUnit[this.unit]}${this.timezone ? `, ${this.timezone}` : ``}>`; }
+ protected static [Symbol.toStringTag] = ((proto: Timestamp_) => {
+ (<any> proto).unit = null;
+ (<any> proto).timezone = null;
+ (<any> proto).ArrayType = Int32Array;
+ return proto[Symbol.toStringTag] = 'Timestamp';
+ })(Timestamp_.prototype);
+export { Timestamp_ as Timestamp };
+/** @ignore */
+export class TimestampSecond extends Timestamp_<Type.TimestampSecond> { constructor(timezone?: string | null) { super(TimeUnit.SECOND, timezone); } }
+/** @ignore */
+export class TimestampMillisecond extends Timestamp_<Type.TimestampMillisecond> { constructor(timezone?: string | null) { super(TimeUnit.MILLISECOND, timezone); } }
+/** @ignore */
+export class TimestampMicrosecond extends Timestamp_<Type.TimestampMicrosecond> { constructor(timezone?: string | null) { super(TimeUnit.MICROSECOND, timezone); } }
+/** @ignore */
+export class TimestampNanosecond extends Timestamp_<Type.TimestampNanosecond> { constructor(timezone?: string | null) { super(TimeUnit.NANOSECOND, timezone); } }
+/** @ignore */
+type Intervals = Type.Interval | Type.IntervalDayTime | Type.IntervalYearMonth;
+/** @ignore */
+interface Interval_<T extends Intervals = Intervals> extends DataType<T> { TArray: Int32Array; TValue: Int32Array; ArrayType: TypedArrayConstructor<Int32Array> }
+/** @ignore */
+class Interval_<T extends Intervals = Intervals> extends DataType<T> {
+ constructor(public readonly unit: IntervalUnit) {
+ super();
+ }
+ public get typeId() { return Type.Interval as T; }
+ public toString() { return `Interval<${IntervalUnit[this.unit]}>`; }
+ protected static [Symbol.toStringTag] = ((proto: Interval_) => {
+ (<any> proto).unit = null;
+ (<any> proto).ArrayType = Int32Array;
+ return proto[Symbol.toStringTag] = 'Interval';
+ })(Interval_.prototype);
+export { Interval_ as Interval };
+/** @ignore */
+export class IntervalDayTime extends Interval_<Type.IntervalDayTime> { constructor() { super(IntervalUnit.DAY_TIME); } }
+/** @ignore */
+export class IntervalYearMonth extends Interval_<Type.IntervalYearMonth> { constructor() { super(IntervalUnit.YEAR_MONTH); } }
+/** @ignore */
+export interface List<T extends DataType = any> extends DataType<Type.List, { [0]: T }> { TArray: IterableArrayLike<T>; TValue: V<T> }
+/** @ignore */
+export class List<T extends DataType = any> extends DataType<Type.List, { [0]: T }> {
+ constructor(child: Field<T>) {
+ super();
+ this.children = [child];
+ }
+ public readonly children: Field<T>[];
+ public get typeId() { return Type.List as Type.List; }
+ public toString() { return `List<${this.valueType}>`; }
+ public get valueType(): T { return this.children[0].type as T; }
+ public get valueField(): Field<T> { return this.children[0] as Field<T>; }
+ public get ArrayType(): T['ArrayType'] { return this.valueType.ArrayType; }
+ protected static [Symbol.toStringTag] = ((proto: List) => {
+ (<any> proto).children = null;
+ return proto[Symbol.toStringTag] = 'List';
+ })(List.prototype);
+/** @ignore */
+export interface Struct<T extends { [key: string]: DataType } = any> extends DataType<Type.Struct> { TArray: IterableArrayLike<RowLike<T>>; TValue: RowLike<T>; dataTypes: T }
+/** @ignore */
+export class Struct<T extends { [key: string]: DataType } = any> extends DataType<Type.Struct, T> {
+ public readonly children: Field<T[keyof T]>[];
+ constructor(children: Field<T[keyof T]>[]) {
+ super();
+ this.children = children;
+ }
+ public get typeId() { return Type.Struct as Type.Struct; }
+ public toString() { return `Struct<{${ => `${}:${f.type}`).join(`, `)}}>`; }
+ protected static [Symbol.toStringTag] = ((proto: Struct) => {
+ (<any> proto).children = null;
+ return proto[Symbol.toStringTag] = 'Struct';
+ })(Struct.prototype);
+/** @ignore */
+type Unions = Type.Union | Type.DenseUnion | Type.SparseUnion;
+/** @ignore */
+interface Union_<T extends Unions = Unions> extends DataType<T> { TArray: Int8Array; TValue: any; ArrayType: TypedArrayConstructor<Int8Array> }
+/** @ignore */
+class Union_<T extends Unions = Unions> extends DataType<T> {
+ public readonly mode: UnionMode;
+ public readonly typeIds: Int32Array;
+ public readonly children: Field<any>[];
+ public readonly typeIdToChildIndex: { [key: number]: number };
+ constructor(mode: UnionMode,
+ typeIds: number[] | Int32Array,
+ children: Field<any>[]) {
+ super();
+ this.mode = mode;
+ this.children = children;
+ this.typeIds = typeIds = Int32Array.from(typeIds);
+ this.typeIdToChildIndex = typeIds.reduce((typeIdToChildIndex, typeId, idx) => {
+ return (typeIdToChildIndex[typeId] = idx) && typeIdToChildIndex || typeIdToChildIndex;
+ }, Object.create(null) as { [key: number]: number });
+ }
+ public get typeId() { return Type.Union as T; }
+ public toString() {
+ return `${this[Symbol.toStringTag]}<${
+ => `${x.type}`).join(` | `)
+ }>`;
+ protected static [Symbol.toStringTag] = ((proto: Union_) => {
+ (<any> proto).mode = null;
+ (<any> proto).typeIds = null;
+ (<any> proto).children = null;
+ (<any> proto).typeIdToChildIndex = null;
+ (<any> proto).ArrayType = Int8Array;
+ return proto[Symbol.toStringTag] = 'Union';
+ })(Union_.prototype);
+export { Union_ as Union };
+/** @ignore */
+export class DenseUnion extends Union_<Type.DenseUnion> {
+ constructor(typeIds: number[] | Int32Array, children: Field[]) {
+ super(UnionMode.Dense, typeIds, children);
+ }
+/** @ignore */
+export class SparseUnion extends Union_<Type.SparseUnion> {
+ constructor(typeIds: number[] | Int32Array, children: Field[]) {
+ super(UnionMode.Sparse, typeIds, children);
+ }
+/** @ignore */
+export interface FixedSizeBinary extends DataType<Type.FixedSizeBinary> { TArray: Uint8Array; TValue: Uint8Array; ArrayType: TypedArrayConstructor<Uint8Array> }
+/** @ignore */
+export class FixedSizeBinary extends DataType<Type.FixedSizeBinary> {
+ constructor(public readonly byteWidth: number) {
+ super();
+ }
+ public get typeId() { return Type.FixedSizeBinary as Type.FixedSizeBinary; }
+ public toString() { return `FixedSizeBinary[${this.byteWidth}]`; }
+ protected static [Symbol.toStringTag] = ((proto: FixedSizeBinary) => {
+ (<any> proto).byteWidth = null;
+ (<any> proto).ArrayType = Uint8Array;
+ return proto[Symbol.toStringTag] = 'FixedSizeBinary';
+ })(FixedSizeBinary.prototype);
+/** @ignore */
+export interface FixedSizeList<T extends DataType = any> extends DataType<Type.FixedSizeList> { TArray: IterableArrayLike<T['TArray']>; TValue: V<T> }
+/** @ignore */
+export class FixedSizeList<T extends DataType = any> extends DataType<Type.FixedSizeList, { [0]: T }> {
+ public readonly children: Field<T>[];
+ constructor(public readonly listSize: number, child: Field<T>) {
+ super();
+ this.children = [child];
+ }
+ public get typeId() { return Type.FixedSizeList as Type.FixedSizeList; }
+ public get valueType(): T { return this.children[0].type as T; }
+ public get valueField(): Field<T> { return this.children[0] as Field<T>; }
+ public get ArrayType(): T['ArrayType'] { return this.valueType.ArrayType; }
+ public toString() { return `FixedSizeList[${this.listSize}]<${this.valueType}>`; }
+ protected static [Symbol.toStringTag] = ((proto: FixedSizeList) => {
+ (<any> proto).children = null;
+ (<any> proto).listSize = null;
+ return proto[Symbol.toStringTag] = 'FixedSizeList';
+ })(FixedSizeList.prototype);
+/** @ignore */
+export interface Map_<TKey extends DataType = any, TValue extends DataType = any> extends DataType<Type.Map> {
+ TArray: IterableArrayLike<Map<TKey['TValue'], TValue['TValue'] | null>>;
+ TChild: Struct<{ key: TKey; value: TValue }>;
+ TValue: MapLike<TKey, TValue>;
+/** @ignore */
+export class Map_<TKey extends DataType = any, TValue extends DataType = any> extends DataType<Type.Map> {
+ constructor(child: Field<Struct<{ key: TKey; value: TValue }>>, keysSorted = false) {
+ super();
+ this.children = [child];
+ this.keysSorted = keysSorted;
+ }
+ public readonly keysSorted: boolean;
+ public readonly children: Field<Struct<{ key: TKey; value: TValue }>>[];
+ public get typeId() { return Type.Map as Type.Map; }
+ public get keyType(): TKey { return this.children[0].type.children[0].type as TKey; }
+ public get valueType(): TValue { return this.children[0].type.children[1].type as TValue; }
+ public toString() { return `Map<{${this.children[0] => `${}:${f.type}`).join(`, `)}}>`; }
+ protected static [Symbol.toStringTag] = ((proto: Map_) => {
+ (<any> proto).children = null;
+ (<any> proto).keysSorted = null;
+ return proto[Symbol.toStringTag] = 'Map_';
+ })(Map_.prototype);
+/** @ignore */
+const getId = ((atomicDictionaryId) => () => ++atomicDictionaryId)(-1);
+/** @ignore */
+export type TKeys = Int8 | Int16 | Int32 | Uint8 | Uint16 | Uint32;
+/** @ignore */
+export interface Dictionary<T extends DataType = any, TKey extends TKeys = TKeys> extends DataType<Type.Dictionary> { TArray: TKey['TArray']; TValue: T['TValue'] }
+/** @ignore */
+export class Dictionary<T extends DataType = any, TKey extends TKeys = TKeys> extends DataType<Type.Dictionary> {
+ public readonly id: number;
+ public readonly indices: TKey;
+ public readonly dictionary: T;
+ public readonly isOrdered: boolean;
+ constructor(dictionary: T, indices: TKey, id?: Long | number | null, isOrdered?: boolean | null) {
+ super();
+ this.indices = indices;
+ this.dictionary = dictionary;
+ this.isOrdered = isOrdered || false;
+ = id == null ? getId() : typeof id === 'number' ? id : id.low;
+ }
+ public get typeId() { return Type.Dictionary as Type.Dictionary; }
+ public get children() { return this.dictionary.children; }
+ public get valueType(): T { return this.dictionary as T; }
+ public get ArrayType(): T['ArrayType'] { return this.dictionary.ArrayType; }
+ public toString() { return `Dictionary<${this.indices}, ${this.dictionary}>`; }
+ protected static [Symbol.toStringTag] = ((proto: Dictionary) => {
+ (<any> proto).id = null;
+ (<any> proto).indices = null;
+ (<any> proto).isOrdered = null;
+ (<any> proto).dictionary = null;
+ return proto[Symbol.toStringTag] = 'Dictionary';
+ })(Dictionary.prototype);
+/** @ignore */
+export interface IterableArrayLike<T = any> extends ArrayLike<T>, Iterable<T> {}
+/** @ignore */
+export type FloatArray = Uint16Array | Float32Array | Float64Array;
+/** @ignore */
+export type IntArray = Int8Array | Int16Array | Int32Array | Uint8Array | Uint16Array | Uint32Array;
+/** @ignore */
+export function strideForType(type: DataType) {
+ const t: any = type;
+ switch (type.typeId) {
+ case Type.Decimal: return 4;
+ case Type.Timestamp: return 2;
+ case Type.Date: return 1 + (t as Date_).unit;
+ case Type.Interval: return 1 + (t as Interval_).unit;
+ case Type.Int: return 1 + +((t as Int_).bitWidth > 32);
+ case Type.Time: return 1 + +((t as Time_).bitWidth > 32);
+ case Type.FixedSizeList: return (t as FixedSizeList).listSize;
+ case Type.FixedSizeBinary: return (t as FixedSizeBinary).byteWidth;
+ default: return 1;
+ }
diff --git a/src/arrow/js/src/util/args.ts b/src/arrow/js/src/util/args.ts
new file mode 100644
index 000000000..25f571999
--- /dev/null
+++ b/src/arrow/js/src/util/args.ts
@@ -0,0 +1,196 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { Data } from '../data';
+import { Field } from '../schema';
+import { Column } from '../column';
+import { Vector } from '../vector';
+import { DataType, Float32, Float64, FloatArray, IntArray, Int16, Int32, Int64, Int8, Uint16, Uint32, Uint64, Uint8 } from '../type';
+import { Chunked } from '../vector/chunked';
+import { BigIntArray, TypedArray as TypedArray_ } from '../interfaces';
+import { FloatArrayCtor } from '../vector/float';
+import { IntArrayCtor } from '../vector/int';
+type RecordBatchCtor = typeof import('../recordbatch').RecordBatch;
+const isArray = Array.isArray;
+type TypedArray = Exclude<TypedArray_ | BigIntArray, Uint8ClampedArray>;
+/** @ignore */
+export function isTypedArray(arr: any): arr is TypedArray {
+ return ArrayBuffer.isView(arr) && 'BYTES_PER_ELEMENT' in arr;
+/** @ignore */
+type ArrayCtor = FloatArrayCtor | IntArrayCtor;
+/** @ignore */
+export function arrayTypeToDataType(ctor: ArrayCtor) {
+ switch (ctor) {
+ case Int8Array: return Int8;
+ case Int16Array: return Int16;
+ case Int32Array: return Int32;
+ case BigInt64Array: return Int64;
+ case Uint8Array: return Uint8;
+ case Uint16Array: return Uint16;
+ case Uint32Array: return Uint32;
+ case BigUint64Array: return Uint64;
+ case Float32Array: return Float32;
+ case Float64Array: return Float64;
+ default: return null;
+ }
+/** @ignore */
+function vectorFromTypedArray(array: TypedArray): Vector {
+ const ArrowType = arrayTypeToDataType(array.constructor as ArrayCtor);
+ if (!ArrowType) {
+ throw new TypeError('Unrecognized Array input');
+ }
+ const type = new ArrowType();
+ const data =, 0, array.length, 0, [undefined, array as IntArray | FloatArray]);
+ return;
+/** @ignore */
+export const selectArgs = <T>(Ctor: any, vals: any[]) => _selectArgs(Ctor, vals, [], 0) as T[];
+/** @ignore */
+export const selectColumnArgs = <T extends { [key: string]: DataType }>(args: any[]) => {
+ const [fields, values] = _selectFieldArgs<T>(args, [[], []]);
+ return, i) =>
+ x instanceof Column ?[i]), x) :
+ x instanceof Vector ?[i], x) as Column<T[keyof T]> :
+ isTypedArray(x) ?[i], vectorFromTypedArray(x)) as Column<T[keyof T]> :
+[i], [] as Vector<T[keyof T]>[]));
+/** @ignore */
+export const selectFieldArgs = <T extends { [key: string]: DataType }>(args: any[]) => _selectFieldArgs<T>(args, [[], []]);
+/** @ignore */
+export const selectChunkArgs = <T>(Ctor: any, vals: any[]) => _selectChunkArgs(Ctor, vals, [], 0) as T[];
+/** @ignore */
+export const selectVectorChildrenArgs = <T extends Vector>(Ctor: RecordBatchCtor, vals: any[]) => _selectVectorChildrenArgs(Ctor, vals, [], 0) as T[];
+/** @ignore */
+export const selectColumnChildrenArgs = <T extends Column>(Ctor: RecordBatchCtor, vals: any[]) => _selectColumnChildrenArgs(Ctor, vals, [], 0) as T[];
+/** @ignore */
+function _selectArgs<T>(Ctor: any, vals: any[], res: T[], idx: number) {
+ let value: any, j = idx;
+ let i = -1;
+ const n = vals.length;
+ while (++i < n) {
+ if (isArray(value = vals[i])) {
+ j = _selectArgs(Ctor, value, res, j).length;
+ } else if (value instanceof Ctor) { res[j++] = value; }
+ }
+ return res;
+/** @ignore */
+function _selectChunkArgs<T>(Ctor: any, vals: any[], res: T[], idx: number) {
+ let value: any, j = idx;
+ let i = -1;
+ const n = vals.length;
+ while (++i < n) {
+ if (isArray(value = vals[i])) {
+ j = _selectChunkArgs(Ctor, value, res, j).length;
+ } else if (value instanceof Chunked) {
+ j = _selectChunkArgs(Ctor, value.chunks, res, j).length;
+ } else if (value instanceof Ctor) { res[j++] = value; }
+ }
+ return res;
+/** @ignore */
+function _selectVectorChildrenArgs<T extends Vector>(Ctor: RecordBatchCtor, vals: any[], res: T[], idx: number) {
+ let value: any, j = idx;
+ let i = -1;
+ const n = vals.length;
+ while (++i < n) {
+ if (isArray(value = vals[i])) {
+ j = _selectVectorChildrenArgs(Ctor, value, res, j).length;
+ } else if (value instanceof Ctor) {
+ j = _selectArgs(Vector,, i) => value.getChildAt(i)!), res, j).length;
+ } else if (value instanceof Vector) { res[j++] = value as T; }
+ }
+ return res;
+/** @ignore */
+function _selectColumnChildrenArgs<T extends Column>(Ctor: RecordBatchCtor, vals: any[], res: T[], idx: number) {
+ let value: any, j = idx;
+ let i = -1;
+ const n = vals.length;
+ while (++i < n) {
+ if (isArray(value = vals[i])) {
+ j = _selectColumnChildrenArgs(Ctor, value, res, j).length;
+ } else if (value instanceof Ctor) {
+ j = _selectArgs(Column,, i) =>, value.getChildAt(i)!)), res, j).length;
+ } else if (value instanceof Column) { res[j++] = value as T; }
+ }
+ return res;
+/** @ignore */
+const toKeysAndValues = (xs: [any[], any[]], [k, v]: [any, any], i: number) => (xs[0][i] = k, xs[1][i] = v, xs);
+/** @ignore */
+function _selectFieldArgs<T extends { [key: string]: DataType }>(vals: any[], ret: [Field<T[keyof T]>[], (Vector<T[keyof T]> | TypedArray)[]]): [Field<T[keyof T]>[], (T[keyof T] | Vector<T[keyof T]> | TypedArray)[]] {
+ let keys: any[];
+ let n: number;
+ switch (n = vals.length) {
+ case 0: return ret;
+ case 1:
+ keys = ret[0];
+ if (!(vals[0])) { return ret; }
+ if (isArray(vals[0])) { return _selectFieldArgs(vals[0], ret); }
+ if (!(vals[0] instanceof Data || vals[0] instanceof Vector || isTypedArray(vals[0]) || vals[0] instanceof DataType)) {
+ [keys, vals] = Object.entries(vals[0]).reduce(toKeysAndValues, ret);
+ }
+ break;
+ default:
+ !isArray(keys = vals[n - 1])
+ ? (vals = isArray(vals[0]) ? vals[0] : vals, keys = [])
+ : (vals = isArray(vals[0]) ? vals[0] : vals.slice(0, n - 1));
+ }
+ let fieldIndex = -1;
+ let valueIndex = -1;
+ let idx = -1;
+ const len = vals.length;
+ let field: number | string | Field<T[keyof T]>;
+ let val: Vector<T[keyof T]> | Data<T[keyof T]>;
+ const [fields, values] = ret as [Field<T[keyof T]>[], any[]];
+ while (++idx < len) {
+ val = vals[idx];
+ if (val instanceof Column && (values[++valueIndex] = val)) {
+ fields[++fieldIndex] = val.field.clone(keys[idx], val.type, true);
+ } else {
+ ({ [idx]: field = idx } = keys);
+ if (val instanceof DataType && (values[++valueIndex] = val)) {
+ fields[++fieldIndex] =, val as DataType, true) as Field<T[keyof T]>;
+ } else if (val?.type && (values[++valueIndex] = val)) {
+ val instanceof Data && (values[valueIndex] = val = as Vector);
+ fields[++fieldIndex] =, val.type, true) as Field<T[keyof T]>;
+ }
+ }
+ }
+ return ret;
diff --git a/src/arrow/js/src/util/bit.ts b/src/arrow/js/src/util/bit.ts
new file mode 100644
index 000000000..e4c3d267e
--- /dev/null
+++ b/src/arrow/js/src/util/bit.ts
@@ -0,0 +1,161 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+/** @ignore */
+export function getBool(_data: any, _index: number, byte: number, bit: number) {
+ return (byte & 1 << bit) !== 0;
+/** @ignore */
+export function getBit(_data: any, _index: number, byte: number, bit: number): 0 | 1 {
+ return (byte & 1 << bit) >> bit as (0 | 1);
+/** @ignore */
+export function setBool(bytes: Uint8Array, index: number, value: any) {
+ return value ?
+ !!(bytes[index >> 3] |= (1 << (index % 8))) || true :
+ !(bytes[index >> 3] &= ~(1 << (index % 8))) && false ;
+/** @ignore */
+export function truncateBitmap(offset: number, length: number, bitmap: Uint8Array) {
+ const alignedSize = (bitmap.byteLength + 7) & ~7;
+ if (offset > 0 || bitmap.byteLength < alignedSize) {
+ const bytes = new Uint8Array(alignedSize);
+ // If the offset is a multiple of 8 bits, it's safe to slice the bitmap
+ bytes.set(offset % 8 === 0 ? bitmap.subarray(offset >> 3) :
+ // Otherwise iterate each bit from the offset and return a new one
+ packBools(new BitIterator(bitmap, offset, length, null, getBool)).subarray(0, alignedSize));
+ return bytes;
+ }
+ return bitmap;
+/** @ignore */
+export function packBools(values: Iterable<any>) {
+ const xs: number[] = [];
+ let i = 0, bit = 0, byte = 0;
+ for (const value of values) {
+ value && (byte |= 1 << bit);
+ if (++bit === 8) {
+ xs[i++] = byte;
+ byte = bit = 0;
+ }
+ }
+ if (i === 0 || bit > 0) { xs[i++] = byte; }
+ const b = new Uint8Array((xs.length + 7) & ~7);
+ b.set(xs);
+ return b;
+/** @ignore */
+export class BitIterator<T> implements IterableIterator<T> {
+ bit: number;
+ byte: number;
+ byteIndex: number;
+ index: number;
+ constructor(
+ private bytes: Uint8Array,
+ begin: number,
+ private length: number,
+ private context: any,
+ private get: (context: any, index: number, byte: number, bit: number) => T
+ ) {
+ this.bit = begin % 8;
+ this.byteIndex = begin >> 3;
+ this.byte = bytes[this.byteIndex++];
+ this.index = 0;
+ }
+ next(): IteratorResult<T> {
+ if (this.index < this.length) {
+ if (this.bit === 8) {
+ this.bit = 0;
+ this.byte = this.bytes[this.byteIndex++];
+ }
+ return {
+ value: this.get(this.context, this.index++, this.byte, this.bit++)
+ };
+ }
+ return { done: true, value: null };
+ }
+ [Symbol.iterator]() {
+ return this;
+ }
+ * Compute the population count (the number of bits set to 1) for a range of bits in a Uint8Array.
+ * @param vector The Uint8Array of bits for which to compute the population count.
+ * @param lhs The range's left-hand side (or start) bit
+ * @param rhs The range's right-hand side (or end) bit
+ */
+/** @ignore */
+export function popcnt_bit_range(data: Uint8Array, lhs: number, rhs: number): number {
+ if (rhs - lhs <= 0) { return 0; }
+ // If the bit range is less than one byte, sum the 1 bits in the bit range
+ if (rhs - lhs < 8) {
+ let sum = 0;
+ for (const bit of new BitIterator(data, lhs, rhs - lhs, data, getBit)) {
+ sum += bit;
+ }
+ return sum;
+ }
+ // Get the next lowest multiple of 8 from the right hand side
+ const rhsInside = rhs >> 3 << 3;
+ // Get the next highest multiple of 8 from the left hand side
+ const lhsInside = lhs + (lhs % 8 === 0 ? 0 : 8 - lhs % 8);
+ return (
+ // Get the popcnt of bits between the left hand side, and the next highest multiple of 8
+ popcnt_bit_range(data, lhs, lhsInside) +
+ // Get the popcnt of bits between the right hand side, and the next lowest multiple of 8
+ popcnt_bit_range(data, rhsInside, rhs) +
+ // Get the popcnt of all bits between the left and right hand sides' multiples of 8
+ popcnt_array(data, lhsInside >> 3, (rhsInside - lhsInside) >> 3)
+ );
+/** @ignore */
+export function popcnt_array(arr: ArrayBufferView, byteOffset?: number, byteLength?: number) {
+ let cnt = 0, pos = byteOffset! | 0;
+ const view = new DataView(arr.buffer, arr.byteOffset, arr.byteLength);
+ const len = byteLength === void 0 ? arr.byteLength : pos + byteLength;
+ while (len - pos >= 4) {
+ cnt += popcnt_uint32(view.getUint32(pos));
+ pos += 4;
+ }
+ while (len - pos >= 2) {
+ cnt += popcnt_uint32(view.getUint16(pos));
+ pos += 2;
+ }
+ while (len - pos >= 1) {
+ cnt += popcnt_uint32(view.getUint8(pos));
+ pos += 1;
+ }
+ return cnt;
+/** @ignore */
+export function popcnt_uint32(uint32: number): number {
+ let i = uint32 | 0;
+ i = i - ((i >>> 1) & 0x55555555);
+ i = (i & 0x33333333) + ((i >>> 2) & 0x33333333);
+ return (((i + (i >>> 4)) & 0x0F0F0F0F) * 0x01010101) >>> 24;
diff --git a/src/arrow/js/src/util/bn.ts b/src/arrow/js/src/util/bn.ts
new file mode 100644
index 000000000..7c71969a4
--- /dev/null
+++ b/src/arrow/js/src/util/bn.ts
@@ -0,0 +1,231 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { ArrayBufferViewInput, toArrayBufferView } from './buffer';
+import { TypedArray, TypedArrayConstructor } from '../interfaces';
+import { BigIntArray, BigIntArrayConstructor } from '../interfaces';
+import { BigIntAvailable, BigInt64Array, BigUint64Array } from './compat';
+/** @ignore */
+export const isArrowBigNumSymbol = Symbol.for('isArrowBigNum');
+/** @ignore */ type BigNumArray = IntArray | UintArray;
+/** @ignore */ type IntArray = Int8Array | Int16Array | Int32Array;
+/** @ignore */ type UintArray = Uint8Array | Uint16Array | Uint32Array | Uint8ClampedArray;
+/** @ignore */
+function BigNum(this: any, x: any, ...xs: any) {
+ if (xs.length === 0) {
+ return Object.setPrototypeOf(toArrayBufferView(this['TypedArray'], x), this.constructor.prototype);
+ }
+ return Object.setPrototypeOf(new this['TypedArray'](x, ...xs), this.constructor.prototype);
+BigNum.prototype[isArrowBigNumSymbol] = true;
+BigNum.prototype.toJSON = function<T extends BN<BigNumArray>>(this: T) { return `"${bignumToString(this)}"`; };
+BigNum.prototype.valueOf = function<T extends BN<BigNumArray>>(this: T) { return bignumToNumber(this); };
+BigNum.prototype.toString = function<T extends BN<BigNumArray>>(this: T) { return bignumToString(this); };
+BigNum.prototype[Symbol.toPrimitive] = function<T extends BN<BigNumArray>>(this: T, hint: 'string' | 'number' | 'default' = 'default') {
+ switch (hint) {
+ case 'number': return bignumToNumber(this);
+ case 'string': return bignumToString(this);
+ case 'default': return bignumToBigInt(this);
+ }
+ // @ts-ignore
+ return bignumToString(this);
+/** @ignore */
+type TypedArrayConstructorArgs =
+ [number | void] |
+ [Iterable<number> | Iterable<bigint>] |
+ [ArrayBufferLike, number | void, number | void] ;
+/** @ignore */
+function SignedBigNum(this: any, ...args: TypedArrayConstructorArgs) { return BigNum.apply(this, args); }
+/** @ignore */
+function UnsignedBigNum(this: any, ...args: TypedArrayConstructorArgs) { return BigNum.apply(this, args); }
+/** @ignore */
+function DecimalBigNum(this: any, ...args: TypedArrayConstructorArgs) { return BigNum.apply(this, args); }
+Object.setPrototypeOf(SignedBigNum.prototype, Object.create(Int32Array.prototype));
+Object.setPrototypeOf(UnsignedBigNum.prototype, Object.create(Uint32Array.prototype));
+Object.setPrototypeOf(DecimalBigNum.prototype, Object.create(Uint32Array.prototype));
+Object.assign(SignedBigNum.prototype, BigNum.prototype, { 'constructor': SignedBigNum, 'signed': true, 'TypedArray': Int32Array, 'BigIntArray': BigInt64Array });
+Object.assign(UnsignedBigNum.prototype, BigNum.prototype, { 'constructor': UnsignedBigNum, 'signed': false, 'TypedArray': Uint32Array, 'BigIntArray': BigUint64Array });
+Object.assign(DecimalBigNum.prototype, BigNum.prototype, { 'constructor': DecimalBigNum, 'signed': true, 'TypedArray': Uint32Array, 'BigIntArray': BigUint64Array });
+/** @ignore */
+function bignumToNumber<T extends BN<BigNumArray>>(bn: T) {
+ const { buffer, byteOffset, length, 'signed': signed } = bn;
+ const words = new Int32Array(buffer, byteOffset, length);
+ let number = 0, i = 0;
+ const n = words.length;
+ let hi, lo;
+ while (i < n) {
+ lo = words[i++];
+ hi = words[i++];
+ signed || (hi = hi >>> 0);
+ number += (lo >>> 0) + (hi * (i ** 32));
+ }
+ return number;
+/** @ignore */
+export let bignumToString: { <T extends BN<BigNumArray>>(a: T): string };
+/** @ignore */
+export let bignumToBigInt: { <T extends BN<BigNumArray>>(a: T): bigint };
+if (!BigIntAvailable) {
+ bignumToString = decimalToString;
+ bignumToBigInt = <any> bignumToString;
+} else {
+ bignumToBigInt = (<T extends BN<BigNumArray>>(a: T) => a.byteLength === 8 ? new a['BigIntArray'](a.buffer, a.byteOffset, 1)[0] : <any>decimalToString(a));
+ bignumToString = (<T extends BN<BigNumArray>>(a: T) => a.byteLength === 8 ? `${new a['BigIntArray'](a.buffer, a.byteOffset, 1)[0]}` : decimalToString(a));
+/** @ignore */
+function decimalToString<T extends BN<BigNumArray>>(a: T) {
+ let digits = '';
+ const base64 = new Uint32Array(2);
+ let base32 = new Uint16Array(a.buffer, a.byteOffset, a.byteLength / 2);
+ const checks = new Uint32Array((base32 = new Uint16Array(base32).reverse()).buffer);
+ let i = -1;
+ const n = base32.length - 1;
+ do {
+ for (base64[0] = base32[i = 0]; i < n;) {
+ base32[i++] = base64[1] = base64[0] / 10;
+ base64[0] = ((base64[0] - base64[1] * 10) << 16) + base32[i];
+ }
+ base32[i] = base64[1] = base64[0] / 10;
+ base64[0] = base64[0] - base64[1] * 10;
+ digits = `${base64[0]}${digits}`;
+ } while (checks[0] || checks[1] || checks[2] || checks[3]);
+ return digits ? digits : `0`;
+/** @ignore */
+export class BN<T extends BigNumArray> {
+ /** @nocollapse */
+ public static new<T extends BigNumArray>(num: T, isSigned?: boolean): (T & BN<T>) {
+ switch (isSigned) {
+ case true: return new (<any> SignedBigNum)(num) as (T & BN<T>);
+ case false: return new (<any> UnsignedBigNum)(num) as (T & BN<T>);
+ }
+ switch (num.constructor) {
+ case Int8Array:
+ case Int16Array:
+ case Int32Array:
+ case BigInt64Array:
+ return new (<any> SignedBigNum)(num) as (T & BN<T>);
+ }
+ if (num.byteLength === 16) {
+ return new (<any> DecimalBigNum)(num) as (T & BN<T>);
+ }
+ return new (<any> UnsignedBigNum)(num) as (T & BN<T>);
+ }
+ /** @nocollapse */
+ public static signed<T extends IntArray>(num: T): (T & BN<T>) {
+ return new (<any> SignedBigNum)(num) as (T & BN<T>);
+ }
+ /** @nocollapse */
+ public static unsigned<T extends UintArray>(num: T): (T & BN<T>) {
+ return new (<any> UnsignedBigNum)(num) as (T & BN<T>);
+ }
+ /** @nocollapse */
+ public static decimal<T extends UintArray>(num: T): (T & BN<T>) {
+ return new (<any> DecimalBigNum)(num) as (T & BN<T>);
+ }
+ constructor(num: T, isSigned?: boolean) {
+ return, isSigned) as any;
+ }
+/** @ignore */
+export interface BN<T extends BigNumArray> extends TypedArrayLike<T> {
+ new<T extends ArrayBufferViewInput>(buffer: T, signed?: boolean): T;
+ readonly signed: boolean;
+ readonly TypedArray: TypedArrayConstructor<TypedArray>;
+ readonly BigIntArray: BigIntArrayConstructor<BigIntArray>;
+ [Symbol.toStringTag]:
+ 'Int8Array' |
+ 'Int16Array' |
+ 'Int32Array' |
+ 'Uint8Array' |
+ 'Uint16Array' |
+ 'Uint32Array' |
+ 'Uint8ClampedArray';
+ /**
+ * Convert the bytes to their (positive) decimal representation for printing
+ */
+ toString(): string;
+ /**
+ * Down-convert the bytes to a 53-bit precision integer. Invoked by JS for
+ * arithmetic operators, like `+`. Easy (and unsafe) way to convert BN to
+ * number via `+bn_inst`
+ */
+ valueOf(): number;
+ /**
+ * Return the JSON representation of the bytes. Must be wrapped in double-quotes,
+ * so it's compatible with JSON.stringify().
+ */
+ toJSON(): string;
+ [Symbol.toPrimitive](hint?: any): number | string | bigint;
+/** @ignore */
+interface TypedArrayLike<T extends BigNumArray> {
+ readonly length: number;
+ readonly buffer: ArrayBuffer;
+ readonly byteLength: number;
+ readonly byteOffset: number;
+ readonly BYTES_PER_ELEMENT: number;
+ includes(searchElement: number, fromIndex?: number | undefined): boolean;
+ copyWithin(target: number, start: number, end?: number | undefined): this;
+ every(callbackfn: (value: number, index: number, array: T) => boolean, thisArg?: any): boolean;
+ fill(value: number, start?: number | undefined, end?: number | undefined): this;
+ filter(callbackfn: (value: number, index: number, array: T) => boolean, thisArg?: any): T;
+ find(predicate: (value: number, index: number, obj: T) => boolean, thisArg?: any): number | undefined;
+ findIndex(predicate: (value: number, index: number, obj: T) => boolean, thisArg?: any): number;
+ forEach(callbackfn: (value: number, index: number, array: T) => void, thisArg?: any): void;
+ indexOf(searchElement: number, fromIndex?: number | undefined): number;
+ join(separator?: string | undefined): string;
+ lastIndexOf(searchElement: number, fromIndex?: number | undefined): number;
+ map(callbackfn: (value: number, index: number, array: T) => number, thisArg?: any): T;
+ reduce(callbackfn: (previousValue: number, currentValue: number, currentIndex: number, array: T) => number): number;
+ reduce(callbackfn: (previousValue: number, currentValue: number, currentIndex: number, array: T) => number, initialValue: number): number;
+ reduce<U>(callbackfn: (previousValue: U, currentValue: number, currentIndex: number, array: T) => U, initialValue: U): U;
+ reduceRight(callbackfn: (previousValue: number, currentValue: number, currentIndex: number, array: T) => number): number;
+ reduceRight(callbackfn: (previousValue: number, currentValue: number, currentIndex: number, array: T) => number, initialValue: number): number;
+ reduceRight<U>(callbackfn: (previousValue: U, currentValue: number, currentIndex: number, array: T) => U, initialValue: U): U;
+ reverse(): T;
+ set(array: ArrayLike<number>, offset?: number | undefined): void;
+ slice(start?: number | undefined, end?: number | undefined): T;
+ some(callbackfn: (value: number, index: number, array: T) => boolean, thisArg?: any): boolean;
+ sort(compareFn?: ((a: number, b: number) => number) | undefined): this;
+ subarray(begin: number, end?: number | undefined): T;
+ toLocaleString(): string;
+ entries(): IterableIterator<[number, number]>;
+ keys(): IterableIterator<number>;
+ values(): IterableIterator<number>;
diff --git a/src/arrow/js/src/util/buffer.ts b/src/arrow/js/src/util/buffer.ts
new file mode 100644
index 000000000..86dae86c6
--- /dev/null
+++ b/src/arrow/js/src/util/buffer.ts
@@ -0,0 +1,235 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { flatbuffers } from 'flatbuffers';
+import { encodeUtf8 } from '../util/utf8';
+import ByteBuffer = flatbuffers.ByteBuffer;
+import { TypedArray, TypedArrayConstructor } from '../interfaces';
+import { BigIntArray, BigIntArrayConstructor } from '../interfaces';
+import { isPromise, isIterable, isAsyncIterable, isIteratorResult, BigInt64Array, BigUint64Array } from './compat';
+/** @ignore */
+const SharedArrayBuf = (typeof SharedArrayBuffer !== 'undefined' ? SharedArrayBuffer : ArrayBuffer);
+/** @ignore */
+function collapseContiguousByteRanges(chunks: Uint8Array[]) {
+ const result = chunks[0] ? [chunks[0]] : [];
+ let xOffset: number, yOffset: number, xLen: number, yLen: number;
+ for (let x, y, i = 0, j = 0, n = chunks.length; ++i < n;) {
+ x = result[j];
+ y = chunks[i];
+ // continue if x and y don't share the same underlying ArrayBuffer, or if x isn't before y
+ if (!x || !y || x.buffer !== y.buffer || y.byteOffset < x.byteOffset) {
+ y && (result[++j] = y);
+ continue;
+ }
+ ({ byteOffset: xOffset, byteLength: xLen } = x);
+ ({ byteOffset: yOffset, byteLength: yLen } = y);
+ // continue if the byte ranges of x and y aren't contiguous
+ if ((xOffset + xLen) < yOffset || (yOffset + yLen) < xOffset) {
+ y && (result[++j] = y);
+ continue;
+ }
+ result[j] = new Uint8Array(x.buffer, xOffset, yOffset - xOffset + yLen);
+ }
+ return result;
+/** @ignore */
+export function memcpy<TTarget extends ArrayBufferView, TSource extends ArrayBufferView>(target: TTarget, source: TSource, targetByteOffset = 0, sourceByteLength = source.byteLength) {
+ const targetByteLength = target.byteLength;
+ const dst = new Uint8Array(target.buffer, target.byteOffset, targetByteLength);
+ const src = new Uint8Array(source.buffer, source.byteOffset, Math.min(sourceByteLength, targetByteLength));
+ dst.set(src, targetByteOffset);
+ return target;
+/** @ignore */
+export function joinUint8Arrays(chunks: Uint8Array[], size?: number | null): [Uint8Array, Uint8Array[], number] {
+ // collapse chunks that share the same underlying ArrayBuffer and whose byte ranges overlap,
+ // to avoid unnecessarily copying the bytes to do this buffer join. This is a common case during
+ // streaming, where we may be reading partial byte ranges out of the same underlying ArrayBuffer
+ const result = collapseContiguousByteRanges(chunks);
+ const byteLength = result.reduce((x, b) => x + b.byteLength, 0);
+ let source: Uint8Array, sliced: Uint8Array, buffer: Uint8Array | void;
+ let offset = 0, index = -1;
+ const length = Math.min(size || Infinity, byteLength);
+ for (let n = result.length; ++index < n;) {
+ source = result[index];
+ sliced = source.subarray(0, Math.min(source.length, length - offset));
+ if (length <= (offset + sliced.length)) {
+ if (sliced.length < source.length) {
+ result[index] = source.subarray(sliced.length);
+ } else if (sliced.length === source.length) { index++; }
+ buffer ? memcpy(buffer, sliced, offset) : (buffer = sliced);
+ break;
+ }
+ memcpy(buffer || (buffer = new Uint8Array(length)), sliced, offset);
+ offset += sliced.length;
+ }
+ return [buffer || new Uint8Array(0), result.slice(index), byteLength - (buffer ? buffer.byteLength : 0)];
+/** @ignore */
+export type ArrayBufferViewInput = ArrayBufferView | ArrayBufferLike | ArrayBufferView | Iterable<number> | ArrayLike<number> | ByteBuffer | string | null | undefined |
+ IteratorResult<ArrayBufferView | ArrayBufferLike | ArrayBufferView | Iterable<number> | ArrayLike<number> | ByteBuffer | string | null | undefined> |
+ ReadableStreamReadResult<ArrayBufferView | ArrayBufferLike | ArrayBufferView | Iterable<number> | ArrayLike<number> | ByteBuffer | string | null | undefined> ;
+/** @ignore */
+export function toArrayBufferView<T extends TypedArray>(ArrayBufferViewCtor: TypedArrayConstructor<T>, input: ArrayBufferViewInput): T;
+export function toArrayBufferView<T extends BigIntArray>(ArrayBufferViewCtor: BigIntArrayConstructor<T>, input: ArrayBufferViewInput): T;
+export function toArrayBufferView(ArrayBufferViewCtor: any, input: ArrayBufferViewInput) {
+ let value: any = isIteratorResult(input) ? input.value : input;
+ if (value instanceof ArrayBufferViewCtor) {
+ if (ArrayBufferViewCtor === Uint8Array) {
+ // Node's `Buffer` class passes the `instanceof Uint8Array` check, but we need
+ // a real Uint8Array, since Buffer#slice isn't the same as Uint8Array#slice :/
+ return new ArrayBufferViewCtor(value.buffer, value.byteOffset, value.byteLength);
+ }
+ return value;
+ }
+ if (!value) { return new ArrayBufferViewCtor(0); }
+ if (typeof value === 'string') { value = encodeUtf8(value); }
+ if (value instanceof ArrayBuffer) { return new ArrayBufferViewCtor(value); }
+ if (value instanceof SharedArrayBuf) { return new ArrayBufferViewCtor(value); }
+ if (value instanceof ByteBuffer) { return toArrayBufferView(ArrayBufferViewCtor, value.bytes()); }
+ return !ArrayBuffer.isView(value) ? ArrayBufferViewCtor.from(value) : value.byteLength <= 0 ? new ArrayBufferViewCtor(0)
+ : new ArrayBufferViewCtor(value.buffer, value.byteOffset, value.byteLength / ArrayBufferViewCtor.BYTES_PER_ELEMENT);
+/** @ignore */ export const toInt8Array = (input: ArrayBufferViewInput) => toArrayBufferView(Int8Array, input);
+/** @ignore */ export const toInt16Array = (input: ArrayBufferViewInput) => toArrayBufferView(Int16Array, input);
+/** @ignore */ export const toInt32Array = (input: ArrayBufferViewInput) => toArrayBufferView(Int32Array, input);
+/** @ignore */ export const toBigInt64Array = (input: ArrayBufferViewInput) => toArrayBufferView(BigInt64Array, input);
+/** @ignore */ export const toUint8Array = (input: ArrayBufferViewInput) => toArrayBufferView(Uint8Array, input);
+/** @ignore */ export const toUint16Array = (input: ArrayBufferViewInput) => toArrayBufferView(Uint16Array, input);
+/** @ignore */ export const toUint32Array = (input: ArrayBufferViewInput) => toArrayBufferView(Uint32Array, input);
+/** @ignore */ export const toBigUint64Array = (input: ArrayBufferViewInput) => toArrayBufferView(BigUint64Array, input);
+/** @ignore */ export const toFloat32Array = (input: ArrayBufferViewInput) => toArrayBufferView(Float32Array, input);
+/** @ignore */ export const toFloat64Array = (input: ArrayBufferViewInput) => toArrayBufferView(Float64Array, input);
+/** @ignore */ export const toUint8ClampedArray = (input: ArrayBufferViewInput) => toArrayBufferView(Uint8ClampedArray, input);
+/** @ignore */
+type ArrayBufferViewIteratorInput = Iterable<ArrayBufferViewInput> | ArrayBufferViewInput;
+/** @ignore */
+const pump = <T extends Iterator<any> | AsyncIterator<any>>(iterator: T) => {; return iterator; };
+/** @ignore */
+export function* toArrayBufferViewIterator<T extends TypedArray>(ArrayCtor: TypedArrayConstructor<T>, source: ArrayBufferViewIteratorInput) {
+ const wrap = function*<T>(x: T) { yield x; };
+ const buffers: Iterable<ArrayBufferViewInput> =
+ (typeof source === 'string') ? wrap(source)
+ : (ArrayBuffer.isView(source)) ? wrap(source)
+ : (source instanceof ArrayBuffer) ? wrap(source)
+ : (source instanceof SharedArrayBuf) ? wrap(source)
+ : !isIterable<ArrayBufferViewInput>(source) ? wrap(source) : source;
+ yield* pump((function* (it: Iterator<ArrayBufferViewInput, any, number | undefined>): Generator<T, void, number | undefined> {
+ let r: IteratorResult<any> = <any> null;
+ do {
+ r = toArrayBufferView(ArrayCtor, r));
+ } while (!r.done);
+ })(buffers[Symbol.iterator]()));
+ return new ArrayCtor();
+/** @ignore */ export const toInt8ArrayIterator = (input: ArrayBufferViewIteratorInput) => toArrayBufferViewIterator(Int8Array, input);
+/** @ignore */ export const toInt16ArrayIterator = (input: ArrayBufferViewIteratorInput) => toArrayBufferViewIterator(Int16Array, input);
+/** @ignore */ export const toInt32ArrayIterator = (input: ArrayBufferViewIteratorInput) => toArrayBufferViewIterator(Int32Array, input);
+/** @ignore */ export const toUint8ArrayIterator = (input: ArrayBufferViewIteratorInput) => toArrayBufferViewIterator(Uint8Array, input);
+/** @ignore */ export const toUint16ArrayIterator = (input: ArrayBufferViewIteratorInput) => toArrayBufferViewIterator(Uint16Array, input);
+/** @ignore */ export const toUint32ArrayIterator = (input: ArrayBufferViewIteratorInput) => toArrayBufferViewIterator(Uint32Array, input);
+/** @ignore */ export const toFloat32ArrayIterator = (input: ArrayBufferViewIteratorInput) => toArrayBufferViewIterator(Float32Array, input);
+/** @ignore */ export const toFloat64ArrayIterator = (input: ArrayBufferViewIteratorInput) => toArrayBufferViewIterator(Float64Array, input);
+/** @ignore */ export const toUint8ClampedArrayIterator = (input: ArrayBufferViewIteratorInput) => toArrayBufferViewIterator(Uint8ClampedArray, input);
+/** @ignore */
+type ArrayBufferViewAsyncIteratorInput = AsyncIterable<ArrayBufferViewInput> | Iterable<ArrayBufferViewInput> | PromiseLike<ArrayBufferViewInput> | ArrayBufferViewInput;
+/** @ignore */
+export async function* toArrayBufferViewAsyncIterator<T extends TypedArray>(ArrayCtor: TypedArrayConstructor<T>, source: ArrayBufferViewAsyncIteratorInput): AsyncGenerator<T, T, number | undefined> {
+ // if a Promise, unwrap the Promise and iterate the resolved value
+ if (isPromise<ArrayBufferViewInput>(source)) {
+ return yield* toArrayBufferViewAsyncIterator(ArrayCtor, await source);
+ }
+ const wrap = async function*<T>(x: T) { yield await x; };
+ const emit = async function* <T extends Iterable<any>>(source: T) {
+ yield* pump((function*(it: Iterator<any>) {
+ let r: IteratorResult<any> = <any> null;
+ do {
+ r = r?.value);
+ } while (!r.done);
+ })(source[Symbol.iterator]()));
+ };
+ const buffers: AsyncIterable<ArrayBufferViewInput> =
+ (typeof source === 'string') ? wrap(source) // if string, wrap in an AsyncIterableIterator
+ : (ArrayBuffer.isView(source)) ? wrap(source) // if TypedArray, wrap in an AsyncIterableIterator
+ : (source instanceof ArrayBuffer) ? wrap(source) // if ArrayBuffer, wrap in an AsyncIterableIterator
+ : (source instanceof SharedArrayBuf) ? wrap(source) // if SharedArrayBuffer, wrap in an AsyncIterableIterator
+ : isIterable<ArrayBufferViewInput>(source) ? emit(source) // If Iterable, wrap in an AsyncIterableIterator and compose the `next` values
+ : !isAsyncIterable<ArrayBufferViewInput>(source) ? wrap(source) // If not an AsyncIterable, treat as a sentinel and wrap in an AsyncIterableIterator
+ : source; // otherwise if AsyncIterable, use it
+ yield* pump((async function* (it: AsyncIterator<ArrayBufferViewInput, any, number | undefined>): AsyncGenerator<T, void, number | undefined> {
+ let r: IteratorResult<any> = <any> null;
+ do {
+ r = await toArrayBufferView(ArrayCtor, r));
+ } while (!r.done);
+ })(buffers[Symbol.asyncIterator]()));
+ return new ArrayCtor();
+/** @ignore */ export const toInt8ArrayAsyncIterator = (input: ArrayBufferViewAsyncIteratorInput) => toArrayBufferViewAsyncIterator(Int8Array, input);
+/** @ignore */ export const toInt16ArrayAsyncIterator = (input: ArrayBufferViewAsyncIteratorInput) => toArrayBufferViewAsyncIterator(Int16Array, input);
+/** @ignore */ export const toInt32ArrayAsyncIterator = (input: ArrayBufferViewAsyncIteratorInput) => toArrayBufferViewAsyncIterator(Int32Array, input);
+/** @ignore */ export const toUint8ArrayAsyncIterator = (input: ArrayBufferViewAsyncIteratorInput) => toArrayBufferViewAsyncIterator(Uint8Array, input);
+/** @ignore */ export const toUint16ArrayAsyncIterator = (input: ArrayBufferViewAsyncIteratorInput) => toArrayBufferViewAsyncIterator(Uint16Array, input);
+/** @ignore */ export const toUint32ArrayAsyncIterator = (input: ArrayBufferViewAsyncIteratorInput) => toArrayBufferViewAsyncIterator(Uint32Array, input);
+/** @ignore */ export const toFloat32ArrayAsyncIterator = (input: ArrayBufferViewAsyncIteratorInput) => toArrayBufferViewAsyncIterator(Float32Array, input);
+/** @ignore */ export const toFloat64ArrayAsyncIterator = (input: ArrayBufferViewAsyncIteratorInput) => toArrayBufferViewAsyncIterator(Float64Array, input);
+/** @ignore */ export const toUint8ClampedArrayAsyncIterator = (input: ArrayBufferViewAsyncIteratorInput) => toArrayBufferViewAsyncIterator(Uint8ClampedArray, input);
+/** @ignore */
+export function rebaseValueOffsets(offset: number, length: number, valueOffsets: Int32Array) {
+ // If we have a non-zero offset, create a new offsets array with the values
+ // shifted by the start offset, such that the new start offset is 0
+ if (offset !== 0) {
+ valueOffsets = valueOffsets.slice(0, length + 1);
+ for (let i = -1; ++i <= length;) {
+ valueOffsets[i] += offset;
+ }
+ }
+ return valueOffsets;
+/** @ignore */
+export function compareArrayLike<T extends ArrayLike<any>>(a: T, b: T) {
+ let i = 0;
+ const n = a.length;
+ if (n !== b.length) { return false; }
+ if (n > 0) {
+ do { if (a[i] !== b[i]) { return false; } } while (++i < n);
+ }
+ return true;
diff --git a/src/arrow/js/src/util/compat.ts b/src/arrow/js/src/util/compat.ts
new file mode 100644
index 000000000..62fcb772e
--- /dev/null
+++ b/src/arrow/js/src/util/compat.ts
@@ -0,0 +1,178 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { ReadableInterop, ArrowJSONLike } from '../io/interfaces';
+/** @ignore */
+type FSReadStream = import('fs').ReadStream;
+/** @ignore */
+type FileHandle = import('fs').promises.FileHandle;
+/** @ignore */
+export interface Subscription {
+ unsubscribe: () => void;
+/** @ignore */
+export interface Observer<T> {
+ closed?: boolean;
+ next: (value: T) => void;
+ error: (err: any) => void;
+ complete: () => void;
+/** @ignore */
+export interface Observable<T> {
+ subscribe: (observer: Observer<T>) => Subscription;
+/** @ignore */
+const [BigIntCtor, BigIntAvailable] = (() => {
+ const BigIntUnavailableError = () => { throw new Error('BigInt is not available in this environment'); };
+ function BigIntUnavailable() { throw BigIntUnavailableError(); }
+ BigIntUnavailable.asIntN = () => { throw BigIntUnavailableError(); };
+ BigIntUnavailable.asUintN = () => { throw BigIntUnavailableError(); };
+ return typeof BigInt !== 'undefined' ? [BigInt, true] : [<any> BigIntUnavailable, false];
+})() as [BigIntConstructor, boolean];
+/** @ignore */
+const [BigInt64ArrayCtor, BigInt64ArrayAvailable] = (() => {
+ const BigInt64ArrayUnavailableError = () => { throw new Error('BigInt64Array is not available in this environment'); };
+ class BigInt64ArrayUnavailable {
+ static get BYTES_PER_ELEMENT() { return 8; }
+ static of() { throw BigInt64ArrayUnavailableError(); }
+ static from() { throw BigInt64ArrayUnavailableError(); }
+ constructor() { throw BigInt64ArrayUnavailableError(); }
+ }
+ return typeof BigInt64Array !== 'undefined' ? [BigInt64Array, true] : [<any> BigInt64ArrayUnavailable, false];
+})() as [BigInt64ArrayConstructor, boolean];
+/** @ignore */
+const [BigUint64ArrayCtor, BigUint64ArrayAvailable] = (() => {
+ const BigUint64ArrayUnavailableError = () => { throw new Error('BigUint64Array is not available in this environment'); };
+ class BigUint64ArrayUnavailable {
+ static get BYTES_PER_ELEMENT() { return 8; }
+ static of() { throw BigUint64ArrayUnavailableError(); }
+ static from() { throw BigUint64ArrayUnavailableError(); }
+ constructor() { throw BigUint64ArrayUnavailableError(); }
+ }
+ return typeof BigUint64Array !== 'undefined' ? [BigUint64Array, true] : [<any> BigUint64ArrayUnavailable, false];
+})() as [BigUint64ArrayConstructor, boolean];
+export { BigIntCtor as BigInt, BigIntAvailable };
+export { BigInt64ArrayCtor as BigInt64Array, BigInt64ArrayAvailable };
+export { BigUint64ArrayCtor as BigUint64Array, BigUint64ArrayAvailable };
+/** @ignore */ const isNumber = (x: any) => typeof x === 'number';
+/** @ignore */ const isBoolean = (x: any) => typeof x === 'boolean';
+/** @ignore */ const isFunction = (x: any) => typeof x === 'function';
+/** @ignore */
+// eslint-disable-next-line @typescript-eslint/ban-types
+export const isObject = (x: any): x is Object => x != null && Object(x) === x;
+/** @ignore */
+export const isPromise = <T = any>(x: any): x is PromiseLike<T> => {
+ return isObject(x) && isFunction(x.then);
+/** @ignore */
+export const isObservable = <T = any>(x: any): x is Observable<T> => {
+ return isObject(x) && isFunction(x.subscribe);
+/** @ignore */
+export const isIterable = <T = any>(x: any): x is Iterable<T> => {
+ return isObject(x) && isFunction(x[Symbol.iterator]);
+/** @ignore */
+export const isAsyncIterable = <T = any>(x: any): x is AsyncIterable<T> => {
+ return isObject(x) && isFunction(x[Symbol.asyncIterator]);
+/** @ignore */
+export const isArrowJSON = (x: any): x is ArrowJSONLike => {
+ return isObject(x) && isObject(x['schema']);
+/** @ignore */
+export const isArrayLike = <T = any>(x: any): x is ArrayLike<T> => {
+ return isObject(x) && isNumber(x['length']);
+/** @ignore */
+export const isIteratorResult = <T = any>(x: any): x is IteratorResult<T> => {
+ return isObject(x) && ('done' in x) && ('value' in x);
+/** @ignore */
+export const isUnderlyingSink = <T = any>(x: any): x is UnderlyingSink<T> => {
+ return isObject(x) &&
+ isFunction(x['abort']) &&
+ isFunction(x['close']) &&
+ isFunction(x['start']) &&
+ isFunction(x['write']);
+/** @ignore */
+export const isFileHandle = (x: any): x is FileHandle => {
+ return isObject(x) && isFunction(x['stat']) && isNumber(x['fd']);
+/** @ignore */
+export const isFSReadStream = (x: any): x is FSReadStream => {
+ return isReadableNodeStream(x) && isNumber((<any> x)['bytesRead']);
+/** @ignore */
+export const isFetchResponse = (x: any): x is Response => {
+ return isObject(x) && isReadableDOMStream(x['body']);
+/** @ignore */
+export const isWritableDOMStream = <T = any>(x: any): x is WritableStream<T> => {
+ return isObject(x) &&
+ isFunction(x['abort']) &&
+ isFunction(x['getWriter']) &&
+ !(x instanceof ReadableInterop);
+/** @ignore */
+export const isReadableDOMStream = <T = any>(x: any): x is ReadableStream<T> => {
+ return isObject(x) &&
+ isFunction(x['cancel']) &&
+ isFunction(x['getReader']) &&
+ !(x instanceof ReadableInterop);
+/** @ignore */
+export const isWritableNodeStream = (x: any): x is NodeJS.WritableStream => {
+ return isObject(x) &&
+ isFunction(x['end']) &&
+ isFunction(x['write']) &&
+ isBoolean(x['writable']) &&
+ !(x instanceof ReadableInterop);
+/** @ignore */
+export const isReadableNodeStream = (x: any): x is NodeJS.ReadableStream => {
+ return isObject(x) &&
+ isFunction(x['read']) &&
+ isFunction(x['pipe']) &&
+ isBoolean(x['readable']) &&
+ !(x instanceof ReadableInterop);
diff --git a/src/arrow/js/src/util/fn.ts b/src/arrow/js/src/util/fn.ts
new file mode 100644
index 000000000..a58f9d337
--- /dev/null
+++ b/src/arrow/js/src/util/fn.ts
@@ -0,0 +1,31 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+/** @ignore */
+export function partial0<T>(visit: (node: T) => any) {
+ return function(this: T) { return visit(this); };
+/** @ignore */
+export function partial1<T>(visit: (node: T, a: any) => any) {
+ return function(this: T, a: any) { return visit(this, a); };
+/** @ignore */
+export function partial2<T>(visit: (node: T, a: any, b: any) => any) {
+ return function(this: T, a: any, b: any) { return visit(this, a, b); };
diff --git a/src/arrow/js/src/util/int.ts b/src/arrow/js/src/util/int.ts
new file mode 100644
index 000000000..147106dbb
--- /dev/null
+++ b/src/arrow/js/src/util/int.ts
@@ -0,0 +1,440 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+/** @ignore */
+const carryBit16 = 1 << 16;
+/** @ignore */
+function intAsHex(value: number): string {
+ if (value < 0) {
+ value = 0xFFFFFFFF + value + 1;
+ }
+ return `0x${value.toString(16)}`;
+/** @ignore */
+const kInt32DecimalDigits = 8;
+/** @ignore */
+const kPowersOfTen = [1,
+ 10,
+ 100,
+ 1000,
+ 10000,
+ 100000,
+ 1000000,
+ 10000000,
+ 100000000];
+/** @ignore */
+export class BaseInt64 {
+ constructor (protected buffer: Uint32Array) {}
+ public high(): number { return this.buffer[1]; }
+ public low (): number { return this.buffer[0]; }
+ protected _times(other: BaseInt64) {
+ // Break the left and right numbers into 16 bit chunks
+ // so that we can multiply them without overflow.
+ const L = new Uint32Array([
+ this.buffer[1] >>> 16,
+ this.buffer[1] & 0xFFFF,
+ this.buffer[0] >>> 16,
+ this.buffer[0] & 0xFFFF
+ ]);
+ const R = new Uint32Array([
+ other.buffer[1] >>> 16,
+ other.buffer[1] & 0xFFFF,
+ other.buffer[0] >>> 16,
+ other.buffer[0] & 0xFFFF
+ ]);
+ let product = L[3] * R[3];
+ this.buffer[0] = product & 0xFFFF;
+ let sum = product >>> 16;
+ product = L[2] * R[3];
+ sum += product;
+ product = (L[3] * R[2]) >>> 0;
+ sum += product;
+ this.buffer[0] += sum << 16;
+ this.buffer[1] = (sum >>> 0 < product ? carryBit16 : 0);
+ this.buffer[1] += sum >>> 16;
+ this.buffer[1] += L[1] * R[3] + L[2] * R[2] + L[3] * R[1];
+ this.buffer[1] += (L[0] * R[3] + L[1] * R[2] + L[2] * R[1] + L[3] * R[0]) << 16;
+ return this;
+ }
+ protected _plus(other: BaseInt64) {
+ const sum = (this.buffer[0] + other.buffer[0]) >>> 0;
+ this.buffer[1] += other.buffer[1];
+ if (sum < (this.buffer[0] >>> 0)) {
+ ++this.buffer[1];
+ }
+ this.buffer[0] = sum;
+ }
+ public lessThan(other: BaseInt64): boolean {
+ return this.buffer[1] < other.buffer[1] ||
+ (this.buffer[1] === other.buffer[1] && this.buffer[0] < other.buffer[0]);
+ }
+ public equals(other: BaseInt64): boolean {
+ return this.buffer[1] === other.buffer[1] && this.buffer[0] == other.buffer[0];
+ }
+ public greaterThan(other: BaseInt64): boolean {
+ return other.lessThan(this);
+ }
+ public hex(): string {
+ return `${intAsHex(this.buffer[1])} ${intAsHex(this.buffer[0])}`;
+ }
+/** @ignore */
+export class Uint64 extends BaseInt64 {
+ public times(other: Uint64): Uint64 {
+ this._times(other);
+ return this;
+ }
+ public plus(other: Uint64): Uint64 {
+ this._plus(other);
+ return this;
+ }
+ /** @nocollapse */
+ public static from(val: any, out_buffer = new Uint32Array(2)): Uint64 {
+ return Uint64.fromString(
+ typeof(val) === 'string' ? val : val.toString(),
+ out_buffer
+ );
+ }
+ /** @nocollapse */
+ public static fromNumber(num: number, out_buffer = new Uint32Array(2)): Uint64 {
+ // Always parse numbers as strings - pulling out high and low bits
+ // directly seems to lose precision sometimes
+ // For example:
+ // > -4613034156400212000 >>> 0
+ // 721782784
+ // The correct lower 32-bits are 721782752
+ return Uint64.fromString(num.toString(), out_buffer);
+ }
+ /** @nocollapse */
+ public static fromString(str: string, out_buffer = new Uint32Array(2)): Uint64 {
+ const length = str.length;
+ const out = new Uint64(out_buffer);
+ for (let posn = 0; posn < length;) {
+ const group = kInt32DecimalDigits < length - posn ?
+ kInt32DecimalDigits : length - posn;
+ const chunk = new Uint64(new Uint32Array([parseInt(str.substr(posn, group), 10), 0]));
+ const multiple = new Uint64(new Uint32Array([kPowersOfTen[group], 0]));
+ out.times(multiple);
+ posn += group;
+ }
+ return out;
+ }
+ /** @nocollapse */
+ public static convertArray(values: (string|number)[]): Uint32Array {
+ const data = new Uint32Array(values.length * 2);
+ for (let i = -1, n = values.length; ++i < n;) {
+ Uint64.from(values[i], new Uint32Array(data.buffer, data.byteOffset + 2 * i * 4, 2));
+ }
+ return data;
+ }
+ /** @nocollapse */
+ public static multiply(left: Uint64, right: Uint64): Uint64 {
+ const rtrn = new Uint64(new Uint32Array(left.buffer));
+ return rtrn.times(right);
+ }
+ /** @nocollapse */
+ public static add(left: Uint64, right: Uint64): Uint64 {
+ const rtrn = new Uint64(new Uint32Array(left.buffer));
+ return;
+ }
+/** @ignore */
+export class Int64 extends BaseInt64 {
+ public negate(): Int64 {
+ this.buffer[0] = ~this.buffer[0] + 1;
+ this.buffer[1] = ~this.buffer[1];
+ if (this.buffer[0] == 0) { ++this.buffer[1]; }
+ return this;
+ }
+ public times(other: Int64): Int64 {
+ this._times(other);
+ return this;
+ }
+ public plus(other: Int64): Int64 {
+ this._plus(other);
+ return this;
+ }
+ public lessThan(other: Int64): boolean {
+ // force high bytes to be signed
+ const this_high = this.buffer[1] << 0;
+ const other_high = other.buffer[1] << 0;
+ return this_high < other_high ||
+ (this_high === other_high && this.buffer[0] < other.buffer[0]);
+ }
+ /** @nocollapse */
+ public static from(val: any, out_buffer = new Uint32Array(2)): Int64 {
+ return Int64.fromString(
+ typeof(val) === 'string' ? val : val.toString(),
+ out_buffer
+ );
+ }
+ /** @nocollapse */
+ public static fromNumber(num: number, out_buffer = new Uint32Array(2)): Int64 {
+ // Always parse numbers as strings - pulling out high and low bits
+ // directly seems to lose precision sometimes
+ // For example:
+ // > -4613034156400212000 >>> 0
+ // 721782784
+ // The correct lower 32-bits are 721782752
+ return Int64.fromString(num.toString(), out_buffer);
+ }
+ /** @nocollapse */
+ public static fromString(str: string, out_buffer = new Uint32Array(2)): Int64 {
+ // TODO: Assert that out_buffer is 0 and length = 2
+ const negate = str.startsWith('-');
+ const length = str.length;
+ const out = new Int64(out_buffer);
+ for (let posn = negate ? 1 : 0; posn < length;) {
+ const group = kInt32DecimalDigits < length - posn ?
+ kInt32DecimalDigits : length - posn;
+ const chunk = new Int64(new Uint32Array([parseInt(str.substr(posn, group), 10), 0]));
+ const multiple = new Int64(new Uint32Array([kPowersOfTen[group], 0]));
+ out.times(multiple);
+ posn += group;
+ }
+ return negate ? out.negate() : out;
+ }
+ /** @nocollapse */
+ public static convertArray(values: (string|number)[]): Uint32Array {
+ const data = new Uint32Array(values.length * 2);
+ for (let i = -1, n = values.length; ++i < n;) {
+ Int64.from(values[i], new Uint32Array(data.buffer, data.byteOffset + 2 * i * 4, 2));
+ }
+ return data;
+ }
+ /** @nocollapse */
+ public static multiply(left: Int64, right: Int64): Int64 {
+ const rtrn = new Int64(new Uint32Array(left.buffer));
+ return rtrn.times(right);
+ }
+ /** @nocollapse */
+ public static add(left: Int64, right: Int64): Int64 {
+ const rtrn = new Int64(new Uint32Array(left.buffer));
+ return;
+ }
+/** @ignore */
+export class Int128 {
+ constructor (private buffer: Uint32Array) {
+ // buffer[3] MSB (high)
+ // buffer[2]
+ // buffer[1]
+ // buffer[0] LSB (low)
+ }
+ public high(): Int64 {
+ return new Int64(new Uint32Array(this.buffer.buffer, this.buffer.byteOffset + 8, 2));
+ }
+ public low(): Int64 {
+ return new Int64(new Uint32Array(this.buffer.buffer, this.buffer.byteOffset, 2));
+ }
+ public negate(): Int128 {
+ this.buffer[0] = ~this.buffer[0] + 1;
+ this.buffer[1] = ~this.buffer[1];
+ this.buffer[2] = ~this.buffer[2];
+ this.buffer[3] = ~this.buffer[3];
+ if (this.buffer[0] == 0) { ++this.buffer[1]; }
+ if (this.buffer[1] == 0) { ++this.buffer[2]; }
+ if (this.buffer[2] == 0) { ++this.buffer[3]; }
+ return this;
+ }
+ public times(other: Int128): Int128 {
+ // Break the left and right numbers into 32 bit chunks
+ // so that we can multiply them without overflow.
+ const L0 = new Uint64(new Uint32Array([this.buffer[3], 0]));
+ const L1 = new Uint64(new Uint32Array([this.buffer[2], 0]));
+ const L2 = new Uint64(new Uint32Array([this.buffer[1], 0]));
+ const L3 = new Uint64(new Uint32Array([this.buffer[0], 0]));
+ const R0 = new Uint64(new Uint32Array([other.buffer[3], 0]));
+ const R1 = new Uint64(new Uint32Array([other.buffer[2], 0]));
+ const R2 = new Uint64(new Uint32Array([other.buffer[1], 0]));
+ const R3 = new Uint64(new Uint32Array([other.buffer[0], 0]));
+ let product = Uint64.multiply(L3, R3);
+ this.buffer[0] = product.low();
+ const sum = new Uint64(new Uint32Array([product.high(), 0]));
+ product = Uint64.multiply(L2, R3);
+ product = Uint64.multiply(L3, R2);
+ this.buffer[1] = sum.low();
+ this.buffer[3] = (sum.lessThan(product) ? 1 : 0);
+ this.buffer[2] = sum.high();
+ const high = new Uint64(new Uint32Array(this.buffer.buffer, this.buffer.byteOffset + 8, 2));
+, R3))
+ .plus(Uint64.multiply(L2, R2))
+ .plus(Uint64.multiply(L3, R1));
+ this.buffer[3] += Uint64.multiply(L0, R3)
+ .plus(Uint64.multiply(L1, R2))
+ .plus(Uint64.multiply(L2, R1))
+ .plus(Uint64.multiply(L3, R0)).low();
+ return this;
+ }
+ public plus(other: Int128): Int128 {
+ const sums = new Uint32Array(4);
+ sums[3] = (this.buffer[3] + other.buffer[3]) >>> 0;
+ sums[2] = (this.buffer[2] + other.buffer[2]) >>> 0;
+ sums[1] = (this.buffer[1] + other.buffer[1]) >>> 0;
+ sums[0] = (this.buffer[0] + other.buffer[0]) >>> 0;
+ if (sums[0] < (this.buffer[0] >>> 0)) {
+ ++sums[1];
+ }
+ if (sums[1] < (this.buffer[1] >>> 0)) {
+ ++sums[2];
+ }
+ if (sums[2] < (this.buffer[2] >>> 0)) {
+ ++sums[3];
+ }
+ this.buffer[3] = sums[3];
+ this.buffer[2] = sums[2];
+ this.buffer[1] = sums[1];
+ this.buffer[0] = sums[0];
+ return this;
+ }
+ public hex(): string {
+ return `${intAsHex(this.buffer[3])} ${intAsHex(this.buffer[2])} ${intAsHex(this.buffer[1])} ${intAsHex(this.buffer[0])}`;
+ }
+ /** @nocollapse */
+ public static multiply(left: Int128, right: Int128): Int128 {
+ const rtrn = new Int128(new Uint32Array(left.buffer));
+ return rtrn.times(right);
+ }
+ /** @nocollapse */
+ public static add(left: Int128, right: Int128): Int128 {
+ const rtrn = new Int128(new Uint32Array(left.buffer));
+ return;
+ }
+ /** @nocollapse */
+ public static from(val: any, out_buffer = new Uint32Array(4)): Int128 {
+ return Int128.fromString(
+ typeof(val) === 'string' ? val : val.toString(),
+ out_buffer
+ );
+ }
+ /** @nocollapse */
+ public static fromNumber(num: number, out_buffer = new Uint32Array(4)): Int128 {
+ // Always parse numbers as strings - pulling out high and low bits
+ // directly seems to lose precision sometimes
+ // For example:
+ // > -4613034156400212000 >>> 0
+ // 721782784
+ // The correct lower 32-bits are 721782752
+ return Int128.fromString(num.toString(), out_buffer);
+ }
+ /** @nocollapse */
+ public static fromString(str: string, out_buffer = new Uint32Array(4)): Int128 {
+ // TODO: Assert that out_buffer is 0 and length = 4
+ const negate = str.startsWith('-');
+ const length = str.length;
+ const out = new Int128(out_buffer);
+ for (let posn = negate ? 1 : 0; posn < length;) {
+ const group = kInt32DecimalDigits < length - posn ?
+ kInt32DecimalDigits : length - posn;
+ const chunk = new Int128(new Uint32Array([parseInt(str.substr(posn, group), 10), 0, 0, 0]));
+ const multiple = new Int128(new Uint32Array([kPowersOfTen[group], 0, 0, 0]));
+ out.times(multiple);
+ posn += group;
+ }
+ return negate ? out.negate() : out;
+ }
+ /** @nocollapse */
+ public static convertArray(values: (string|number)[]): Uint32Array {
+ // TODO: Distinguish between string and number at compile-time
+ const data = new Uint32Array(values.length * 4);
+ for (let i = -1, n = values.length; ++i < n;) {
+ Int128.from(values[i], new Uint32Array(data.buffer, data.byteOffset + 4 * 4 * i, 4));
+ }
+ return data;
+ }
diff --git a/src/arrow/js/src/util/math.ts b/src/arrow/js/src/util/math.ts
new file mode 100644
index 000000000..47678e1a9
--- /dev/null
+++ b/src/arrow/js/src/util/math.ts
@@ -0,0 +1,105 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+const f64 = new Float64Array(1);
+const u32 = new Uint32Array(f64.buffer);
+ * Convert uint16 (logically a float16) to a JS float64. Inspired by numpy's `npy_half_to_double`:
+ *
+ * @param h {number} the uint16 to convert
+ * @private
+ * @ignore
+ */
+export function uint16ToFloat64(h: number) {
+ const expo = (h & 0x7C00) >> 10;
+ const sigf = (h & 0x03FF) / 1024;
+ const sign = (-1) ** ((h & 0x8000) >> 15);
+ switch (expo) {
+ case 0x1F: return sign * (sigf ? NaN : 1 / 0);
+ case 0x00: return sign * (sigf ? 6.103515625e-5 * sigf : 0);
+ }
+ return sign * (2 ** (expo - 15)) * (1 + sigf);
+ * Convert a float64 to uint16 (assuming the float64 is logically a float16). Inspired by numpy's `npy_double_to_half`:
+ *
+ * @param d {number} The float64 to convert
+ * @private
+ * @ignore
+ */
+export function float64ToUint16(d: number) {
+ if (d !== d) { return 0x7E00; } // NaN
+ f64[0] = d;
+ // Magic numbers:
+ // 0x80000000 = 10000000 00000000 00000000 00000000 -- masks the 32nd bit
+ // 0x7ff00000 = 01111111 11110000 00000000 00000000 -- masks the 21st-31st bits
+ // 0x000fffff = 00000000 00001111 11111111 11111111 -- masks the 1st-20th bit
+ const sign = (u32[1] & 0x80000000) >> 16 & 0xFFFF;
+ let expo = (u32[1] & 0x7ff00000), sigf = 0x0000;
+ if (expo >= 0x40f00000) {
+ //
+ // If exponent overflowed, the float16 is either NaN or Infinity.
+ // Rules to propagate the sign bit: mantissa > 0 ? NaN : +/-Infinity
+ //
+ // Magic numbers:
+ // 0x40F00000 = 01000000 11110000 00000000 00000000 -- 6-bit exponent overflow
+ // 0x7C000000 = 01111100 00000000 00000000 00000000 -- masks the 27th-31st bits
+ //
+ // returns:
+ // qNaN, aka 32256 decimal, 0x7E00 hex, or 01111110 00000000 binary
+ // sNaN, aka 32000 decimal, 0x7D00 hex, or 01111101 00000000 binary
+ // +inf, aka 31744 decimal, 0x7C00 hex, or 01111100 00000000 binary
+ // -inf, aka 64512 decimal, 0xFC00 hex, or 11111100 00000000 binary
+ //
+ // If mantissa is greater than 23 bits, set to +Infinity like numpy
+ if (u32[0] > 0) {
+ expo = 0x7C00;
+ } else {
+ expo = (expo & 0x7C000000) >> 16;
+ sigf = (u32[1] & 0x000fffff) >> 10;
+ }
+ } else if (expo <= 0x3f000000) {
+ //
+ // If exponent underflowed, the float is either signed zero or subnormal.
+ //
+ // Magic numbers:
+ // 0x3F000000 = 00111111 00000000 00000000 00000000 -- 6-bit exponent underflow
+ //
+ sigf = 0x100000 + (u32[1] & 0x000fffff);
+ sigf = 0x100000 + (sigf << ((expo >> 20) - 998)) >> 21;
+ expo = 0;
+ } else {
+ //
+ // No overflow or underflow, rebase the exponent and round the mantissa
+ // Magic numbers:
+ // 0x200 = 00000010 00000000 -- masks off the 10th bit
+ //
+ // Ensure the first mantissa bit (the 10th one) is 1 and round
+ expo = (expo - 0x3f000000) >> 10;
+ sigf = ((u32[1] & 0x000fffff) + 0x200) >> 10;
+ }
+ return sign | expo | sigf & 0xFFFF;
diff --git a/src/arrow/js/src/util/pretty.ts b/src/arrow/js/src/util/pretty.ts
new file mode 100644
index 000000000..a189fc490
--- /dev/null
+++ b/src/arrow/js/src/util/pretty.ts
@@ -0,0 +1,37 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+/** @ignore */ const undf = void (0);
+/** @ignore */
+export function valueToString(x: any) {
+ if (x === null) { return 'null'; }
+ if (x === undf) { return 'undefined'; }
+ switch (typeof x) {
+ case 'number': return `${x}`;
+ case 'bigint': return `${x}`;
+ case 'string': return `"${x}"`;
+ }
+ // If [Symbol.toPrimitive] is implemented (like in BN)
+ // use it instead of JSON.stringify(). This ensures we
+ // print BigInts, Decimals, and Binary in their native
+ // representation
+ if (typeof x[Symbol.toPrimitive] === 'function') {
+ return x[Symbol.toPrimitive]('string');
+ }
+ return ArrayBuffer.isView(x) ? `[${x}]` : JSON.stringify(x);
diff --git a/src/arrow/js/src/util/recordbatch.ts b/src/arrow/js/src/util/recordbatch.ts
new file mode 100644
index 000000000..37a630858
--- /dev/null
+++ b/src/arrow/js/src/util/recordbatch.ts
@@ -0,0 +1,121 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { Column } from '../column';
+import { Vector } from '../vector';
+import { DataType } from '../type';
+import { Data, Buffers } from '../data';
+import { Schema, Field } from '../schema';
+import { Chunked } from '../vector/chunked';
+import { RecordBatch } from '../recordbatch';
+const noopBuf = new Uint8Array(0);
+const nullBufs = (bitmapLength: number) => <unknown> [
+ noopBuf, noopBuf, new Uint8Array(bitmapLength), noopBuf
+] as Buffers<any>;
+/** @ignore */
+export function ensureSameLengthData<T extends { [key: string]: DataType } = any>(
+ schema: Schema<T>,
+ chunks: Data<T[keyof T]>[],
+ batchLength = chunks.reduce((l, c) => Math.max(l, c.length), 0)
+) {
+ let data: Data<T[keyof T]>;
+ let field: Field<T[keyof T]>;
+ let i = -1;
+ const n = chunks.length;
+ const fields = [...schema.fields];
+ const batchData = [] as Data<T[keyof T]>[];
+ const bitmapLength = ((batchLength + 63) & ~63) >> 3;
+ while (++i < n) {
+ if ((data = chunks[i]) && data.length === batchLength) {
+ batchData[i] = data;
+ } else {
+ (field = fields[i]).nullable || (fields[i] = fields[i].clone({ nullable: true }) as Field<T[keyof T]>);
+ batchData[i] = data ? data._changeLengthAndBackfillNullBitmap(batchLength)
+ :, 0, batchLength, batchLength, nullBufs(bitmapLength)) as Data<T[keyof T]>;
+ }
+ }
+ return [new Schema<T>(fields), batchLength, batchData] as [Schema<T>, number, Data<T[keyof T]>[]];
+/** @ignore */
+export function distributeColumnsIntoRecordBatches<T extends { [key: string]: DataType } = any>(columns: Column<T[keyof T]>[]): [Schema<T>, RecordBatch<T>[]] {
+ return distributeVectorsIntoRecordBatches<T>(new Schema<T>({ field }) => field)), columns);
+/** @ignore */
+export function distributeVectorsIntoRecordBatches<T extends { [key: string]: DataType } = any>(schema: Schema<T>, vecs: (Vector<T[keyof T]> | Chunked<T[keyof T]>)[]): [Schema<T>, RecordBatch<T>[]] {
+ return uniformlyDistributeChunksAcrossRecordBatches<T>(schema, => v instanceof Chunked ? => : []));
+/** @ignore */
+function uniformlyDistributeChunksAcrossRecordBatches<T extends { [key: string]: DataType } = any>(schema: Schema<T>, columns: Data<T[keyof T]>[][]): [Schema<T>, RecordBatch<T>[]] {
+ const fields = [...schema.fields];
+ const batchArgs = [] as [number, Data<T[keyof T]>[]][];
+ const memo = { numBatches: columns.reduce((n, c) => Math.max(n, c.length), 0) };
+ let numBatches = 0, batchLength = 0;
+ let i = -1;
+ const numColumns = columns.length;
+ let child: Data<T[keyof T]>, childData: Data<T[keyof T]>[] = [];
+ while (memo.numBatches-- > 0) {
+ for (batchLength = Number.POSITIVE_INFINITY, i = -1; ++i < numColumns;) {
+ childData[i] = child = columns[i].shift()!;
+ batchLength = Math.min(batchLength, child ? child.length : batchLength);
+ }
+ if (isFinite(batchLength)) {
+ childData = distributeChildData(fields, batchLength, childData, columns, memo);
+ if (batchLength > 0) {
+ batchArgs[numBatches++] = [batchLength, childData.slice()];
+ }
+ }
+ }
+ return [
+ schema = new Schema<T>(fields, schema.metadata),
+ => new RecordBatch(schema, ...xs))
+ ];
+/** @ignore */
+function distributeChildData<T extends { [key: string]: DataType } = any>(fields: Field<T[keyof T]>[], batchLength: number, childData: Data<T[keyof T]>[], columns: Data<T[keyof T]>[][], memo: { numBatches: number }) {
+ let data: Data<T[keyof T]>;
+ let field: Field<T[keyof T]>;
+ let length = 0, i = -1;
+ const n = columns.length;
+ const bitmapLength = ((batchLength + 63) & ~63) >> 3;
+ while (++i < n) {
+ if ((data = childData[i]) && ((length = data.length) >= batchLength)) {
+ if (length === batchLength) {
+ childData[i] = data;
+ } else {
+ childData[i] = data.slice(0, batchLength);
+ data = data.slice(batchLength, length - batchLength);
+ memo.numBatches = Math.max(memo.numBatches, columns[i].unshift(data));
+ }
+ } else {
+ (field = fields[i]).nullable || (fields[i] = field.clone({ nullable: true }) as Field<T[keyof T]>);
+ childData[i] = data ? data._changeLengthAndBackfillNullBitmap(batchLength)
+ :, 0, batchLength, batchLength, nullBufs(bitmapLength)) as Data<T[keyof T]>;
+ }
+ }
+ return childData;
diff --git a/src/arrow/js/src/util/utf8.ts b/src/arrow/js/src/util/utf8.ts
new file mode 100644
index 000000000..b6f8fcdb8
--- /dev/null
+++ b/src/arrow/js/src/util/utf8.ts
@@ -0,0 +1,24 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+const decoder = new TextDecoder('utf-8');
+/** @ignore */
+export const decodeUtf8 = (buffer?: BufferSource) => decoder.decode(buffer);
+const encoder = new TextEncoder();
+/** @ignore */
+export const encodeUtf8 = (value?: string) => encoder.encode(value);
diff --git a/src/arrow/js/src/util/vector.ts b/src/arrow/js/src/util/vector.ts
new file mode 100644
index 000000000..a6cfd0373
--- /dev/null
+++ b/src/arrow/js/src/util/vector.ts
@@ -0,0 +1,198 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { Vector } from '../vector';
+import { MapRow, StructRow } from '../vector/row';
+import { compareArrayLike } from '../util/buffer';
+import { BigInt, BigIntAvailable } from './compat';
+/** @ignore */
+type RangeLike = { length: number; stride?: number };
+/** @ignore */
+type ClampThen<T extends RangeLike> = (source: T, index: number) => any;
+/** @ignore */
+type ClampRangeThen<T extends RangeLike> = (source: T, offset: number, length: number) => any;
+export function clampIndex<T extends RangeLike>(source: T, index: number): number;
+export function clampIndex<T extends RangeLike, N extends ClampThen<T> = ClampThen<T>>(source: T, index: number, then: N): ReturnType<N>;
+/** @ignore */
+export function clampIndex<T extends RangeLike, N extends ClampThen<T> = ClampThen<T>>(source: T, index: number, then?: N) {
+ const length = source.length;
+ const adjust = index > -1 ? index : (length + (index % length));
+ return then ? then(source, adjust) : adjust;
+/** @ignore */
+let tmp: number;
+export function clampRange<T extends RangeLike>(source: T, begin: number | undefined, end: number | undefined): [number, number];
+export function clampRange<T extends RangeLike, N extends ClampRangeThen<T> = ClampRangeThen<T>>(source: T, begin: number | undefined, end: number | undefined, then: N): ReturnType<N>;
+/** @ignore */
+export function clampRange<T extends RangeLike, N extends ClampRangeThen<T> = ClampRangeThen<T>>(source: T, begin: number | undefined, end: number | undefined, then?: N) {
+ // Adjust args similar to Array.prototype.slice. Normalize begin/end to
+ // clamp between 0 and length, and wrap around on negative indices, e.g.
+ // slice(-1, 5) or slice(5, -1)
+ const { length: len = 0 } = source;
+ let lhs = typeof begin !== 'number' ? 0 : begin;
+ let rhs = typeof end !== 'number' ? len : end;
+ // wrap around on negative start/end positions
+ (lhs < 0) && (lhs = ((lhs % len) + len) % len);
+ (rhs < 0) && (rhs = ((rhs % len) + len) % len);
+ // ensure lhs <= rhs
+ (rhs < lhs) && (tmp = lhs, lhs = rhs, rhs = tmp);
+ // ensure rhs <= length
+ (rhs > len) && (rhs = len);
+ return then ? then(source, lhs, rhs) : [lhs, rhs];
+const big0 = BigIntAvailable ? BigInt(0) : 0;
+const isNaNFast = (value: any) => value !== value;
+/** @ignore */
+export function createElementComparator(search: any) {
+ const typeofSearch = typeof search;
+ // Compare primitives
+ if (typeofSearch !== 'object' || search === null) {
+ // Compare NaN
+ if (isNaNFast(search)) {
+ return isNaNFast;
+ }
+ return typeofSearch !== 'bigint'
+ ? (value: any) => value === search
+ : (value: any) => (big0 + value) === search;
+ }
+ // Compare Dates
+ if (search instanceof Date) {
+ const valueOfSearch = search.valueOf();
+ return (value: any) => value instanceof Date ? (value.valueOf() === valueOfSearch) : false;
+ }
+ // Compare TypedArrays
+ if (ArrayBuffer.isView(search)) {
+ return (value: any) => value ? compareArrayLike(search, value) : false;
+ }
+ // Compare Maps and Rows
+ if (search instanceof Map) { return creatMapComparator(search); }
+ // Compare Array-likes
+ if (Array.isArray(search)) { return createArrayLikeComparator(search); }
+ // Compare Vectors
+ if (search instanceof Vector) { return createVectorComparator(search); }
+ // Compare non-empty Objects
+ return createObjectComparator(search);
+/** @ignore */
+function createArrayLikeComparator(lhs: ArrayLike<any>) {
+ const comparators = [] as ((x: any) => boolean)[];
+ for (let i = -1, n = lhs.length; ++i < n;) {
+ comparators[i] = createElementComparator(lhs[i]);
+ }
+ return createSubElementsComparator(comparators);
+/** @ignore */
+function creatMapComparator(lhs: Map<any, any>) {
+ let i = -1;
+ const comparators = [] as ((x: any) => boolean)[];
+ lhs.forEach((v) => comparators[++i] = createElementComparator(v));
+ return createSubElementsComparator(comparators);
+/** @ignore */
+function createVectorComparator(lhs: Vector<any>) {
+ const comparators = [] as ((x: any) => boolean)[];
+ for (let i = -1, n = lhs.length; ++i < n;) {
+ comparators[i] = createElementComparator(lhs.get(i));
+ }
+ return createSubElementsComparator(comparators);
+/** @ignore */
+function createObjectComparator(lhs: any) {
+ const keys = Object.keys(lhs);
+ // Only compare non-empty Objects
+ if (keys.length === 0) { return () => false; }
+ const comparators = [] as ((x: any) => boolean)[];
+ for (let i = -1, n = keys.length; ++i < n;) {
+ comparators[i] = createElementComparator(lhs[keys[i]]);
+ }
+ return createSubElementsComparator(comparators, keys);
+function createSubElementsComparator(comparators: ((x: any) => boolean)[], keys?: Iterable<string>) {
+ return (rhs: any) => {
+ if (!rhs || typeof rhs !== 'object') {
+ return false;
+ }
+ switch (rhs.constructor) {
+ case Array: return compareArray(comparators, rhs);
+ case Map:
+ case MapRow:
+ case StructRow:
+ return compareObject(comparators, rhs, rhs.keys());
+ case Object:
+ case undefined: // support `Object.create(null)` objects
+ return compareObject(comparators, rhs, keys || Object.keys(rhs));
+ }
+ return rhs instanceof Vector ? compareVector(comparators, rhs) : false;
+ };
+function compareArray(comparators: ((x: any) => boolean)[], arr: any[]) {
+ const n = comparators.length;
+ if (arr.length !== n) { return false; }
+ for (let i = -1; ++i < n;) {
+ if (!(comparators[i](arr[i]))) { return false; }
+ }
+ return true;
+function compareVector(comparators: ((x: any) => boolean)[], vec: Vector) {
+ const n = comparators.length;
+ if (vec.length !== n) { return false; }
+ for (let i = -1; ++i < n;) {
+ if (!(comparators[i](vec.get(i)))) { return false; }
+ }
+ return true;
+function compareObject(comparators: ((x: any) => boolean)[], obj: Map<any, any>, keys: Iterable<string>) {
+ const lKeyItr = keys[Symbol.iterator]();
+ const rKeyItr = obj instanceof Map ? obj.keys() : Object.keys(obj)[Symbol.iterator]();
+ const rValItr = obj instanceof Map ? obj.values() : Object.values(obj)[Symbol.iterator]();
+ let i = 0;
+ const n = comparators.length;
+ let rVal =;
+ let lKey =;
+ let rKey =;
+ for (; i < n && !lKey.done && !rKey.done && !rVal.done;
+ ++i, lKey =, rKey =, rVal = {
+ if (lKey.value !== rKey.value || !comparators[i](rVal.value)) {
+ break;
+ }
+ }
+ if (i === n && lKey.done && rKey.done && rVal.done) {
+ return true;
+ }
+ lKeyItr.return && lKeyItr.return();
+ rKeyItr.return && rKeyItr.return();
+ rValItr.return && rValItr.return();
+ return false;
diff --git a/src/arrow/js/src/vector.ts b/src/arrow/js/src/vector.ts
new file mode 100644
index 000000000..bd7838cdf
--- /dev/null
+++ b/src/arrow/js/src/vector.ts
@@ -0,0 +1,73 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { Data } from './data';
+import { DataType } from './type';
+import { Chunked } from './vector/chunked';
+/** @ignore */
+export interface Clonable<R extends AbstractVector> {
+ clone(...args: any[]): R;
+/** @ignore */
+export interface Sliceable<R extends AbstractVector> {
+ slice(begin?: number, end?: number): R;
+/** @ignore */
+export interface Applicative<T extends DataType, R extends Chunked> {
+ concat(...others: Vector<T>[]): R;
+ readonly [Symbol.isConcatSpreadable]: boolean;
+export interface AbstractVector<T extends DataType = any>
+ extends Clonable<AbstractVector<T>>,
+ Sliceable<AbstractVector<T>>,
+ Applicative<T, Chunked<T>> {
+ readonly TType: T['TType'];
+ readonly TArray: T['TArray'];
+ readonly TValue: T['TValue'];
+export abstract class AbstractVector<T extends DataType = any> implements Iterable<T['TValue'] | null> {
+ public abstract readonly data: Data<T>;
+ public abstract readonly type: T;
+ public abstract readonly typeId: T['TType'];
+ public abstract readonly length: number;
+ public abstract readonly stride: number;
+ public abstract readonly nullCount: number;
+ public abstract readonly byteLength: number;
+ public abstract readonly numChildren: number;
+ public abstract readonly ArrayType: T['ArrayType'];
+ public abstract isValid(index: number): boolean;
+ public abstract get(index: number): T['TValue'] | null;
+ public abstract set(index: number, value: T['TValue'] | null): void;
+ public abstract indexOf(value: T['TValue'] | null, fromIndex?: number): number;
+ public abstract [Symbol.iterator](): IterableIterator<T['TValue'] | null>;
+ public abstract toArray(): T['TArray'];
+ public abstract getChildAt<R extends DataType = any>(index: number): Vector<R> | null;
+(AbstractVector.prototype as any).data = null;
+export { AbstractVector as Vector };
diff --git a/src/arrow/js/src/vector/base.ts b/src/arrow/js/src/vector/base.ts
new file mode 100644
index 000000000..2ceecdda4
--- /dev/null
+++ b/src/arrow/js/src/vector/base.ts
@@ -0,0 +1,111 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { Data } from '../data';
+import { Type } from '../enum';
+import { DataType } from '../type';
+import { Chunked } from './chunked';
+import { clampRange } from '../util/vector';
+import { VectorType as V } from '../interfaces';
+import { AbstractVector, Vector, Clonable, Sliceable, Applicative } from '../vector';
+/** @ignore */
+export interface BaseVector<T extends DataType = any> extends Clonable<V<T>>, Sliceable<V<T>>, Applicative<T, Chunked<T>> {
+ slice(begin?: number, end?: number): V<T>;
+ concat(...others: Vector<T>[]): Chunked<T>;
+ clone<R extends DataType = T>(data: Data<R>, children?: Vector<R>[]): V<R>;
+/** @ignore */
+export abstract class BaseVector<T extends DataType = any> extends AbstractVector<T>
+ implements Clonable<V<T>>, Sliceable<V<T>>, Applicative<T, Chunked<T>> {
+ protected _children?: Vector[];
+ constructor(data: Data<T>, children?: Vector[]) {
+ super();
+ this._children = children;
+ this.numChildren = data.childData.length;
+ this._bindDataAccessors( = data);
+ }
+ public readonly data: Data<T>;
+ public readonly numChildren: number;
+ public get type() { return; }
+ public get typeId() { return; }
+ public get length() { return; }
+ public get offset() { return; }
+ public get stride() { return; }
+ public get nullCount() { return; }
+ public get byteLength() { return; }
+ public get VectorName() { return `${Type[this.typeId]}Vector`; }
+ public get ArrayType(): T['ArrayType'] { return this.type.ArrayType; }
+ public get values() { return; }
+ public get typeIds() { return; }
+ public get nullBitmap() { return; }
+ public get valueOffsets() { return; }
+ public get [Symbol.toStringTag]() { return `${this.VectorName}<${this.type[Symbol.toStringTag]}>`; }
+ public clone<R extends DataType = T>(data: Data<R>, children = this._children) {
+ return<R>(data, children) as any;
+ }
+ public concat(...others: Vector<T>[]) {
+ return Chunked.concat<T>(this, ...others);
+ }
+ public slice(begin?: number, end?: number) {
+ // Adjust args similar to Array.prototype.slice. Normalize begin/end to
+ // clamp between 0 and length, and wrap around on negative indices, e.g.
+ // slice(-1, 5) or slice(5, -1)
+ return clampRange(this, begin, end, this._sliceInternal);
+ }
+ public isValid(index: number): boolean {
+ if (this.nullCount > 0) {
+ const idx = this.offset + index;
+ const val = this.nullBitmap[idx >> 3];
+ const mask = (val & (1 << (idx % 8)));
+ return mask !== 0;
+ }
+ return true;
+ }
+ public getChildAt<R extends DataType = any>(index: number): Vector<R> | null {
+ return index < 0 || index >= this.numChildren ? null : (
+ (this._children || (this._children = []))[index] ||
+ (this._children[index] =<R>([index] as Data<R>))
+ ) as Vector<R>;
+ }
+ public toJSON() { return [...this]; }
+ protected _sliceInternal(self: this, begin: number, end: number) {
+ return self.clone(, end - begin), null!);
+ }
+ // @ts-ignore
+ protected _bindDataAccessors(data: Data<T>) {
+ // Implementation in src/vectors/index.ts due to circular dependency/packaging shenanigans
+ }
+(BaseVector.prototype as any)[Symbol.isConcatSpreadable] = true;
diff --git a/src/arrow/js/src/vector/binary.ts b/src/arrow/js/src/vector/binary.ts
new file mode 100644
index 000000000..603187a78
--- /dev/null
+++ b/src/arrow/js/src/vector/binary.ts
@@ -0,0 +1,27 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { Vector } from '../vector';
+import { BaseVector } from './base';
+import { Binary, Utf8 } from '../type';
+/** @ignore */
+export class BinaryVector extends BaseVector<Binary> {
+ public asUtf8() {
+ return Utf8()));
+ }
diff --git a/src/arrow/js/src/vector/bool.ts b/src/arrow/js/src/vector/bool.ts
new file mode 100644
index 000000000..b555f4692
--- /dev/null
+++ b/src/arrow/js/src/vector/bool.ts
@@ -0,0 +1,35 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { Bool } from '../type';
+import { Chunked } from './chunked';
+import { BaseVector } from './base';
+import { VectorBuilderOptions } from './index';
+import { vectorFromValuesWithType } from './index';
+import { VectorBuilderOptionsAsync } from './index';
+/** @ignore */
+export class BoolVector extends BaseVector<Bool> {
+ public static from<TNull = any>(input: Iterable<boolean | TNull>): BoolVector;
+ public static from<TNull = any>(input: AsyncIterable<boolean | TNull>): Promise<BoolVector>;
+ public static from<TNull = any>(input: VectorBuilderOptions<Bool, boolean | TNull>): Chunked<Bool>;
+ public static from<TNull = any>(input: VectorBuilderOptionsAsync<Bool, boolean | TNull>): Promise<Chunked<Bool>>;
+ /** @nocollapse */
+ public static from<TNull = any>(input: Iterable<boolean | TNull> | AsyncIterable<boolean | TNull> | VectorBuilderOptions<Bool, boolean | TNull> | VectorBuilderOptionsAsync<Bool, boolean | TNull>) {
+ return vectorFromValuesWithType(() => new Bool(), input);
+ }
diff --git a/src/arrow/js/src/vector/chunked.ts b/src/arrow/js/src/vector/chunked.ts
new file mode 100644
index 000000000..656c4a1b6
--- /dev/null
+++ b/src/arrow/js/src/vector/chunked.ts
@@ -0,0 +1,320 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { Data } from '../data';
+import { Field } from '../schema';
+import { clampRange } from '../util/vector';
+import { DataType, Dictionary } from '../type';
+import { selectChunkArgs } from '../util/args';
+import { DictionaryVector } from './dictionary';
+import { AbstractVector, Vector } from '../vector';
+import { Clonable, Sliceable, Applicative } from '../vector';
+/** @ignore */
+type ChunkedDict<T extends DataType> = T extends Dictionary ? Vector<T['dictionary']> : null | never;
+/** @ignore */
+type ChunkedKeys<T extends DataType> = T extends Dictionary ? Vector<T['indices']> | Chunked<T['indices']> : null | never;
+/** @ignore */
+export type SearchContinuation<T extends Chunked> = (column: T, chunkIndex: number, valueIndex: number) => any;
+/** @ignore */
+class ChunkedIterator<T extends DataType> implements IterableIterator<T['TValue'] | null> {
+ private chunkIndex = 0;
+ private chunkIterator: IterableIterator<T['TValue'] | null>;
+ constructor(
+ private chunks: Vector<T>[],
+ ) {
+ this.chunkIterator = this.getChunkIterator();
+ }
+ next(): IteratorResult<T['TValue'] | null> {
+ while (this.chunkIndex < this.chunks.length) {
+ const next =;
+ if (!next.done) {
+ return next;
+ }
+ if (++this.chunkIndex < this.chunks.length) {
+ this.chunkIterator = this.getChunkIterator();
+ }
+ }
+ return {done: true, value: null};
+ }
+ getChunkIterator() {
+ return this.chunks[this.chunkIndex][Symbol.iterator]();
+ }
+ [Symbol.iterator]() {
+ return this;
+ }
+/** @ignore */
+export class Chunked<T extends DataType = any>
+ extends AbstractVector<T>
+ implements Clonable<Chunked<T>>,
+ Sliceable<Chunked<T>>,
+ Applicative<T, Chunked<T>> {
+ /** @nocollapse */
+ public static flatten<T extends DataType>(...vectors: (Vector<T> | Vector<T>[])[]) {
+ return selectChunkArgs<Vector<T>>(Vector, vectors);
+ }
+ /** @nocollapse */
+ public static concat<T extends DataType>(...vectors: (Vector<T> | Vector<T>[])[]) {
+ const chunks = Chunked.flatten<T>(...vectors);
+ return new Chunked<T>(chunks[0].type, chunks);
+ }
+ protected _type: T;
+ protected _length: number;
+ protected _chunks: Vector<T>[];
+ protected _numChildren: number;
+ protected _children?: Chunked[];
+ protected _nullCount = -1;
+ protected _chunkOffsets: Uint32Array;
+ constructor(type: T, chunks: Vector<T>[] = [], offsets = calculateOffsets(chunks)) {
+ super();
+ this._type = type;
+ this._chunks = chunks;
+ this._chunkOffsets = offsets;
+ this._length = offsets[offsets.length - 1];
+ this._numChildren = (this._type.children || []).length;
+ }
+ public get type() { return this._type; }
+ public get length() { return this._length; }
+ public get chunks() { return this._chunks; }
+ public get typeId(): T['TType'] { return this._type.typeId; }
+ public get VectorName() { return `Chunked<${this._type}>`; }
+ public get data(): Data<T> {
+ return this._chunks[0] ? this._chunks[0].data : <any> null;
+ }
+ public get ArrayType() { return this._type.ArrayType; }
+ public get numChildren() { return this._numChildren; }
+ public get stride() { return this._chunks[0] ? this._chunks[0].stride : 1; }
+ public get byteLength(): number {
+ return this._chunks.reduce((byteLength, chunk) => byteLength + chunk.byteLength, 0);
+ }
+ public get nullCount() {
+ let nullCount = this._nullCount;
+ if (nullCount < 0) {
+ this._nullCount = nullCount = this._chunks.reduce((x, { nullCount }) => x + nullCount, 0);
+ }
+ return nullCount;
+ }
+ protected _indices?: ChunkedKeys<T>;
+ public get indices(): ChunkedKeys<T> | null {
+ if (DataType.isDictionary(this._type)) {
+ if (!this._indices) {
+ const chunks = (<any> this._chunks) as DictionaryVector<T, any>[];
+ this._indices = (chunks.length === 1
+ ? chunks[0].indices
+ : Chunked.concat( => x.indices))) as ChunkedKeys<T>;
+ }
+ return this._indices;
+ }
+ return null;
+ }
+ public get dictionary(): ChunkedDict<T> | null {
+ if (DataType.isDictionary(this._type)) {
+ return this._chunks[this._chunks.length - 1].data.dictionary as ChunkedDict<T>;
+ }
+ return null;
+ }
+ public [Symbol.iterator](): IterableIterator<T['TValue'] | null> {
+ return new ChunkedIterator(this._chunks);
+ }
+ public clone(chunks = this._chunks): Chunked<T> {
+ return new Chunked(this._type, chunks);
+ }
+ public concat(...others: Vector<T>[]): Chunked<T> {
+ return this.clone(Chunked.flatten(this, ...others));
+ }
+ public slice(begin?: number, end?: number): Chunked<T> {
+ return clampRange(this, begin, end, this._sliceInternal);
+ }
+ public getChildAt<R extends DataType = any>(index: number): Chunked<R> | null {
+ if (index < 0 || index >= this._numChildren) { return null; }
+ const columns = this._children || (this._children = []);
+ let child: Chunked<R>, field: Field<R>, chunks: Vector<R>[];
+ if (child = columns[index]) { return child; }
+ if (field = ((this._type.children || [])[index] as Field<R>)) {
+ chunks = this._chunks
+ .map((vector) => vector.getChildAt<R>(index))
+ .filter((vec): vec is Vector<R> => vec != null);
+ if (chunks.length > 0) {
+ return (columns[index] = new Chunked<R>(field.type, chunks));
+ }
+ }
+ return null;
+ }
+ public search(index: number): [number, number] | null;
+ public search<N extends SearchContinuation<Chunked<T>>>(index: number, then?: N): ReturnType<N>;
+ public search<N extends SearchContinuation<Chunked<T>>>(index: number, then?: N) {
+ const idx = index;
+ // binary search to find the child vector and value indices
+ const offsets = this._chunkOffsets;
+ let rhs = offsets.length - 1;
+ // return early if out of bounds, or if there's just one child
+ if (idx < 0 ) { return null; }
+ if (idx >= offsets[rhs]) { return null; }
+ if (rhs <= 1 ) { return then ? then(this, 0, idx) : [0, idx]; }
+ let lhs = 0, pos = 0, mid = 0;
+ do {
+ if (lhs + 1 === rhs) {
+ return then ? then(this, lhs, idx - pos) : [lhs, idx - pos];
+ }
+ mid = lhs + ((rhs - lhs) / 2) | 0;
+ idx >= offsets[mid] ? (lhs = mid) : (rhs = mid);
+ } while (idx < offsets[rhs] && idx >= (pos = offsets[lhs]));
+ return null;
+ }
+ public isValid(index: number): boolean {
+ return !!, this.isValidInternal);
+ }
+ public get(index: number): T['TValue'] | null {
+ return, this.getInternal);
+ }
+ public set(index: number, value: T['TValue'] | null): void {
+, ({ chunks }, i, j) => chunks[i].set(j, value));
+ }
+ public indexOf(element: T['TValue'], offset?: number): number {
+ if (offset && typeof offset === 'number') {
+ return, (self, i, j) => this.indexOfInternal(self, i, j, element))!;
+ }
+ return this.indexOfInternal(this, 0, Math.max(0, offset || 0), element);
+ }
+ public toArray(): T['TArray'] {
+ const { chunks } = this;
+ const n = chunks.length;
+ let ArrayType: any = this._type.ArrayType;
+ if (n <= 0) { return new ArrayType(0); }
+ if (n <= 1) { return chunks[0].toArray(); }
+ let len = 0;
+ const src = new Array(n);
+ for (let i = -1; ++i < n;) {
+ len += (src[i] = chunks[i].toArray()).length;
+ }
+ if (ArrayType !== src[0].constructor) {
+ ArrayType = src[0].constructor;
+ }
+ const dst = new ArrayType(len);
+ const set: any = ArrayType === Array ? arraySet : typedSet;
+ for (let i = -1, idx = 0; ++i < n;) {
+ idx = set(src[i], dst, idx);
+ }
+ return dst;
+ }
+ protected getInternal({ _chunks }: Chunked<T>, i: number, j: number) { return _chunks[i].get(j); }
+ protected isValidInternal({ _chunks }: Chunked<T>, i: number, j: number) { return _chunks[i].isValid(j); }
+ protected indexOfInternal({ _chunks }: Chunked<T>, chunkIndex: number, fromIndex: number, element: T['TValue']) {
+ let i = chunkIndex - 1;
+ const n = _chunks.length;
+ let start = fromIndex, offset = 0, found = -1;
+ while (++i < n) {
+ if (~(found = _chunks[i].indexOf(element, start))) {
+ return offset + found;
+ }
+ start = 0;
+ offset += _chunks[i].length;
+ }
+ return -1;
+ }
+ protected _sliceInternal(self: Chunked<T>, begin: number, end: number) {
+ const slices: Vector<T>[] = [];
+ const { chunks, _chunkOffsets: chunkOffsets } = self;
+ for (let i = -1, n = chunks.length; ++i < n;) {
+ const chunk = chunks[i];
+ const chunkLength = chunk.length;
+ const chunkOffset = chunkOffsets[i];
+ // If the child is to the right of the slice boundary, we can stop
+ if (chunkOffset >= end) { break; }
+ // If the child is to the left of of the slice boundary, exclude
+ if (begin >= chunkOffset + chunkLength) { continue; }
+ // If the child is between both left and right boundaries, include w/o slicing
+ if (chunkOffset >= begin && (chunkOffset + chunkLength) <= end) {
+ slices.push(chunk);
+ continue;
+ }
+ // If the child overlaps one of the slice boundaries, include that slice
+ const from = Math.max(0, begin - chunkOffset);
+ const to = Math.min(end - chunkOffset, chunkLength);
+ slices.push(chunk.slice(from, to) as Vector<T>);
+ }
+ return self.clone(slices);
+ }
+/** @ignore */
+function calculateOffsets<T extends DataType>(vectors: Vector<T>[]) {
+ const offsets = new Uint32Array((vectors || []).length + 1);
+ let offset = offsets[0] = 0;
+ const length = offsets.length;
+ for (let index = 0; ++index < length;) {
+ offsets[index] = (offset += vectors[index - 1].length);
+ }
+ return offsets;
+/** @ignore */
+const typedSet = (src: TypedArray, dst: TypedArray, offset: number) => {
+ dst.set(src, offset);
+ return (offset + src.length);
+/** @ignore */
+const arraySet = (src: any[], dst: any[], offset: number) => {
+ let idx = offset;
+ for (let i = -1, n = src.length; ++i < n;) {
+ dst[idx++] = src[i];
+ }
+ return idx;
+/** @ignore */
+interface TypedArray extends ArrayBufferView {
+ readonly length: number;
+ readonly [n: number]: number;
+ set(array: ArrayLike<number>, offset?: number): void;
diff --git a/src/arrow/js/src/vector/date.ts b/src/arrow/js/src/vector/date.ts
new file mode 100644
index 000000000..8c2b7a563
--- /dev/null
+++ b/src/arrow/js/src/vector/date.ts
@@ -0,0 +1,51 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { DateUnit } from '../enum';
+import { Chunked } from './chunked';
+import { BaseVector } from './base';
+import { VectorType as V } from '../interfaces';
+import { VectorBuilderOptions } from './index';
+import { vectorFromValuesWithType } from './index';
+import { VectorBuilderOptionsAsync } from './index';
+import { Date_, DateDay, DateMillisecond } from '../type';
+/** @ignore */
+type FromArgs<T extends Date_> = [Iterable<Date>, T['unit']];
+/** @ignore */
+export class DateVector<T extends Date_ = Date_> extends BaseVector<T> {
+ public static from<T extends DateUnit.DAY>(...args: FromArgs<DateDay>): V<DateDay>;
+ public static from<T extends DateUnit.MILLISECOND>(...args: FromArgs<DateMillisecond>): V<DateMillisecond>;
+ public static from<T extends Date_, TNull = any>(input: Iterable<Date | TNull>): V<T>;
+ public static from<T extends Date_, TNull = any>(input: AsyncIterable<Date | TNull>): Promise<V<T>>;
+ public static from<T extends Date_, TNull = any>(input: VectorBuilderOptions<T, Date | TNull>): Chunked<T>;
+ public static from<T extends Date_, TNull = any>(input: VectorBuilderOptionsAsync<T, Date | TNull>): Promise<Chunked<T>>;
+ /** @nocollapse */
+ public static from<T extends Date_, TNull = any>(...args: FromArgs<T> | [Iterable<Date | TNull> | AsyncIterable<Date | TNull> | VectorBuilderOptions<T, Date | TNull> | VectorBuilderOptionsAsync<T, Date | TNull>]) {
+ if (args.length === 2) {
+ return vectorFromValuesWithType(() => args[1] === DateUnit.DAY ? new DateDay() : new DateMillisecond() as T, args[0]);
+ }
+ return vectorFromValuesWithType(() => new DateMillisecond() as T, args[0]);
+ }
+/** @ignore */
+export class DateDayVector extends DateVector<DateDay> {}
+/** @ignore */
+export class DateMillisecondVector extends DateVector<DateMillisecond> {}
diff --git a/src/arrow/js/src/vector/decimal.ts b/src/arrow/js/src/vector/decimal.ts
new file mode 100644
index 000000000..a1056fd4f
--- /dev/null
+++ b/src/arrow/js/src/vector/decimal.ts
@@ -0,0 +1,22 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { Decimal } from '../type';
+import { BaseVector } from './base';
+/** @ignore */
+export class DecimalVector extends BaseVector<Decimal> {}
diff --git a/src/arrow/js/src/vector/dictionary.ts b/src/arrow/js/src/vector/dictionary.ts
new file mode 100644
index 000000000..4b39dbe97
--- /dev/null
+++ b/src/arrow/js/src/vector/dictionary.ts
@@ -0,0 +1,60 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { Data } from '../data';
+import { Vector } from '../vector';
+import { BaseVector } from './base';
+import { VectorType as V } from '../interfaces';
+import { VectorBuilderOptions } from './index';
+import { vectorFromValuesWithType } from './index';
+import { VectorBuilderOptionsAsync } from './index';
+import { DataType, Dictionary, TKeys } from '../type';
+/** @ignore */
+type FromArgs<T extends DataType = any, TKey extends TKeys = TKeys> = [Vector<T>, TKey, ArrayLike<number> | TKey['TArray']];
+/** @ignore */
+export class DictionaryVector<T extends DataType = any, TKey extends TKeys = TKeys> extends BaseVector<Dictionary<T, TKey>> {
+ public static from<T extends DataType = any, TKey extends TKeys = TKeys>(...args: FromArgs<T, TKey>): V<Dictionary<T, TKey>>;
+ public static from<T extends DataType = any, TKey extends TKeys = TKeys>(input: VectorBuilderOptions<Dictionary<T, TKey>>): Vector<Dictionary<T, TKey>>;
+ public static from<T extends DataType = any, TKey extends TKeys = TKeys>(input: VectorBuilderOptionsAsync<Dictionary<T, TKey>>): Promise<Vector<Dictionary<T, TKey>>>;
+ /** @nocollapse */
+ public static from<T extends DataType = any, TKey extends TKeys = TKeys>(...args: any[]) {
+ if (args.length === 3) {
+ const [values, indices, keys] = args as FromArgs<T, TKey>;
+ const type = new Dictionary(values.type, indices, null, null);
+ return, 0, keys.length, 0, null, keys, values));
+ }
+ return vectorFromValuesWithType(() => args[0].type, args[0]);
+ }
+ constructor(data: Data<Dictionary<T, TKey>>) {
+ super(data);
+ this.indices =;
+ }
+ public readonly indices: V<TKey>;
+ public get dictionary() { return <Vector<T>>; }
+ public reverseLookup(value: T) { return this.dictionary.indexOf(value); }
+ public getKey(idx: number): TKey['TValue'] | null { return this.indices.get(idx); }
+ public getValue(key: number): T['TValue'] | null { return this.dictionary.get(key); }
+ public setKey(idx: number, key: TKey['TValue'] | null) { return this.indices.set(idx, key); }
+ public setValue(key: number, value: T['TValue'] | null) { return this.dictionary.set(key, value); }
+(DictionaryVector.prototype as any).indices = null;
diff --git a/src/arrow/js/src/vector/fixedsizebinary.ts b/src/arrow/js/src/vector/fixedsizebinary.ts
new file mode 100644
index 000000000..779be19ff
--- /dev/null
+++ b/src/arrow/js/src/vector/fixedsizebinary.ts
@@ -0,0 +1,22 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { BaseVector } from './base';
+import { FixedSizeBinary } from '../type';
+/** @ignore */
+export class FixedSizeBinaryVector extends BaseVector<FixedSizeBinary> {}
diff --git a/src/arrow/js/src/vector/fixedsizelist.ts b/src/arrow/js/src/vector/fixedsizelist.ts
new file mode 100644
index 000000000..13637021f
--- /dev/null
+++ b/src/arrow/js/src/vector/fixedsizelist.ts
@@ -0,0 +1,22 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { BaseVector } from './base';
+import { DataType, FixedSizeList } from '../type';
+/** @ignore */
+export class FixedSizeListVector<T extends DataType = any> extends BaseVector<FixedSizeList<T>> {}
diff --git a/src/arrow/js/src/vector/float.ts b/src/arrow/js/src/vector/float.ts
new file mode 100644
index 000000000..8260d2b27
--- /dev/null
+++ b/src/arrow/js/src/vector/float.ts
@@ -0,0 +1,144 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { Data } from '../data';
+import { Vector } from '../vector';
+import { Chunked } from './chunked';
+import { BaseVector } from './base';
+import { VectorBuilderOptions } from './index';
+import { vectorFromValuesWithType } from './index';
+import { VectorBuilderOptionsAsync } from './index';
+import { Float, Float16, Float32, Float64, FloatArray } from '../type';
+import { VectorType as V, TypedArrayConstructor } from '../interfaces';
+/** @ignore */
+type FloatVectorConstructors =
+ typeof FloatVector |
+ typeof Float16Vector |
+ typeof Float32Vector |
+ typeof Float64Vector ;
+/** @ignore */
+type FromInput<T extends Float, TNull = any> =
+ FloatArray |
+ Iterable<T['TValue'] | TNull> |
+ AsyncIterable<T['TValue'] | TNull> |
+ VectorBuilderOptions<T, TNull> |
+ VectorBuilderOptionsAsync<T, TNull> ;
+/** @ignore */
+export type FloatArrayCtor = TypedArrayConstructor<FloatArray>;
+/** @ignore */
+export class FloatVector<T extends Float = Float> extends BaseVector<T> {
+ // Guaranteed zero-copy variants
+ public static from(this: typeof FloatVector, input: Uint16Array): Float16Vector;
+ public static from(this: typeof FloatVector, input: Float32Array): Float32Vector;
+ public static from(this: typeof FloatVector, input: Float64Array): Float64Vector;
+ // Zero-copy if input is a TypedArray of the same type as the
+ // Vector that from is called on, otherwise uses the Builders
+ public static from<TNull = any>(this: typeof Float16Vector, input: FromInput<Float16, TNull>): Float16Vector;
+ public static from<TNull = any>(this: typeof Float32Vector, input: FromInput<Float32, TNull>): Float32Vector;
+ public static from<TNull = any>(this: typeof Float64Vector, input: FromInput<Float64, TNull>): Float64Vector;
+ // Not zero-copy
+ public static from<T extends Float, TNull = any>(this: typeof FloatVector, input: Iterable<T['TValue'] | TNull>): V<T>;
+ public static from<T extends Float, TNull = any>(this: typeof FloatVector, input: AsyncIterable<T['TValue'] | TNull>): Promise<V<T>>;
+ public static from<T extends Float, TNull = any>(this: typeof FloatVector, input: VectorBuilderOptions<T, TNull>): Chunked<T>;
+ public static from<T extends Float, TNull = any>(this: typeof FloatVector, input: VectorBuilderOptionsAsync<T, TNull>): Promise<Chunked<T>>;
+ /** @nocollapse */
+ public static from<T extends Float, TNull = any>(this: FloatVectorConstructors, input: FromInput<T, TNull>) {
+ let ArrowType = vectorTypeToDataType(this);
+ if ((input instanceof ArrayBuffer) || ArrayBuffer.isView(input)) {
+ const InputType = arrayTypeToDataType(input.constructor as FloatArrayCtor) || ArrowType;
+ // Special case, infer the Arrow DataType from the input if calling the base
+ // FloatVector.from with a TypedArray, e.g. `FloatVector.from(new Float32Array())`
+ if (ArrowType === null) {
+ ArrowType = InputType;
+ }
+ // If the DataType inferred from the Vector constructor matches the
+ // DataType inferred from the input arguments, return zero-copy view
+ if (ArrowType && ArrowType === InputType) {
+ const type = new ArrowType();
+ const length = input.byteLength / type.ArrayType.BYTES_PER_ELEMENT;
+ // If the ArrowType is Float16 but the input type isn't a Uint16Array,
+ // let the Float16Builder handle casting the input values to Uint16s.
+ if (!convertTo16Bit(ArrowType, input.constructor)) {
+ return, 0, length, 0, null, input as FloatArray));
+ }
+ }
+ }
+ if (ArrowType) {
+ // If the DataType inferred from the Vector constructor is different than
+ // the DataType inferred from the input TypedArray, or if input isn't a
+ // TypedArray, use the Builders to construct the result Vector
+ return vectorFromValuesWithType(() => new ArrowType!() as T, input);
+ }
+ if ((input instanceof DataView) || (input instanceof ArrayBuffer)) {
+ throw new TypeError(`Cannot infer float type from instance of ${}`);
+ }
+ throw new TypeError('Unrecognized FloatVector input');
+ }
+/** @ignore */
+export class Float16Vector extends FloatVector<Float16> {
+ // Since JS doesn't have half floats, `toArray()` returns a zero-copy slice
+ // of the underlying Uint16Array data. This behavior ensures we don't incur
+ // extra compute or copies if you're calling `toArray()` in order to create
+ // a buffer for something like WebGL. Buf if you're using JS and want typed
+ // arrays of 4-to-8-byte precision, these methods will enumerate the values
+ // and clamp to the desired byte lengths.
+ public toFloat32Array() { return new Float32Array(this as Iterable<number>); }
+ public toFloat64Array() { return new Float64Array(this as Iterable<number>); }
+/** @ignore */
+export class Float32Vector extends FloatVector<Float32> {}
+/** @ignore */
+export class Float64Vector extends FloatVector<Float64> {}
+const convertTo16Bit = (typeCtor: any, dataCtor: any) => {
+ return (typeCtor === Float16) && (dataCtor !== Uint16Array);
+/** @ignore */
+const arrayTypeToDataType = (ctor: FloatArrayCtor) => {
+ switch (ctor) {
+ case Uint16Array: return Float16;
+ case Float32Array: return Float32;
+ case Float64Array: return Float64;
+ default: return null;
+ }
+/** @ignore */
+const vectorTypeToDataType = (ctor: FloatVectorConstructors) => {
+ switch (ctor) {
+ case Float16Vector: return Float16;
+ case Float32Vector: return Float32;
+ case Float64Vector: return Float64;
+ default: return null;
+ }
diff --git a/src/arrow/js/src/vector/index.ts b/src/arrow/js/src/vector/index.ts
new file mode 100644
index 000000000..30f5e3cfa
--- /dev/null
+++ b/src/arrow/js/src/vector/index.ts
@@ -0,0 +1,207 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+export { Vector } from '../vector';
+export { BaseVector } from './base';
+export { BinaryVector } from './binary';
+export { BoolVector } from './bool';
+export { Chunked } from './chunked';
+export { DateVector, DateDayVector, DateMillisecondVector } from './date';
+export { DecimalVector } from './decimal';
+export { DictionaryVector } from './dictionary';
+export { FixedSizeBinaryVector } from './fixedsizebinary';
+export { FixedSizeListVector } from './fixedsizelist';
+export { FloatVector, Float16Vector, Float32Vector, Float64Vector } from './float';
+export { IntervalVector, IntervalDayTimeVector, IntervalYearMonthVector } from './interval';
+export { IntVector, Int8Vector, Int16Vector, Int32Vector, Int64Vector, Uint8Vector, Uint16Vector, Uint32Vector, Uint64Vector } from './int';
+export { ListVector } from './list';
+export { MapVector } from './map';
+export { NullVector } from './null';
+export { StructVector } from './struct';
+export { TimestampVector, TimestampSecondVector, TimestampMillisecondVector, TimestampMicrosecondVector, TimestampNanosecondVector } from './timestamp';
+export { TimeVector, TimeSecondVector, TimeMillisecondVector, TimeMicrosecondVector, TimeNanosecondVector } from './time';
+export { UnionVector, DenseUnionVector, SparseUnionVector } from './union';
+export { Utf8Vector } from './utf8';
+export { MapRow, StructRow } from './row';
+import * as fn from '../util/fn';
+import { Data } from '../data';
+import { Type } from '../enum';
+import { Vector } from '../vector';
+import { DataType } from '../type';
+import { Chunked } from './chunked';
+import { BaseVector } from './base';
+import { setBool } from '../util/bit';
+import { isIterable, isAsyncIterable } from '../util/compat';
+import { Builder, IterableBuilderOptions } from '../builder';
+import { VectorType as V, VectorCtorArgs } from '../interfaces';
+import { instance as getVisitor } from '../visitor/get';
+import { instance as setVisitor } from '../visitor/set';
+import { instance as indexOfVisitor } from '../visitor/indexof';
+import { instance as toArrayVisitor } from '../visitor/toarray';
+import { instance as iteratorVisitor } from '../visitor/iterator';
+import { instance as byteWidthVisitor } from '../visitor/bytewidth';
+import { instance as getVectorConstructor } from '../visitor/vectorctor';
+declare module '../vector' {
+ namespace Vector {
+ export { newVector as new };
+ export { vectorFrom as from };
+ }
+declare module './base' {
+ namespace BaseVector {
+ export { vectorFrom as from };
+ }
+ interface BaseVector<T extends DataType> {
+ get(index: number): T['TValue'] | null;
+ set(index: number, value: T['TValue'] | null): void;
+ indexOf(value: T['TValue'] | null, fromIndex?: number): number;
+ toArray(): T['TArray'];
+ getByteWidth(): number;
+ [Symbol.iterator](): IterableIterator<T['TValue'] | null>;
+ }
+/** @nocollapse */ = newVector;
+/** @nocollapse */
+Vector.from = vectorFrom;
+/** @ignore */
+function newVector<T extends DataType>(data: Data<T>, ...args: VectorCtorArgs<V<T>>): V<T> {
+ return new (getVectorConstructor.getVisitFn<T>(data)())(data, ...args) as V<T>;
+/** @ignore */
+export interface VectorBuilderOptions<T extends DataType, TNull = any> extends IterableBuilderOptions<T, TNull> { values: Iterable<T['TValue'] | TNull> }
+/** @ignore */
+export interface VectorBuilderOptionsAsync<T extends DataType, TNull = any> extends IterableBuilderOptions<T, TNull> { values: AsyncIterable<T['TValue'] | TNull> }
+/** @ignore */
+export function vectorFromValuesWithType<T extends DataType, TNull = any>(newDataType: () => T, input: Iterable<T['TValue'] | TNull> | AsyncIterable<T['TValue'] | TNull> | VectorBuilderOptions<T, TNull> | VectorBuilderOptionsAsync<T, TNull>) {
+ if (isIterable(input)) {
+ return Vector.from({ 'nullValues': [null, undefined], type: newDataType(), 'values': input }) as V<T>;
+ } else if (isAsyncIterable(input)) {
+ return Vector.from({ 'nullValues': [null, undefined], type: newDataType(), 'values': input }) as Promise<V<T>>;
+ }
+ const {
+ 'values': values = [],
+ 'type': type = newDataType(),
+ 'nullValues': nullValues = [null, undefined],
+ } = { ...input };
+ return isIterable(values)
+ ? Vector.from({ nullValues, ...input, type } as VectorBuilderOptions<T, TNull>)
+ : Vector.from({ nullValues, ...input, type } as VectorBuilderOptionsAsync<T, TNull>);
+/** @ignore */
+function vectorFrom<T extends DataType = any, TNull = any>(input: VectorBuilderOptions<T, TNull>): Vector<T>;
+function vectorFrom<T extends DataType = any, TNull = any>(input: VectorBuilderOptionsAsync<T, TNull>): Promise<Vector<T>>;
+function vectorFrom<T extends DataType = any, TNull = any>(input: VectorBuilderOptions<T, TNull> | VectorBuilderOptionsAsync<T, TNull>) {
+ const { 'values': values = [], ...options } = { 'nullValues': [null, undefined], ...input } as VectorBuilderOptions<T, TNull> | VectorBuilderOptionsAsync<T, TNull>;
+ if (isIterable<T['TValue'] | TNull>(values)) {
+ const chunks = [...Builder.throughIterable(options)(values)];
+ return (chunks.length === 1 ? chunks[0] : Chunked.concat<T>(chunks)) as Vector<T>;
+ }
+ return (async (chunks: V<T>[]) => {
+ const transform = Builder.throughAsyncIterable(options);
+ for await (const chunk of transform(values)) {
+ chunks.push(chunk);
+ }
+ return (chunks.length === 1 ? chunks[0] : Chunked.concat<T>(chunks)) as Vector<T>;
+ })([]);
+// We provide the following method implementations for code navigability purposes only.
+// They're overridden at runtime below with the specific Visitor implementation for each type,
+// short-circuiting the usual Visitor traversal and reducing intermediate lookups and calls.
+// This comment is here to remind you to not set breakpoints in these function bodies, or to inform
+// you why the breakpoints you have already set are not being triggered. Have a great day!
+BaseVector.prototype.get = function baseVectorGet<T extends DataType>(this: BaseVector<T>, index: number): T['TValue'] | null {
+ return getVisitor.visit(this, index);
+BaseVector.prototype.set = function baseVectorSet<T extends DataType>(this: BaseVector<T>, index: number, value: T['TValue'] | null): void {
+ return setVisitor.visit(this, index, value);
+BaseVector.prototype.indexOf = function baseVectorIndexOf<T extends DataType>(this: BaseVector<T>, value: T['TValue'] | null, fromIndex?: number): number {
+ return indexOfVisitor.visit(this, value, fromIndex);
+BaseVector.prototype.toArray = function baseVectorToArray<T extends DataType>(this: BaseVector<T>): T['TArray'] {
+ return toArrayVisitor.visit(this);
+BaseVector.prototype.getByteWidth = function baseVectorGetByteWidth<T extends DataType>(this: BaseVector<T>): number {
+ return byteWidthVisitor.visit(this.type);
+BaseVector.prototype[Symbol.iterator] = function baseVectorSymbolIterator<T extends DataType>(this: BaseVector<T>): IterableIterator<T['TValue'] | null> {
+ return iteratorVisitor.visit(this);
+(BaseVector.prototype as any)._bindDataAccessors = bindBaseVectorDataAccessors;
+// Perf: bind and assign the operator Visitor methods to each of the Vector subclasses for each Type
+(Object.keys(Type) as any[])
+ .map((T: any) => Type[T] as any)
+ .filter((T: any): T is Type => typeof T === 'number')
+ .filter((typeId) => typeId !== Type.NONE)
+ .forEach((typeId) => {
+ const VectorCtor = getVectorConstructor.visit(typeId);
+ VectorCtor.prototype['get'] = fn.partial1(getVisitor.getVisitFn(typeId));
+ VectorCtor.prototype['set'] = fn.partial2(setVisitor.getVisitFn(typeId));
+ VectorCtor.prototype['indexOf'] = fn.partial2(indexOfVisitor.getVisitFn(typeId));
+ VectorCtor.prototype['toArray'] = fn.partial0(toArrayVisitor.getVisitFn(typeId));
+ VectorCtor.prototype['getByteWidth'] = partialType0(byteWidthVisitor.getVisitFn(typeId));
+ VectorCtor.prototype[Symbol.iterator] = fn.partial0(iteratorVisitor.getVisitFn(typeId));
+ });
+/** @ignore */
+function partialType0<T extends Vector>(visit: (node: T['type']) => any) {
+ return function(this: T) { return visit(this.type); };
+/** @ignore */
+function wrapNullableGet<T extends DataType, V extends Vector<T>, F extends (i: number) => any>(fn: F): (...args: Parameters<F>) => ReturnType<F> {
+ return function(this: V, i: number) { return this.isValid(i) ?, i) : null; };
+/** @ignore */
+function wrapNullableSet<T extends DataType, V extends BaseVector<T>, F extends (i: number, a: any) => void>(fn: F): (...args: Parameters<F>) => void {
+ return function(this: V, i: number, a: any) {
+ if (setBool(this.nullBitmap, this.offset + i, !((a == null)))) {
+, i, a);
+ }
+ };
+/** @ignore */
+function bindBaseVectorDataAccessors<T extends DataType>(this: BaseVector<T>) {
+ const nullBitmap = this.nullBitmap;
+ if (nullBitmap && nullBitmap.byteLength > 0) {
+ this.get = wrapNullableGet(this.get);
+ this.set = wrapNullableSet(this.set);
+ }
diff --git a/src/arrow/js/src/vector/int.ts b/src/arrow/js/src/vector/int.ts
new file mode 100644
index 000000000..dbfba58c9
--- /dev/null
+++ b/src/arrow/js/src/vector/int.ts
@@ -0,0 +1,195 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { Data } from '../data';
+import { Vector } from '../vector';
+import { Chunked } from './chunked';
+import { BaseVector } from './base';
+import { VectorBuilderOptions } from './index';
+import { vectorFromValuesWithType } from './index';
+import { VectorBuilderOptionsAsync } from './index';
+import { BigInt64Array, BigUint64Array } from '../util/compat';
+import { toBigInt64Array, toBigUint64Array } from '../util/buffer';
+import { Int, Uint8, Uint16, Uint32, Uint64, Int8, Int16, Int32, Int64, IntArray } from '../type';
+import { VectorType as V, TypedArrayConstructor, BigIntArrayConstructor, BigIntArray } from '../interfaces';
+/** @ignore */
+type IntVectorConstructors =
+ typeof IntVector |
+ typeof Int8Vector |
+ typeof Int16Vector |
+ typeof Int32Vector |
+ typeof Uint8Vector |
+ typeof Uint16Vector |
+ typeof Uint32Vector |
+ typeof Int64Vector |
+ typeof Uint64Vector ;
+/** @ignore */
+type FromInput<T extends Int, TNull = any> =
+ IntArray | BigIntArray |
+ Iterable<T['TValue'] | TNull> |
+ AsyncIterable<T['TValue'] | TNull> |
+ VectorBuilderOptions<T, TNull> |
+ VectorBuilderOptionsAsync<T, TNull> ;
+/** @ignore */
+type FromArgs<T extends Int, TNull = any> = [FromInput<T, TNull>, boolean?];
+/** @ignore */
+export type IntArrayCtor = TypedArrayConstructor<IntArray> | BigIntArrayConstructor<BigIntArray>;
+/** @ignore */
+export class IntVector<T extends Int = Int> extends BaseVector<T> {
+ // Guaranteed zero-copy variants
+ public static from(this: typeof IntVector, input: Int8Array): Int8Vector;
+ public static from(this: typeof IntVector, input: Int16Array): Int16Vector;
+ public static from(this: typeof IntVector, input: Int32Array): Int32Vector;
+ public static from(this: typeof IntVector, input: BigInt64Array): Int64Vector;
+ public static from(this: typeof IntVector, input: Int32Array, is64bit: true): Int64Vector;
+ public static from(this: typeof IntVector, input: Uint8Array): Uint8Vector;
+ public static from(this: typeof IntVector, input: Uint16Array): Uint16Vector;
+ public static from(this: typeof IntVector, input: Uint32Array): Uint32Vector;
+ public static from(this: typeof IntVector, input: BigUint64Array): Uint64Vector;
+ public static from(this: typeof IntVector, input: Uint32Array, is64bit: true): Uint64Vector;
+ // Zero-copy if input is a TypedArray of the same type as the
+ // Vector that from is called on, otherwise uses the Builders
+ public static from<TNull = any>(this: typeof Int8Vector, input: FromInput<Int8, TNull>): Int8Vector;
+ public static from<TNull = any>(this: typeof Int16Vector, input: FromInput<Int16, TNull>): Int16Vector;
+ public static from<TNull = any>(this: typeof Int32Vector, input: FromInput<Int32, TNull>): Int32Vector;
+ public static from<TNull = any>(this: typeof Int64Vector, input: FromInput<Int64, TNull>): Int64Vector;
+ public static from<TNull = any>(this: typeof Uint8Vector, input: FromInput<Uint8, TNull>): Uint8Vector;
+ public static from<TNull = any>(this: typeof Uint16Vector, input: FromInput<Uint16, TNull>): Uint16Vector;
+ public static from<TNull = any>(this: typeof Uint32Vector, input: FromInput<Uint32, TNull>): Uint32Vector;
+ public static from<TNull = any>(this: typeof Uint64Vector, input: FromInput<Uint64, TNull>): Uint64Vector;
+ // Not zero-copy
+ public static from<T extends Int, TNull = any>(this: typeof IntVector, input: Iterable<T['TValue'] | TNull>): V<T>;
+ public static from<T extends Int, TNull = any>(this: typeof IntVector, input: AsyncIterable<T['TValue'] | TNull>): Promise<V<T>>;
+ public static from<T extends Int, TNull = any>(this: typeof IntVector, input: VectorBuilderOptions<T, TNull>): Chunked<T>;
+ public static from<T extends Int, TNull = any>(this: typeof IntVector, input: VectorBuilderOptionsAsync<T, TNull>): Promise<Chunked<T>>;
+ /** @nocollapse */
+ public static from<T extends Int, TNull = any>(this: IntVectorConstructors, ...args: FromArgs<T, TNull>) {
+ const [input, is64bit = false] = args;
+ let ArrowType = vectorTypeToDataType(this, is64bit);
+ if ((input instanceof ArrayBuffer) || ArrayBuffer.isView(input)) {
+ const InputType = arrayTypeToDataType(input.constructor as IntArrayCtor, is64bit) || ArrowType;
+ // Special case, infer the Arrow DataType from the input if calling the base
+ // IntVector.from with a TypedArray, e.g. `IntVector.from(new Int32Array())`
+ if (ArrowType === null) {
+ ArrowType = InputType;
+ }
+ // If the DataType inferred from the Vector constructor matches the
+ // DataType inferred from the input arguments, return zero-copy view
+ if (ArrowType && ArrowType === InputType) {
+ const type = new ArrowType();
+ let length = input.byteLength / type.ArrayType.BYTES_PER_ELEMENT;
+ // If the ArrowType is 64bit but the input type is 32bit pairs, update the logical length
+ if (convert32To64Bit(ArrowType, input.constructor)) {
+ length *= 0.5;
+ }
+ return, 0, length, 0, null, input as IntArray));
+ }
+ }
+ if (ArrowType) {
+ // If the DataType inferred from the Vector constructor is different than
+ // the DataType inferred from the input TypedArray, or if input isn't a
+ // TypedArray, use the Builders to construct the result Vector
+ return vectorFromValuesWithType(() => new ArrowType!() as T, input);
+ }
+ if ((input instanceof DataView) || (input instanceof ArrayBuffer)) {
+ throw new TypeError(`Cannot infer integer type from instance of ${}`);
+ }
+ throw new TypeError('Unrecognized IntVector input');
+ }
+/** @ignore */
+export class Int8Vector extends IntVector<Int8> {}
+/** @ignore */
+export class Int16Vector extends IntVector<Int16> {}
+/** @ignore */
+export class Int32Vector extends IntVector<Int32> {}
+/** @ignore */
+export class Int64Vector extends IntVector<Int64> {
+ public toBigInt64Array() {
+ return toBigInt64Array(this.values);
+ }
+ private _values64!: BigInt64Array;
+ public get values64(): BigInt64Array {
+ return this._values64 || (this._values64 = this.toBigInt64Array());
+ }
+/** @ignore */
+export class Uint8Vector extends IntVector<Uint8> {}
+/** @ignore */
+export class Uint16Vector extends IntVector<Uint16> {}
+/** @ignore */
+export class Uint32Vector extends IntVector<Uint32> {}
+/** @ignore */
+export class Uint64Vector extends IntVector<Uint64> {
+ public toBigUint64Array() {
+ return toBigUint64Array(this.values);
+ }
+ private _values64!: BigUint64Array;
+ public get values64(): BigUint64Array {
+ return this._values64 || (this._values64 = this.toBigUint64Array());
+ }
+const convert32To64Bit = (typeCtor: any, dataCtor: any) => {
+ return (typeCtor === Int64 || typeCtor === Uint64) &&
+ (dataCtor === Int32Array || dataCtor === Uint32Array);
+/** @ignore */
+const arrayTypeToDataType = (ctor: IntArrayCtor, is64bit: boolean) => {
+ switch (ctor) {
+ case Int8Array: return Int8;
+ case Int16Array: return Int16;
+ case Int32Array: return is64bit ? Int64 : Int32;
+ case BigInt64Array: return Int64;
+ case Uint8Array: return Uint8;
+ case Uint16Array: return Uint16;
+ case Uint32Array: return is64bit ? Uint64 : Uint32;
+ case BigUint64Array: return Uint64;
+ default: return null;
+ }
+/** @ignore */
+const vectorTypeToDataType = (ctor: IntVectorConstructors, is64bit: boolean) => {
+ switch (ctor) {
+ case Int8Vector: return Int8;
+ case Int16Vector: return Int16;
+ case Int32Vector: return is64bit ? Int64 : Int32;
+ case Int64Vector: return Int64;
+ case Uint8Vector: return Uint8;
+ case Uint16Vector: return Uint16;
+ case Uint32Vector: return is64bit ? Uint64 : Uint32;
+ case Uint64Vector: return Uint64;
+ default: return null;
+ }
diff --git a/src/arrow/js/src/vector/interval.ts b/src/arrow/js/src/vector/interval.ts
new file mode 100644
index 000000000..70384ab97
--- /dev/null
+++ b/src/arrow/js/src/vector/interval.ts
@@ -0,0 +1,26 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { BaseVector } from './base';
+import { Interval, IntervalDayTime, IntervalYearMonth } from '../type';
+/** @ignore */
+export class IntervalVector<T extends Interval = Interval> extends BaseVector<T> {}
+/** @ignore */
+export class IntervalDayTimeVector extends IntervalVector<IntervalDayTime> {}
+/** @ignore */
+export class IntervalYearMonthVector extends IntervalVector<IntervalYearMonth> {}
diff --git a/src/arrow/js/src/vector/list.ts b/src/arrow/js/src/vector/list.ts
new file mode 100644
index 000000000..6ea189044
--- /dev/null
+++ b/src/arrow/js/src/vector/list.ts
@@ -0,0 +1,22 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { BaseVector } from './base';
+import { DataType, List } from '../type';
+/** @ignore */
+export class ListVector<T extends DataType = any> extends BaseVector<List<T>> {}
diff --git a/src/arrow/js/src/vector/map.ts b/src/arrow/js/src/vector/map.ts
new file mode 100644
index 000000000..9975919f7
--- /dev/null
+++ b/src/arrow/js/src/vector/map.ts
@@ -0,0 +1,35 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { MapRow } from './row';
+import { Field } from '../schema';
+import { Vector } from '../vector';
+import { BaseVector } from './base';
+import { DataType, Map_, Struct, List } from '../type';
+/** @ignore */
+export class MapVector<K extends DataType = any, V extends DataType = any> extends BaseVector<Map_<K, V>> {
+ public asList() {
+ const child = this.type.children[0] as Field<Struct<{ key: K; value: V }>>;
+ return List<Struct<{ key: K; value: V }>>(child)));
+ }
+ public bind(index: number): Map_<K, V>['TValue'] {
+ const child = this.getChildAt<Struct<{ key: K; value: V }>>(0)!;
+ const { [index]: begin, [index + 1]: end } = this.valueOffsets;
+ return new MapRow(child.slice(begin, end));
+ }
diff --git a/src/arrow/js/src/vector/null.ts b/src/arrow/js/src/vector/null.ts
new file mode 100644
index 000000000..ffa3d0576
--- /dev/null
+++ b/src/arrow/js/src/vector/null.ts
@@ -0,0 +1,22 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { Null } from '../type';
+import { BaseVector } from './base';
+/** @ignore */
+export class NullVector extends BaseVector<Null> {}
diff --git a/src/arrow/js/src/vector/row.ts b/src/arrow/js/src/vector/row.ts
new file mode 100644
index 000000000..23d1b5440
--- /dev/null
+++ b/src/arrow/js/src/vector/row.ts
@@ -0,0 +1,296 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { Vector } from '../vector';
+import { StructVector } from './struct';
+import { valueToString } from '../util/pretty';
+import { DataType, Struct, RowLike } from '../type';
+/** @ignore */ const kParent = Symbol.for('parent');
+/** @ignore */ const kRowIndex = Symbol.for('rowIndex');
+/** @ignore */ const kKeyToIdx = Symbol.for('keyToIdx');
+/** @ignore */ const kIdxToVal = Symbol.for('idxToVal');
+/** @ignore */ const kCustomInspect = Symbol.for('nodejs.util.inspect.custom');
+abstract class Row<K extends PropertyKey = any, V = any> implements Map<K, V> {
+ public readonly size: number;
+ public readonly [Symbol.toStringTag]: string;
+ protected [kRowIndex]: number;
+ protected [kParent]: Vector<Struct>;
+ protected [kKeyToIdx]: Map<K, number>;
+ protected [kIdxToVal]: V[];
+ constructor(parent: Vector<Struct>, numKeys: number) {
+ this[kParent] = parent;
+ this.size = numKeys;
+ }
+ public abstract keys(): IterableIterator<K>;
+ public abstract values(): IterableIterator<V>;
+ public abstract getKey(idx: number): K;
+ public abstract getIndex(key: K): number;
+ public abstract getValue(idx: number): V;
+ public abstract setValue(idx: number, val: V): void;
+ public entries() { return this[Symbol.iterator](); }
+ public has(key: K) { return this.get(key) !== undefined; }
+ public get(key: K) {
+ let val = undefined;
+ if (key != null) {
+ const ktoi = this[kKeyToIdx] || (this[kKeyToIdx] = new Map());
+ let idx = ktoi.get(key);
+ if (idx !== undefined) {
+ const itov = this[kIdxToVal] || (this[kIdxToVal] = new Array(this.size));
+ ((val = itov[idx]) !== undefined) || (itov[idx] = val = this.getValue(idx));
+ } else if ((idx = this.getIndex(key)) > -1) {
+ ktoi.set(key, idx);
+ const itov = this[kIdxToVal] || (this[kIdxToVal] = new Array(this.size));
+ ((val = itov[idx]) !== undefined) || (itov[idx] = val = this.getValue(idx));
+ }
+ }
+ return val;
+ }
+ public set(key: K, val: V) {
+ if (key != null) {
+ const ktoi = this[kKeyToIdx] || (this[kKeyToIdx] = new Map());
+ let idx = ktoi.get(key);
+ if (idx === undefined) {
+ ktoi.set(key, idx = this.getIndex(key));
+ }
+ if (idx > -1) {
+ const itov = this[kIdxToVal] || (this[kIdxToVal] = new Array(this.size));
+ itov[idx] = <any> this.setValue(idx, val);
+ }
+ }
+ return this;
+ }
+ public clear(): void { throw new Error(`Clearing ${this[Symbol.toStringTag]} not supported.`); }
+ public delete(_: K): boolean { throw new Error(`Deleting ${this[Symbol.toStringTag]} values not supported.`); }
+ public *[Symbol.iterator](): IterableIterator<[K, V]> {
+ const ki = this.keys();
+ const vi = this.values();
+ const ktoi = this[kKeyToIdx] || (this[kKeyToIdx] = new Map());
+ const itov = this[kIdxToVal] || (this[kIdxToVal] = new Array(this.size));
+ for (let k: K, v: V, i = 0, kr: IteratorResult<K>, vr: IteratorResult<V>;
+ !((kr = || (vr =;
+ ++i
+ ) {
+ k = kr.value;
+ v = vr.value;
+ itov[i] = v;
+ ktoi.has(k) || ktoi.set(k, i);
+ yield [k, v];
+ }
+ }
+ public forEach(callbackfn: (value: V, key: K, map: Map<K, V>) => void, thisArg?: any): void {
+ const ki = this.keys();
+ const vi = this.values();
+ const callback = thisArg === undefined ? callbackfn :
+ (v: V, k: K, m: Map<K, V>) =>, v, k, m);
+ const ktoi = this[kKeyToIdx] || (this[kKeyToIdx] = new Map());
+ const itov = this[kIdxToVal] || (this[kIdxToVal] = new Array(this.size));
+ for (let k: K, v: V, i = 0, kr: IteratorResult<K>, vr: IteratorResult<V>;
+ !((kr = || (vr =;
+ ++i
+ ) {
+ k = kr.value;
+ v = vr.value;
+ itov[i] = v;
+ ktoi.has(k) || ktoi.set(k, i);
+ callback(v, k, this);
+ }
+ }
+ public toArray() { return [...this.values()]; }
+ public toJSON() {
+ const obj = {} as any;
+ this.forEach((val, key) => obj[key] = val);
+ return obj;
+ }
+ public inspect() { return this.toString(); }
+ public [kCustomInspect]() { return this.toString(); }
+ public toString() {
+ const str: string[] = [];
+ this.forEach((val, key) => {
+ key = valueToString(key);
+ val = valueToString(val);
+ str.push(`${key}: ${val}`);
+ });
+ return `{ ${str.join(', ')} }`;
+ }
+ protected static [Symbol.toStringTag] = ((proto: Row) => {
+ Object.defineProperties(proto, {
+ 'size': { writable: true, enumerable: false, configurable: false, value: 0 },
+ [kParent]: { writable: true, enumerable: false, configurable: false, value: null },
+ [kRowIndex]: { writable: true, enumerable: false, configurable: false, value: -1 },
+ });
+ return (proto as any)[Symbol.toStringTag] = 'Row';
+ })(Row.prototype);
+export class MapRow<K extends DataType = any, V extends DataType = any> extends Row<K['TValue'], V['TValue'] | null> {
+ constructor(slice: Vector<Struct<{ key: K; value: V }>>) {
+ super(slice, slice.length);
+ return createRowProxy(this);
+ }
+ public keys() {
+ return this[kParent].getChildAt(0)![Symbol.iterator]();
+ }
+ public values() {
+ return this[kParent].getChildAt(1)![Symbol.iterator]();
+ }
+ public getKey(idx: number): K['TValue'] {
+ return this[kParent].getChildAt(0)!.get(idx);
+ }
+ public getIndex(key: K['TValue']): number {
+ return this[kParent].getChildAt(0)!.indexOf(key);
+ }
+ public getValue(index: number): V['TValue'] | null {
+ return this[kParent].getChildAt(1)!.get(index);
+ }
+ public setValue(index: number, value: V['TValue'] | null): void {
+ this[kParent].getChildAt(1)!.set(index, value);
+ }
+export class StructRow<T extends { [key: string]: DataType } = any> extends Row<keyof T, T[keyof T]['TValue'] | null> {
+ constructor(parent: StructVector<T>) {
+ super(parent, parent.type.children.length);
+ return defineRowProxyProperties(this);
+ }
+ public *keys() {
+ for (const field of this[kParent].type.children) {
+ yield as keyof T;
+ }
+ }
+ public *values() {
+ for (const field of this[kParent].type.children) {
+ yield (this as RowLike<T>)[];
+ }
+ }
+ public getKey(idx: number): keyof T {
+ return this[kParent].type.children[idx].name as keyof T;
+ }
+ public getIndex(key: keyof T): number {
+ return this[kParent].type.children.findIndex((f) => === key);
+ }
+ public getValue(index: number): T[keyof T]['TValue'] | null {
+ return this[kParent].getChildAt(index)!.get(this[kRowIndex]);
+ }
+ public setValue(index: number, value: T[keyof T]['TValue'] | null): void {
+ return this[kParent].getChildAt(index)!.set(this[kRowIndex], value);
+ }
+Object.setPrototypeOf(Row.prototype, Map.prototype);
+/** @ignore */
+const defineRowProxyProperties = (() => {
+ const desc = { enumerable: true, configurable: false, get: null as any, set: null as any };
+ return <T extends Row>(row: T) => {
+ let idx = -1;
+ const ktoi = row[kKeyToIdx] || (row[kKeyToIdx] = new Map());
+ const getter = (key: any) => function(this: T) { return this.get(key); };
+ const setter = (key: any) => function(this: T, val: any) { return this.set(key, val); };
+ for (const key of row.keys()) {
+ ktoi.set(key, ++idx);
+ desc.get = getter(key);
+ desc.set = setter(key);
+, key) || (desc.enumerable = true, Object.defineProperty(row, key, desc));
+, idx) || (desc.enumerable = false, Object.defineProperty(row, idx, desc));
+ }
+ desc.get = desc.set = null;
+ return row;
+ };
+/** @ignore */
+const createRowProxy = (() => {
+ if (typeof Proxy === 'undefined') {
+ return defineRowProxyProperties;
+ }
+ const has = Row.prototype.has;
+ const get = Row.prototype.get;
+ const set = Row.prototype.set;
+ const getKey = Row.prototype.getKey;
+ const RowProxyHandler: ProxyHandler<Row> = {
+ isExtensible() { return false; },
+ deleteProperty() { return false; },
+ preventExtensions() { return true; },
+ ownKeys(row: Row) { return [...row.keys()].map((x) => `${x}`); },
+ has(row: Row, key: PropertyKey) {
+ switch (key) {
+ case 'getKey': case 'getIndex': case 'getValue': case 'setValue': case 'toArray': case 'toJSON': case 'inspect':
+ case 'constructor': case 'isPrototypeOf': case 'propertyIsEnumerable': case 'toString': case 'toLocaleString': case 'valueOf':
+ case 'size': case 'has': case 'get': case 'set': case 'clear': case 'delete': case 'keys': case 'values': case 'entries': case 'forEach':
+ case '__proto__': case '__defineGetter__': case '__defineSetter__': case 'hasOwnProperty': case '__lookupGetter__': case '__lookupSetter__':
+ case Symbol.iterator: case Symbol.toStringTag: case kParent: case kRowIndex: case kIdxToVal: case kKeyToIdx: case kCustomInspect:
+ return true;
+ }
+ if (typeof key === 'number' && !row.has(key)) {
+ key = row.getKey(key);
+ }
+ return row.has(key);
+ },
+ get(row: Row, key: PropertyKey, receiver: any) {
+ switch (key) {
+ case 'getKey': case 'getIndex': case 'getValue': case 'setValue': case 'toArray': case 'toJSON': case 'inspect':
+ case 'constructor': case 'isPrototypeOf': case 'propertyIsEnumerable': case 'toString': case 'toLocaleString': case 'valueOf':
+ case 'size': case 'has': case 'get': case 'set': case 'clear': case 'delete': case 'keys': case 'values': case 'entries': case 'forEach':
+ case '__proto__': case '__defineGetter__': case '__defineSetter__': case 'hasOwnProperty': case '__lookupGetter__': case '__lookupSetter__':
+ case Symbol.iterator: case Symbol.toStringTag: case kParent: case kRowIndex: case kIdxToVal: case kKeyToIdx: case kCustomInspect:
+ return Reflect.get(row, key, receiver);
+ }
+ if (typeof key === 'number' && !, key)) {
+ key =, key);
+ }
+ return, key);
+ },
+ set(row: Row, key: PropertyKey, val: any, receiver: any) {
+ switch (key) {
+ case kParent: case kRowIndex: case kIdxToVal: case kKeyToIdx:
+ return Reflect.set(row, key, val, receiver);
+ case 'getKey': case 'getIndex': case 'getValue': case 'setValue': case 'toArray': case 'toJSON': case 'inspect':
+ case 'constructor': case 'isPrototypeOf': case 'propertyIsEnumerable': case 'toString': case 'toLocaleString': case 'valueOf':
+ case 'size': case 'has': case 'get': case 'set': case 'clear': case 'delete': case 'keys': case 'values': case 'entries': case 'forEach':
+ case '__proto__': case '__defineGetter__': case '__defineSetter__': case 'hasOwnProperty': case '__lookupGetter__': case '__lookupSetter__':
+ case Symbol.iterator: case Symbol.toStringTag:
+ return false;
+ }
+ if (typeof key === 'number' && !, key)) {
+ key =, key);
+ }
+ return, key) ? !!, key, val) : false;
+ },
+ };
+ return <T extends Row>(row: T) => new Proxy(row, RowProxyHandler) as T;
diff --git a/src/arrow/js/src/vector/struct.ts b/src/arrow/js/src/vector/struct.ts
new file mode 100644
index 000000000..b825f092e
--- /dev/null
+++ b/src/arrow/js/src/vector/struct.ts
@@ -0,0 +1,32 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { StructRow } from './row';
+import { BaseVector } from './base';
+import { DataType, Struct } from '../type';
+/** @ignore */ const kRowIndex = Symbol.for('rowIndex');
+/** @ignore */
+export class StructVector<T extends { [key: string]: DataType } = any> extends BaseVector<Struct<T>> {
+ private _row!: StructRow<T>;
+ public bind(index: number): Struct<T>['TValue'] {
+ const proto = this._row || (this._row = new StructRow<T>(this));
+ const bound = Object.create(proto);
+ bound[kRowIndex] = index;
+ return bound;
+ }
diff --git a/src/arrow/js/src/vector/time.ts b/src/arrow/js/src/vector/time.ts
new file mode 100644
index 000000000..0abded940
--- /dev/null
+++ b/src/arrow/js/src/vector/time.ts
@@ -0,0 +1,30 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { BaseVector } from './base';
+import { Time, TimeSecond, TimeMillisecond, TimeMicrosecond, TimeNanosecond } from '../type';
+/** @ignore */
+export class TimeVector<T extends Time = Time> extends BaseVector<T> {}
+/** @ignore */
+export class TimeSecondVector extends TimeVector<TimeSecond> {}
+/** @ignore */
+export class TimeMillisecondVector extends TimeVector<TimeMillisecond> {}
+/** @ignore */
+export class TimeMicrosecondVector extends TimeVector<TimeMicrosecond> {}
+/** @ignore */
+export class TimeNanosecondVector extends TimeVector<TimeNanosecond> {}
diff --git a/src/arrow/js/src/vector/timestamp.ts b/src/arrow/js/src/vector/timestamp.ts
new file mode 100644
index 000000000..caff0bd6f
--- /dev/null
+++ b/src/arrow/js/src/vector/timestamp.ts
@@ -0,0 +1,30 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { BaseVector } from './base';
+import { Timestamp, TimestampSecond, TimestampMillisecond, TimestampMicrosecond, TimestampNanosecond } from '../type';
+/** @ignore */
+export class TimestampVector<T extends Timestamp = Timestamp> extends BaseVector<T> {}
+/** @ignore */
+export class TimestampSecondVector extends TimestampVector<TimestampSecond> {}
+/** @ignore */
+export class TimestampMillisecondVector extends TimestampVector<TimestampMillisecond> {}
+/** @ignore */
+export class TimestampMicrosecondVector extends TimestampVector<TimestampMicrosecond> {}
+/** @ignore */
+export class TimestampNanosecondVector extends TimestampVector<TimestampNanosecond> {}
diff --git a/src/arrow/js/src/vector/union.ts b/src/arrow/js/src/vector/union.ts
new file mode 100644
index 000000000..854519c57
--- /dev/null
+++ b/src/arrow/js/src/vector/union.ts
@@ -0,0 +1,32 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { BaseVector } from './base';
+import { Union, DenseUnion, SparseUnion} from '../type';
+/** @ignore */
+export class UnionVector<T extends Union = Union> extends BaseVector<T> {
+ public get typeIdToChildIndex() { return; }
+/** @ignore */
+export class DenseUnionVector extends UnionVector<DenseUnion> {
+ public get valueOffsets() { return!; }
+/** @ignore */
+export class SparseUnionVector extends UnionVector<SparseUnion> {}
diff --git a/src/arrow/js/src/vector/utf8.ts b/src/arrow/js/src/vector/utf8.ts
new file mode 100644
index 000000000..a891c0dc5
--- /dev/null
+++ b/src/arrow/js/src/vector/utf8.ts
@@ -0,0 +1,39 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { Vector } from '../vector';
+import { Chunked } from './chunked';
+import { BaseVector } from './base';
+import { Binary, Utf8 } from '../type';
+import { VectorBuilderOptions } from './index';
+import { vectorFromValuesWithType } from './index';
+import { VectorBuilderOptionsAsync } from './index';
+/** @ignore */
+export class Utf8Vector extends BaseVector<Utf8> {
+ public static from<TNull = any>(input: Iterable<string | TNull>): Utf8Vector;
+ public static from<TNull = any>(input: AsyncIterable<string | TNull>): Promise<Utf8Vector>;
+ public static from<TNull = any>(input: VectorBuilderOptions<Utf8, string | TNull>): Chunked<Utf8>;
+ public static from<TNull = any>(input: VectorBuilderOptionsAsync<Utf8, string | TNull>): Promise<Chunked<Utf8>>;
+ /** @nocollapse */
+ public static from<TNull = any>(input: Iterable<string | TNull> | AsyncIterable<string | TNull> | VectorBuilderOptions<Utf8, string | TNull> | VectorBuilderOptionsAsync<Utf8, string | TNull>) {
+ return vectorFromValuesWithType(() => new Utf8(), input);
+ }
+ public asBinary() {
+ return Binary()));
+ }
diff --git a/src/arrow/js/src/visitor.ts b/src/arrow/js/src/visitor.ts
new file mode 100644
index 000000000..3a63c93f9
--- /dev/null
+++ b/src/arrow/js/src/visitor.ts
@@ -0,0 +1,260 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { Data } from './data';
+import { Vector } from './vector';
+import { Type, Precision, DateUnit, TimeUnit, IntervalUnit, UnionMode } from './enum';
+import { DataType, Float, Int, Date_, Interval, Time, Timestamp, Union, } from './type';
+export abstract class Visitor {
+ public visitMany(nodes: any[], ...args: any[][]) {
+ return, i) => this.visit(node, => x[i])));
+ }
+ public visit(...args: any[]) {
+ return this.getVisitFn(args[0], false).apply(this, args);
+ }
+ public getVisitFn(node: any, throwIfNotFound = true) {
+ return getVisitFn(this, node, throwIfNotFound);
+ }
+ public visitNull (_node: any, ..._args: any[]): any { return null; }
+ public visitBool (_node: any, ..._args: any[]): any { return null; }
+ public visitInt (_node: any, ..._args: any[]): any { return null; }
+ public visitFloat (_node: any, ..._args: any[]): any { return null; }
+ public visitUtf8 (_node: any, ..._args: any[]): any { return null; }
+ public visitBinary (_node: any, ..._args: any[]): any { return null; }
+ public visitFixedSizeBinary (_node: any, ..._args: any[]): any { return null; }
+ public visitDate (_node: any, ..._args: any[]): any { return null; }
+ public visitTimestamp (_node: any, ..._args: any[]): any { return null; }
+ public visitTime (_node: any, ..._args: any[]): any { return null; }
+ public visitDecimal (_node: any, ..._args: any[]): any { return null; }
+ public visitList (_node: any, ..._args: any[]): any { return null; }
+ public visitStruct (_node: any, ..._args: any[]): any { return null; }
+ public visitUnion (_node: any, ..._args: any[]): any { return null; }
+ public visitDictionary (_node: any, ..._args: any[]): any { return null; }
+ public visitInterval (_node: any, ..._args: any[]): any { return null; }
+ public visitFixedSizeList (_node: any, ..._args: any[]): any { return null; }
+ public visitMap (_node: any, ..._args: any[]): any { return null; }
+/** @ignore */
+function getVisitFn<T extends DataType>(visitor: Visitor, node: any, throwIfNotFound = true) {
+ let fn: any = null;
+ let dtype: T['TType'] = Type.NONE;
+ if (node instanceof Data ) dtype = inferDType(node.type as T);
+ else if (node instanceof Vector ) dtype = inferDType(node.type as T);
+ else if (node instanceof DataType) dtype = inferDType(node as T);
+ else if (typeof (dtype = node) !== 'number') dtype = Type[node] as any as T['TType'];
+ switch (dtype) {
+ case Type.Null: fn = visitor.visitNull; break;
+ case Type.Bool: fn = visitor.visitBool; break;
+ case Type.Int: fn = visitor.visitInt; break;
+ case Type.Int8: fn = visitor.visitInt8 || visitor.visitInt; break;
+ case Type.Int16: fn = visitor.visitInt16 || visitor.visitInt; break;
+ case Type.Int32: fn = visitor.visitInt32 || visitor.visitInt; break;
+ case Type.Int64: fn = visitor.visitInt64 || visitor.visitInt; break;
+ case Type.Uint8: fn = visitor.visitUint8 || visitor.visitInt; break;
+ case Type.Uint16: fn = visitor.visitUint16 || visitor.visitInt; break;
+ case Type.Uint32: fn = visitor.visitUint32 || visitor.visitInt; break;
+ case Type.Uint64: fn = visitor.visitUint64 || visitor.visitInt; break;
+ case Type.Float: fn = visitor.visitFloat; break;
+ case Type.Float16: fn = visitor.visitFloat16 || visitor.visitFloat; break;
+ case Type.Float32: fn = visitor.visitFloat32 || visitor.visitFloat; break;
+ case Type.Float64: fn = visitor.visitFloat64 || visitor.visitFloat; break;
+ case Type.Utf8: fn = visitor.visitUtf8; break;
+ case Type.Binary: fn = visitor.visitBinary; break;
+ case Type.FixedSizeBinary: fn = visitor.visitFixedSizeBinary; break;
+ case Type.Date: fn = visitor.visitDate; break;
+ case Type.DateDay: fn = visitor.visitDateDay || visitor.visitDate; break;
+ case Type.DateMillisecond: fn = visitor.visitDateMillisecond || visitor.visitDate; break;
+ case Type.Timestamp: fn = visitor.visitTimestamp; break;
+ case Type.TimestampSecond: fn = visitor.visitTimestampSecond || visitor.visitTimestamp; break;
+ case Type.TimestampMillisecond: fn = visitor.visitTimestampMillisecond || visitor.visitTimestamp; break;
+ case Type.TimestampMicrosecond: fn = visitor.visitTimestampMicrosecond || visitor.visitTimestamp; break;
+ case Type.TimestampNanosecond: fn = visitor.visitTimestampNanosecond || visitor.visitTimestamp; break;
+ case Type.Time: fn = visitor.visitTime; break;
+ case Type.TimeSecond: fn = visitor.visitTimeSecond || visitor.visitTime; break;
+ case Type.TimeMillisecond: fn = visitor.visitTimeMillisecond || visitor.visitTime; break;
+ case Type.TimeMicrosecond: fn = visitor.visitTimeMicrosecond || visitor.visitTime; break;
+ case Type.TimeNanosecond: fn = visitor.visitTimeNanosecond || visitor.visitTime; break;
+ case Type.Decimal: fn = visitor.visitDecimal; break;
+ case Type.List: fn = visitor.visitList; break;
+ case Type.Struct: fn = visitor.visitStruct; break;
+ case Type.Union: fn = visitor.visitUnion; break;
+ case Type.DenseUnion: fn = visitor.visitDenseUnion || visitor.visitUnion; break;
+ case Type.SparseUnion: fn = visitor.visitSparseUnion || visitor.visitUnion; break;
+ case Type.Dictionary: fn = visitor.visitDictionary; break;
+ case Type.Interval: fn = visitor.visitInterval; break;
+ case Type.IntervalDayTime: fn = visitor.visitIntervalDayTime || visitor.visitInterval; break;
+ case Type.IntervalYearMonth: fn = visitor.visitIntervalYearMonth || visitor.visitInterval; break;
+ case Type.FixedSizeList: fn = visitor.visitFixedSizeList; break;
+ case Type.Map: fn = visitor.visitMap; break;
+ }
+ if (typeof fn === 'function') return fn;
+ if (!throwIfNotFound) return () => null;
+ throw new Error(`Unrecognized type '${Type[dtype]}'`);
+/** @ignore */
+function inferDType<T extends DataType>(type: T): Type {
+ switch (type.typeId) {
+ case Type.Null: return Type.Null;
+ case Type.Int: {
+ const { bitWidth, isSigned } = (type as any as Int);
+ switch (bitWidth) {
+ case 8: return isSigned ? Type.Int8 : Type.Uint8 ;
+ case 16: return isSigned ? Type.Int16 : Type.Uint16;
+ case 32: return isSigned ? Type.Int32 : Type.Uint32;
+ case 64: return isSigned ? Type.Int64 : Type.Uint64;
+ }
+ // @ts-ignore
+ return Type.Int;
+ }
+ case Type.Float:
+ switch((type as any as Float).precision) {
+ case Precision.HALF: return Type.Float16;
+ case Precision.SINGLE: return Type.Float32;
+ case Precision.DOUBLE: return Type.Float64;
+ }
+ // @ts-ignore
+ return Type.Float;
+ case Type.Binary: return Type.Binary;
+ case Type.Utf8: return Type.Utf8;
+ case Type.Bool: return Type.Bool;
+ case Type.Decimal: return Type.Decimal;
+ case Type.Time:
+ switch ((type as any as Time).unit) {
+ case TimeUnit.SECOND: return Type.TimeSecond;
+ case TimeUnit.MILLISECOND: return Type.TimeMillisecond;
+ case TimeUnit.MICROSECOND: return Type.TimeMicrosecond;
+ case TimeUnit.NANOSECOND: return Type.TimeNanosecond;
+ }
+ // @ts-ignore
+ return Type.Time;
+ case Type.Timestamp:
+ switch ((type as any as Timestamp).unit) {
+ case TimeUnit.SECOND: return Type.TimestampSecond;
+ case TimeUnit.MILLISECOND: return Type.TimestampMillisecond;
+ case TimeUnit.MICROSECOND: return Type.TimestampMicrosecond;
+ case TimeUnit.NANOSECOND: return Type.TimestampNanosecond;
+ }
+ // @ts-ignore
+ return Type.Timestamp;
+ case Type.Date:
+ switch ((type as any as Date_).unit) {
+ case DateUnit.DAY: return Type.DateDay;
+ case DateUnit.MILLISECOND: return Type.DateMillisecond;
+ }
+ // @ts-ignore
+ return Type.Date;
+ case Type.Interval:
+ switch ((type as any as Interval).unit) {
+ case IntervalUnit.DAY_TIME: return Type.IntervalDayTime;
+ case IntervalUnit.YEAR_MONTH: return Type.IntervalYearMonth;
+ }
+ // @ts-ignore
+ return Type.Interval;
+ case Type.Map: return Type.Map;
+ case Type.List: return Type.List;
+ case Type.Struct: return Type.Struct;
+ case Type.Union:
+ switch ((type as any as Union).mode) {
+ case UnionMode.Dense: return Type.DenseUnion;
+ case UnionMode.Sparse: return Type.SparseUnion;
+ }
+ // @ts-ignore
+ return Type.Union;
+ case Type.FixedSizeBinary: return Type.FixedSizeBinary;
+ case Type.FixedSizeList: return Type.FixedSizeList;
+ case Type.Dictionary: return Type.Dictionary;
+ }
+ throw new Error(`Unrecognized type '${Type[type.typeId]}'`);
+export interface Visitor {
+ visitNull (node: any, ...args: any[]): any;
+ visitBool (node: any, ...args: any[]): any;
+ visitInt (node: any, ...args: any[]): any;
+ visitInt8? (node: any, ...args: any[]): any;
+ visitInt16? (node: any, ...args: any[]): any;
+ visitInt32? (node: any, ...args: any[]): any;
+ visitInt64? (node: any, ...args: any[]): any;
+ visitUint8? (node: any, ...args: any[]): any;
+ visitUint16? (node: any, ...args: any[]): any;
+ visitUint32? (node: any, ...args: any[]): any;
+ visitUint64? (node: any, ...args: any[]): any;
+ visitFloat (node: any, ...args: any[]): any;
+ visitFloat16? (node: any, ...args: any[]): any;
+ visitFloat32? (node: any, ...args: any[]): any;
+ visitFloat64? (node: any, ...args: any[]): any;
+ visitUtf8 (node: any, ...args: any[]): any;
+ visitBinary (node: any, ...args: any[]): any;
+ visitFixedSizeBinary (node: any, ...args: any[]): any;
+ visitDate (node: any, ...args: any[]): any;
+ visitDateDay? (node: any, ...args: any[]): any;
+ visitDateMillisecond? (node: any, ...args: any[]): any;
+ visitTimestamp (node: any, ...args: any[]): any;
+ visitTimestampSecond? (node: any, ...args: any[]): any;
+ visitTimestampMillisecond? (node: any, ...args: any[]): any;
+ visitTimestampMicrosecond? (node: any, ...args: any[]): any;
+ visitTimestampNanosecond? (node: any, ...args: any[]): any;
+ visitTime (node: any, ...args: any[]): any;
+ visitTimeSecond? (node: any, ...args: any[]): any;
+ visitTimeMillisecond? (node: any, ...args: any[]): any;
+ visitTimeMicrosecond? (node: any, ...args: any[]): any;
+ visitTimeNanosecond? (node: any, ...args: any[]): any;
+ visitDecimal (node: any, ...args: any[]): any;
+ visitList (node: any, ...args: any[]): any;
+ visitStruct (node: any, ...args: any[]): any;
+ visitUnion (node: any, ...args: any[]): any;
+ visitDenseUnion? (node: any, ...args: any[]): any;
+ visitSparseUnion? (node: any, ...args: any[]): any;
+ visitDictionary (node: any, ...args: any[]): any;
+ visitInterval (node: any, ...args: any[]): any;
+ visitIntervalDayTime? (node: any, ...args: any[]): any;
+ visitIntervalYearMonth? (node: any, ...args: any[]): any;
+ visitFixedSizeList (node: any, ...args: any[]): any;
+ visitMap (node: any, ...args: any[]): any;
+// Add these here so they're picked up by the externs creator
+// in the build, and closure-compiler doesn't minify them away
+(Visitor.prototype as any).visitInt8 = null;
+(Visitor.prototype as any).visitInt16 = null;
+(Visitor.prototype as any).visitInt32 = null;
+(Visitor.prototype as any).visitInt64 = null;
+(Visitor.prototype as any).visitUint8 = null;
+(Visitor.prototype as any).visitUint16 = null;
+(Visitor.prototype as any).visitUint32 = null;
+(Visitor.prototype as any).visitUint64 = null;
+(Visitor.prototype as any).visitFloat16 = null;
+(Visitor.prototype as any).visitFloat32 = null;
+(Visitor.prototype as any).visitFloat64 = null;
+(Visitor.prototype as any).visitDateDay = null;
+(Visitor.prototype as any).visitDateMillisecond = null;
+(Visitor.prototype as any).visitTimestampSecond = null;
+(Visitor.prototype as any).visitTimestampMillisecond = null;
+(Visitor.prototype as any).visitTimestampMicrosecond = null;
+(Visitor.prototype as any).visitTimestampNanosecond = null;
+(Visitor.prototype as any).visitTimeSecond = null;
+(Visitor.prototype as any).visitTimeMillisecond = null;
+(Visitor.prototype as any).visitTimeMicrosecond = null;
+(Visitor.prototype as any).visitTimeNanosecond = null;
+(Visitor.prototype as any).visitDenseUnion = null;
+(Visitor.prototype as any).visitSparseUnion = null;
+(Visitor.prototype as any).visitIntervalDayTime = null;
+(Visitor.prototype as any).visitIntervalYearMonth = null;
diff --git a/src/arrow/js/src/visitor/builderctor.ts b/src/arrow/js/src/visitor/builderctor.ts
new file mode 100644
index 000000000..ac35a9874
--- /dev/null
+++ b/src/arrow/js/src/visitor/builderctor.ts
@@ -0,0 +1,98 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { Data } from '../data';
+import { Type } from '../enum';
+import { DataType } from '../type';
+import { Visitor } from '../visitor';
+import { VectorType, BuilderCtor } from '../interfaces';
+import { BinaryBuilder } from '../builder/binary';
+import { BoolBuilder } from '../builder/bool';
+import { DateBuilder, DateDayBuilder, DateMillisecondBuilder } from '../builder/date';
+import { DecimalBuilder } from '../builder/decimal';
+import { DictionaryBuilder } from '../builder/dictionary';
+import { FixedSizeBinaryBuilder } from '../builder/fixedsizebinary';
+import { FixedSizeListBuilder } from '../builder/fixedsizelist';
+import { FloatBuilder, Float16Builder, Float32Builder, Float64Builder } from '../builder/float';
+import { IntervalBuilder, IntervalDayTimeBuilder, IntervalYearMonthBuilder } from '../builder/interval';
+import { IntBuilder, Int8Builder, Int16Builder, Int32Builder, Int64Builder, Uint8Builder, Uint16Builder, Uint32Builder, Uint64Builder } from '../builder/int';
+import { ListBuilder } from '../builder/list';
+import { MapBuilder } from '../builder/map';
+import { NullBuilder } from '../builder/null';
+import { StructBuilder } from '../builder/struct';
+import { TimestampBuilder, TimestampSecondBuilder, TimestampMillisecondBuilder, TimestampMicrosecondBuilder, TimestampNanosecondBuilder } from '../builder/timestamp';
+import { TimeBuilder, TimeSecondBuilder, TimeMillisecondBuilder, TimeMicrosecondBuilder, TimeNanosecondBuilder } from '../builder/time';
+import { UnionBuilder, DenseUnionBuilder, SparseUnionBuilder } from '../builder/union';
+import { Utf8Builder } from '../builder/utf8';
+/** @ignore */
+export interface GetBuilderCtor extends Visitor {
+ visit<T extends Type>(type: T): BuilderCtor<T>;
+ visitMany<T extends Type>(types: T[]): BuilderCtor<T>[];
+ getVisitFn<T extends Type>(type: T): () => BuilderCtor<T>;
+ getVisitFn<T extends DataType>(node: VectorType<T> | Data<T> | T): () => BuilderCtor<T>;
+/** @ignore */
+export class GetBuilderCtor extends Visitor {
+ public visitNull () { return NullBuilder; }
+ public visitBool () { return BoolBuilder; }
+ public visitInt () { return IntBuilder; }
+ public visitInt8 () { return Int8Builder; }
+ public visitInt16 () { return Int16Builder; }
+ public visitInt32 () { return Int32Builder; }
+ public visitInt64 () { return Int64Builder; }
+ public visitUint8 () { return Uint8Builder; }
+ public visitUint16 () { return Uint16Builder; }
+ public visitUint32 () { return Uint32Builder; }
+ public visitUint64 () { return Uint64Builder; }
+ public visitFloat () { return FloatBuilder; }
+ public visitFloat16 () { return Float16Builder; }
+ public visitFloat32 () { return Float32Builder; }
+ public visitFloat64 () { return Float64Builder; }
+ public visitUtf8 () { return Utf8Builder; }
+ public visitBinary () { return BinaryBuilder; }
+ public visitFixedSizeBinary () { return FixedSizeBinaryBuilder; }
+ public visitDate () { return DateBuilder; }
+ public visitDateDay () { return DateDayBuilder; }
+ public visitDateMillisecond () { return DateMillisecondBuilder; }
+ public visitTimestamp () { return TimestampBuilder; }
+ public visitTimestampSecond () { return TimestampSecondBuilder; }
+ public visitTimestampMillisecond () { return TimestampMillisecondBuilder; }
+ public visitTimestampMicrosecond () { return TimestampMicrosecondBuilder; }
+ public visitTimestampNanosecond () { return TimestampNanosecondBuilder; }
+ public visitTime () { return TimeBuilder; }
+ public visitTimeSecond () { return TimeSecondBuilder; }
+ public visitTimeMillisecond () { return TimeMillisecondBuilder; }
+ public visitTimeMicrosecond () { return TimeMicrosecondBuilder; }
+ public visitTimeNanosecond () { return TimeNanosecondBuilder; }
+ public visitDecimal () { return DecimalBuilder; }
+ public visitList () { return ListBuilder; }
+ public visitStruct () { return StructBuilder; }
+ public visitUnion () { return UnionBuilder; }
+ public visitDenseUnion () { return DenseUnionBuilder; }
+ public visitSparseUnion () { return SparseUnionBuilder; }
+ public visitDictionary () { return DictionaryBuilder; }
+ public visitInterval () { return IntervalBuilder; }
+ public visitIntervalDayTime () { return IntervalDayTimeBuilder; }
+ public visitIntervalYearMonth () { return IntervalYearMonthBuilder; }
+ public visitFixedSizeList () { return FixedSizeListBuilder; }
+ public visitMap () { return MapBuilder; }
+/** @ignore */
+export const instance = new GetBuilderCtor();
diff --git a/src/arrow/js/src/visitor/bytewidth.ts b/src/arrow/js/src/visitor/bytewidth.ts
new file mode 100644
index 000000000..8be7c7a64
--- /dev/null
+++ b/src/arrow/js/src/visitor/bytewidth.ts
@@ -0,0 +1,68 @@
+/* istanbul ignore file */
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { Data } from '../data';
+import { Visitor } from '../visitor';
+import { VectorType } from '../interfaces';
+import { Type, TimeUnit } from '../enum';
+import { Schema, Field } from '../schema';
+import {
+ DataType, Dictionary,
+ Float, Int, Date_, Interval, Time, Timestamp,
+ Bool, Null, Utf8, Binary, Decimal, FixedSizeBinary,
+ List, FixedSizeList, Map_, Struct, Union,
+} from '../type';
+/** @ignore */ const sum = (x: number, y: number) => x + y;
+/** @ignore */ const variableWidthColumnErrorMessage = (type: DataType) => `Cannot compute the byte width of variable-width column ${type}`;
+/** @ignore */
+export interface ByteWidthVisitor extends Visitor {
+ visit<T extends DataType>(node: T): number;
+ visitMany<T extends DataType>(nodes: T[]): number[];
+ getVisitFn<T extends Type> (node: T): (type: DataType<T>) => number;
+ getVisitFn<T extends DataType>(node: VectorType<T> | Data<T> | T): (type: T) => number;
+/** @ignore */
+export class ByteWidthVisitor extends Visitor {
+ public visitNull (____: Null ) { return 0; }
+ public visitInt (type: Int ) { return type.bitWidth / 8; }
+ public visitFloat (type: Float ) { return type.ArrayType.BYTES_PER_ELEMENT; }
+ public visitBinary (type: Binary ) { throw new Error(variableWidthColumnErrorMessage(type)); }
+ public visitUtf8 (type: Utf8 ) { throw new Error(variableWidthColumnErrorMessage(type)); }
+ public visitBool (____: Bool ) { return 1 / 8; }
+ public visitDecimal (____: Decimal ) { return 16; }
+ public visitDate (type: Date_ ) { return (type.unit + 1) * 4; }
+ public visitTime (type: Time ) { return type.bitWidth / 8; }
+ public visitTimestamp (type: Timestamp ) { return type.unit === TimeUnit.SECOND ? 4 : 8; }
+ public visitInterval (type: Interval ) { return (type.unit + 1) * 4; }
+ public visitList (type: List ) { throw new Error(variableWidthColumnErrorMessage(type)); }
+ public visitStruct (type: Struct ) { return this.visitFields(type.children).reduce(sum, 0); }
+ public visitUnion (type: Union ) { return this.visitFields(type.children).reduce(sum, 0); }
+ public visitFixedSizeBinary (type: FixedSizeBinary ) { return type.byteWidth; }
+ public visitFixedSizeList (type: FixedSizeList ) { return type.listSize * this.visitFields(type.children).reduce(sum, 0); }
+ public visitMap (type: Map_ ) { return this.visitFields(type.children).reduce(sum, 0); }
+ public visitDictionary (type: Dictionary ) { return this.visit(type.indices); }
+ public visitFields (fields: Field[] ) { return (fields || []).map((field) => this.visit(field.type)); }
+ public visitSchema (schema: Schema ) { return this.visitFields(schema.fields).reduce(sum, 0); }
+/** @ignore */
+export const instance = new ByteWidthVisitor();
diff --git a/src/arrow/js/src/visitor/get.ts b/src/arrow/js/src/visitor/get.ts
new file mode 100644
index 000000000..733418c0a
--- /dev/null
+++ b/src/arrow/js/src/visitor/get.ts
@@ -0,0 +1,321 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { Data } from '../data';
+import { BN } from '../util/bn';
+import { Visitor } from '../visitor';
+import { decodeUtf8 } from '../util/utf8';
+import { VectorType } from '../interfaces';
+import { uint16ToFloat64 } from '../util/math';
+import { Type, UnionMode, Precision, DateUnit, TimeUnit, IntervalUnit } from '../enum';
+import {
+ DataType, Dictionary,
+ Bool, Null, Utf8, Binary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct,
+ Float, Float16, Float32, Float64,
+ Int, Uint8, Uint16, Uint32, Uint64, Int8, Int16, Int32, Int64,
+ Date_, DateDay, DateMillisecond,
+ Interval, IntervalDayTime, IntervalYearMonth,
+ Time, TimeSecond, TimeMillisecond, TimeMicrosecond, TimeNanosecond,
+ Timestamp, TimestampSecond, TimestampMillisecond, TimestampMicrosecond, TimestampNanosecond,
+ Union, DenseUnion, SparseUnion,
+} from '../type';
+/** @ignore */
+export interface GetVisitor extends Visitor {
+ visit<T extends VectorType> (node: T, index: number): T['TValue'];
+ visitMany<T extends VectorType> (nodes: T[], indices: number[]): T['TValue'][];
+ getVisitFn<T extends Type> (node: T): (vector: VectorType<T>, index: number) => VectorType<T>['TValue'];
+ getVisitFn<T extends DataType>(node: VectorType<T> | Data<T> | T): (vector: VectorType<T>, index: number) => VectorType<T>['TValue'];
+ visitNull <T extends Null> (vector: VectorType<T>, index: number): T['TValue'];
+ visitBool <T extends Bool> (vector: VectorType<T>, index: number): T['TValue'];
+ visitInt <T extends Int> (vector: VectorType<T>, index: number): T['TValue'];
+ visitInt8 <T extends Int8> (vector: VectorType<T>, index: number): T['TValue'];
+ visitInt16 <T extends Int16> (vector: VectorType<T>, index: number): T['TValue'];
+ visitInt32 <T extends Int32> (vector: VectorType<T>, index: number): T['TValue'];
+ visitInt64 <T extends Int64> (vector: VectorType<T>, index: number): T['TValue'];
+ visitUint8 <T extends Uint8> (vector: VectorType<T>, index: number): T['TValue'];
+ visitUint16 <T extends Uint16> (vector: VectorType<T>, index: number): T['TValue'];
+ visitUint32 <T extends Uint32> (vector: VectorType<T>, index: number): T['TValue'];
+ visitUint64 <T extends Uint64> (vector: VectorType<T>, index: number): T['TValue'];
+ visitFloat <T extends Float> (vector: VectorType<T>, index: number): T['TValue'];
+ visitFloat16 <T extends Float16> (vector: VectorType<T>, index: number): T['TValue'];
+ visitFloat32 <T extends Float32> (vector: VectorType<T>, index: number): T['TValue'];
+ visitFloat64 <T extends Float64> (vector: VectorType<T>, index: number): T['TValue'];
+ visitUtf8 <T extends Utf8> (vector: VectorType<T>, index: number): T['TValue'];
+ visitBinary <T extends Binary> (vector: VectorType<T>, index: number): T['TValue'];
+ visitFixedSizeBinary <T extends FixedSizeBinary> (vector: VectorType<T>, index: number): T['TValue'];
+ visitDate <T extends Date_> (vector: VectorType<T>, index: number): T['TValue'];
+ visitDateDay <T extends DateDay> (vector: VectorType<T>, index: number): T['TValue'];
+ visitDateMillisecond <T extends DateMillisecond> (vector: VectorType<T>, index: number): T['TValue'];
+ visitTimestamp <T extends Timestamp> (vector: VectorType<T>, index: number): T['TValue'];
+ visitTimestampSecond <T extends TimestampSecond> (vector: VectorType<T>, index: number): T['TValue'];
+ visitTimestampMillisecond <T extends TimestampMillisecond> (vector: VectorType<T>, index: number): T['TValue'];
+ visitTimestampMicrosecond <T extends TimestampMicrosecond> (vector: VectorType<T>, index: number): T['TValue'];
+ visitTimestampNanosecond <T extends TimestampNanosecond> (vector: VectorType<T>, index: number): T['TValue'];
+ visitTime <T extends Time> (vector: VectorType<T>, index: number): T['TValue'];
+ visitTimeSecond <T extends TimeSecond> (vector: VectorType<T>, index: number): T['TValue'];
+ visitTimeMillisecond <T extends TimeMillisecond> (vector: VectorType<T>, index: number): T['TValue'];
+ visitTimeMicrosecond <T extends TimeMicrosecond> (vector: VectorType<T>, index: number): T['TValue'];
+ visitTimeNanosecond <T extends TimeNanosecond> (vector: VectorType<T>, index: number): T['TValue'];
+ visitDecimal <T extends Decimal> (vector: VectorType<T>, index: number): T['TValue'];
+ visitList <T extends List> (vector: VectorType<T>, index: number): T['TValue'];
+ visitStruct <T extends Struct> (vector: VectorType<T>, index: number): T['TValue'];
+ visitUnion <T extends Union> (vector: VectorType<T>, index: number): T['TValue'];
+ visitDenseUnion <T extends DenseUnion> (vector: VectorType<T>, index: number): T['TValue'];
+ visitSparseUnion <T extends SparseUnion> (vector: VectorType<T>, index: number): T['TValue'];
+ visitDictionary <T extends Dictionary> (vector: VectorType<T>, index: number): T['TValue'];
+ visitInterval <T extends Interval> (vector: VectorType<T>, index: number): T['TValue'];
+ visitIntervalDayTime <T extends IntervalDayTime> (vector: VectorType<T>, index: number): T['TValue'];
+ visitIntervalYearMonth <T extends IntervalYearMonth> (vector: VectorType<T>, index: number): T['TValue'];
+ visitFixedSizeList <T extends FixedSizeList> (vector: VectorType<T>, index: number): T['TValue'];
+ visitMap <T extends Map_> (vector: VectorType<T>, index: number): T['TValue'];
+/** @ignore */
+export class GetVisitor extends Visitor {}
+/** @ignore */const epochDaysToMs = (data: Int32Array, index: number) => 86400000 * data[index];
+/** @ignore */const epochMillisecondsLongToMs = (data: Int32Array, index: number) => 4294967296 * (data[index + 1]) + (data[index] >>> 0);
+/** @ignore */const epochMicrosecondsLongToMs = (data: Int32Array, index: number) => 4294967296 * (data[index + 1] / 1000) + ((data[index] >>> 0) / 1000);
+/** @ignore */const epochNanosecondsLongToMs = (data: Int32Array, index: number) => 4294967296 * (data[index + 1] / 1000000) + ((data[index] >>> 0) / 1000000);
+/** @ignore */const epochMillisecondsToDate = (epochMs: number) => new Date(epochMs);
+/** @ignore */const epochDaysToDate = (data: Int32Array, index: number) => epochMillisecondsToDate(epochDaysToMs(data, index));
+/** @ignore */const epochMillisecondsLongToDate = (data: Int32Array, index: number) => epochMillisecondsToDate(epochMillisecondsLongToMs(data, index));
+/** @ignore */
+const getNull = <T extends Null>(_vector: VectorType<T>, _index: number): T['TValue'] => null;
+/** @ignore */
+const getVariableWidthBytes = (values: Uint8Array, valueOffsets: Int32Array, index: number) => {
+ const { [index]: x, [index + 1]: y } = valueOffsets;
+ return x != null && y != null ? values.subarray(x, y) : null as any;
+/** @ignore */
+const getBool = <T extends Bool>({ offset, values }: VectorType<T>, index: number): T['TValue'] => {
+ const idx = offset + index;
+ const byte = values[idx >> 3];
+ return (byte & 1 << (idx % 8)) !== 0;
+/** @ignore */
+type Numeric1X = Int8 | Int16 | Int32 | Uint8 | Uint16 | Uint32 | Float32 | Float64;
+/** @ignore */
+type Numeric2X = Int64 | Uint64;
+/** @ignore */
+const getDateDay = <T extends DateDay> ({ values }: VectorType<T>, index: number): T['TValue'] => epochDaysToDate(values, index);
+/** @ignore */
+const getDateMillisecond = <T extends DateMillisecond>({ values }: VectorType<T>, index: number): T['TValue'] => epochMillisecondsLongToDate(values, index * 2);
+/** @ignore */
+const getNumeric = <T extends Numeric1X> ({ stride, values }: VectorType<T>, index: number): T['TValue'] => values[stride * index];
+/** @ignore */
+const getFloat16 = <T extends Float16> ({ stride, values }: VectorType<T>, index: number): T['TValue'] => uint16ToFloat64(values[stride * index]);
+/** @ignore */
+const getBigInts = <T extends Numeric2X>({ stride, values, type }: VectorType<T>, index: number): T['TValue'] => <any> * index, stride * (index + 1)), type.isSigned);
+/** @ignore */
+const getFixedSizeBinary = <T extends FixedSizeBinary>({ stride, values }: VectorType<T>, index: number): T['TValue'] => values.subarray(stride * index, stride * (index + 1));
+/** @ignore */
+const getBinary = <T extends Binary>({ values, valueOffsets }: VectorType<T>, index: number): T['TValue'] => getVariableWidthBytes(values, valueOffsets, index);
+/** @ignore */
+const getUtf8 = <T extends Utf8>({ values, valueOffsets }: VectorType<T>, index: number): T['TValue'] => {
+ const bytes = getVariableWidthBytes(values, valueOffsets, index);
+ return bytes !== null ? decodeUtf8(bytes) : null as any;
+/* istanbul ignore next */
+/** @ignore */
+const getInt = <T extends Int>(vector: VectorType<T>, index: number): T['TValue'] => (
+ vector.type.bitWidth < 64
+ ? getNumeric(vector as VectorType<Numeric1X>, index)
+ : getBigInts(vector as VectorType<Numeric2X>, index)
+/* istanbul ignore next */
+/** @ignore */
+const getFloat = <T extends Float> (vector: VectorType<T>, index: number): T['TValue'] => (
+ vector.type.precision !== Precision.HALF
+ ? getNumeric(vector as VectorType<Numeric1X>, index)
+ : getFloat16(vector as VectorType<Float16>, index)
+/* istanbul ignore next */
+/** @ignore */
+const getDate = <T extends Date_> (vector: VectorType<T>, index: number): T['TValue'] => (
+ vector.type.unit === DateUnit.DAY
+ ? getDateDay(vector as VectorType<DateDay>, index)
+ : getDateMillisecond(vector as VectorType<DateMillisecond>, index)
+/** @ignore */
+const getTimestampSecond = <T extends TimestampSecond> ({ values }: VectorType<T>, index: number): T['TValue'] => 1000 * epochMillisecondsLongToMs(values, index * 2);
+/** @ignore */
+const getTimestampMillisecond = <T extends TimestampMillisecond>({ values }: VectorType<T>, index: number): T['TValue'] => epochMillisecondsLongToMs(values, index * 2);
+/** @ignore */
+const getTimestampMicrosecond = <T extends TimestampMicrosecond>({ values }: VectorType<T>, index: number): T['TValue'] => epochMicrosecondsLongToMs(values, index * 2);
+/** @ignore */
+const getTimestampNanosecond = <T extends TimestampNanosecond> ({ values }: VectorType<T>, index: number): T['TValue'] => epochNanosecondsLongToMs(values, index * 2);
+/* istanbul ignore next */
+/** @ignore */
+const getTimestamp = <T extends Timestamp>(vector: VectorType<T>, index: number): T['TValue'] => {
+ switch (vector.type.unit) {
+ case TimeUnit.SECOND: return getTimestampSecond(vector as VectorType<TimestampSecond>, index);
+ case TimeUnit.MILLISECOND: return getTimestampMillisecond(vector as VectorType<TimestampMillisecond>, index);
+ case TimeUnit.MICROSECOND: return getTimestampMicrosecond(vector as VectorType<TimestampMicrosecond>, index);
+ case TimeUnit.NANOSECOND: return getTimestampNanosecond(vector as VectorType<TimestampNanosecond>, index);
+ }
+/** @ignore */
+const getTimeSecond = <T extends TimeSecond> ({ values, stride }: VectorType<T>, index: number): T['TValue'] => values[stride * index];
+/** @ignore */
+const getTimeMillisecond = <T extends TimeMillisecond>({ values, stride }: VectorType<T>, index: number): T['TValue'] => values[stride * index];
+/** @ignore */
+const getTimeMicrosecond = <T extends TimeMicrosecond>({ values }: VectorType<T>, index: number): T['TValue'] => BN.signed(values.subarray(2 * index, 2 * (index + 1)));
+/** @ignore */
+const getTimeNanosecond = <T extends TimeNanosecond> ({ values }: VectorType<T>, index: number): T['TValue'] => BN.signed(values.subarray(2 * index, 2 * (index + 1)));
+/* istanbul ignore next */
+/** @ignore */
+const getTime = <T extends Time>(vector: VectorType<T>, index: number): T['TValue'] => {
+ switch (vector.type.unit) {
+ case TimeUnit.SECOND: return getTimeSecond(vector as VectorType<TimeSecond>, index);
+ case TimeUnit.MILLISECOND: return getTimeMillisecond(vector as VectorType<TimeMillisecond>, index);
+ case TimeUnit.MICROSECOND: return getTimeMicrosecond(vector as VectorType<TimeMicrosecond>, index);
+ case TimeUnit.NANOSECOND: return getTimeNanosecond(vector as VectorType<TimeNanosecond>, index);
+ }
+/** @ignore */
+const getDecimal = <T extends Decimal>({ values }: VectorType<T>, index: number): T['TValue'] => BN.decimal(values.subarray(4 * index, 4 * (index + 1)));
+/** @ignore */
+const getList = <T extends List>(vector: VectorType<T>, index: number): T['TValue'] => {
+ const child = vector.getChildAt(0)!, { valueOffsets, stride } = vector;
+ return child.slice(valueOffsets[index * stride], valueOffsets[(index * stride) + 1]) as T['TValue'];
+/** @ignore */
+const getMap = <T extends Map_>(vector: VectorType<T>, index: number): T['TValue'] => {
+ return vector.bind(index) as T['TValue'];
+/** @ignore */
+const getStruct = <T extends Struct>(vector: VectorType<T>, index: number): T['TValue'] => {
+ return vector.bind(index) as T['TValue'];
+/* istanbul ignore next */
+/** @ignore */
+const getUnion = <
+ V extends VectorType<Union> | VectorType<DenseUnion> | VectorType<SparseUnion>
+>(vector: V, index: number): V['TValue'] => {
+ return vector.type.mode === UnionMode.Dense ?
+ getDenseUnion(vector as VectorType<DenseUnion>, index) :
+ getSparseUnion(vector as VectorType<SparseUnion>, index);
+/** @ignore */
+const getDenseUnion = <T extends DenseUnion>(vector: VectorType<T>, index: number): T['TValue'] => {
+ const childIndex = vector.typeIdToChildIndex[vector.typeIds[index]];
+ const child = vector.getChildAt(childIndex);
+ return child ? child.get(vector.valueOffsets[index]) : null;
+/** @ignore */
+const getSparseUnion = <T extends SparseUnion>(vector: VectorType<T>, index: number): T['TValue'] => {
+ const childIndex = vector.typeIdToChildIndex[vector.typeIds[index]];
+ const child = vector.getChildAt(childIndex);
+ return child ? child.get(index) : null;
+/** @ignore */
+const getDictionary = <T extends Dictionary>(vector: VectorType<T>, index: number): T['TValue'] => {
+ return vector.getValue(vector.getKey(index)!);
+/* istanbul ignore next */
+/** @ignore */
+const getInterval = <T extends Interval>(vector: VectorType<T>, index: number): T['TValue'] =>
+ (vector.type.unit === IntervalUnit.DAY_TIME)
+ ? getIntervalDayTime(vector as VectorType<IntervalDayTime>, index)
+ : getIntervalYearMonth(vector as VectorType<IntervalYearMonth>, index);
+/** @ignore */
+const getIntervalDayTime = <T extends IntervalDayTime>({ values }: VectorType<T>, index: number): T['TValue'] => values.subarray(2 * index, 2 * (index + 1));
+/** @ignore */
+const getIntervalYearMonth = <T extends IntervalYearMonth>({ values }: VectorType<T>, index: number): T['TValue'] => {
+ const interval = values[index];
+ const int32s = new Int32Array(2);
+ int32s[0] = interval / 12 | 0; /* years */
+ int32s[1] = interval % 12 | 0; /* months */
+ return int32s;
+/** @ignore */
+const getFixedSizeList = <T extends FixedSizeList>(vector: VectorType<T>, index: number): T['TValue'] => {
+ const child = vector.getChildAt(0)!, { stride } = vector;
+ return child.slice(index * stride, (index + 1) * stride) as T['TValue'];
+GetVisitor.prototype.visitNull = getNull;
+GetVisitor.prototype.visitBool = getBool;
+GetVisitor.prototype.visitInt = getInt;
+GetVisitor.prototype.visitInt8 = getNumeric;
+GetVisitor.prototype.visitInt16 = getNumeric;
+GetVisitor.prototype.visitInt32 = getNumeric;
+GetVisitor.prototype.visitInt64 = getBigInts;
+GetVisitor.prototype.visitUint8 = getNumeric;
+GetVisitor.prototype.visitUint16 = getNumeric;
+GetVisitor.prototype.visitUint32 = getNumeric;
+GetVisitor.prototype.visitUint64 = getBigInts;
+GetVisitor.prototype.visitFloat = getFloat;
+GetVisitor.prototype.visitFloat16 = getFloat16;
+GetVisitor.prototype.visitFloat32 = getNumeric;
+GetVisitor.prototype.visitFloat64 = getNumeric;
+GetVisitor.prototype.visitUtf8 = getUtf8;
+GetVisitor.prototype.visitBinary = getBinary;
+GetVisitor.prototype.visitFixedSizeBinary = getFixedSizeBinary;
+GetVisitor.prototype.visitDate = getDate;
+GetVisitor.prototype.visitDateDay = getDateDay;
+GetVisitor.prototype.visitDateMillisecond = getDateMillisecond;
+GetVisitor.prototype.visitTimestamp = getTimestamp;
+GetVisitor.prototype.visitTimestampSecond = getTimestampSecond;
+GetVisitor.prototype.visitTimestampMillisecond = getTimestampMillisecond;
+GetVisitor.prototype.visitTimestampMicrosecond = getTimestampMicrosecond;
+GetVisitor.prototype.visitTimestampNanosecond = getTimestampNanosecond;
+GetVisitor.prototype.visitTime = getTime;
+GetVisitor.prototype.visitTimeSecond = getTimeSecond;
+GetVisitor.prototype.visitTimeMillisecond = getTimeMillisecond;
+GetVisitor.prototype.visitTimeMicrosecond = getTimeMicrosecond;
+GetVisitor.prototype.visitTimeNanosecond = getTimeNanosecond;
+GetVisitor.prototype.visitDecimal = getDecimal;
+GetVisitor.prototype.visitList = getList;
+GetVisitor.prototype.visitStruct = getStruct;
+GetVisitor.prototype.visitUnion = getUnion;
+GetVisitor.prototype.visitDenseUnion = getDenseUnion;
+GetVisitor.prototype.visitSparseUnion = getSparseUnion;
+GetVisitor.prototype.visitDictionary = getDictionary;
+GetVisitor.prototype.visitInterval = getInterval;
+GetVisitor.prototype.visitIntervalDayTime = getIntervalDayTime;
+GetVisitor.prototype.visitIntervalYearMonth = getIntervalYearMonth;
+GetVisitor.prototype.visitFixedSizeList = getFixedSizeList;
+GetVisitor.prototype.visitMap = getMap;
+/** @ignore */
+export const instance = new GetVisitor();
diff --git a/src/arrow/js/src/visitor/indexof.ts b/src/arrow/js/src/visitor/indexof.ts
new file mode 100644
index 000000000..ab4678aed
--- /dev/null
+++ b/src/arrow/js/src/visitor/indexof.ts
@@ -0,0 +1,183 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { Data } from '../data';
+import { Type } from '../enum';
+import { Visitor } from '../visitor';
+import { VectorType } from '../interfaces';
+import { getBool, BitIterator } from '../util/bit';
+import { createElementComparator } from '../util/vector';
+import {
+ DataType, Dictionary,
+ Bool, Null, Utf8, Binary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct,
+ Float, Float16, Float32, Float64,
+ Int, Uint8, Uint16, Uint32, Uint64, Int8, Int16, Int32, Int64,
+ Date_, DateDay, DateMillisecond,
+ Interval, IntervalDayTime, IntervalYearMonth,
+ Time, TimeSecond, TimeMillisecond, TimeMicrosecond, TimeNanosecond,
+ Timestamp, TimestampSecond, TimestampMillisecond, TimestampMicrosecond, TimestampNanosecond,
+ Union, DenseUnion, SparseUnion,
+} from '../type';
+/** @ignore */
+export interface IndexOfVisitor extends Visitor {
+ visit<T extends VectorType> (node: T, value: T['TValue'] | null, index?: number): number;
+ visitMany <T extends VectorType> (nodes: T[], values: (T['TValue'] | null)[], indices: (number | undefined)[]): number[];
+ getVisitFn<T extends Type> (node: T): (vector: VectorType<T>, value: VectorType<T>['TValue'] | null, index?: number) => number;
+ getVisitFn<T extends DataType>(node: VectorType<T> | Data<T> | T): (vector: VectorType<T>, value: T['TValue'] | null, index?: number) => number;
+ visitNull <T extends Null> (vector: VectorType<T>, value: T['TValue'] | null, index?: number): number;
+ visitBool <T extends Bool> (vector: VectorType<T>, value: T['TValue'] | null, index?: number): number;
+ visitInt <T extends Int> (vector: VectorType<T>, value: T['TValue'] | null, index?: number): number;
+ visitInt8 <T extends Int8> (vector: VectorType<T>, value: T['TValue'] | null, index?: number): number;
+ visitInt16 <T extends Int16> (vector: VectorType<T>, value: T['TValue'] | null, index?: number): number;
+ visitInt32 <T extends Int32> (vector: VectorType<T>, value: T['TValue'] | null, index?: number): number;
+ visitInt64 <T extends Int64> (vector: VectorType<T>, value: T['TValue'] | null, index?: number): number;
+ visitUint8 <T extends Uint8> (vector: VectorType<T>, value: T['TValue'] | null, index?: number): number;
+ visitUint16 <T extends Uint16> (vector: VectorType<T>, value: T['TValue'] | null, index?: number): number;
+ visitUint32 <T extends Uint32> (vector: VectorType<T>, value: T['TValue'] | null, index?: number): number;
+ visitUint64 <T extends Uint64> (vector: VectorType<T>, value: T['TValue'] | null, index?: number): number;
+ visitFloat <T extends Float> (vector: VectorType<T>, value: T['TValue'] | null, index?: number): number;
+ visitFloat16 <T extends Float16> (vector: VectorType<T>, value: T['TValue'] | null, index?: number): number;
+ visitFloat32 <T extends Float32> (vector: VectorType<T>, value: T['TValue'] | null, index?: number): number;
+ visitFloat64 <T extends Float64> (vector: VectorType<T>, value: T['TValue'] | null, index?: number): number;
+ visitUtf8 <T extends Utf8> (vector: VectorType<T>, value: T['TValue'] | null, index?: number): number;
+ visitBinary <T extends Binary> (vector: VectorType<T>, value: T['TValue'] | null, index?: number): number;
+ visitFixedSizeBinary <T extends FixedSizeBinary> (vector: VectorType<T>, value: T['TValue'] | null, index?: number): number;
+ visitDate <T extends Date_> (vector: VectorType<T>, value: T['TValue'] | null, index?: number): number;
+ visitDateDay <T extends DateDay> (vector: VectorType<T>, value: T['TValue'] | null, index?: number): number;
+ visitDateMillisecond <T extends DateMillisecond> (vector: VectorType<T>, value: T['TValue'] | null, index?: number): number;
+ visitTimestamp <T extends Timestamp> (vector: VectorType<T>, value: T['TValue'] | null, index?: number): number;
+ visitTimestampSecond <T extends TimestampSecond> (vector: VectorType<T>, value: T['TValue'] | null, index?: number): number;
+ visitTimestampMillisecond <T extends TimestampMillisecond>(vector: VectorType<T>, value: T['TValue'] | null, index?: number): number;
+ visitTimestampMicrosecond <T extends TimestampMicrosecond>(vector: VectorType<T>, value: T['TValue'] | null, index?: number): number;
+ visitTimestampNanosecond <T extends TimestampNanosecond> (vector: VectorType<T>, value: T['TValue'] | null, index?: number): number;
+ visitTime <T extends Time> (vector: VectorType<T>, value: T['TValue'] | null, index?: number): number;
+ visitTimeSecond <T extends TimeSecond> (vector: VectorType<T>, value: T['TValue'] | null, index?: number): number;
+ visitTimeMillisecond <T extends TimeMillisecond> (vector: VectorType<T>, value: T['TValue'] | null, index?: number): number;
+ visitTimeMicrosecond <T extends TimeMicrosecond> (vector: VectorType<T>, value: T['TValue'] | null, index?: number): number;
+ visitTimeNanosecond <T extends TimeNanosecond> (vector: VectorType<T>, value: T['TValue'] | null, index?: number): number;
+ visitDecimal <T extends Decimal> (vector: VectorType<T>, value: T['TValue'] | null, index?: number): number;
+ visitList <T extends List> (vector: VectorType<T>, value: T['TValue'] | null, index?: number): number;
+ visitStruct <T extends Struct> (vector: VectorType<T>, value: T['TValue'] | null, index?: number): number;
+ visitUnion <T extends Union> (vector: VectorType<T>, value: T['TValue'] | null, index?: number): number;
+ visitDenseUnion <T extends DenseUnion> (vector: VectorType<T>, value: T['TValue'] | null, index?: number): number;
+ visitSparseUnion <T extends SparseUnion> (vector: VectorType<T>, value: T['TValue'] | null, index?: number): number;
+ visitDictionary <T extends Dictionary> (vector: VectorType<T>, value: T['TValue'] | null, index?: number): number;
+ visitInterval <T extends Interval> (vector: VectorType<T>, value: T['TValue'] | null, index?: number): number;
+ visitIntervalDayTime <T extends IntervalDayTime> (vector: VectorType<T>, value: T['TValue'] | null, index?: number): number;
+ visitIntervalYearMonth <T extends IntervalYearMonth> (vector: VectorType<T>, value: T['TValue'] | null, index?: number): number;
+ visitFixedSizeList <T extends FixedSizeList> (vector: VectorType<T>, value: T['TValue'] | null, index?: number): number;
+ visitMap <T extends Map_> (vector: VectorType<T>, value: T['TValue'] | null, index?: number): number;
+/** @ignore */
+export class IndexOfVisitor extends Visitor {}
+/** @ignore */
+function nullIndexOf(vector: VectorType<Null>, searchElement?: null) {
+ // if you're looking for nulls and the vector isn't empty, we've got 'em!
+ return searchElement === null && vector.length > 0 ? 0 : -1;
+/** @ignore */
+function indexOfNull<T extends DataType>(vector: VectorType<T>, fromIndex?: number): number {
+ const { nullBitmap } =;
+ if (!nullBitmap || vector.nullCount <= 0) {
+ return -1;
+ }
+ let i = 0;
+ for (const isValid of new BitIterator(nullBitmap, + (fromIndex || 0), vector.length, nullBitmap, getBool)) {
+ if (!isValid) { return i; }
+ ++i;
+ }
+ return -1;
+/** @ignore */
+function indexOfValue<T extends DataType>(vector: VectorType<T>, searchElement?: T['TValue'] | null, fromIndex?: number): number {
+ if (searchElement === undefined) { return -1; }
+ if (searchElement === null) { return indexOfNull(vector, fromIndex); }
+ const compare = createElementComparator(searchElement);
+ for (let i = (fromIndex || 0) - 1, n = vector.length; ++i < n;) {
+ if (compare(vector.get(i))) {
+ return i;
+ }
+ }
+ return -1;
+/** @ignore */
+function indexOfUnion<T extends DataType>(vector: VectorType<T>, searchElement?: T['TValue'] | null, fromIndex?: number): number {
+ // Unions are special -- they do have a nullBitmap, but so can their children.
+ // If the searchElement is null, we don't know whether it came from the Union's
+ // bitmap or one of its childrens'. So we don't interrogate the Union's bitmap,
+ // since that will report the wrong index if a child has a null before the Union.
+ const compare = createElementComparator(searchElement);
+ for (let i = (fromIndex || 0) - 1, n = vector.length; ++i < n;) {
+ if (compare(vector.get(i))) {
+ return i;
+ }
+ }
+ return -1;
+IndexOfVisitor.prototype.visitNull = nullIndexOf;
+IndexOfVisitor.prototype.visitBool = indexOfValue;
+IndexOfVisitor.prototype.visitInt = indexOfValue;
+IndexOfVisitor.prototype.visitInt8 = indexOfValue;
+IndexOfVisitor.prototype.visitInt16 = indexOfValue;
+IndexOfVisitor.prototype.visitInt32 = indexOfValue;
+IndexOfVisitor.prototype.visitInt64 = indexOfValue;
+IndexOfVisitor.prototype.visitUint8 = indexOfValue;
+IndexOfVisitor.prototype.visitUint16 = indexOfValue;
+IndexOfVisitor.prototype.visitUint32 = indexOfValue;
+IndexOfVisitor.prototype.visitUint64 = indexOfValue;
+IndexOfVisitor.prototype.visitFloat = indexOfValue;
+IndexOfVisitor.prototype.visitFloat16 = indexOfValue;
+IndexOfVisitor.prototype.visitFloat32 = indexOfValue;
+IndexOfVisitor.prototype.visitFloat64 = indexOfValue;
+IndexOfVisitor.prototype.visitUtf8 = indexOfValue;
+IndexOfVisitor.prototype.visitBinary = indexOfValue;
+IndexOfVisitor.prototype.visitFixedSizeBinary = indexOfValue;
+IndexOfVisitor.prototype.visitDate = indexOfValue;
+IndexOfVisitor.prototype.visitDateDay = indexOfValue;
+IndexOfVisitor.prototype.visitDateMillisecond = indexOfValue;
+IndexOfVisitor.prototype.visitTimestamp = indexOfValue;
+IndexOfVisitor.prototype.visitTimestampSecond = indexOfValue;
+IndexOfVisitor.prototype.visitTimestampMillisecond = indexOfValue;
+IndexOfVisitor.prototype.visitTimestampMicrosecond = indexOfValue;
+IndexOfVisitor.prototype.visitTimestampNanosecond = indexOfValue;
+IndexOfVisitor.prototype.visitTime = indexOfValue;
+IndexOfVisitor.prototype.visitTimeSecond = indexOfValue;
+IndexOfVisitor.prototype.visitTimeMillisecond = indexOfValue;
+IndexOfVisitor.prototype.visitTimeMicrosecond = indexOfValue;
+IndexOfVisitor.prototype.visitTimeNanosecond = indexOfValue;
+IndexOfVisitor.prototype.visitDecimal = indexOfValue;
+IndexOfVisitor.prototype.visitList = indexOfValue;
+IndexOfVisitor.prototype.visitStruct = indexOfValue;
+IndexOfVisitor.prototype.visitUnion = indexOfValue;
+IndexOfVisitor.prototype.visitDenseUnion = indexOfUnion;
+IndexOfVisitor.prototype.visitSparseUnion = indexOfUnion;
+IndexOfVisitor.prototype.visitDictionary = indexOfValue;
+IndexOfVisitor.prototype.visitInterval = indexOfValue;
+IndexOfVisitor.prototype.visitIntervalDayTime = indexOfValue;
+IndexOfVisitor.prototype.visitIntervalYearMonth = indexOfValue;
+IndexOfVisitor.prototype.visitFixedSizeList = indexOfValue;
+IndexOfVisitor.prototype.visitMap = indexOfValue;
+/** @ignore */
+export const instance = new IndexOfVisitor();
diff --git a/src/arrow/js/src/visitor/iterator.ts b/src/arrow/js/src/visitor/iterator.ts
new file mode 100644
index 000000000..4a8e6b5b6
--- /dev/null
+++ b/src/arrow/js/src/visitor/iterator.ts
@@ -0,0 +1,193 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { Data } from '../data';
+import { Type } from '../enum';
+import { Visitor } from '../visitor';
+import { VectorType } from '../interfaces';
+import { BitIterator } from '../util/bit';
+import { instance as getVisitor } from './get';
+import {
+ DataType, Dictionary,
+ Bool, Null, Utf8, Binary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct,
+ Float, Float16, Float32, Float64,
+ Int, Uint8, Uint16, Uint32, Uint64, Int8, Int16, Int32, Int64,
+ Date_, DateDay, DateMillisecond,
+ Interval, IntervalDayTime, IntervalYearMonth,
+ Time, TimeSecond, TimeMillisecond, TimeMicrosecond, TimeNanosecond,
+ Timestamp, TimestampSecond, TimestampMillisecond, TimestampMicrosecond, TimestampNanosecond,
+ Union, DenseUnion, SparseUnion,
+} from '../type';
+/** @ignore */
+export interface IteratorVisitor extends Visitor {
+ visit<T extends VectorType>(node: T): IterableIterator<T['TValue'] | null>;
+ visitMany <T extends VectorType>(nodes: T[]): IterableIterator<T['TValue'] | null>[];
+ getVisitFn<T extends Type>(node: T): (vector: VectorType<T>) => IterableIterator<VectorType<T>['TValue'] | null>;
+ getVisitFn<T extends DataType>(node: VectorType<T> | Data<T> | T): (vector: VectorType<T>) => IterableIterator<VectorType<T>['TValue'] | null>;
+ visitNull <T extends Null> (vector: VectorType<T>): IterableIterator<T['TValue'] | null>;
+ visitBool <T extends Bool> (vector: VectorType<T>): IterableIterator<T['TValue'] | null>;
+ visitInt <T extends Int> (vector: VectorType<T>): IterableIterator<T['TValue'] | null>;
+ visitInt8 <T extends Int8> (vector: VectorType<T>): IterableIterator<T['TValue'] | null>;
+ visitInt16 <T extends Int16> (vector: VectorType<T>): IterableIterator<T['TValue'] | null>;
+ visitInt32 <T extends Int32> (vector: VectorType<T>): IterableIterator<T['TValue'] | null>;
+ visitInt64 <T extends Int64> (vector: VectorType<T>): IterableIterator<T['TValue'] | null>;
+ visitUint8 <T extends Uint8> (vector: VectorType<T>): IterableIterator<T['TValue'] | null>;
+ visitUint16 <T extends Uint16> (vector: VectorType<T>): IterableIterator<T['TValue'] | null>;
+ visitUint32 <T extends Uint32> (vector: VectorType<T>): IterableIterator<T['TValue'] | null>;
+ visitUint64 <T extends Uint64> (vector: VectorType<T>): IterableIterator<T['TValue'] | null>;
+ visitFloat <T extends Float> (vector: VectorType<T>): IterableIterator<T['TValue'] | null>;
+ visitFloat16 <T extends Float16> (vector: VectorType<T>): IterableIterator<T['TValue'] | null>;
+ visitFloat32 <T extends Float32> (vector: VectorType<T>): IterableIterator<T['TValue'] | null>;
+ visitFloat64 <T extends Float64> (vector: VectorType<T>): IterableIterator<T['TValue'] | null>;
+ visitUtf8 <T extends Utf8> (vector: VectorType<T>): IterableIterator<T['TValue'] | null>;
+ visitBinary <T extends Binary> (vector: VectorType<T>): IterableIterator<T['TValue'] | null>;
+ visitFixedSizeBinary <T extends FixedSizeBinary> (vector: VectorType<T>): IterableIterator<T['TValue'] | null>;
+ visitDate <T extends Date_> (vector: VectorType<T>): IterableIterator<T['TValue'] | null>;
+ visitDateDay <T extends DateDay> (vector: VectorType<T>): IterableIterator<T['TValue'] | null>;
+ visitDateMillisecond <T extends DateMillisecond> (vector: VectorType<T>): IterableIterator<T['TValue'] | null>;
+ visitTimestamp <T extends Timestamp> (vector: VectorType<T>): IterableIterator<T['TValue'] | null>;
+ visitTimestampSecond <T extends TimestampSecond> (vector: VectorType<T>): IterableIterator<T['TValue'] | null>;
+ visitTimestampMillisecond <T extends TimestampMillisecond> (vector: VectorType<T>): IterableIterator<T['TValue'] | null>;
+ visitTimestampMicrosecond <T extends TimestampMicrosecond> (vector: VectorType<T>): IterableIterator<T['TValue'] | null>;
+ visitTimestampNanosecond <T extends TimestampNanosecond> (vector: VectorType<T>): IterableIterator<T['TValue'] | null>;
+ visitTime <T extends Time> (vector: VectorType<T>): IterableIterator<T['TValue'] | null>;
+ visitTimeSecond <T extends TimeSecond> (vector: VectorType<T>): IterableIterator<T['TValue'] | null>;
+ visitTimeMillisecond <T extends TimeMillisecond> (vector: VectorType<T>): IterableIterator<T['TValue'] | null>;
+ visitTimeMicrosecond <T extends TimeMicrosecond> (vector: VectorType<T>): IterableIterator<T['TValue'] | null>;
+ visitTimeNanosecond <T extends TimeNanosecond> (vector: VectorType<T>): IterableIterator<T['TValue'] | null>;
+ visitDecimal <T extends Decimal> (vector: VectorType<T>): IterableIterator<T['TValue'] | null>;
+ visitList <T extends List> (vector: VectorType<T>): IterableIterator<T['TValue'] | null>;
+ visitStruct <T extends Struct> (vector: VectorType<T>): IterableIterator<T['TValue'] | null>;
+ visitUnion <T extends Union> (vector: VectorType<T>): IterableIterator<T['TValue'] | null>;
+ visitDenseUnion <T extends DenseUnion> (vector: VectorType<T>): IterableIterator<T['TValue'] | null>;
+ visitSparseUnion <T extends SparseUnion> (vector: VectorType<T>): IterableIterator<T['TValue'] | null>;
+ visitDictionary <T extends Dictionary> (vector: VectorType<T>): IterableIterator<T['TValue'] | null>;
+ visitInterval <T extends Interval> (vector: VectorType<T>): IterableIterator<T['TValue'] | null>;
+ visitIntervalDayTime <T extends IntervalDayTime> (vector: VectorType<T>): IterableIterator<T['TValue'] | null>;
+ visitIntervalYearMonth <T extends IntervalYearMonth> (vector: VectorType<T>): IterableIterator<T['TValue'] | null>;
+ visitFixedSizeList <T extends FixedSizeList> (vector: VectorType<T>): IterableIterator<T['TValue'] | null>;
+ visitMap <T extends Map_> (vector: VectorType<T>): IterableIterator<T['TValue'] | null>;
+/** @ignore */
+export class IteratorVisitor extends Visitor {}
+/** @ignore */
+function nullableIterator<T extends DataType>(vector: VectorType<T>): IterableIterator<T['TValue'] | null> {
+ const getFn = getVisitor.getVisitFn(vector);
+ return new BitIterator<T['TValue'] | null>(
+,, vector.length, vector,
+ (vec: VectorType<T>, idx: number, nullByte: number, nullBit: number) =>
+ ((nullByte & 1 << nullBit) !== 0) ? getFn(vec, idx) : null
+ );
+/** @ignore */
+class VectorIterator<T extends DataType> implements IterableIterator<T['TValue'] | null> {
+ private index = 0;
+ constructor(
+ private vector: VectorType<T>,
+ private getFn: (vector: VectorType<T>, index: number) => VectorType<T>['TValue']
+ ) {}
+ next(): IteratorResult<T['TValue'] | null> {
+ if (this.index < this.vector.length) {
+ return {
+ value: this.getFn(this.vector, this.index++)
+ };
+ }
+ return {done: true, value: null};
+ }
+ [Symbol.iterator]() {
+ return this;
+ }
+/** @ignore */
+function vectorIterator<T extends DataType>(vector: VectorType<T>): IterableIterator<T['TValue'] | null> {
+ // If nullable, iterate manually
+ if (vector.nullCount > 0) {
+ return nullableIterator<T>(vector);
+ }
+ const { type, typeId, length } = vector;
+ // Fast case, defer to native iterators if possible
+ if (vector.stride === 1 && (
+ (typeId === Type.Timestamp) ||
+ (typeId === Type.Int && (type as Int).bitWidth !== 64) ||
+ (typeId === Type.Time && (type as Time).bitWidth !== 64) ||
+ (typeId === Type.Float && (type as Float).precision > 0 /* Precision.HALF */)
+ )) {
+ return, length)[Symbol.iterator]();
+ }
+ // Otherwise, iterate manually
+ return new VectorIterator(vector, getVisitor.getVisitFn(vector));
+IteratorVisitor.prototype.visitNull = vectorIterator;
+IteratorVisitor.prototype.visitBool = vectorIterator;
+IteratorVisitor.prototype.visitInt = vectorIterator;
+IteratorVisitor.prototype.visitInt8 = vectorIterator;
+IteratorVisitor.prototype.visitInt16 = vectorIterator;
+IteratorVisitor.prototype.visitInt32 = vectorIterator;
+IteratorVisitor.prototype.visitInt64 = vectorIterator;
+IteratorVisitor.prototype.visitUint8 = vectorIterator;
+IteratorVisitor.prototype.visitUint16 = vectorIterator;
+IteratorVisitor.prototype.visitUint32 = vectorIterator;
+IteratorVisitor.prototype.visitUint64 = vectorIterator;
+IteratorVisitor.prototype.visitFloat = vectorIterator;
+IteratorVisitor.prototype.visitFloat16 = vectorIterator;
+IteratorVisitor.prototype.visitFloat32 = vectorIterator;
+IteratorVisitor.prototype.visitFloat64 = vectorIterator;
+IteratorVisitor.prototype.visitUtf8 = vectorIterator;
+IteratorVisitor.prototype.visitBinary = vectorIterator;
+IteratorVisitor.prototype.visitFixedSizeBinary = vectorIterator;
+IteratorVisitor.prototype.visitDate = vectorIterator;
+IteratorVisitor.prototype.visitDateDay = vectorIterator;
+IteratorVisitor.prototype.visitDateMillisecond = vectorIterator;
+IteratorVisitor.prototype.visitTimestamp = vectorIterator;
+IteratorVisitor.prototype.visitTimestampSecond = vectorIterator;
+IteratorVisitor.prototype.visitTimestampMillisecond = vectorIterator;
+IteratorVisitor.prototype.visitTimestampMicrosecond = vectorIterator;
+IteratorVisitor.prototype.visitTimestampNanosecond = vectorIterator;
+IteratorVisitor.prototype.visitTime = vectorIterator;
+IteratorVisitor.prototype.visitTimeSecond = vectorIterator;
+IteratorVisitor.prototype.visitTimeMillisecond = vectorIterator;
+IteratorVisitor.prototype.visitTimeMicrosecond = vectorIterator;
+IteratorVisitor.prototype.visitTimeNanosecond = vectorIterator;
+IteratorVisitor.prototype.visitDecimal = vectorIterator;
+IteratorVisitor.prototype.visitList = vectorIterator;
+IteratorVisitor.prototype.visitStruct = vectorIterator;
+IteratorVisitor.prototype.visitUnion = vectorIterator;
+IteratorVisitor.prototype.visitDenseUnion = vectorIterator;
+IteratorVisitor.prototype.visitSparseUnion = vectorIterator;
+IteratorVisitor.prototype.visitDictionary = vectorIterator;
+IteratorVisitor.prototype.visitInterval = vectorIterator;
+IteratorVisitor.prototype.visitIntervalDayTime = vectorIterator;
+IteratorVisitor.prototype.visitIntervalYearMonth = vectorIterator;
+IteratorVisitor.prototype.visitFixedSizeList = vectorIterator;
+IteratorVisitor.prototype.visitMap = vectorIterator;
+/** @ignore */
+export const instance = new IteratorVisitor();
diff --git a/src/arrow/js/src/visitor/jsontypeassembler.ts b/src/arrow/js/src/visitor/jsontypeassembler.ts
new file mode 100644
index 000000000..54f046f64
--- /dev/null
+++ b/src/arrow/js/src/visitor/jsontypeassembler.ts
@@ -0,0 +1,91 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import * as type from '../type';
+import { Visitor } from '../visitor';
+import { Type as ArrowType } from '../fb/Schema';
+import { Precision, DateUnit, TimeUnit, IntervalUnit, UnionMode } from '../enum';
+/** @ignore */
+export interface JSONTypeAssembler extends Visitor {
+ visit<T extends type.DataType>(node: T): Record<string, unknown> | undefined;
+/** @ignore */
+export class JSONTypeAssembler extends Visitor {
+ public visit<T extends type.DataType>(node: T): Record<string, unknown> | undefined {
+ return node == null ? undefined : super.visit(node);
+ }
+ public visitNull<T extends type.Null>({ typeId }: T) {
+ return { 'name': ArrowType[typeId].toLowerCase() };
+ }
+ public visitInt<T extends type.Int>({ typeId, bitWidth, isSigned }: T) {
+ return { 'name': ArrowType[typeId].toLowerCase(), 'bitWidth': bitWidth, 'isSigned': isSigned };
+ }
+ public visitFloat<T extends type.Float>({ typeId, precision }: T) {
+ return { 'name': ArrowType[typeId].toLowerCase(), 'precision': Precision[precision] };
+ }
+ public visitBinary<T extends type.Binary>({ typeId }: T) {
+ return { 'name': ArrowType[typeId].toLowerCase() };
+ }
+ public visitBool<T extends type.Bool>({ typeId }: T) {
+ return { 'name': ArrowType[typeId].toLowerCase() };
+ }
+ public visitUtf8<T extends type.Utf8>({ typeId }: T) {
+ return { 'name': ArrowType[typeId].toLowerCase() };
+ }
+ public visitDecimal<T extends type.Decimal>({ typeId, scale, precision }: T) {
+ return { 'name': ArrowType[typeId].toLowerCase(), 'scale': scale, 'precision': precision };
+ }
+ public visitDate<T extends type.Date_>({ typeId, unit }: T) {
+ return { 'name': ArrowType[typeId].toLowerCase(), 'unit': DateUnit[unit] };
+ }
+ public visitTime<T extends type.Time>({ typeId, unit, bitWidth }: T) {
+ return { 'name': ArrowType[typeId].toLowerCase(), 'unit': TimeUnit[unit], bitWidth };
+ }
+ public visitTimestamp<T extends type.Timestamp>({ typeId, timezone, unit }: T) {
+ return { 'name': ArrowType[typeId].toLowerCase(), 'unit': TimeUnit[unit], timezone };
+ }
+ public visitInterval<T extends type.Interval>({ typeId, unit }: T) {
+ return { 'name': ArrowType[typeId].toLowerCase(), 'unit': IntervalUnit[unit] };
+ }
+ public visitList<T extends type.List>({ typeId }: T) {
+ return { 'name': ArrowType[typeId].toLowerCase() };
+ }
+ public visitStruct<T extends type.Struct>({ typeId }: T) {
+ return { 'name': ArrowType[typeId].toLowerCase() };
+ }
+ public visitUnion<T extends type.Union>({ typeId, mode, typeIds }: T) {
+ return {
+ 'name': ArrowType[typeId].toLowerCase(),
+ 'mode': UnionMode[mode],
+ 'typeIds': [...typeIds]
+ };
+ }
+ public visitDictionary<T extends type.Dictionary>(node: T) {
+ return this.visit(node.dictionary);
+ }
+ public visitFixedSizeBinary<T extends type.FixedSizeBinary>({ typeId, byteWidth }: T) {
+ return { 'name': ArrowType[typeId].toLowerCase(), 'byteWidth': byteWidth };
+ }
+ public visitFixedSizeList<T extends type.FixedSizeList>({ typeId, listSize }: T) {
+ return { 'name': ArrowType[typeId].toLowerCase(), 'listSize': listSize };
+ }
+ public visitMap<T extends type.Map_>({ typeId, keysSorted }: T) {
+ return { 'name': ArrowType[typeId].toLowerCase(), 'keysSorted': keysSorted };
+ }
diff --git a/src/arrow/js/src/visitor/jsonvectorassembler.ts b/src/arrow/js/src/visitor/jsonvectorassembler.ts
new file mode 100644
index 000000000..f3c013344
--- /dev/null
+++ b/src/arrow/js/src/visitor/jsonvectorassembler.ts
@@ -0,0 +1,177 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { BN } from '../util/bn';
+import { Column } from '../column';
+import { Vector } from '../vector';
+import { Visitor } from '../visitor';
+import { BufferType } from '../enum';
+import { RecordBatch } from '../recordbatch';
+import { VectorType as V } from '../interfaces';
+import { UnionMode, DateUnit, TimeUnit } from '../enum';
+import { BitIterator, getBit, getBool } from '../util/bit';
+import { selectColumnChildrenArgs } from '../util/args';
+import {
+ DataType,
+ Float, Int, Date_, Interval, Time, Timestamp, Union,
+ Bool, Null, Utf8, Binary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct,
+} from '../type';
+/** @ignore */
+export interface JSONVectorAssembler extends Visitor {
+ visit <T extends Column> (node: T ): Record<string, unknown>;
+ visitMany <T extends Column> (cols: T[]): Record<string, unknown>[];
+ getVisitFn<T extends DataType>(node: Column<T>): (column: Column<T>) => { name: string; count: number; VALIDITY: (0 | 1)[]; DATA?: any[]; OFFSET?: number[]; TYPE?: number[]; children?: any[] };
+ visitNull <T extends Null> (vector: V<T>): Record<string, never>;
+ visitBool <T extends Bool> (vector: V<T>): { DATA: boolean[] };
+ visitInt <T extends Int> (vector: V<T>): { DATA: (number | string)[] };
+ visitFloat <T extends Float> (vector: V<T>): { DATA: number[] };
+ visitUtf8 <T extends Utf8> (vector: V<T>): { DATA: string[]; OFFSET: number[] };
+ visitBinary <T extends Binary> (vector: V<T>): { DATA: string[]; OFFSET: number[] };
+ visitFixedSizeBinary <T extends FixedSizeBinary> (vector: V<T>): { DATA: string[] };
+ visitDate <T extends Date_> (vector: V<T>): { DATA: number[] };
+ visitTimestamp <T extends Timestamp> (vector: V<T>): { DATA: string[] };
+ visitTime <T extends Time> (vector: V<T>): { DATA: number[] };
+ visitDecimal <T extends Decimal> (vector: V<T>): { DATA: string[] };
+ visitList <T extends List> (vector: V<T>): { children: any[]; OFFSET: number[] };
+ visitStruct <T extends Struct> (vector: V<T>): { children: any[] };
+ visitUnion <T extends Union> (vector: V<T>): { children: any[]; TYPE: number[] };
+ visitInterval <T extends Interval> (vector: V<T>): { DATA: number[] };
+ visitFixedSizeList <T extends FixedSizeList> (vector: V<T>): { children: any[] };
+ visitMap <T extends Map_> (vector: V<T>): { children: any[] };
+/** @ignore */
+export class JSONVectorAssembler extends Visitor {
+ /** @nocollapse */
+ public static assemble<T extends Column | RecordBatch>(...args: (T | T[])[]) {
+ return new JSONVectorAssembler().visitMany(selectColumnChildrenArgs(RecordBatch, args));
+ }
+ public visit<T extends Column>(column: T) {
+ const { data, name, length } = column;
+ const { offset, nullCount, nullBitmap } = data;
+ const type = DataType.isDictionary(column.type) ? column.type.indices : column.type;
+ const buffers = Object.assign([], data.buffers, { [BufferType.VALIDITY]: undefined });
+ return {
+ 'name': name,
+ 'count': length,
+ 'VALIDITY': DataType.isNull(type) ? undefined
+ : nullCount <= 0 ? Array.from({ length }, () => 1)
+ : [ BitIterator(nullBitmap, offset, length, null, getBit)],
+ ...super.visit(, offset, length, 0, buffers)))
+ };
+ }
+ public visitNull() { return {}; }
+ public visitBool<T extends Bool>({ values, offset, length }: V<T>) {
+ return { 'DATA': [ BitIterator(values, offset, length, null, getBool)] };
+ }
+ public visitInt<T extends Int>(vector: V<T>) {
+ return {
+ 'DATA': vector.type.bitWidth < 64
+ ? [...vector.values]
+ : [...bigNumsToStrings(vector.values as (Int32Array | Uint32Array), 2)]
+ };
+ }
+ public visitFloat<T extends Float>(vector: V<T>) {
+ return { 'DATA': [...vector.values] };
+ }
+ public visitUtf8<T extends Utf8>(vector: V<T>) {
+ return { 'DATA': [...vector], 'OFFSET': [...vector.valueOffsets] };
+ }
+ public visitBinary<T extends Binary>(vector: V<T>) {
+ return { 'DATA': [...binaryToString(vector)], OFFSET: [...vector.valueOffsets] };
+ }
+ public visitFixedSizeBinary<T extends FixedSizeBinary>(vector: V<T>) {
+ return { 'DATA': [...binaryToString(vector)] };
+ }
+ public visitDate<T extends Date_>(vector: V<T>) {
+ return {
+ 'DATA': vector.type.unit === DateUnit.DAY
+ ? [...vector.values]
+ : [...bigNumsToStrings(vector.values, 2)]
+ };
+ }
+ public visitTimestamp<T extends Timestamp>(vector: V<T>) {
+ return { 'DATA': [...bigNumsToStrings(vector.values, 2)] };
+ }
+ public visitTime<T extends Time>(vector: V<T>) {
+ return {
+ 'DATA': vector.type.unit < TimeUnit.MICROSECOND
+ ? [...vector.values]
+ : [...bigNumsToStrings(vector.values, 2)]
+ };
+ }
+ public visitDecimal<T extends Decimal>(vector: V<T>) {
+ return { 'DATA': [...bigNumsToStrings(vector.values, 4)] };
+ }
+ public visitList<T extends List>(vector: V<T>) {
+ return {
+ 'OFFSET': [...vector.valueOffsets],
+ 'children':, i) =>
+ this.visit(new Column(f, [vector.getChildAt(i)!])))
+ };
+ }
+ public visitStruct<T extends Struct>(vector: V<T>) {
+ return {
+ 'children':, i) =>
+ this.visit(new Column(f, [vector.getChildAt(i)!])))
+ };
+ }
+ public visitUnion<T extends Union>(vector: V<T>) {
+ return {
+ 'TYPE': [...vector.typeIds],
+ 'OFFSET': vector.type.mode === UnionMode.Dense ? [...vector.valueOffsets] : undefined,
+ 'children':, i) => this.visit(new Column(f, [vector.getChildAt(i)!])))
+ };
+ }
+ public visitInterval<T extends Interval>(vector: V<T>) {
+ return { 'DATA': [...vector.values] };
+ }
+ public visitFixedSizeList<T extends FixedSizeList>(vector: V<T>) {
+ return {
+ 'children':, i) =>
+ this.visit(new Column(f, [vector.getChildAt(i)!])))
+ };
+ }
+ public visitMap<T extends Map_>(vector: V<T>) {
+ return {
+ 'OFFSET': [...vector.valueOffsets],
+ 'children':, i) =>
+ this.visit(new Column(f, [vector.getChildAt(i)!])))
+ };
+ }
+/** @ignore */
+function* binaryToString(vector: Vector<Binary> | Vector<FixedSizeBinary>) {
+ for (const octets of vector as Iterable<Uint8Array>) {
+ yield octets.reduce((str, byte) => {
+ return `${str}${('0' + (byte & 0xFF).toString(16)).slice(-2)}`;
+ }, '').toUpperCase();
+ }
+/** @ignore */
+function* bigNumsToStrings(values: Uint32Array | Int32Array, stride: number) {
+ for (let i = -1, n = values.length / stride; ++i < n;) {
+ yield `${ + 0) * stride, (i + 1) * stride), false)}`;
+ }
diff --git a/src/arrow/js/src/visitor/set.ts b/src/arrow/js/src/visitor/set.ts
new file mode 100644
index 000000000..77985e5be
--- /dev/null
+++ b/src/arrow/js/src/visitor/set.ts
@@ -0,0 +1,354 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { Data } from '../data';
+import { Field } from '../schema';
+import { Vector } from '../vector';
+import { Visitor } from '../visitor';
+import { encodeUtf8 } from '../util/utf8';
+import { VectorType } from '../interfaces';
+import { float64ToUint16 } from '../util/math';
+import { toArrayBufferView } from '../util/buffer';
+import { Type, UnionMode, Precision, DateUnit, TimeUnit, IntervalUnit } from '../enum';
+import {
+ DataType, Dictionary,
+ Bool, Null, Utf8, Binary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct,
+ Float, Float16, Float32, Float64,
+ Int, Uint8, Uint16, Uint32, Uint64, Int8, Int16, Int32, Int64,
+ Date_, DateDay, DateMillisecond,
+ Interval, IntervalDayTime, IntervalYearMonth,
+ Time, TimeSecond, TimeMillisecond, TimeMicrosecond, TimeNanosecond,
+ Timestamp, TimestampSecond, TimestampMillisecond, TimestampMicrosecond, TimestampNanosecond,
+ Union, DenseUnion, SparseUnion,
+} from '../type';
+/** @ignore */
+export interface SetVisitor extends Visitor {
+ visit<T extends VectorType>(node: T, index: number, value: T['TValue']): void;
+ visitMany<T extends VectorType>(nodes: T[], indices: number[], values: T['TValue'][]): void[];
+ getVisitFn<T extends Type>(node: T): (vector: VectorType<T>, index: number, value: VectorType<T>['TValue']) => void;
+ getVisitFn<T extends DataType>(node: VectorType<T> | Data<T> | T): (vector: VectorType<T>, index: number, value: VectorType<T>['TValue']) => void;
+ visitNull <T extends Null> (vector: VectorType<T>, index: number, value: T['TValue']): void;
+ visitBool <T extends Bool> (vector: VectorType<T>, index: number, value: T['TValue']): void;
+ visitInt <T extends Int> (vector: VectorType<T>, index: number, value: T['TValue']): void;
+ visitInt8 <T extends Int8> (vector: VectorType<T>, index: number, value: T['TValue']): void;
+ visitInt16 <T extends Int16> (vector: VectorType<T>, index: number, value: T['TValue']): void;
+ visitInt32 <T extends Int32> (vector: VectorType<T>, index: number, value: T['TValue']): void;
+ visitInt64 <T extends Int64> (vector: VectorType<T>, index: number, value: T['TValue']): void;
+ visitUint8 <T extends Uint8> (vector: VectorType<T>, index: number, value: T['TValue']): void;
+ visitUint16 <T extends Uint16> (vector: VectorType<T>, index: number, value: T['TValue']): void;
+ visitUint32 <T extends Uint32> (vector: VectorType<T>, index: number, value: T['TValue']): void;
+ visitUint64 <T extends Uint64> (vector: VectorType<T>, index: number, value: T['TValue']): void;
+ visitFloat <T extends Float> (vector: VectorType<T>, index: number, value: T['TValue']): void;
+ visitFloat16 <T extends Float16> (vector: VectorType<T>, index: number, value: T['TValue']): void;
+ visitFloat32 <T extends Float32> (vector: VectorType<T>, index: number, value: T['TValue']): void;
+ visitFloat64 <T extends Float64> (vector: VectorType<T>, index: number, value: T['TValue']): void;
+ visitUtf8 <T extends Utf8> (vector: VectorType<T>, index: number, value: T['TValue']): void;
+ visitBinary <T extends Binary> (vector: VectorType<T>, index: number, value: T['TValue']): void;
+ visitFixedSizeBinary <T extends FixedSizeBinary> (vector: VectorType<T>, index: number, value: T['TValue']): void;
+ visitDate <T extends Date_> (vector: VectorType<T>, index: number, value: T['TValue']): void;
+ visitDateDay <T extends DateDay> (vector: VectorType<T>, index: number, value: T['TValue']): void;
+ visitDateMillisecond <T extends DateMillisecond> (vector: VectorType<T>, index: number, value: T['TValue']): void;
+ visitTimestamp <T extends Timestamp> (vector: VectorType<T>, index: number, value: T['TValue']): void;
+ visitTimestampSecond <T extends TimestampSecond> (vector: VectorType<T>, index: number, value: T['TValue']): void;
+ visitTimestampMillisecond <T extends TimestampMillisecond>(vector: VectorType<T>, index: number, value: T['TValue']): void;
+ visitTimestampMicrosecond <T extends TimestampMicrosecond>(vector: VectorType<T>, index: number, value: T['TValue']): void;
+ visitTimestampNanosecond <T extends TimestampNanosecond> (vector: VectorType<T>, index: number, value: T['TValue']): void;
+ visitTime <T extends Time> (vector: VectorType<T>, index: number, value: T['TValue']): void;
+ visitTimeSecond <T extends TimeSecond> (vector: VectorType<T>, index: number, value: T['TValue']): void;
+ visitTimeMillisecond <T extends TimeMillisecond> (vector: VectorType<T>, index: number, value: T['TValue']): void;
+ visitTimeMicrosecond <T extends TimeMicrosecond> (vector: VectorType<T>, index: number, value: T['TValue']): void;
+ visitTimeNanosecond <T extends TimeNanosecond> (vector: VectorType<T>, index: number, value: T['TValue']): void;
+ visitDecimal <T extends Decimal> (vector: VectorType<T>, index: number, value: T['TValue']): void;
+ visitList <T extends List> (vector: VectorType<T>, index: number, value: T['TValue']): void;
+ visitStruct <T extends Struct> (vector: VectorType<T>, index: number, value: T['TValue']): void;
+ visitUnion <T extends Union> (vector: VectorType<T>, index: number, value: T['TValue']): void;
+ visitDenseUnion <T extends DenseUnion> (vector: VectorType<T>, index: number, value: T['TValue']): void;
+ visitSparseUnion <T extends SparseUnion> (vector: VectorType<T>, index: number, value: T['TValue']): void;
+ visitDictionary <T extends Dictionary> (vector: VectorType<T>, index: number, value: T['TValue']): void;
+ visitInterval <T extends Interval> (vector: VectorType<T>, index: number, value: T['TValue']): void;
+ visitIntervalDayTime <T extends IntervalDayTime> (vector: VectorType<T>, index: number, value: T['TValue']): void;
+ visitIntervalYearMonth <T extends IntervalYearMonth> (vector: VectorType<T>, index: number, value: T['TValue']): void;
+ visitFixedSizeList <T extends FixedSizeList> (vector: VectorType<T>, index: number, value: T['TValue']): void;
+ visitMap <T extends Map_> (vector: VectorType<T>, index: number, value: T['TValue']): void;
+/** @ignore */
+export class SetVisitor extends Visitor {}
+/** @ignore */
+const setEpochMsToDays = (data: Int32Array, index: number, epochMs: number) => { data[index] = (epochMs / 86400000) | 0; };
+/** @ignore */
+const setEpochMsToMillisecondsLong = (data: Int32Array, index: number, epochMs: number) => {
+ data[index] = (epochMs % 4294967296) | 0;
+ data[index + 1] = (epochMs / 4294967296) | 0;
+/** @ignore */
+const setEpochMsToMicrosecondsLong = (data: Int32Array, index: number, epochMs: number) => {
+ data[index] = ((epochMs * 1000) % 4294967296) | 0;
+ data[index + 1] = ((epochMs * 1000) / 4294967296) | 0;
+/** @ignore */
+const setEpochMsToNanosecondsLong = (data: Int32Array, index: number, epochMs: number) => {
+ data[index] = ((epochMs * 1000000) % 4294967296) | 0;
+ data[index + 1] = ((epochMs * 1000000) / 4294967296) | 0;
+/** @ignore */
+const setVariableWidthBytes = (values: Uint8Array, valueOffsets: Int32Array, index: number, value: Uint8Array) => {
+ const { [index]: x, [index + 1]: y } = valueOffsets;
+ if (x != null && y != null) {
+ values.set(value.subarray(0, y - x), x);
+ }
+/** @ignore */
+const setBool = <T extends Bool>({ offset, values }: VectorType<T>, index: number, val: boolean) => {
+ const idx = offset + index;
+ val ? (values[idx >> 3] |= (1 << (idx % 8))) // true
+ : (values[idx >> 3] &= ~(1 << (idx % 8))); // false
+/** @ignore */ type Numeric1X = Int8 | Int16 | Int32 | Uint8 | Uint16 | Uint32 | Float32 | Float64;
+/** @ignore */ type Numeric2X = Int64 | Uint64;
+/** @ignore */
+const setDateDay = <T extends DateDay> ({ values }: VectorType<T>, index: number, value: T['TValue']): void => { setEpochMsToDays(values, index, value.valueOf()); };
+/** @ignore */
+const setDateMillisecond = <T extends DateMillisecond>({ values }: VectorType<T>, index: number, value: T['TValue']): void => { setEpochMsToMillisecondsLong(values, index * 2, value.valueOf()); };
+/** @ignore */
+const setNumeric = <T extends Numeric1X> ({ stride, values }: VectorType<T>, index: number, value: T['TValue']): void => { values[stride * index] = value; };
+/** @ignore */
+const setFloat16 = <T extends Float16> ({ stride, values }: VectorType<T>, index: number, value: T['TValue']): void => { values[stride * index] = float64ToUint16(value); };
+/** @ignore */
+const setNumericX2 = <T extends Numeric2X> (vector: VectorType<T>, index: number, value: T['TValue']): void => {
+ switch (typeof value) {
+ case 'bigint': vector.values64[index] = value; break;
+ case 'number': vector.values[index * vector.stride] = value; break;
+ default: {
+ const val = value as T['TArray'];
+ const { stride, ArrayType } = vector;
+ const long = toArrayBufferView<T['TArray']>(ArrayType, val);
+ vector.values.set(long.subarray(0, stride), stride * index);
+ }
+ }
+/** @ignore */
+const setFixedSizeBinary = <T extends FixedSizeBinary>({ stride, values }: VectorType<T>, index: number, value: T['TValue']): void => { values.set(value.subarray(0, stride), stride * index); };
+/** @ignore */
+const setBinary = <T extends Binary>({ values, valueOffsets }: VectorType<T>, index: number, value: T['TValue']) => setVariableWidthBytes(values, valueOffsets, index, value);
+/** @ignore */
+const setUtf8 = <T extends Utf8>({ values, valueOffsets }: VectorType<T>, index: number, value: T['TValue']) => {
+ setVariableWidthBytes(values, valueOffsets, index, encodeUtf8(value));
+/* istanbul ignore next */
+/** @ignore */
+const setInt = <T extends Int>(vector: VectorType<T>, index: number, value: T['TValue']): void => {
+ vector.type.bitWidth < 64
+ ? setNumeric(vector as VectorType<Numeric1X>, index, value as Numeric1X['TValue'])
+ : setNumericX2(vector as VectorType<Numeric2X>, index, value as Numeric2X['TValue']);
+/* istanbul ignore next */
+/** @ignore */
+const setFloat = <T extends Float>(vector: VectorType<T>, index: number, value: T['TValue']): void => {
+ vector.type.precision !== Precision.HALF
+ ? setNumeric(vector as VectorType<Numeric1X>, index, value)
+ : setFloat16(vector as VectorType<Float16>, index, value);
+/* istanbul ignore next */
+const setDate = <T extends Date_> (vector: VectorType<T>, index: number, value: T['TValue']): void => {
+ vector.type.unit === DateUnit.DAY
+ ? setDateDay(vector as VectorType<DateDay>, index, value)
+ : setDateMillisecond(vector as VectorType<DateMillisecond>, index, value);
+/** @ignore */
+const setTimestampSecond = <T extends TimestampSecond> ({ values }: VectorType<T>, index: number, value: T['TValue']): void => setEpochMsToMillisecondsLong(values, index * 2, value / 1000);
+/** @ignore */
+const setTimestampMillisecond = <T extends TimestampMillisecond>({ values }: VectorType<T>, index: number, value: T['TValue']): void => setEpochMsToMillisecondsLong(values, index * 2, value);
+/** @ignore */
+const setTimestampMicrosecond = <T extends TimestampMicrosecond>({ values }: VectorType<T>, index: number, value: T['TValue']): void => setEpochMsToMicrosecondsLong(values, index * 2, value);
+/** @ignore */
+const setTimestampNanosecond = <T extends TimestampNanosecond> ({ values }: VectorType<T>, index: number, value: T['TValue']): void => setEpochMsToNanosecondsLong(values, index * 2, value);
+/* istanbul ignore next */
+/** @ignore */
+const setTimestamp = <T extends Timestamp>(vector: VectorType<T>, index: number, value: T['TValue']): void => {
+ switch (vector.type.unit) {
+ case TimeUnit.SECOND: return setTimestampSecond(vector as VectorType<TimestampSecond>, index, value);
+ case TimeUnit.MILLISECOND: return setTimestampMillisecond(vector as VectorType<TimestampMillisecond>, index, value);
+ case TimeUnit.MICROSECOND: return setTimestampMicrosecond(vector as VectorType<TimestampMicrosecond>, index, value);
+ case TimeUnit.NANOSECOND: return setTimestampNanosecond(vector as VectorType<TimestampNanosecond>, index, value);
+ }
+/** @ignore */
+const setTimeSecond = <T extends TimeSecond> ({ values, stride }: VectorType<T>, index: number, value: T['TValue']): void => { values[stride * index] = value; };
+/** @ignore */
+const setTimeMillisecond = <T extends TimeMillisecond>({ values, stride }: VectorType<T>, index: number, value: T['TValue']): void => { values[stride * index] = value; };
+/** @ignore */
+const setTimeMicrosecond = <T extends TimeMicrosecond>({ values }: VectorType<T>, index: number, value: T['TValue']): void => { values.set(value.subarray(0, 2), 2 * index); };
+/** @ignore */
+const setTimeNanosecond = <T extends TimeNanosecond> ({ values }: VectorType<T>, index: number, value: T['TValue']): void => { values.set(value.subarray(0, 2), 2 * index); };
+/* istanbul ignore next */
+/** @ignore */
+const setTime = <T extends Time>(vector: VectorType<T>, index: number, value: T['TValue']): void => {
+ switch (vector.type.unit) {
+ case TimeUnit.SECOND: return setTimeSecond(vector as VectorType<TimeSecond>, index, value as TimeSecond['TValue']);
+ case TimeUnit.MILLISECOND: return setTimeMillisecond(vector as VectorType<TimeMillisecond>, index, value as TimeMillisecond['TValue']);
+ case TimeUnit.MICROSECOND: return setTimeMicrosecond(vector as VectorType<TimeMicrosecond>, index, value as TimeMicrosecond['TValue']);
+ case TimeUnit.NANOSECOND: return setTimeNanosecond(vector as VectorType<TimeNanosecond>, index, value as TimeNanosecond['TValue']);
+ }
+/** @ignore */
+const setDecimal = <T extends Decimal>({ values }: VectorType<T>, index: number, value: T['TValue']): void => { values.set(value.subarray(0, 4), 4 * index); };
+/** @ignore */
+const setList = <T extends List>(vector: VectorType<T>, index: number, value: T['TValue']): void => {
+ const values = vector.getChildAt(0)!, valueOffsets = vector.valueOffsets;
+ for (let idx = -1, itr = valueOffsets[index], end = valueOffsets[index + 1]; itr < end;) {
+ values.set(itr++, value.get(++idx));
+ }
+/** @ignore */
+const setMap = <T extends Map_>(vector: VectorType<T>, index: number, value: T['TValue']) => {
+ const values = vector.getChildAt(0)!, valueOffsets = vector.valueOffsets;
+ const entries = value instanceof Map ? [...value] : Object.entries(value);
+ for (let idx = -1, itr = valueOffsets[index], end = valueOffsets[index + 1]; itr < end;) {
+ values.set(itr++, entries[++idx]);
+ }
+/** @ignore */ const _setStructArrayValue = (o: number, v: any[]) => (c: Vector | null, _: Field, i: number) => c?.set(o, v[i]);
+/** @ignore */ const _setStructVectorValue = (o: number, v: Vector) => (c: Vector | null, _: Field, i: number) => c?.set(o, v.get(i));
+/** @ignore */ const _setStructMapValue = (o: number, v: Map<string, any>) => (c: Vector | null, f: Field, _: number) => c?.set(o, v.get(;
+/** @ignore */ const _setStructObjectValue = (o: number, v: { [key: string]: any }) => (c: Vector | null, f: Field, _: number) => c?.set(o, v[]);
+/** @ignore */
+const setStruct = <T extends Struct>(vector: VectorType<T>, index: number, value: T['TValue']) => {
+ const setValue = value instanceof Map ? _setStructMapValue(index, value) :
+ value instanceof Vector ? _setStructVectorValue(index, value) :
+ Array.isArray(value) ? _setStructArrayValue(index, value) :
+ _setStructObjectValue(index, value) ;
+ vector.type.children.forEach((f: Field, i: number) => setValue(vector.getChildAt(i), f, i));
+/* istanbul ignore next */
+/** @ignore */
+const setUnion = <
+ V extends VectorType<Union> | VectorType<DenseUnion> | VectorType<SparseUnion>
+>(vector: V, index: number, value: V['TValue']) => {
+ vector.type.mode === UnionMode.Dense ?
+ setDenseUnion(vector as VectorType<DenseUnion>, index, value) :
+ setSparseUnion(vector as VectorType<SparseUnion>, index, value);
+/** @ignore */
+const setDenseUnion = <T extends DenseUnion>(vector: VectorType<T>, index: number, value: T['TValue']): void => {
+ const childIndex = vector.typeIdToChildIndex[vector.typeIds[index]];
+ const child = vector.getChildAt(childIndex);
+ child && child.set(vector.valueOffsets[index], value);
+/** @ignore */
+const setSparseUnion = <T extends SparseUnion>(vector: VectorType<T>, index: number, value: T['TValue']): void => {
+ const childIndex = vector.typeIdToChildIndex[vector.typeIds[index]];
+ const child = vector.getChildAt(childIndex);
+ child && child.set(index, value);
+/** @ignore */
+const setDictionary = <T extends Dictionary>(vector: VectorType<T>, index: number, value: T['TValue']): void => {
+ const key = vector.getKey(index);
+ if (key !== null) {
+ vector.setValue(key, value);
+ }
+/* istanbul ignore next */
+/** @ignore */
+const setIntervalValue = <T extends Interval>(vector: VectorType<T>, index: number, value: T['TValue']): void => {
+ (vector.type.unit === IntervalUnit.DAY_TIME)
+ ? setIntervalDayTime(vector as VectorType<IntervalDayTime>, index, value)
+ : setIntervalYearMonth(vector as VectorType<IntervalYearMonth>, index, value);
+/** @ignore */
+const setIntervalDayTime = <T extends IntervalDayTime>({ values }: VectorType<T>, index: number, value: T['TValue']): void => { values.set(value.subarray(0, 2), 2 * index); };
+/** @ignore */
+const setIntervalYearMonth = <T extends IntervalYearMonth>({ values }: VectorType<T>, index: number, value: T['TValue']): void => { values[index] = (value[0] * 12) + (value[1] % 12); };
+/** @ignore */
+const setFixedSizeList = <T extends FixedSizeList>(vector: VectorType<T>, index: number, value: T['TValue']): void => {
+ const child = vector.getChildAt(0)!, { stride } = vector;
+ for (let idx = -1, offset = index * stride; ++idx < stride;) {
+ child.set(offset + idx, value.get(idx));
+ }
+SetVisitor.prototype.visitBool = setBool;
+SetVisitor.prototype.visitInt = setInt;
+SetVisitor.prototype.visitInt8 = setNumeric;
+SetVisitor.prototype.visitInt16 = setNumeric;
+SetVisitor.prototype.visitInt32 = setNumeric;
+SetVisitor.prototype.visitInt64 = setNumericX2;
+SetVisitor.prototype.visitUint8 = setNumeric;
+SetVisitor.prototype.visitUint16 = setNumeric;
+SetVisitor.prototype.visitUint32 = setNumeric;
+SetVisitor.prototype.visitUint64 = setNumericX2;
+SetVisitor.prototype.visitFloat = setFloat;
+SetVisitor.prototype.visitFloat16 = setFloat16;
+SetVisitor.prototype.visitFloat32 = setNumeric;
+SetVisitor.prototype.visitFloat64 = setNumeric;
+SetVisitor.prototype.visitUtf8 = setUtf8;
+SetVisitor.prototype.visitBinary = setBinary;
+SetVisitor.prototype.visitFixedSizeBinary = setFixedSizeBinary;
+SetVisitor.prototype.visitDate = setDate;
+SetVisitor.prototype.visitDateDay = setDateDay;
+SetVisitor.prototype.visitDateMillisecond = setDateMillisecond;
+SetVisitor.prototype.visitTimestamp = setTimestamp;
+SetVisitor.prototype.visitTimestampSecond = setTimestampSecond;
+SetVisitor.prototype.visitTimestampMillisecond = setTimestampMillisecond;
+SetVisitor.prototype.visitTimestampMicrosecond = setTimestampMicrosecond;
+SetVisitor.prototype.visitTimestampNanosecond = setTimestampNanosecond;
+SetVisitor.prototype.visitTime = setTime;
+SetVisitor.prototype.visitTimeSecond = setTimeSecond;
+SetVisitor.prototype.visitTimeMillisecond = setTimeMillisecond;
+SetVisitor.prototype.visitTimeMicrosecond = setTimeMicrosecond;
+SetVisitor.prototype.visitTimeNanosecond = setTimeNanosecond;
+SetVisitor.prototype.visitDecimal = setDecimal;
+SetVisitor.prototype.visitList = setList;
+SetVisitor.prototype.visitStruct = setStruct;
+SetVisitor.prototype.visitUnion = setUnion;
+SetVisitor.prototype.visitDenseUnion = setDenseUnion;
+SetVisitor.prototype.visitSparseUnion = setSparseUnion;
+SetVisitor.prototype.visitDictionary = setDictionary;
+SetVisitor.prototype.visitInterval = setIntervalValue;
+SetVisitor.prototype.visitIntervalDayTime = setIntervalDayTime;
+SetVisitor.prototype.visitIntervalYearMonth = setIntervalYearMonth;
+SetVisitor.prototype.visitFixedSizeList = setFixedSizeList;
+SetVisitor.prototype.visitMap = setMap;
+/** @ignore */
+export const instance = new SetVisitor();
diff --git a/src/arrow/js/src/visitor/toarray.ts b/src/arrow/js/src/visitor/toarray.ts
new file mode 100644
index 000000000..395e9943c
--- /dev/null
+++ b/src/arrow/js/src/visitor/toarray.ts
@@ -0,0 +1,151 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { Data } from '../data';
+import { Type } from '../enum';
+import { Visitor } from '../visitor';
+import { VectorType } from '../interfaces';
+import { instance as iteratorVisitor } from './iterator';
+import {
+ DataType, Dictionary,
+ Bool, Null, Utf8, Binary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct,
+ Float, Float16, Float32, Float64,
+ Int, Uint8, Uint16, Uint32, Uint64, Int8, Int16, Int32, Int64,
+ Date_, DateDay, DateMillisecond,
+ Interval, IntervalDayTime, IntervalYearMonth,
+ Time, TimeSecond, TimeMillisecond, TimeMicrosecond, TimeNanosecond,
+ Timestamp, TimestampSecond, TimestampMillisecond, TimestampMicrosecond, TimestampNanosecond,
+ Union, DenseUnion, SparseUnion,
+} from '../type';
+/** @ignore */
+export interface ToArrayVisitor extends Visitor {
+ visit<T extends VectorType>(node: T): T['TArray'];
+ visitMany<T extends VectorType>(nodes: T[]): T['TArray'][];
+ getVisitFn<T extends Type>(node: T): (vector: VectorType<T>) => VectorType<T>['TArray'];
+ getVisitFn<T extends DataType>(node: VectorType<T> | Data<T> | T): (vector: VectorType<T>) => VectorType<T>['TArray'];
+ visitNull <T extends Null> (vector: VectorType<T>): VectorType<T>['TArray'];
+ visitBool <T extends Bool> (vector: VectorType<T>): VectorType<T>['TArray'];
+ visitInt <T extends Int> (vector: VectorType<T>): VectorType<T>['TArray'];
+ visitInt8 <T extends Int8> (vector: VectorType<T>): VectorType<T>['TArray'];
+ visitInt16 <T extends Int16> (vector: VectorType<T>): VectorType<T>['TArray'];
+ visitInt32 <T extends Int32> (vector: VectorType<T>): VectorType<T>['TArray'];
+ visitInt64 <T extends Int64> (vector: VectorType<T>): VectorType<T>['TArray'];
+ visitUint8 <T extends Uint8> (vector: VectorType<T>): VectorType<T>['TArray'];
+ visitUint16 <T extends Uint16> (vector: VectorType<T>): VectorType<T>['TArray'];
+ visitUint32 <T extends Uint32> (vector: VectorType<T>): VectorType<T>['TArray'];
+ visitUint64 <T extends Uint64> (vector: VectorType<T>): VectorType<T>['TArray'];
+ visitFloat <T extends Float> (vector: VectorType<T>): VectorType<T>['TArray'];
+ visitFloat16 <T extends Float16> (vector: VectorType<T>): VectorType<T>['TArray'];
+ visitFloat32 <T extends Float32> (vector: VectorType<T>): VectorType<T>['TArray'];
+ visitFloat64 <T extends Float64> (vector: VectorType<T>): VectorType<T>['TArray'];
+ visitUtf8 <T extends Utf8> (vector: VectorType<T>): VectorType<T>['TArray'];
+ visitBinary <T extends Binary> (vector: VectorType<T>): VectorType<T>['TArray'];
+ visitFixedSizeBinary <T extends FixedSizeBinary> (vector: VectorType<T>): VectorType<T>['TArray'];
+ visitDate <T extends Date_> (vector: VectorType<T>): VectorType<T>['TArray'];
+ visitDateDay <T extends DateDay> (vector: VectorType<T>): VectorType<T>['TArray'];
+ visitDateMillisecond <T extends DateMillisecond> (vector: VectorType<T>): VectorType<T>['TArray'];
+ visitTimestamp <T extends Timestamp> (vector: VectorType<T>): VectorType<T>['TArray'];
+ visitTimestampSecond <T extends TimestampSecond> (vector: VectorType<T>): VectorType<T>['TArray'];
+ visitTimestampMillisecond <T extends TimestampMillisecond>(vector: VectorType<T>): VectorType<T>['TArray'];
+ visitTimestampMicrosecond <T extends TimestampMicrosecond>(vector: VectorType<T>): VectorType<T>['TArray'];
+ visitTimestampNanosecond <T extends TimestampNanosecond> (vector: VectorType<T>): VectorType<T>['TArray'];
+ visitTime <T extends Time> (vector: VectorType<T>): VectorType<T>['TArray'];
+ visitTimeSecond <T extends TimeSecond> (vector: VectorType<T>): VectorType<T>['TArray'];
+ visitTimeMillisecond <T extends TimeMillisecond> (vector: VectorType<T>): VectorType<T>['TArray'];
+ visitTimeMicrosecond <T extends TimeMicrosecond> (vector: VectorType<T>): VectorType<T>['TArray'];
+ visitTimeNanosecond <T extends TimeNanosecond> (vector: VectorType<T>): VectorType<T>['TArray'];
+ visitDecimal <T extends Decimal> (vector: VectorType<T>): VectorType<T>['TArray'];
+ visitList <R extends DataType, T extends List<R>> (vector: VectorType<T>): VectorType<T>['TArray'];
+ visitStruct <T extends Struct> (vector: VectorType<T>): VectorType<T>['TArray'];
+ visitUnion <T extends Union> (vector: VectorType<T>): VectorType<T>['TArray'];
+ visitDenseUnion <T extends DenseUnion> (vector: VectorType<T>): VectorType<T>['TArray'];
+ visitSparseUnion <T extends SparseUnion> (vector: VectorType<T>): VectorType<T>['TArray'];
+ visitDictionary <R extends DataType, T extends Dictionary<R>> (vector: VectorType<T>): VectorType<T>['TArray'];
+ visitInterval <T extends Interval> (vector: VectorType<T>): VectorType<T>['TArray'];
+ visitIntervalDayTime <T extends IntervalDayTime> (vector: VectorType<T>): VectorType<T>['TArray'];
+ visitIntervalYearMonth <T extends IntervalYearMonth> (vector: VectorType<T>): VectorType<T>['TArray'];
+ visitFixedSizeList <R extends DataType, T extends FixedSizeList<R>> (vector: VectorType<T>): VectorType<T>['TArray'];
+ visitMap <T extends Map_> (vector: VectorType<T>): VectorType<T>['TArray'];
+/** @ignore */
+export class ToArrayVisitor extends Visitor {}
+/** @ignore */
+function arrayOfVector<T extends DataType>(vector: VectorType<T>): T['TArray'] {
+ const { type, length, stride } = vector;
+ // Fast case, return subarray if possible
+ switch (type.typeId) {
+ case Type.Int:
+ case Type.Float: case Type.Decimal:
+ case Type.Time: case Type.Timestamp:
+ return, length * stride);
+ }
+ // Otherwise if not primitive, slow copy
+ return [...iteratorVisitor.visit(vector)] as T['TArray'];
+ToArrayVisitor.prototype.visitNull = arrayOfVector;
+ToArrayVisitor.prototype.visitBool = arrayOfVector;
+ToArrayVisitor.prototype.visitInt = arrayOfVector;
+ToArrayVisitor.prototype.visitInt8 = arrayOfVector;
+ToArrayVisitor.prototype.visitInt16 = arrayOfVector;
+ToArrayVisitor.prototype.visitInt32 = arrayOfVector;
+ToArrayVisitor.prototype.visitInt64 = arrayOfVector;
+ToArrayVisitor.prototype.visitUint8 = arrayOfVector;
+ToArrayVisitor.prototype.visitUint16 = arrayOfVector;
+ToArrayVisitor.prototype.visitUint32 = arrayOfVector;
+ToArrayVisitor.prototype.visitUint64 = arrayOfVector;
+ToArrayVisitor.prototype.visitFloat = arrayOfVector;
+ToArrayVisitor.prototype.visitFloat16 = arrayOfVector;
+ToArrayVisitor.prototype.visitFloat32 = arrayOfVector;
+ToArrayVisitor.prototype.visitFloat64 = arrayOfVector;
+ToArrayVisitor.prototype.visitUtf8 = arrayOfVector;
+ToArrayVisitor.prototype.visitBinary = arrayOfVector;
+ToArrayVisitor.prototype.visitFixedSizeBinary = arrayOfVector;
+ToArrayVisitor.prototype.visitDate = arrayOfVector;
+ToArrayVisitor.prototype.visitDateDay = arrayOfVector;
+ToArrayVisitor.prototype.visitDateMillisecond = arrayOfVector;
+ToArrayVisitor.prototype.visitTimestamp = arrayOfVector;
+ToArrayVisitor.prototype.visitTimestampSecond = arrayOfVector;
+ToArrayVisitor.prototype.visitTimestampMillisecond = arrayOfVector;
+ToArrayVisitor.prototype.visitTimestampMicrosecond = arrayOfVector;
+ToArrayVisitor.prototype.visitTimestampNanosecond = arrayOfVector;
+ToArrayVisitor.prototype.visitTime = arrayOfVector;
+ToArrayVisitor.prototype.visitTimeSecond = arrayOfVector;
+ToArrayVisitor.prototype.visitTimeMillisecond = arrayOfVector;
+ToArrayVisitor.prototype.visitTimeMicrosecond = arrayOfVector;
+ToArrayVisitor.prototype.visitTimeNanosecond = arrayOfVector;
+ToArrayVisitor.prototype.visitDecimal = arrayOfVector;
+ToArrayVisitor.prototype.visitList = arrayOfVector;
+ToArrayVisitor.prototype.visitStruct = arrayOfVector;
+ToArrayVisitor.prototype.visitUnion = arrayOfVector;
+ToArrayVisitor.prototype.visitDenseUnion = arrayOfVector;
+ToArrayVisitor.prototype.visitSparseUnion = arrayOfVector;
+ToArrayVisitor.prototype.visitDictionary = arrayOfVector;
+ToArrayVisitor.prototype.visitInterval = arrayOfVector;
+ToArrayVisitor.prototype.visitIntervalDayTime = arrayOfVector;
+ToArrayVisitor.prototype.visitIntervalYearMonth = arrayOfVector;
+ToArrayVisitor.prototype.visitFixedSizeList = arrayOfVector;
+ToArrayVisitor.prototype.visitMap = arrayOfVector;
+/** @ignore */
+export const instance = new ToArrayVisitor();
diff --git a/src/arrow/js/src/visitor/typeassembler.ts b/src/arrow/js/src/visitor/typeassembler.ts
new file mode 100644
index 000000000..4cd65d926
--- /dev/null
+++ b/src/arrow/js/src/visitor/typeassembler.ts
@@ -0,0 +1,158 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { flatbuffers } from 'flatbuffers';
+import Long = flatbuffers.Long;
+import Builder = flatbuffers.Builder;
+import * as type from '../type';
+import { Visitor } from '../visitor';
+import {
+ Null,
+ Int,
+ FloatingPoint,
+ Binary,
+ Bool,
+ Utf8,
+ Decimal,
+ Date,
+ Time,
+ Timestamp,
+ Interval,
+ List,
+ Struct_ as Struct,
+ Union,
+ DictionaryEncoding,
+ FixedSizeBinary,
+ FixedSizeList,
+ Map as Map_,
+} from '../fb/Schema';
+/** @ignore */
+export interface TypeAssembler extends Visitor {
+ visit<T extends type.DataType>(node: T, builder: Builder): number | undefined;
+/** @ignore */
+export class TypeAssembler extends Visitor {
+ public visit<T extends type.DataType>(node: T, builder: Builder): number | undefined {
+ return (node == null || builder == null) ? undefined : super.visit(node, builder);
+ }
+ public visitNull<T extends type.Null>(_node: T, b: Builder) {
+ Null.startNull(b);
+ return Null.endNull(b);
+ }
+ public visitInt<T extends type.Int>(node: T, b: Builder) {
+ Int.startInt(b);
+ Int.addBitWidth(b, node.bitWidth);
+ Int.addIsSigned(b, node.isSigned);
+ return Int.endInt(b);
+ }
+ public visitFloat<T extends type.Float>(node: T, b: Builder) {
+ FloatingPoint.startFloatingPoint(b);
+ FloatingPoint.addPrecision(b, node.precision);
+ return FloatingPoint.endFloatingPoint(b);
+ }
+ public visitBinary<T extends type.Binary>(_node: T, b: Builder) {
+ Binary.startBinary(b);
+ return Binary.endBinary(b);
+ }
+ public visitBool<T extends type.Bool>(_node: T, b: Builder) {
+ Bool.startBool(b);
+ return Bool.endBool(b);
+ }
+ public visitUtf8<T extends type.Utf8>(_node: T, b: Builder) {
+ Utf8.startUtf8(b);
+ return Utf8.endUtf8(b);
+ }
+ public visitDecimal<T extends type.Decimal>(node: T, b: Builder) {
+ Decimal.startDecimal(b);
+ Decimal.addScale(b, node.scale);
+ Decimal.addPrecision(b, node.precision);
+ return Decimal.endDecimal(b);
+ }
+ public visitDate<T extends type.Date_>(node: T, b: Builder) {
+ Date.startDate(b);
+ Date.addUnit(b, node.unit);
+ return Date.endDate(b);
+ }
+ public visitTime<T extends type.Time>(node: T, b: Builder) {
+ Time.startTime(b);
+ Time.addUnit(b, node.unit);
+ Time.addBitWidth(b, node.bitWidth);
+ return Time.endTime(b);
+ }
+ public visitTimestamp<T extends type.Timestamp>(node: T, b: Builder) {
+ const timezone = (node.timezone && b.createString(node.timezone)) || undefined;
+ Timestamp.startTimestamp(b);
+ Timestamp.addUnit(b, node.unit);
+ if (timezone !== undefined) {
+ Timestamp.addTimezone(b, timezone);
+ }
+ return Timestamp.endTimestamp(b);
+ }
+ public visitInterval<T extends type.Interval>(node: T, b: Builder) {
+ Interval.startInterval(b);
+ Interval.addUnit(b, node.unit);
+ return Interval.endInterval(b);
+ }
+ public visitList<T extends type.List>(_node: T, b: Builder) {
+ List.startList(b);
+ return List.endList(b);
+ }
+ public visitStruct<T extends type.Struct>(_node: T, b: Builder) {
+ Struct.startStruct_(b);
+ return Struct.endStruct_(b);
+ }
+ public visitUnion<T extends type.Union>(node: T, b: Builder) {
+ Union.startTypeIdsVector(b, node.typeIds.length);
+ const typeIds = Union.createTypeIdsVector(b, node.typeIds);
+ Union.startUnion(b);
+ Union.addMode(b, node.mode);
+ Union.addTypeIds(b, typeIds);
+ return Union.endUnion(b);
+ }
+ public visitDictionary<T extends type.Dictionary>(node: T, b: Builder) {
+ const indexType = this.visit(node.indices, b);
+ DictionaryEncoding.startDictionaryEncoding(b);
+ DictionaryEncoding.addId(b, new Long(, 0));
+ DictionaryEncoding.addIsOrdered(b, node.isOrdered);
+ if (indexType !== undefined) {
+ DictionaryEncoding.addIndexType(b, indexType);
+ }
+ return DictionaryEncoding.endDictionaryEncoding(b);
+ }
+ public visitFixedSizeBinary<T extends type.FixedSizeBinary>(node: T, b: Builder) {
+ FixedSizeBinary.startFixedSizeBinary(b);
+ FixedSizeBinary.addByteWidth(b, node.byteWidth);
+ return FixedSizeBinary.endFixedSizeBinary(b);
+ }
+ public visitFixedSizeList<T extends type.FixedSizeList>(node: T, b: Builder) {
+ FixedSizeList.startFixedSizeList(b);
+ FixedSizeList.addListSize(b, node.listSize);
+ return FixedSizeList.endFixedSizeList(b);
+ }
+ public visitMap<T extends type.Map_>(node: T, b: Builder) {
+ Map_.startMap(b);
+ Map_.addKeysSorted(b, node.keysSorted);
+ return Map_.endMap(b);
+ }
+/** @ignore */
+export const instance = new TypeAssembler();
diff --git a/src/arrow/js/src/visitor/typecomparator.ts b/src/arrow/js/src/visitor/typecomparator.ts
new file mode 100644
index 000000000..478b505f8
--- /dev/null
+++ b/src/arrow/js/src/visitor/typecomparator.ts
@@ -0,0 +1,280 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { Data } from '../data';
+import { Visitor } from '../visitor';
+import { VectorType } from '../interfaces';
+import { Schema, Field } from '../schema';
+import {
+ DataType, Dictionary,
+ Bool, Null, Utf8, Binary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct,
+ Float, Float16, Float32, Float64,
+ Int, Uint8, Uint16, Uint32, Uint64, Int8, Int16, Int32, Int64,
+ Date_, DateDay, DateMillisecond,
+ Interval, IntervalDayTime, IntervalYearMonth,
+ Time, TimeSecond, TimeMillisecond, TimeMicrosecond, TimeNanosecond,
+ Timestamp, TimestampSecond, TimestampMillisecond, TimestampMicrosecond, TimestampNanosecond,
+ Union, DenseUnion, SparseUnion,
+} from '../type';
+/** @ignore */
+export interface TypeComparator extends Visitor {
+ visit<T extends DataType>(type: T, other?: DataType | null): other is T;
+ visitMany<T extends DataType>(nodes: T[], others?: DataType[] | null): boolean[];
+ getVisitFn<T extends DataType>(node: VectorType<T> | Data<T> | T): (other?: DataType | null) => other is T;
+ visitNull <T extends Null> (type: T, other?: DataType | null): other is T;
+ visitBool <T extends Bool> (type: T, other?: DataType | null): other is T;
+ visitInt <T extends Int> (type: T, other?: DataType | null): other is T;
+ visitInt8 <T extends Int8> (type: T, other?: DataType | null): other is T;
+ visitInt16 <T extends Int16> (type: T, other?: DataType | null): other is T;
+ visitInt32 <T extends Int32> (type: T, other?: DataType | null): other is T;
+ visitInt64 <T extends Int64> (type: T, other?: DataType | null): other is T;
+ visitUint8 <T extends Uint8> (type: T, other?: DataType | null): other is T;
+ visitUint16 <T extends Uint16> (type: T, other?: DataType | null): other is T;
+ visitUint32 <T extends Uint32> (type: T, other?: DataType | null): other is T;
+ visitUint64 <T extends Uint64> (type: T, other?: DataType | null): other is T;
+ visitFloat <T extends Float> (type: T, other?: DataType | null): other is T;
+ visitFloat16 <T extends Float16> (type: T, other?: DataType | null): other is T;
+ visitFloat32 <T extends Float32> (type: T, other?: DataType | null): other is T;
+ visitFloat64 <T extends Float64> (type: T, other?: DataType | null): other is T;
+ visitUtf8 <T extends Utf8> (type: T, other?: DataType | null): other is T;
+ visitBinary <T extends Binary> (type: T, other?: DataType | null): other is T;
+ visitFixedSizeBinary <T extends FixedSizeBinary> (type: T, other?: DataType | null): other is T;
+ visitDate <T extends Date_> (type: T, other?: DataType | null): other is T;
+ visitDateDay <T extends DateDay> (type: T, other?: DataType | null): other is T;
+ visitDateMillisecond <T extends DateMillisecond> (type: T, other?: DataType | null): other is T;
+ visitTimestamp <T extends Timestamp> (type: T, other?: DataType | null): other is T;
+ visitTimestampSecond <T extends TimestampSecond> (type: T, other?: DataType | null): other is T;
+ visitTimestampMillisecond <T extends TimestampMillisecond> (type: T, other?: DataType | null): other is T;
+ visitTimestampMicrosecond <T extends TimestampMicrosecond> (type: T, other?: DataType | null): other is T;
+ visitTimestampNanosecond <T extends TimestampNanosecond> (type: T, other?: DataType | null): other is T;
+ visitTime <T extends Time> (type: T, other?: DataType | null): other is T;
+ visitTimeSecond <T extends TimeSecond> (type: T, other?: DataType | null): other is T;
+ visitTimeMillisecond <T extends TimeMillisecond> (type: T, other?: DataType | null): other is T;
+ visitTimeMicrosecond <T extends TimeMicrosecond> (type: T, other?: DataType | null): other is T;
+ visitTimeNanosecond <T extends TimeNanosecond> (type: T, other?: DataType | null): other is T;
+ visitDecimal <T extends Decimal> (type: T, other?: DataType | null): other is T;
+ visitList <T extends List> (type: T, other?: DataType | null): other is T;
+ visitStruct <T extends Struct> (type: T, other?: DataType | null): other is T;
+ visitUnion <T extends Union> (type: T, other?: DataType | null): other is T;
+ visitDenseUnion <T extends DenseUnion> (type: T, other?: DataType | null): other is T;
+ visitSparseUnion <T extends SparseUnion> (type: T, other?: DataType | null): other is T;
+ visitDictionary <T extends Dictionary> (type: T, other?: DataType | null): other is T;
+ visitInterval <T extends Interval> (type: T, other?: DataType | null): other is T;
+ visitIntervalDayTime <T extends IntervalDayTime> (type: T, other?: DataType | null): other is T;
+ visitIntervalYearMonth <T extends IntervalYearMonth> (type: T, other?: DataType | null): other is T;
+ visitFixedSizeList <T extends FixedSizeList> (type: T, other?: DataType | null): other is T;
+ visitMap <T extends Map_> (type: T, other?: DataType | null): other is T;
+/** @ignore */
+export class TypeComparator extends Visitor {
+ compareSchemas<T extends { [key: string]: DataType }>(schema: Schema<T>, other?: Schema | null): other is Schema<T> {
+ return (schema === other) || (
+ other instanceof schema.constructor &&
+ this.compareManyFields(schema.fields, other.fields)
+ );
+ }
+ compareManyFields<T extends { [key: string]: DataType }>(fields: Field<T[keyof T]>[], others?: Field[] | null): others is Field<T[keyof T]>[] {
+ return (fields === others) || (
+ Array.isArray(fields) &&
+ Array.isArray(others) &&
+ fields.length === others.length &&
+ fields.every((f, i) => this.compareFields(f, others[i]))
+ );
+ }
+ compareFields<T extends DataType = any>(field: Field<T>, other?: Field | null): other is Field<T> {
+ return (field === other) || (
+ other instanceof field.constructor &&
+ === &&
+ field.nullable === other.nullable &&
+ this.visit(field.type, other.type)
+ );
+ }
+function compareConstructor<T extends DataType>(type: T, other?: DataType | null): other is T {
+ return other instanceof type.constructor;
+function compareAny<T extends DataType>(type: T, other?: DataType | null): other is T {
+ return (type === other) || compareConstructor(type, other);
+function compareInt<T extends Int>(type: T, other?: DataType | null): other is T {
+ return (type === other) || (
+ compareConstructor(type, other) &&
+ type.bitWidth === other.bitWidth &&
+ type.isSigned === other.isSigned
+ );
+function compareFloat<T extends Float>(type: T, other?: DataType | null): other is T {
+ return (type === other) || (
+ compareConstructor(type, other) &&
+ type.precision === other.precision
+ );
+function compareFixedSizeBinary<T extends FixedSizeBinary>(type: T, other?: DataType | null): other is T {
+ return (type === other) || (
+ compareConstructor(type, other) &&
+ type.byteWidth === other.byteWidth
+ );
+function compareDate<T extends Date_>(type: T, other?: DataType | null): other is T {
+ return (type === other) || (
+ compareConstructor(type, other) &&
+ type.unit === other.unit
+ );
+function compareTimestamp<T extends Timestamp>(type: T, other?: DataType | null): other is T {
+ return (type === other) || (
+ compareConstructor(type, other) &&
+ type.unit === other.unit &&
+ type.timezone === other.timezone
+ );
+function compareTime<T extends Time>(type: T, other?: DataType | null): other is T {
+ return (type === other) || (
+ compareConstructor(type, other) &&
+ type.unit === other.unit &&
+ type.bitWidth === other.bitWidth
+ );
+function compareList<T extends List>(type: T, other?: DataType | null): other is T {
+ return (type === other) || (
+ compareConstructor(type, other) &&
+ type.children.length === other.children.length &&
+ instance.compareManyFields(type.children, other.children)
+ );
+function compareStruct<T extends Struct>(type: T, other?: DataType | null): other is T {
+ return (type === other) || (
+ compareConstructor(type, other) &&
+ type.children.length === other.children.length &&
+ instance.compareManyFields(type.children, other.children)
+ );
+function compareUnion<T extends Union>(type: T, other?: DataType | null): other is T {
+ return (type === other) || (
+ compareConstructor(type, other) &&
+ type.mode === other.mode &&
+ type.typeIds.every((x, i) => x === other.typeIds[i]) &&
+ instance.compareManyFields(type.children, other.children)
+ );
+function compareDictionary<T extends Dictionary>(type: T, other?: DataType | null): other is T {
+ return (type === other) || (
+ compareConstructor(type, other) &&
+ === &&
+ type.isOrdered === other.isOrdered &&
+ instance.visit(<any> type.indices, other.indices) &&
+ instance.visit(type.dictionary, other.dictionary)
+ );
+function compareInterval<T extends Interval>(type: T, other?: DataType | null): other is T {
+ return (type === other) || (
+ compareConstructor(type, other) &&
+ type.unit === other.unit
+ );
+function compareFixedSizeList<T extends FixedSizeList>(type: T, other?: DataType | null): other is T {
+ return (type === other) || (
+ compareConstructor(type, other) &&
+ type.listSize === other.listSize &&
+ type.children.length === other.children.length &&
+ instance.compareManyFields(type.children, other.children)
+ );
+function compareMap<T extends Map_>(type: T, other?: DataType | null): other is T {
+ return (type === other) || (
+ compareConstructor(type, other) &&
+ type.keysSorted === other.keysSorted &&
+ type.children.length === other.children.length &&
+ instance.compareManyFields(type.children, other.children)
+ );
+TypeComparator.prototype.visitNull = compareAny;
+TypeComparator.prototype.visitBool = compareAny;
+TypeComparator.prototype.visitInt = compareInt;
+TypeComparator.prototype.visitInt8 = compareInt;
+TypeComparator.prototype.visitInt16 = compareInt;
+TypeComparator.prototype.visitInt32 = compareInt;
+TypeComparator.prototype.visitInt64 = compareInt;
+TypeComparator.prototype.visitUint8 = compareInt;
+TypeComparator.prototype.visitUint16 = compareInt;
+TypeComparator.prototype.visitUint32 = compareInt;
+TypeComparator.prototype.visitUint64 = compareInt;
+TypeComparator.prototype.visitFloat = compareFloat;
+TypeComparator.prototype.visitFloat16 = compareFloat;
+TypeComparator.prototype.visitFloat32 = compareFloat;
+TypeComparator.prototype.visitFloat64 = compareFloat;
+TypeComparator.prototype.visitUtf8 = compareAny;
+TypeComparator.prototype.visitBinary = compareAny;
+TypeComparator.prototype.visitFixedSizeBinary = compareFixedSizeBinary;
+TypeComparator.prototype.visitDate = compareDate;
+TypeComparator.prototype.visitDateDay = compareDate;
+TypeComparator.prototype.visitDateMillisecond = compareDate;
+TypeComparator.prototype.visitTimestamp = compareTimestamp;
+TypeComparator.prototype.visitTimestampSecond = compareTimestamp;
+TypeComparator.prototype.visitTimestampMillisecond = compareTimestamp;
+TypeComparator.prototype.visitTimestampMicrosecond = compareTimestamp;
+TypeComparator.prototype.visitTimestampNanosecond = compareTimestamp;
+TypeComparator.prototype.visitTime = compareTime;
+TypeComparator.prototype.visitTimeSecond = compareTime;
+TypeComparator.prototype.visitTimeMillisecond = compareTime;
+TypeComparator.prototype.visitTimeMicrosecond = compareTime;
+TypeComparator.prototype.visitTimeNanosecond = compareTime;
+TypeComparator.prototype.visitDecimal = compareAny;
+TypeComparator.prototype.visitList = compareList;
+TypeComparator.prototype.visitStruct = compareStruct;
+TypeComparator.prototype.visitUnion = compareUnion;
+TypeComparator.prototype.visitDenseUnion = compareUnion;
+TypeComparator.prototype.visitSparseUnion = compareUnion;
+TypeComparator.prototype.visitDictionary = compareDictionary;
+TypeComparator.prototype.visitInterval = compareInterval;
+TypeComparator.prototype.visitIntervalDayTime = compareInterval;
+TypeComparator.prototype.visitIntervalYearMonth = compareInterval;
+TypeComparator.prototype.visitFixedSizeList = compareFixedSizeList;
+TypeComparator.prototype.visitMap = compareMap;
+/** @ignore */
+export const instance = new TypeComparator();
+export function compareSchemas<T extends { [key: string]: DataType }>(schema: Schema<T>, other?: Schema | null): other is Schema<T> {
+ return instance.compareSchemas(schema, other);
+export function compareFields<T extends DataType = any>(field: Field<T>, other?: Field | null): other is Field<T> {
+ return instance.compareFields(field, other);
+export function compareTypes<A extends DataType = any>(type: A, other?: DataType): other is A {
+ return instance.visit(type, other);
diff --git a/src/arrow/js/src/visitor/typector.ts b/src/arrow/js/src/visitor/typector.ts
new file mode 100644
index 000000000..9d0a9f17d
--- /dev/null
+++ b/src/arrow/js/src/visitor/typector.ts
@@ -0,0 +1,82 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { Data } from '../data';
+import { Type } from '../enum';
+import * as type from '../type';
+import { DataType } from '../type';
+import { Visitor } from '../visitor';
+import { VectorType } from '../interfaces';
+import { DataTypeCtor } from '../interfaces';
+/** @ignore */
+export interface GetDataTypeConstructor extends Visitor {
+ visit<T extends Type>(node: T): DataTypeCtor<T>;
+ visitMany<T extends Type>(nodes: T[]): DataTypeCtor<T>[];
+ getVisitFn<T extends Type>(node: T): () => DataTypeCtor<T>;
+ getVisitFn<T extends DataType>(node: VectorType<T> | Data<T> | T): () => DataTypeCtor<T>;
+/** @ignore */
+export class GetDataTypeConstructor extends Visitor {
+ public visitNull () { return type.Null; }
+ public visitBool () { return type.Bool; }
+ public visitInt () { return type.Int; }
+ public visitInt8 () { return type.Int8; }
+ public visitInt16 () { return type.Int16; }
+ public visitInt32 () { return type.Int32; }
+ public visitInt64 () { return type.Int64; }
+ public visitUint8 () { return type.Uint8; }
+ public visitUint16 () { return type.Uint16; }
+ public visitUint32 () { return type.Uint32; }
+ public visitUint64 () { return type.Uint64; }
+ public visitFloat () { return type.Float; }
+ public visitFloat16 () { return type.Float16; }
+ public visitFloat32 () { return type.Float32; }
+ public visitFloat64 () { return type.Float64; }
+ public visitUtf8 () { return type.Utf8; }
+ public visitBinary () { return type.Binary; }
+ public visitFixedSizeBinary () { return type.FixedSizeBinary; }
+ public visitDate () { return type.Date_; }
+ public visitDateDay () { return type.DateDay; }
+ public visitDateMillisecond () { return type.DateMillisecond; }
+ public visitTimestamp () { return type.Timestamp; }
+ public visitTimestampSecond () { return type.TimestampSecond; }
+ public visitTimestampMillisecond () { return type.TimestampMillisecond; }
+ public visitTimestampMicrosecond () { return type.TimestampMicrosecond; }
+ public visitTimestampNanosecond () { return type.TimestampNanosecond; }
+ public visitTime () { return type.Time; }
+ public visitTimeSecond () { return type.TimeSecond; }
+ public visitTimeMillisecond () { return type.TimeMillisecond; }
+ public visitTimeMicrosecond () { return type.TimeMicrosecond; }
+ public visitTimeNanosecond () { return type.TimeNanosecond; }
+ public visitDecimal () { return type.Decimal; }
+ public visitList () { return type.List; }
+ public visitStruct () { return type.Struct; }
+ public visitUnion () { return type.Union; }
+ public visitDenseUnion () { return type.DenseUnion; }
+ public visitSparseUnion () { return type.SparseUnion; }
+ public visitDictionary () { return type.Dictionary; }
+ public visitInterval () { return type.Interval; }
+ public visitIntervalDayTime () { return type.IntervalDayTime; }
+ public visitIntervalYearMonth () { return type.IntervalYearMonth; }
+ public visitFixedSizeList () { return type.FixedSizeList; }
+ public visitMap () { return type.Map_; }
+/** @ignore */
+export const instance = new GetDataTypeConstructor();
diff --git a/src/arrow/js/src/visitor/vectorassembler.ts b/src/arrow/js/src/visitor/vectorassembler.ts
new file mode 100644
index 000000000..e324bc02e
--- /dev/null
+++ b/src/arrow/js/src/visitor/vectorassembler.ts
@@ -0,0 +1,234 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { Data } from '../data';
+import { Vector } from '../vector';
+import { Visitor } from '../visitor';
+import { Type, UnionMode } from '../enum';
+import { RecordBatch } from '../recordbatch';
+import { VectorType as V } from '../interfaces';
+import { rebaseValueOffsets } from '../util/buffer';
+import { packBools, truncateBitmap } from '../util/bit';
+import { selectVectorChildrenArgs } from '../util/args';
+import { BufferRegion, FieldNode } from '../ipc/metadata/message';
+import {
+ DataType, Dictionary,
+ Float, Int, Date_, Interval, Time, Timestamp, Union,
+ Bool, Null, Utf8, Binary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct,
+} from '../type';
+/** @ignore */
+export interface VectorAssembler extends Visitor {
+ visit<T extends Vector>(node: T): this;
+ visitMany<T extends Vector>(nodes: T[]): this[];
+ getVisitFn<T extends Type>(node: T): (vector: V<T>) => this;
+ getVisitFn<T extends DataType>(node: V<T> | Data<T> | T): (vector: V<T>) => this;
+ visitBool <T extends Bool> (vector: V<T>): this;
+ visitInt <T extends Int> (vector: V<T>): this;
+ visitFloat <T extends Float> (vector: V<T>): this;
+ visitUtf8 <T extends Utf8> (vector: V<T>): this;
+ visitBinary <T extends Binary> (vector: V<T>): this;
+ visitFixedSizeBinary <T extends FixedSizeBinary> (vector: V<T>): this;
+ visitDate <T extends Date_> (vector: V<T>): this;
+ visitTimestamp <T extends Timestamp> (vector: V<T>): this;
+ visitTime <T extends Time> (vector: V<T>): this;
+ visitDecimal <T extends Decimal> (vector: V<T>): this;
+ visitList <T extends List> (vector: V<T>): this;
+ visitStruct <T extends Struct> (vector: V<T>): this;
+ visitUnion <T extends Union> (vector: V<T>): this;
+ visitInterval <T extends Interval> (vector: V<T>): this;
+ visitFixedSizeList <T extends FixedSizeList> (vector: V<T>): this;
+ visitMap <T extends Map_> (vector: V<T>): this;
+/** @ignore */
+export class VectorAssembler extends Visitor {
+ /** @nocollapse */
+ public static assemble<T extends Vector | RecordBatch>(...args: (T | T[])[]) {
+ const assembler = new VectorAssembler();
+ const vectorChildren = selectVectorChildrenArgs(RecordBatch, args);
+ const [assembleResult = assembler] = assembler.visitMany(vectorChildren);
+ return assembleResult;
+ }
+ private constructor() { super(); }
+ public visit<T extends Vector>(vector: T): this {
+ if (!DataType.isDictionary(vector.type)) {
+ const { data, length, nullCount } = vector;
+ if (length > 2147483647) {
+ /* istanbul ignore next */
+ throw new RangeError('Cannot write arrays larger than 2^31 - 1 in length');
+ }
+ if (!DataType.isNull(vector.type)) {
+, nullCount <= 0
+ ? new Uint8Array(0) // placeholder validity buffer
+ : truncateBitmap(data.offset, length, data.nullBitmap)
+ );
+ }
+ this.nodes.push(new FieldNode(length, nullCount));
+ }
+ return super.visit(vector);
+ }
+ public visitNull<T extends Null>(_nullV: V<T>) {
+ return this;
+ }
+ public visitDictionary<T extends Dictionary>(vector: V<T>) {
+ // Assemble the indices here, Dictionary assembled separately.
+ return this.visit(vector.indices);
+ }
+ public get nodes() { return this._nodes; }
+ public get buffers() { return this._buffers; }
+ public get byteLength() { return this._byteLength; }
+ public get bufferRegions() { return this._bufferRegions; }
+ protected _byteLength = 0;
+ protected _nodes: FieldNode[] = [];
+ protected _buffers: ArrayBufferView[] = [];
+ protected _bufferRegions: BufferRegion[] = [];
+/** @ignore */
+function addBuffer(this: VectorAssembler, values: ArrayBufferView) {
+ const byteLength = (values.byteLength + 7) & ~7; // Round up to a multiple of 8
+ this.buffers.push(values);
+ this.bufferRegions.push(new BufferRegion(this._byteLength, byteLength));
+ this._byteLength += byteLength;
+ return this;
+/** @ignore */
+function assembleUnion<T extends Union>(this: VectorAssembler, vector: V<T>) {
+ const { type, length, typeIds, valueOffsets } = vector;
+ // All Union Vectors have a typeIds buffer
+, typeIds);
+ // If this is a Sparse Union, treat it like all other Nested types
+ if (type.mode === UnionMode.Sparse) {
+ return, vector);
+ } else if (type.mode === UnionMode.Dense) {
+ // If this is a Dense Union, add the valueOffsets buffer and potentially slice the children
+ if (vector.offset <= 0) {
+ // If the Vector hasn't been sliced, write the existing valueOffsets
+, valueOffsets);
+ // We can treat this like all other Nested types
+ return, vector);
+ } else {
+ // A sliced Dense Union is an unpleasant case. Because the offsets are different for
+ // each child vector, we need to "rebase" the valueOffsets for each child
+ // Union typeIds are not necessary 0-indexed
+ const maxChildTypeId = typeIds.reduce((x, y) => Math.max(x, y), typeIds[0]);
+ const childLengths = new Int32Array(maxChildTypeId + 1);
+ // Set all to -1 to indicate that we haven't observed a first occurrence of a particular child yet
+ const childOffsets = new Int32Array(maxChildTypeId + 1).fill(-1);
+ const shiftedOffsets = new Int32Array(length);
+ // If we have a non-zero offset, then the value offsets do not start at
+ // zero. We must a) create a new offsets array with shifted offsets and
+ // b) slice the values array accordingly
+ const unshiftedOffsets = rebaseValueOffsets(-valueOffsets[0], length, valueOffsets);
+ for (let typeId, shift, index = -1; ++index < length;) {
+ if ((shift = childOffsets[typeId = typeIds[index]]) === -1) {
+ shift = childOffsets[typeId] = unshiftedOffsets[typeId];
+ }
+ shiftedOffsets[index] = unshiftedOffsets[index] - shift;
+ ++childLengths[typeId];
+ }
+, shiftedOffsets);
+ // Slice and visit children accordingly
+ for (let child: Vector | null, childIndex = -1, numChildren = type.children.length; ++childIndex < numChildren;) {
+ if (child = vector.getChildAt(childIndex)) {
+ const typeId = type.typeIds[childIndex];
+ const childLength = Math.min(length, childLengths[typeId]);
+ this.visit(child.slice(childOffsets[typeId], childLength));
+ }
+ }
+ }
+ }
+ return this;
+/** @ignore */
+function assembleBoolVector<T extends Bool>(this: VectorAssembler, vector: V<T>) {
+ // Bool vector is a special case of FlatVector, as its data buffer needs to stay packed
+ let values: Uint8Array;
+ if (vector.nullCount >= vector.length) {
+ // If all values are null, just insert a placeholder empty data buffer (fastest path)
+ return, new Uint8Array(0));
+ } else if ((values = vector.values) instanceof Uint8Array) {
+ // If values is already a Uint8Array, slice the bitmap (fast path)
+ return, truncateBitmap(vector.offset, vector.length, values));
+ }
+ // Otherwise if the underlying data *isn't* a Uint8Array, enumerate the
+ // values as bools and re-pack them into a Uint8Array. This code isn't
+ // reachable unless you're trying to manipulate the Data internals,
+ // we we're only doing this for safety.
+ /* istanbul ignore next */
+ return, packBools(vector));
+/** @ignore */
+function assembleFlatVector<T extends Int | Float | FixedSizeBinary | Date_ | Timestamp | Time | Decimal | Interval>(this: VectorAssembler, vector: V<T>) {
+ return, vector.values.subarray(0, vector.length * vector.stride));
+/** @ignore */
+function assembleFlatListVector<T extends Utf8 | Binary>(this: VectorAssembler, vector: V<T>) {
+ const { length, values, valueOffsets } = vector;
+ const firstOffset = valueOffsets[0];
+ const lastOffset = valueOffsets[length];
+ const byteLength = Math.min(lastOffset - firstOffset, values.byteLength - firstOffset);
+ // Push in the order FlatList types read their buffers
+, rebaseValueOffsets(-valueOffsets[0], length, valueOffsets)); // valueOffsets buffer first
+, values.subarray(firstOffset, firstOffset + byteLength)); // sliced values buffer second
+ return this;
+/** @ignore */
+function assembleListVector<T extends Map_ | List | FixedSizeList>(this: VectorAssembler, vector: V<T>) {
+ const { length, valueOffsets } = vector;
+ // If we have valueOffsets (MapVector, ListVector), push that buffer first
+ if (valueOffsets) {
+, rebaseValueOffsets(valueOffsets[0], length, valueOffsets));
+ }
+ // Then insert the List's values child
+ return this.visit(vector.getChildAt(0)!);
+/** @ignore */
+function assembleNestedVector<T extends Struct | Union>(this: VectorAssembler, vector: V<T>) {
+ return this.visitMany(, i) => vector.getChildAt(i)!).filter(Boolean))[0];
+VectorAssembler.prototype.visitBool = assembleBoolVector;
+VectorAssembler.prototype.visitInt = assembleFlatVector;
+VectorAssembler.prototype.visitFloat = assembleFlatVector;
+VectorAssembler.prototype.visitUtf8 = assembleFlatListVector;
+VectorAssembler.prototype.visitBinary = assembleFlatListVector;
+VectorAssembler.prototype.visitFixedSizeBinary = assembleFlatVector;
+VectorAssembler.prototype.visitDate = assembleFlatVector;
+VectorAssembler.prototype.visitTimestamp = assembleFlatVector;
+VectorAssembler.prototype.visitTime = assembleFlatVector;
+VectorAssembler.prototype.visitDecimal = assembleFlatVector;
+VectorAssembler.prototype.visitList = assembleListVector;
+VectorAssembler.prototype.visitStruct = assembleNestedVector;
+VectorAssembler.prototype.visitUnion = assembleUnion;
+VectorAssembler.prototype.visitInterval = assembleFlatVector;
+VectorAssembler.prototype.visitFixedSizeList = assembleListVector;
+VectorAssembler.prototype.visitMap = assembleListVector;
diff --git a/src/arrow/js/src/visitor/vectorctor.ts b/src/arrow/js/src/visitor/vectorctor.ts
new file mode 100644
index 000000000..5db268c00
--- /dev/null
+++ b/src/arrow/js/src/visitor/vectorctor.ts
@@ -0,0 +1,99 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { Data } from '../data';
+import { Type } from '../enum';
+import { DataType } from '../type';
+import { Visitor } from '../visitor';
+import { VectorType, VectorCtor } from '../interfaces';
+import { BinaryVector } from '../vector/binary';
+import { BoolVector } from '../vector/bool';
+import { DateVector, DateDayVector, DateMillisecondVector } from '../vector/date';
+import { DecimalVector } from '../vector/decimal';
+import { DictionaryVector } from '../vector/dictionary';
+import { FixedSizeBinaryVector } from '../vector/fixedsizebinary';
+import { FixedSizeListVector } from '../vector/fixedsizelist';
+import { FloatVector, Float16Vector, Float32Vector, Float64Vector } from '../vector/float';
+import { IntervalVector, IntervalDayTimeVector, IntervalYearMonthVector } from '../vector/interval';
+import { IntVector, Int8Vector, Int16Vector, Int32Vector, Int64Vector, Uint8Vector, Uint16Vector, Uint32Vector, Uint64Vector } from '../vector/int';
+import { ListVector } from '../vector/list';
+import { MapVector } from '../vector/map';
+import { NullVector } from '../vector/null';
+import { StructVector } from '../vector/struct';
+import { TimestampVector, TimestampSecondVector, TimestampMillisecondVector, TimestampMicrosecondVector, TimestampNanosecondVector } from '../vector/timestamp';
+import { TimeVector, TimeSecondVector, TimeMillisecondVector, TimeMicrosecondVector, TimeNanosecondVector } from '../vector/time';
+import { UnionVector, DenseUnionVector, SparseUnionVector } from '../vector/union';
+import { Utf8Vector } from '../vector/utf8';
+/** @ignore */
+export interface GetVectorConstructor extends Visitor {
+ visit<T extends Type>(node: T): VectorCtor<T>;
+ visitMany <T extends Type>(nodes: T[]): VectorCtor<T>[];
+ getVisitFn<T extends Type>(node: T): () => VectorCtor<T>;
+ getVisitFn<T extends DataType>(node: VectorType<T> | Data<T> | T): () => VectorCtor<T>;
+/** @ignore */
+export class GetVectorConstructor extends Visitor {
+ public visitNull () { return NullVector; }
+ public visitBool () { return BoolVector; }
+ public visitInt () { return IntVector; }
+ public visitInt8 () { return Int8Vector; }
+ public visitInt16 () { return Int16Vector; }
+ public visitInt32 () { return Int32Vector; }
+ public visitInt64 () { return Int64Vector; }
+ public visitUint8 () { return Uint8Vector; }
+ public visitUint16 () { return Uint16Vector; }
+ public visitUint32 () { return Uint32Vector; }
+ public visitUint64 () { return Uint64Vector; }
+ public visitFloat () { return FloatVector; }
+ public visitFloat16 () { return Float16Vector; }
+ public visitFloat32 () { return Float32Vector; }
+ public visitFloat64 () { return Float64Vector; }
+ public visitUtf8 () { return Utf8Vector; }
+ public visitBinary () { return BinaryVector; }
+ public visitFixedSizeBinary () { return FixedSizeBinaryVector; }
+ public visitDate () { return DateVector; }
+ public visitDateDay () { return DateDayVector; }
+ public visitDateMillisecond () { return DateMillisecondVector; }
+ public visitTimestamp () { return TimestampVector; }
+ public visitTimestampSecond () { return TimestampSecondVector; }
+ public visitTimestampMillisecond () { return TimestampMillisecondVector; }
+ public visitTimestampMicrosecond () { return TimestampMicrosecondVector; }
+ public visitTimestampNanosecond () { return TimestampNanosecondVector; }
+ public visitTime () { return TimeVector; }
+ public visitTimeSecond () { return TimeSecondVector; }
+ public visitTimeMillisecond () { return TimeMillisecondVector; }
+ public visitTimeMicrosecond () { return TimeMicrosecondVector; }
+ public visitTimeNanosecond () { return TimeNanosecondVector; }
+ public visitDecimal () { return DecimalVector; }
+ public visitList () { return ListVector; }
+ public visitStruct () { return StructVector; }
+ public visitUnion () { return UnionVector; }
+ public visitDenseUnion () { return DenseUnionVector; }
+ public visitSparseUnion () { return SparseUnionVector; }
+ public visitDictionary () { return DictionaryVector; }
+ public visitInterval () { return IntervalVector; }
+ public visitIntervalDayTime () { return IntervalDayTimeVector; }
+ public visitIntervalYearMonth () { return IntervalYearMonthVector; }
+ public visitFixedSizeList () { return FixedSizeListVector; }
+ public visitMap () { return MapVector; }
+/** @ignore */
+export const instance = new GetVectorConstructor();
diff --git a/src/arrow/js/src/visitor/vectorloader.ts b/src/arrow/js/src/visitor/vectorloader.ts
new file mode 100644
index 000000000..0a7bb41d8
--- /dev/null
+++ b/src/arrow/js/src/visitor/vectorloader.ts
@@ -0,0 +1,141 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+import { Data } from '../data';
+import * as type from '../type';
+import { Field } from '../schema';
+import { Vector } from '../vector';
+import { DataType } from '../type';
+import { Visitor } from '../visitor';
+import { packBools } from '../util/bit';
+import { encodeUtf8 } from '../util/utf8';
+import { Int64, Int128 } from '../util/int';
+import { UnionMode, DateUnit } from '../enum';
+import { toArrayBufferView } from '../util/buffer';
+import { BufferRegion, FieldNode } from '../ipc/metadata/message';
+/** @ignore */
+export interface VectorLoader extends Visitor {
+ visit<T extends DataType>(node: Field<T> | T): Data<T>;
+ visitMany<T extends DataType>(nodes: (Field<T> | T)[]): Data<T>[];
+/** @ignore */
+export class VectorLoader extends Visitor {
+ private bytes: Uint8Array;
+ private nodes: FieldNode[];
+ private nodesIndex = -1;
+ private buffers: BufferRegion[];
+ private buffersIndex = -1;
+ private dictionaries: Map<number, Vector<any>>;
+ constructor(bytes: Uint8Array, nodes: FieldNode[], buffers: BufferRegion[], dictionaries: Map<number, Vector<any>>) {
+ super();
+ this.bytes = bytes;
+ this.nodes = nodes;
+ this.buffers = buffers;
+ this.dictionaries = dictionaries;
+ }
+ public visit<T extends DataType>(node: Field<T> | T): Data<T> {
+ return super.visit(node instanceof Field ? node.type : node);
+ }
+ public visitNull <T extends type.Null> (type: T, { length, } = this.nextFieldNode()) { return Data.Null(type, 0, length); }
+ public visitBool <T extends type.Bool> (type: T, { length, nullCount } = this.nextFieldNode()) { return Data.Bool(type, 0, length, nullCount, this.readNullBitmap(type, nullCount), this.readData(type)); }
+ public visitInt <T extends type.Int> (type: T, { length, nullCount } = this.nextFieldNode()) { return Data.Int(type, 0, length, nullCount, this.readNullBitmap(type, nullCount), this.readData(type)); }
+ public visitFloat <T extends type.Float> (type: T, { length, nullCount } = this.nextFieldNode()) { return Data.Float(type, 0, length, nullCount, this.readNullBitmap(type, nullCount), this.readData(type)); }
+ public visitUtf8 <T extends type.Utf8> (type: T, { length, nullCount } = this.nextFieldNode()) { return Data.Utf8(type, 0, length, nullCount, this.readNullBitmap(type, nullCount), this.readOffsets(type), this.readData(type)); }
+ public visitBinary <T extends type.Binary> (type: T, { length, nullCount } = this.nextFieldNode()) { return Data.Binary(type, 0, length, nullCount, this.readNullBitmap(type, nullCount), this.readOffsets(type), this.readData(type)); }
+ public visitFixedSizeBinary <T extends type.FixedSizeBinary> (type: T, { length, nullCount } = this.nextFieldNode()) { return Data.FixedSizeBinary(type, 0, length, nullCount, this.readNullBitmap(type, nullCount), this.readData(type)); }
+ public visitDate <T extends type.Date_> (type: T, { length, nullCount } = this.nextFieldNode()) { return Data.Date(type, 0, length, nullCount, this.readNullBitmap(type, nullCount), this.readData(type)); }
+ public visitTimestamp <T extends type.Timestamp> (type: T, { length, nullCount } = this.nextFieldNode()) { return Data.Timestamp(type, 0, length, nullCount, this.readNullBitmap(type, nullCount), this.readData(type)); }
+ public visitTime <T extends type.Time> (type: T, { length, nullCount } = this.nextFieldNode()) { return Data.Time(type, 0, length, nullCount, this.readNullBitmap(type, nullCount), this.readData(type)); }
+ public visitDecimal <T extends type.Decimal> (type: T, { length, nullCount } = this.nextFieldNode()) { return Data.Decimal(type, 0, length, nullCount, this.readNullBitmap(type, nullCount), this.readData(type)); }
+ public visitList <T extends type.List> (type: T, { length, nullCount } = this.nextFieldNode()) { return Data.List(type, 0, length, nullCount, this.readNullBitmap(type, nullCount), this.readOffsets(type), this.visit(type.children[0])); }
+ public visitStruct <T extends type.Struct> (type: T, { length, nullCount } = this.nextFieldNode()) { return Data.Struct(type, 0, length, nullCount, this.readNullBitmap(type, nullCount), this.visitMany(type.children)); }
+ public visitUnion <T extends type.Union> (type: T ) { return type.mode === UnionMode.Sparse ? this.visitSparseUnion(type as type.SparseUnion) : this.visitDenseUnion(type as type.DenseUnion); }
+ public visitDenseUnion <T extends type.DenseUnion> (type: T, { length, nullCount } = this.nextFieldNode()) { return Data.Union(type, 0, length, nullCount, this.readNullBitmap(type, nullCount), this.readTypeIds(type), this.readOffsets(type), this.visitMany(type.children)); }
+ public visitSparseUnion <T extends type.SparseUnion> (type: T, { length, nullCount } = this.nextFieldNode()) { return Data.Union(type, 0, length, nullCount, this.readNullBitmap(type, nullCount), this.readTypeIds(type), this.visitMany(type.children)); }
+ public visitDictionary <T extends type.Dictionary> (type: T, { length, nullCount } = this.nextFieldNode()) { return Data.Dictionary(type, 0, length, nullCount, this.readNullBitmap(type, nullCount), this.readData(type.indices), this.readDictionary(type)); }
+ public visitInterval <T extends type.Interval> (type: T, { length, nullCount } = this.nextFieldNode()) { return Data.Interval(type, 0, length, nullCount, this.readNullBitmap(type, nullCount), this.readData(type)); }
+ public visitFixedSizeList <T extends type.FixedSizeList> (type: T, { length, nullCount } = this.nextFieldNode()) { return Data.FixedSizeList(type, 0, length, nullCount, this.readNullBitmap(type, nullCount), this.visit(type.children[0])); }
+ public visitMap <T extends type.Map_> (type: T, { length, nullCount } = this.nextFieldNode()) { return Data.Map(type, 0, length, nullCount, this.readNullBitmap(type, nullCount), this.readOffsets(type), this.visit(type.children[0])); }
+ protected nextFieldNode() { return this.nodes[++this.nodesIndex]; }
+ protected nextBufferRange() { return this.buffers[++this.buffersIndex]; }
+ protected readNullBitmap<T extends DataType>(type: T, nullCount: number, buffer = this.nextBufferRange()) {
+ return nullCount > 0 && this.readData(type, buffer) || new Uint8Array(0);
+ }
+ protected readOffsets<T extends DataType>(type: T, buffer?: BufferRegion) { return this.readData(type, buffer); }
+ protected readTypeIds<T extends DataType>(type: T, buffer?: BufferRegion) { return this.readData(type, buffer); }
+ protected readData<T extends DataType>(_type: T, { length, offset } = this.nextBufferRange()) {
+ return this.bytes.subarray(offset, offset + length);
+ }
+ protected readDictionary<T extends type.Dictionary>(type: T): Vector<T['dictionary']> {
+ return this.dictionaries.get(!;
+ }
+/** @ignore */
+export class JSONVectorLoader extends VectorLoader {
+ private sources: any[][];
+ constructor(sources: any[][], nodes: FieldNode[], buffers: BufferRegion[], dictionaries: Map<number, Vector<any>>) {
+ super(new Uint8Array(0), nodes, buffers, dictionaries);
+ this.sources = sources;
+ }
+ protected readNullBitmap<T extends DataType>(_type: T, nullCount: number, { offset } = this.nextBufferRange()) {
+ return nullCount <= 0 ? new Uint8Array(0) : packBools(this.sources[offset]);
+ }
+ protected readOffsets<T extends DataType>(_type: T, { offset } = this.nextBufferRange()) {
+ return toArrayBufferView(Uint8Array, toArrayBufferView(Int32Array, this.sources[offset]));
+ }
+ protected readTypeIds<T extends DataType>(type: T, { offset } = this.nextBufferRange()) {
+ return toArrayBufferView(Uint8Array, toArrayBufferView(type.ArrayType, this.sources[offset]));
+ }
+ protected readData<T extends DataType>(type: T, { offset } = this.nextBufferRange()) {
+ const { sources } = this;
+ if (DataType.isTimestamp(type)) {
+ return toArrayBufferView(Uint8Array, Int64.convertArray(sources[offset] as string[]));
+ } else if ((DataType.isInt(type) || DataType.isTime(type)) && type.bitWidth === 64) {
+ return toArrayBufferView(Uint8Array, Int64.convertArray(sources[offset] as string[]));
+ } else if (DataType.isDate(type) && type.unit === DateUnit.MILLISECOND) {
+ return toArrayBufferView(Uint8Array, Int64.convertArray(sources[offset] as string[]));
+ } else if (DataType.isDecimal(type)) {
+ return toArrayBufferView(Uint8Array, Int128.convertArray(sources[offset] as string[]));
+ } else if (DataType.isBinary(type) || DataType.isFixedSizeBinary(type)) {
+ return binaryDataFromJSON(sources[offset] as string[]);
+ } else if (DataType.isBool(type)) {
+ return packBools(sources[offset] as number[]);
+ } else if (DataType.isUtf8(type)) {
+ return encodeUtf8((sources[offset] as string[]).join(''));
+ }
+ return toArrayBufferView(Uint8Array, toArrayBufferView(type.ArrayType, sources[offset].map((x) => +x)));
+ }
+/** @ignore */
+function binaryDataFromJSON(values: string[]) {
+ // "DATA": ["49BC7D5B6C47D2","3F5FB6D9322026"]
+ // There are definitely more efficient ways to do this... but it gets the
+ // job done.
+ const joined = values.join('');
+ const data = new Uint8Array(joined.length / 2);
+ for (let i = 0; i < joined.length; i += 2) {
+ data[i >> 1] = parseInt(joined.substr(i, 2), 16);
+ }
+ return data;