diff options
Diffstat (limited to 'src/arrow/js/src/builder')
22 files changed, 1200 insertions, 0 deletions
diff --git a/src/arrow/js/src/builder/binary.ts b/src/arrow/js/src/builder/binary.ts new file mode 100644 index 000000000..829da5c97 --- /dev/null +++ b/src/arrow/js/src/builder/binary.ts @@ -0,0 +1,54 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import { Binary } from '../type'; +import { toUint8Array } from '../util/buffer'; +import { BufferBuilder } from './buffer'; +import { VariableWidthBuilder, BuilderOptions } from '../builder'; + +/** @ignore */ +export class BinaryBuilder<TNull = any> extends VariableWidthBuilder<Binary, TNull> { + constructor(opts: BuilderOptions<Binary, TNull>) { + super(opts); + this._values = new BufferBuilder(new Uint8Array(0)); + } + public get byteLength(): number { + let size = this._pendingLength + (this.length * 4); + this._offsets && (size += this._offsets.byteLength); + this._values && (size += this._values.byteLength); + this._nulls && (size += this._nulls.byteLength); + return size; + } + public setValue(index: number, value: Uint8Array) { + return super.setValue(index, toUint8Array(value)); + } + protected _flushPending(pending: Map<number, Uint8Array | undefined>, pendingLength: number) { + const offsets = this._offsets; + const data = this._values.reserve(pendingLength).buffer; + let index = 0, length = 0, offset = 0, value: Uint8Array | undefined; + for ([index, value] of pending) { + if (value === undefined) { + offsets.set(index, 0); + } else { + length = value.length; + data.set(value, offset); + offsets.set(index, length); + offset += length; + } + } + } +} diff --git a/src/arrow/js/src/builder/bool.ts b/src/arrow/js/src/builder/bool.ts new file mode 100644 index 000000000..5c0e0950e --- /dev/null +++ b/src/arrow/js/src/builder/bool.ts @@ -0,0 +1,31 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import { Bool } from '../type'; +import { BitmapBufferBuilder } from './buffer'; +import { Builder, BuilderOptions } from '../builder'; + +/** @ignore */ +export class BoolBuilder<TNull = any> extends Builder<Bool, TNull> { + constructor(options: BuilderOptions<Bool, TNull>) { + super(options); + this._values = new BitmapBufferBuilder(); + } + public setValue(index: number, value: boolean) { + this._values.set(index, +value); + } +} diff --git a/src/arrow/js/src/builder/buffer.ts b/src/arrow/js/src/builder/buffer.ts new file mode 100644 index 000000000..3c20cc001 --- /dev/null +++ b/src/arrow/js/src/builder/buffer.ts @@ -0,0 +1,182 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import { memcpy } from '../util/buffer'; +import { BigIntAvailable, BigInt64Array, BigUint64Array } from '../util/compat'; +import { + TypedArray, TypedArrayConstructor, + BigIntArray, BigIntArrayConstructor +} from '../interfaces'; + +/** @ignore */ type DataValue<T> = T extends TypedArray ? number : T extends BigIntArray ? WideValue<T> : T; +/** @ignore */ type WideValue<T extends BigIntArray> = T extends BigIntArray ? bigint | Int32Array | Uint32Array : never; +/** @ignore */ type ArrayCtor<T extends TypedArray | BigIntArray> = + T extends TypedArray ? TypedArrayConstructor<T> : + T extends BigIntArray ? BigIntArrayConstructor<T> : + any; + +/** @ignore */ +const roundLengthUpToNearest64Bytes = (len: number, BPE: number) => ((((len * BPE) + 63) & ~63) || 64) / BPE; +/** @ignore */ +const sliceOrExtendArray = <T extends TypedArray | BigIntArray>(arr: T, len = 0) => ( + arr.length >= len ? arr.subarray(0, len) : memcpy(new (arr.constructor as any)(len), arr, 0) +) as T; + +/** @ignore */ +export interface BufferBuilder<T extends TypedArray | BigIntArray = any, TValue = DataValue<T>> { + readonly offset: number; +} + +/** @ignore */ +export class BufferBuilder<T extends TypedArray | BigIntArray = any, TValue = DataValue<T>> { + + constructor(buffer: T, stride = 1) { + this.buffer = buffer; + this.stride = stride; + this.BYTES_PER_ELEMENT = buffer.BYTES_PER_ELEMENT; + this.ArrayType = buffer.constructor as ArrayCtor<T>; + this._resize(this.length = buffer.length / stride | 0); + } + + public buffer: T; + public length: number; + public readonly stride: number; + public readonly ArrayType: ArrayCtor<T>; + public readonly BYTES_PER_ELEMENT: number; + + public get byteLength() { return this.length * this.stride * this.BYTES_PER_ELEMENT | 0; } + public get reservedLength() { return this.buffer.length / this.stride; } + public get reservedByteLength() { return this.buffer.byteLength; } + + // @ts-ignore + public set(index: number, value: TValue) { return this; } + public append(value: TValue) { return this.set(this.length, value); } + public reserve(extra: number) { + if (extra > 0) { + this.length += extra; + const stride = this.stride; + const length = this.length * stride; + const reserved = this.buffer.length; + if (length >= reserved) { + this._resize(reserved === 0 + ? roundLengthUpToNearest64Bytes(length * 1, this.BYTES_PER_ELEMENT) + : roundLengthUpToNearest64Bytes(length * 2, this.BYTES_PER_ELEMENT) + ); + } + } + return this; + } + public flush(length = this.length) { + length = roundLengthUpToNearest64Bytes(length * this.stride, this.BYTES_PER_ELEMENT); + const array = sliceOrExtendArray<T>(this.buffer, length); + this.clear(); + return array; + } + public clear() { + this.length = 0; + this._resize(0); + return this; + } + protected _resize(newLength: number) { + return this.buffer = <T> memcpy(new this.ArrayType(newLength), this.buffer); + } +} + +(BufferBuilder.prototype as any).offset = 0; + +/** @ignore */ +export class DataBufferBuilder<T extends TypedArray> extends BufferBuilder<T, number> { + public last() { return this.get(this.length - 1); } + public get(index: number) { return this.buffer[index]; } + public set(index: number, value: number) { + this.reserve(index - this.length + 1); + this.buffer[index * this.stride] = value; + return this; + } +} + +/** @ignore */ +export class BitmapBufferBuilder extends DataBufferBuilder<Uint8Array> { + + constructor(data = new Uint8Array(0)) { super(data, 1 / 8); } + + public numValid = 0; + public get numInvalid() { return this.length - this.numValid; } + public get(idx: number) { return this.buffer[idx >> 3] >> idx % 8 & 1; } + public set(idx: number, val: number) { + const { buffer } = this.reserve(idx - this.length + 1); + const byte = idx >> 3, bit = idx % 8, cur = buffer[byte] >> bit & 1; + // If `val` is truthy and the current bit is 0, flip it to 1 and increment `numValid`. + // If `val` is falsey and the current bit is 1, flip it to 0 and decrement `numValid`. + val ? cur === 0 && ((buffer[byte] |= (1 << bit)), ++this.numValid) + : cur === 1 && ((buffer[byte] &= ~(1 << bit)), --this.numValid); + return this; + } + public clear() { + this.numValid = 0; + return super.clear(); + } +} + +/** @ignore */ +export class OffsetsBufferBuilder extends DataBufferBuilder<Int32Array> { + constructor(data = new Int32Array(1)) { super(data, 1); } + public append(value: number) { + return this.set(this.length - 1, value); + } + public set(index: number, value: number) { + const offset = this.length - 1; + const buffer = this.reserve(index - offset + 1).buffer; + if (offset < index++) { + buffer.fill(buffer[offset], offset, index); + } + buffer[index] = buffer[index - 1] + value; + return this; + } + public flush(length = this.length - 1) { + if (length > this.length) { + this.set(length - 1, 0); + } + return super.flush(length + 1); + } +} + +/** @ignore */ +export class WideBufferBuilder<T extends TypedArray, R extends BigIntArray> extends BufferBuilder<T, DataValue<T>> { + public buffer64!: R; + protected _ArrayType64!: BigIntArrayConstructor<R>; + public get ArrayType64() { + return this._ArrayType64 || (this._ArrayType64 = <BigIntArrayConstructor<R>> (this.buffer instanceof Int32Array ? BigInt64Array : BigUint64Array)); + } + public set(index: number, value: DataValue<T>) { + this.reserve(index - this.length + 1); + switch (typeof value) { + case 'bigint': this.buffer64[index] = value; break; + case 'number': this.buffer[index * this.stride] = value; break; + default: this.buffer.set(value as TypedArray, index * this.stride); + } + return this; + } + protected _resize(newLength: number) { + const data = super._resize(newLength); + const length = data.byteLength / (this.BYTES_PER_ELEMENT * this.stride); + if (BigIntAvailable) { + this.buffer64 = new this.ArrayType64(data.buffer, data.byteOffset, length); + } + return data; + } +} diff --git a/src/arrow/js/src/builder/date.ts b/src/arrow/js/src/builder/date.ts new file mode 100644 index 000000000..e9748b58c --- /dev/null +++ b/src/arrow/js/src/builder/date.ts @@ -0,0 +1,26 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import { FixedWidthBuilder } from '../builder'; +import { Date_, DateDay, DateMillisecond } from '../type'; + +/** @ignore */ +export class DateBuilder<T extends Date_ = Date_, TNull = any> extends FixedWidthBuilder<T, TNull> {} +/** @ignore */ +export class DateDayBuilder<TNull = any> extends DateBuilder<DateDay, TNull> {} +/** @ignore */ +export class DateMillisecondBuilder<TNull = any> extends DateBuilder<DateMillisecond, TNull> {} diff --git a/src/arrow/js/src/builder/decimal.ts b/src/arrow/js/src/builder/decimal.ts new file mode 100644 index 000000000..5814abd5b --- /dev/null +++ b/src/arrow/js/src/builder/decimal.ts @@ -0,0 +1,22 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import { Decimal } from '../type'; +import { FixedWidthBuilder } from '../builder'; + +/** @ignore */ +export class DecimalBuilder<TNull = any> extends FixedWidthBuilder<Decimal, TNull> {} diff --git a/src/arrow/js/src/builder/dictionary.ts b/src/arrow/js/src/builder/dictionary.ts new file mode 100644 index 000000000..6602825dd --- /dev/null +++ b/src/arrow/js/src/builder/dictionary.ts @@ -0,0 +1,98 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import { Vector } from '../vector'; +import { IntBuilder } from './int'; +import { Dictionary, DataType } from '../type'; +import { Builder, BuilderOptions } from '../builder'; + +type DictionaryHashFunction = (x: any) => string | number; + +export interface DictionaryBuilderOptions<T extends DataType = any, TNull = any> extends BuilderOptions<T, TNull> { + dictionaryHashFunction?: DictionaryHashFunction; +} + +/** @ignore */ +export class DictionaryBuilder<T extends Dictionary, TNull = any> extends Builder<T, TNull> { + + protected _dictionaryOffset: number; + protected _dictionary?: Vector<T['dictionary']>; + protected _keysToIndices: { [key: string]: number }; + public readonly indices: IntBuilder<T['indices']>; + public readonly dictionary: Builder<T['dictionary']>; + + constructor({ 'type': type, 'nullValues': nulls, 'dictionaryHashFunction': hashFn }: DictionaryBuilderOptions<T, TNull>) { + super({ type: new Dictionary(type.dictionary, type.indices, type.id, type.isOrdered) as T }); + this._nulls = <any> null; + this._dictionaryOffset = 0; + this._keysToIndices = Object.create(null); + this.indices = Builder.new({ 'type': this.type.indices, 'nullValues': nulls }) as IntBuilder<T['indices']>; + this.dictionary = Builder.new({ 'type': this.type.dictionary, 'nullValues': null }) as Builder<T['dictionary']>; + if (typeof hashFn === 'function') { + this.valueToKey = hashFn; + } + } + + public get values() { return this.indices.values; } + public get nullCount() { return this.indices.nullCount; } + public get nullBitmap() { return this.indices.nullBitmap; } + public get byteLength() { return this.indices.byteLength + this.dictionary.byteLength; } + public get reservedLength() { return this.indices.reservedLength + this.dictionary.reservedLength; } + public get reservedByteLength() { return this.indices.reservedByteLength + this.dictionary.reservedByteLength; } + public isValid(value: T['TValue'] | TNull) { return this.indices.isValid(value); } + public setValid(index: number, valid: boolean) { + const indices = this.indices; + valid = indices.setValid(index, valid); + this.length = indices.length; + return valid; + } + public setValue(index: number, value: T['TValue']) { + const keysToIndices = this._keysToIndices; + const key = this.valueToKey(value); + let idx = keysToIndices[key]; + if (idx === undefined) { + keysToIndices[key] = idx = this._dictionaryOffset + this.dictionary.append(value).length - 1; + } + return this.indices.setValue(index, idx); + } + public flush() { + const type = this.type; + const prev = this._dictionary; + const curr = this.dictionary.toVector(); + const data = this.indices.flush().clone(type); + data.dictionary = prev ? prev.concat(curr) : curr; + this.finished || (this._dictionaryOffset += curr.length); + this._dictionary = data.dictionary as Vector<T['dictionary']>; + this.clear(); + return data; + } + public finish() { + this.indices.finish(); + this.dictionary.finish(); + this._dictionaryOffset = 0; + this._keysToIndices = Object.create(null); + return super.finish(); + } + public clear() { + this.indices.clear(); + this.dictionary.clear(); + return super.clear(); + } + public valueToKey(val: any): string | number { + return typeof val === 'string' ? val : `${val}`; + } +} diff --git a/src/arrow/js/src/builder/fixedsizebinary.ts b/src/arrow/js/src/builder/fixedsizebinary.ts new file mode 100644 index 000000000..99aaf46a3 --- /dev/null +++ b/src/arrow/js/src/builder/fixedsizebinary.ts @@ -0,0 +1,22 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import { FixedSizeBinary } from '../type'; +import { FixedWidthBuilder } from '../builder'; + +/** @ignore */ +export class FixedSizeBinaryBuilder<TNull = any> extends FixedWidthBuilder<FixedSizeBinary, TNull> {} diff --git a/src/arrow/js/src/builder/fixedsizelist.ts b/src/arrow/js/src/builder/fixedsizelist.ts new file mode 100644 index 000000000..cc20f5ba2 --- /dev/null +++ b/src/arrow/js/src/builder/fixedsizelist.ts @@ -0,0 +1,41 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import { Run } from './run'; +import { Field } from '../schema'; +import { Builder } from '../builder'; +import { DataType, FixedSizeList } from '../type'; + +/** @ignore */ +export class FixedSizeListBuilder<T extends DataType = any, TNull = any> extends Builder<FixedSizeList<T>, TNull> { + protected _run = new Run<T, TNull>(); + public setValue(index: number, value: T['TValue']) { + super.setValue(index, this._run.bind(value)); + } + public addChild(child: Builder<T>, name = '0') { + if (this.numChildren > 0) { + throw new Error('FixedSizeListBuilder can only have one child.'); + } + const childIndex = this.children.push(child); + this.type = new FixedSizeList(this.type.listSize, new Field(name, child.type, true)); + return childIndex; + } + public clear() { + this._run.clear(); + return super.clear(); + } +} diff --git a/src/arrow/js/src/builder/float.ts b/src/arrow/js/src/builder/float.ts new file mode 100644 index 000000000..dbf4c0d06 --- /dev/null +++ b/src/arrow/js/src/builder/float.ts @@ -0,0 +1,45 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import { float64ToUint16 } from '../util/math'; +import { FixedWidthBuilder } from '../builder'; +import { Float, Float16, Float32, Float64 } from '../type'; + +/** @ignore */ +export class FloatBuilder<T extends Float = Float, TNull = any> extends FixedWidthBuilder<T, TNull> {} + +/** @ignore */ +export class Float16Builder<TNull = any> extends FloatBuilder<Float16, TNull> { + public setValue(index: number, value: number) { + // convert JS float64 to a uint16 + this._values.set(index, float64ToUint16(value)); + } +} + +/** @ignore */ +export class Float32Builder<TNull = any> extends FloatBuilder<Float32, TNull> { + public setValue(index: number, value: number) { + this._values.set(index, value); + } +} + +/** @ignore */ +export class Float64Builder<TNull = any> extends FloatBuilder<Float64, TNull> { + public setValue(index: number, value: number) { + this._values.set(index, value); + } +} diff --git a/src/arrow/js/src/builder/index.ts b/src/arrow/js/src/builder/index.ts new file mode 100644 index 000000000..dfd9d54f1 --- /dev/null +++ b/src/arrow/js/src/builder/index.ts @@ -0,0 +1,82 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +/** @ignore */ +export { Builder, BuilderOptions } from '../builder'; +export { BoolBuilder } from './bool'; +export { NullBuilder } from './null'; +export { DateBuilder, DateDayBuilder, DateMillisecondBuilder } from './date'; +export { DecimalBuilder } from './decimal'; +export { DictionaryBuilder } from './dictionary'; +export { FixedSizeBinaryBuilder } from './fixedsizebinary'; +export { FloatBuilder, Float16Builder, Float32Builder, Float64Builder } from './float'; +export { IntBuilder, Int8Builder, Int16Builder, Int32Builder, Int64Builder, Uint8Builder, Uint16Builder, Uint32Builder, Uint64Builder } from './int'; +export { TimeBuilder, TimeSecondBuilder, TimeMillisecondBuilder, TimeMicrosecondBuilder, TimeNanosecondBuilder } from './time'; +export { TimestampBuilder, TimestampSecondBuilder, TimestampMillisecondBuilder, TimestampMicrosecondBuilder, TimestampNanosecondBuilder } from './timestamp'; +export { IntervalBuilder, IntervalDayTimeBuilder, IntervalYearMonthBuilder } from './interval'; +export { Utf8Builder } from './utf8'; +export { BinaryBuilder } from './binary'; +export { ListBuilder } from './list'; +export { FixedSizeListBuilder } from './fixedsizelist'; +export { MapBuilder } from './map'; +export { StructBuilder } from './struct'; +export { UnionBuilder, SparseUnionBuilder, DenseUnionBuilder } from './union'; + +import { Type } from '../enum'; +import { Field } from '../schema'; +import { DataType } from '../type'; +import { Utf8Builder } from './utf8'; +import { BuilderType as B } from '../interfaces'; +import { Builder, BuilderOptions } from '../builder'; +import { instance as setVisitor } from '../visitor/set'; +import { instance as getBuilderConstructor } from '../visitor/builderctor'; + +/** @nocollapse */ +Builder.new = newBuilder; + +function newBuilder<T extends DataType = any, TNull = any>(options: BuilderOptions<T, TNull>): B<T, TNull> { + + const type = options.type; + const builder = new (getBuilderConstructor.getVisitFn<T>(type)())(options) as Builder<T, TNull>; + + if (type.children && type.children.length > 0) { + + const children = options['children'] || [] as BuilderOptions[]; + const defaultOptions = { 'nullValues': options['nullValues'] }; + const getChildOptions = Array.isArray(children) + ? ((_: Field, i: number) => children[i] || defaultOptions) + : (({ name }: Field) => children[name] || defaultOptions); + + type.children.forEach((field, index) => { + const { type } = field; + const opts = getChildOptions(field, index); + builder.children.push(newBuilder({ ...opts, type })); + }); + } + + return builder as B<T, TNull>; +} + +(Object.keys(Type) as any[]) + .map((T: any) => Type[T] as any) + .filter((T: any): T is Type => typeof T === 'number' && T !== Type.NONE) + .forEach((typeId) => { + const BuilderCtor = getBuilderConstructor.visit(typeId); + BuilderCtor.prototype._setValue = setVisitor.getVisitFn(typeId); + }); + +(Utf8Builder.prototype as any)._setValue = setVisitor.visitBinary; diff --git a/src/arrow/js/src/builder/int.ts b/src/arrow/js/src/builder/int.ts new file mode 100644 index 000000000..5777bd125 --- /dev/null +++ b/src/arrow/js/src/builder/int.ts @@ -0,0 +1,80 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import { bignumToBigInt } from '../util/bn'; +import { WideBufferBuilder } from './buffer'; +import { BigInt64Array } from '../util/compat'; +import { FixedWidthBuilder, BuilderOptions } from '../builder'; +import { Int, Int8, Int16, Int32, Int64, Uint8, Uint16, Uint32, Uint64 } from '../type'; + +/** @ignore */ +export class IntBuilder<T extends Int = Int, TNull = any> extends FixedWidthBuilder<T, TNull> { + public setValue(index: number, value: T['TValue']) { + this._values.set(index, value); + } +} + +/** @ignore */ +export class Int8Builder<TNull = any> extends IntBuilder<Int8, TNull> {} +/** @ignore */ +export class Int16Builder<TNull = any> extends IntBuilder<Int16, TNull> {} +/** @ignore */ +export class Int32Builder<TNull = any> extends IntBuilder<Int32, TNull> {} +/** @ignore */ +export class Int64Builder<TNull = any> extends IntBuilder<Int64, TNull> { + protected _values: WideBufferBuilder<Int32Array, BigInt64Array>; + constructor(options: BuilderOptions<Int64, TNull>) { + if (options['nullValues']) { + options['nullValues'] = (options['nullValues'] as TNull[]).map(toBigInt); + } + super(options); + this._values = new WideBufferBuilder(new Int32Array(0), 2); + } + public get values64() { return this._values.buffer64; } + public isValid(value: Int32Array | bigint | TNull) { return super.isValid(toBigInt(value)); } +} + +/** @ignore */ +export class Uint8Builder<TNull = any> extends IntBuilder<Uint8, TNull> {} +/** @ignore */ +export class Uint16Builder<TNull = any> extends IntBuilder<Uint16, TNull> {} +/** @ignore */ +export class Uint32Builder<TNull = any> extends IntBuilder<Uint32, TNull> {} +/** @ignore */ +export class Uint64Builder<TNull = any> extends IntBuilder<Uint64, TNull> { + protected _values: WideBufferBuilder<Uint32Array, BigUint64Array>; + constructor(options: BuilderOptions<Uint64, TNull>) { + if (options['nullValues']) { + options['nullValues'] = (options['nullValues'] as TNull[]).map(toBigInt); + } + super(options); + this._values = new WideBufferBuilder(new Uint32Array(0), 2); + } + public get values64() { return this._values.buffer64; } + public isValid(value: Uint32Array | bigint | TNull) { return super.isValid(toBigInt(value)); } +} + +const toBigInt = ((memo: any) => (value: any) => { + if (ArrayBuffer.isView(value)) { + memo.buffer = value.buffer; + memo.byteOffset = value.byteOffset; + memo.byteLength = value.byteLength; + value = bignumToBigInt(memo); + memo.buffer = null; + } + return value; +})({ 'BigIntArray': BigInt64Array }); diff --git a/src/arrow/js/src/builder/interval.ts b/src/arrow/js/src/builder/interval.ts new file mode 100644 index 000000000..374228215 --- /dev/null +++ b/src/arrow/js/src/builder/interval.ts @@ -0,0 +1,26 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import { FixedWidthBuilder } from '../builder'; +import { Interval, IntervalDayTime, IntervalYearMonth } from '../type'; + +/** @ignore */ +export class IntervalBuilder<T extends Interval = Interval, TNull = any> extends FixedWidthBuilder<T, TNull> {} +/** @ignore */ +export class IntervalDayTimeBuilder<TNull = any> extends IntervalBuilder<IntervalDayTime, TNull> {} +/** @ignore */ +export class IntervalYearMonthBuilder<TNull = any> extends IntervalBuilder<IntervalYearMonth, TNull> {} diff --git a/src/arrow/js/src/builder/list.ts b/src/arrow/js/src/builder/list.ts new file mode 100644 index 000000000..844681eae --- /dev/null +++ b/src/arrow/js/src/builder/list.ts @@ -0,0 +1,58 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import { Run } from './run'; +import { Field } from '../schema'; +import { DataType, List } from '../type'; +import { OffsetsBufferBuilder } from './buffer'; +import { Builder, BuilderOptions, VariableWidthBuilder } from '../builder'; + +/** @ignore */ +export class ListBuilder<T extends DataType = any, TNull = any> extends VariableWidthBuilder<List<T>, TNull> { + protected _run = new Run<T, TNull>(); + protected _offsets: OffsetsBufferBuilder; + constructor(opts: BuilderOptions<List<T>, TNull>) { + super(opts); + this._offsets = new OffsetsBufferBuilder(); + } + public addChild(child: Builder<T>, name = '0') { + if (this.numChildren > 0) { + throw new Error('ListBuilder can only have one child.'); + } + this.children[this.numChildren] = child; + this.type = new List(new Field(name, child.type, true)); + return this.numChildren - 1; + } + public clear() { + this._run.clear(); + return super.clear(); + } + protected _flushPending(pending: Map<number, T['TValue'] | undefined>) { + const run = this._run; + const offsets = this._offsets; + const setValue = this._setValue; + let index = 0, value: Uint8Array | undefined; + for ([index, value] of pending) { + if (value === undefined) { + offsets.set(index, 0); + } else { + offsets.set(index, value.length); + setValue(this, index, run.bind(value)); + } + } + } +} diff --git a/src/arrow/js/src/builder/map.ts b/src/arrow/js/src/builder/map.ts new file mode 100644 index 000000000..25affef2c --- /dev/null +++ b/src/arrow/js/src/builder/map.ts @@ -0,0 +1,64 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import { Field } from '../schema'; +import { DataType, Map_, Struct } from '../type'; +import { Builder, VariableWidthBuilder } from '../builder'; + +/** @ignore */ type MapValue<K extends DataType = any, V extends DataType = any> = Map_<K, V>['TValue']; +/** @ignore */ type MapValues<K extends DataType = any, V extends DataType = any> = Map<number, MapValue<K, V> | undefined>; +/** @ignore */ type MapValueExt<K extends DataType = any, V extends DataType = any> = MapValue<K, V> | { [key: string]: V } | { [key: number]: V } ; + +/** @ignore */ +export class MapBuilder<K extends DataType = any, V extends DataType = any, TNull = any> extends VariableWidthBuilder<Map_<K, V>, TNull> { + + protected _pending: MapValues<K, V> | undefined; + public set(index: number, value: MapValueExt<K, V> | TNull) { + return super.set(index, value as MapValue<K, V> | TNull); + } + + public setValue(index: number, value: MapValueExt<K, V>) { + value = value instanceof Map ? value : new Map(Object.entries(value)); + const pending = this._pending || (this._pending = new Map() as MapValues<K, V>); + const current = pending.get(index); + current && (this._pendingLength -= current.size); + this._pendingLength += value.size; + pending.set(index, value); + } + + public addChild(child: Builder<Struct<{ key: K; value: V }>>, name = `${this.numChildren}`) { + if (this.numChildren > 0) { + throw new Error('ListBuilder can only have one child.'); + } + this.children[this.numChildren] = child; + this.type = new Map_<K, V>(new Field(name, child.type, true), this.type.keysSorted); + return this.numChildren - 1; + } + + protected _flushPending(pending: MapValues<K, V>) { + const offsets = this._offsets; + const setValue = this._setValue; + pending.forEach((value, index) => { + if (value === undefined) { + offsets.set(index, 0); + } else { + offsets.set(index, value.size); + setValue(this, index, value); + } + }); + } +} diff --git a/src/arrow/js/src/builder/null.ts b/src/arrow/js/src/builder/null.ts new file mode 100644 index 000000000..4be3f063b --- /dev/null +++ b/src/arrow/js/src/builder/null.ts @@ -0,0 +1,29 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import { Null } from '../type'; +import { Builder } from '../builder'; + +/** @ignore */ +export class NullBuilder<TNull = any> extends Builder<Null, TNull> { + // @ts-ignore + public setValue(index: number, value: null) {} + public setValid(index: number, valid: boolean) { + this.length = Math.max(index + 1, this.length); + return valid; + } +} diff --git a/src/arrow/js/src/builder/run.ts b/src/arrow/js/src/builder/run.ts new file mode 100644 index 000000000..5239f51f2 --- /dev/null +++ b/src/arrow/js/src/builder/run.ts @@ -0,0 +1,34 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import { Vector } from '../vector'; +import { DataType } from '../type'; + +/** @ignore */ +export class Run<T extends DataType = any, TNull = any> { + protected _values!: ArrayLike<T['TValue'] | TNull>; + public get length() { return this._values.length; } + public get(index: number) { return this._values[index]; } + public clear() { this._values = <any> null; return this; } + public bind(values: Vector<T> | ArrayLike<T['TValue'] | TNull>) { + if (values instanceof Vector) { + return values; + } + this._values = values; + return this as any; + } +} diff --git a/src/arrow/js/src/builder/struct.ts b/src/arrow/js/src/builder/struct.ts new file mode 100644 index 000000000..4d12336ce --- /dev/null +++ b/src/arrow/js/src/builder/struct.ts @@ -0,0 +1,29 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import { Field } from '../schema'; +import { Builder } from '../builder'; +import { DataType, Struct } from '../type'; + +/** @ignore */ +export class StructBuilder<T extends { [key: string]: DataType } = any, TNull = any> extends Builder<Struct<T>, TNull> { + public addChild(child: Builder, name = `${this.numChildren}`) { + const childIndex = this.children.push(child); + this.type = new Struct([...this.type.children, new Field(name, child.type, true)]); + return childIndex; + } +} diff --git a/src/arrow/js/src/builder/time.ts b/src/arrow/js/src/builder/time.ts new file mode 100644 index 000000000..bfa71d2b5 --- /dev/null +++ b/src/arrow/js/src/builder/time.ts @@ -0,0 +1,30 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import { FixedWidthBuilder } from '../builder'; +import { Time, TimeSecond, TimeMillisecond, TimeMicrosecond, TimeNanosecond } from '../type'; + +/** @ignore */ +export class TimeBuilder<T extends Time = Time, TNull = any> extends FixedWidthBuilder<T, TNull> {} +/** @ignore */ +export class TimeSecondBuilder<TNull = any> extends TimeBuilder<TimeSecond, TNull> {} +/** @ignore */ +export class TimeMillisecondBuilder<TNull = any> extends TimeBuilder<TimeMillisecond, TNull> {} +/** @ignore */ +export class TimeMicrosecondBuilder<TNull = any> extends TimeBuilder<TimeMicrosecond, TNull> {} +/** @ignore */ +export class TimeNanosecondBuilder<TNull = any> extends TimeBuilder<TimeNanosecond, TNull> {} diff --git a/src/arrow/js/src/builder/timestamp.ts b/src/arrow/js/src/builder/timestamp.ts new file mode 100644 index 000000000..49741e9ba --- /dev/null +++ b/src/arrow/js/src/builder/timestamp.ts @@ -0,0 +1,30 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import { FixedWidthBuilder } from '../builder'; +import { Timestamp, TimestampSecond, TimestampMillisecond, TimestampMicrosecond, TimestampNanosecond } from '../type'; + +/** @ignore */ +export class TimestampBuilder<T extends Timestamp = Timestamp, TNull = any> extends FixedWidthBuilder<T, TNull> {} +/** @ignore */ +export class TimestampSecondBuilder<TNull = any> extends TimestampBuilder<TimestampSecond, TNull> {} +/** @ignore */ +export class TimestampMillisecondBuilder<TNull = any> extends TimestampBuilder<TimestampMillisecond, TNull> {} +/** @ignore */ +export class TimestampMicrosecondBuilder<TNull = any> extends TimestampBuilder<TimestampMicrosecond, TNull> {} +/** @ignore */ +export class TimestampNanosecondBuilder<TNull = any> extends TimestampBuilder<TimestampNanosecond, TNull> {} diff --git a/src/arrow/js/src/builder/union.ts b/src/arrow/js/src/builder/union.ts new file mode 100644 index 000000000..18ac05bf6 --- /dev/null +++ b/src/arrow/js/src/builder/union.ts @@ -0,0 +1,96 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import { Field } from '../schema'; +import { DataBufferBuilder } from './buffer'; +import { Builder, BuilderOptions } from '../builder'; +import { Union, SparseUnion, DenseUnion } from '../type'; + +export interface UnionBuilderOptions<T extends Union = any, TNull = any> extends BuilderOptions<T, TNull> { + valueToChildTypeId?: (builder: UnionBuilder<T, TNull>, value: any, offset: number) => number; +} + +/** @ignore */ +export abstract class UnionBuilder<T extends Union, TNull = any> extends Builder<T, TNull> { + + protected _typeIds: DataBufferBuilder<Int8Array>; + + constructor(options: UnionBuilderOptions<T, TNull>) { + super(options); + this._typeIds = new DataBufferBuilder(new Int8Array(0), 1); + if (typeof options['valueToChildTypeId'] === 'function') { + this._valueToChildTypeId = options['valueToChildTypeId']; + } + } + + public get typeIdToChildIndex() { return this.type.typeIdToChildIndex; } + + public append(value: T['TValue'] | TNull, childTypeId?: number) { + return this.set(this.length, value, childTypeId); + } + + public set(index: number, value: T['TValue'] | TNull, childTypeId?: number) { + if (childTypeId === undefined) { + childTypeId = this._valueToChildTypeId(this, value, index); + } + if (this.setValid(index, this.isValid(value))) { + this.setValue(index, value, childTypeId); + } + return this; + } + + public setValue(index: number, value: T['TValue'], childTypeId?: number) { + this._typeIds.set(index, childTypeId!); + super.setValue(index, value); + } + + public addChild(child: Builder, name = `${this.children.length}`) { + const childTypeId = this.children.push(child); + const { type: { children, mode, typeIds } } = this; + const fields = [...children, new Field(name, child.type)]; + this.type = <T> new Union(mode, [...typeIds, childTypeId], fields); + return childTypeId; + } + + /** @ignore */ + // @ts-ignore + protected _valueToChildTypeId(builder: UnionBuilder<T, TNull>, value: any, offset: number): number { + throw new Error(`Cannot map UnionBuilder value to child typeId. \ +Pass the \`childTypeId\` as the second argument to unionBuilder.append(), \ +or supply a \`valueToChildTypeId\` function as part of the UnionBuilder constructor options.`); + } +} + +/** @ignore */ +export class SparseUnionBuilder<T extends SparseUnion, TNull = any> extends UnionBuilder<T, TNull> {} +/** @ignore */ +export class DenseUnionBuilder<T extends DenseUnion, TNull = any> extends UnionBuilder<T, TNull> { + + protected _offsets: DataBufferBuilder<Int32Array>; + + constructor(options: UnionBuilderOptions<T, TNull>) { + super(options); + this._offsets = new DataBufferBuilder(new Int32Array(0)); + } + + /** @ignore */ + public setValue(index: number, value: T['TValue'], childTypeId?: number) { + const childIndex = this.type.typeIdToChildIndex[childTypeId!]; + this._offsets.set(index, this.getChildAt(childIndex)!.length); + return super.setValue(index, value, childTypeId); + } +} diff --git a/src/arrow/js/src/builder/utf8.ts b/src/arrow/js/src/builder/utf8.ts new file mode 100644 index 000000000..7564cdad6 --- /dev/null +++ b/src/arrow/js/src/builder/utf8.ts @@ -0,0 +1,44 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import { Utf8 } from '../type'; +import { encodeUtf8 } from '../util/utf8'; +import { BinaryBuilder } from './binary'; +import { BufferBuilder } from './buffer'; +import { VariableWidthBuilder, BuilderOptions } from '../builder'; + +/** @ignore */ +export class Utf8Builder<TNull = any> extends VariableWidthBuilder<Utf8, TNull> { + constructor(opts: BuilderOptions<Utf8, TNull>) { + super(opts); + this._values = new BufferBuilder(new Uint8Array(0)); + } + public get byteLength(): number { + let size = this._pendingLength + (this.length * 4); + this._offsets && (size += this._offsets.byteLength); + this._values && (size += this._values.byteLength); + this._nulls && (size += this._nulls.byteLength); + return size; + } + public setValue(index: number, value: string) { + return super.setValue(index, encodeUtf8(value) as any); + } + // @ts-ignore + protected _flushPending(pending: Map<number, Uint8Array | undefined>, pendingLength: number): void {} +} + +(Utf8Builder.prototype as any)._flushPending = (BinaryBuilder.prototype as any)._flushPending; diff --git a/src/arrow/js/src/builder/valid.ts b/src/arrow/js/src/builder/valid.ts new file mode 100644 index 000000000..ae5b799fb --- /dev/null +++ b/src/arrow/js/src/builder/valid.ts @@ -0,0 +1,77 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import { DataType } from '../type'; +import { valueToString } from '../util/pretty'; +import { BigIntAvailable } from '../util/compat'; + +/** + * Dynamically compile the null values into an `isValid()` function whose + * implementation is a switch statement. Microbenchmarks in v8 indicate + * this approach is 25% faster than using an ES6 Map. + * + * @example + * console.log(createIsValidFunction([null, 'N/A', NaN])); + * `function (x) { + * if (x !== x) return false; + * switch (x) { + * case null: + * case "N/A": + * return false; + * } + * return true; + * }` + * + * @ignore + * @param nullValues + */ +export function createIsValidFunction<T extends DataType = any, TNull = any>(nullValues?: ReadonlyArray<TNull>) { + + if (!nullValues || nullValues.length <= 0) { + // @ts-ignore + return function isValid(value: any) { return true; }; + } + + let fnBody = ''; + const noNaNs = nullValues.filter((x) => x === x); + + if (noNaNs.length > 0) { + fnBody = ` + switch (x) {${noNaNs.map((x) => ` + case ${valueToCase(x)}:`).join('')} + return false; + }`; + } + + // NaN doesn't equal anything including itself, so it doesn't work as a + // switch case. Instead we must explicitly check for NaN before the switch. + if (nullValues.length !== noNaNs.length) { + fnBody = `if (x !== x) return false;\n${fnBody}`; + } + + return new Function(`x`, `${fnBody}\nreturn true;`) as (value: T['TValue'] | TNull) => boolean; +} + +/** @ignore */ +function valueToCase(x: any) { + if (typeof x !== 'bigint') { + return valueToString(x); + } else if (BigIntAvailable) { + return `${valueToString(x)}n`; + } + return `"${valueToString(x)}"`; +} |