diff options
Diffstat (limited to 'src/arrow/js/test/unit/table')
-rw-r--r-- | src/arrow/js/test/unit/table/assign-tests.ts | 80 | ||||
-rw-r--r-- | src/arrow/js/test/unit/table/serialize-tests.ts | 167 |
2 files changed, 247 insertions, 0 deletions
diff --git a/src/arrow/js/test/unit/table/assign-tests.ts b/src/arrow/js/test/unit/table/assign-tests.ts new file mode 100644 index 000000000..fa1dacbc6 --- /dev/null +++ b/src/arrow/js/test/unit/table/assign-tests.ts @@ -0,0 +1,80 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +/* eslint-disable jest/no-standalone-expect */ + +import '../../jest-extensions'; +import { zip } from 'ix/iterable'; +import * as generate from '../../generate-test-data'; +import { validateTable } from '../generated-data-validators'; +import { + Schema, Field, DataType, Int32, Float32, Utf8 +} from 'apache-arrow'; + +const toSchema = (...xs: [string, DataType][]) => new Schema(xs.map((x) => new Field(...x))); +const schema1 = toSchema(['a', new Int32()], ['b', new Float32()], ['c', new Utf8()]); +const partialOverlapWith1 = toSchema(['a', new Int32()], ['b', new Float32()], ['f', new Utf8()]); +const schema2 = toSchema(['d', new Int32()], ['e', new Float32()], ['f', new Utf8()]); + +describe('Table.assign()', () => { + describe(`should assign non-overlapping fields`, () => { + const lhs = generate.table([20], schema1); + const rhs = generate.table([20], schema2); + const table = lhs.table.assign(rhs.table); + const f = assignGeneratedTables(lhs, rhs); + expect(table.schema.fields.map((f) => f.name)).toEqual(['a', 'b', 'c', 'd', 'e', 'f']); + validateTable({ ...f([0,1,2], [3,4,5]), table }).run(); + }); + describe(`should assign partially-overlapping fields`, () => { + const lhs = generate.table([20], schema1); + const rhs = generate.table([20], partialOverlapWith1); + const table = lhs.table.assign(rhs.table); + const f = assignGeneratedTables(lhs, rhs); + expect(table.schema.fields.map((f) => f.name)).toEqual(['a', 'b', 'c', 'f']); + // eslint-disable-next-line no-sparse-arrays + validateTable({ ...f([ , , 2], [0,1,3]), table }).run(); + }); + describe(`should assign completely-overlapping fields`, () => { + const lhs = generate.table([20], schema2); + const rhs = generate.table([20], schema2); + const table = lhs.table.assign(rhs.table); + const f = assignGeneratedTables(lhs, rhs); + expect(table.schema.fields.map((f) => f.name)).toEqual(['d', 'e', 'f']); + // eslint-disable-next-line no-sparse-arrays + validateTable({ ...f([ , , ], [0,1,2]), table }).run(); + }); +}); + +function assignGeneratedTables(lhs: generate.GeneratedTable, rhs: generate.GeneratedTable) { + return function createAssignedTestData(lhsIndices: any[], rhsIndices: any[]) { + const pluckLhs = (xs: any[], ys: any[] = []) => lhsIndices.reduce((ys, i, j) => { + if (i !== undefined) { ys[i] = xs ? xs[j] : null; } + return ys; + }, ys); + const pluckRhs = (xs: any[], ys: any[] = []) => rhsIndices.reduce((ys, i, j) => { + if (i !== undefined) { ys[i] = xs ? xs[j] : null; } + return ys; + }, ys); + const cols = () => [...pluckLhs(lhs.cols(), pluckRhs(rhs.cols()))]; + const keys = () => [...pluckLhs(lhs.keys(), pluckRhs(rhs.keys()))]; + const rows = () => [...zip(lhs.rows(), rhs.rows())].map(([x, y]) => [...pluckLhs(x, pluckRhs(y))]); + const colBatches = [...zip(lhs.colBatches, rhs.colBatches)].map(([x, y]) => () => [...pluckLhs(x(), pluckRhs(y()))]); + const keyBatches = [...zip(lhs.keyBatches, rhs.keyBatches)].map(([x, y]) => () => [...pluckLhs(x(), pluckRhs(y()))]); + const rowBatches = [...zip(lhs.rowBatches, rhs.rowBatches)].map(([x, y]) => () => [...zip(x(), y())].map(([x, y]) => [...pluckLhs(x, pluckRhs(y))])); + return { cols, keys, rows, colBatches, keyBatches, rowBatches }; + }; +} diff --git a/src/arrow/js/test/unit/table/serialize-tests.ts b/src/arrow/js/test/unit/table/serialize-tests.ts new file mode 100644 index 000000000..5eb211763 --- /dev/null +++ b/src/arrow/js/test/unit/table/serialize-tests.ts @@ -0,0 +1,167 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import '../../jest-extensions'; +import * as generate from '../../generate-test-data'; +import { + Table, Schema, Field, DataType, Dictionary, Int32, Float32, Utf8, Null, Int32Vector +} from 'apache-arrow'; + +const toSchema = (...xs: [string, DataType][]) => new Schema(xs.map((x) => new Field(...x))); +const schema1 = toSchema(['a', new Int32()], ['b', new Float32()], ['c', new Dictionary(new Utf8(), new Int32())]); +const schema2 = toSchema(['d', new Int32()], ['e', new Float32()], ['f', new Utf8()]); +const nullSchema = new Schema([new Field('null', new Null())]); + +schema1.metadata.set('foo', 'bar'); + +function createTable<T extends { [key: string]: DataType } = any>(schema: Schema<T>, chunkLengths: number[]) { + return generate.table(chunkLengths, schema).table; +} + +describe('Table#serialize()', () => { + + test(`doesn't swap the order of buffers that share the same underlying ArrayBuffer but are in a different order`, () => { + const values = new Int32Array([0, 1, 2, 3, 4, 5, 6, 7]); + const expected = values.slice(); + const x = Int32Vector.from(values.subarray(4, 8)); // back + const y = Int32Vector.from(values.subarray(0, 4)); // front + const source = Table.new([x, y], ['x', 'y']); + const table = Table.from(source.serialize()); + expect(table.getColumn('x').toArray()).toEqual(expected.subarray(4, 8)); + expect(table.getColumn('y').toArray()).toEqual(expected.subarray(0, 4)); + }); + + test(`Table#empty round-trips through serialization`, () => { + const source = Table.empty(); + source.schema.metadata.set('foo', 'bar'); + expect(source).toHaveLength(0); + expect(source.numCols).toBe(0); + const result = Table.from(source.serialize()); + expect(result).toEqualTable(source); + expect(result.schema.metadata.get('foo')).toEqual('bar'); + }); + + test(`Schema metadata round-trips through serialization`, () => { + const source = createTable(schema1, [20]); + expect(source).toHaveLength(20); + expect(source.numCols).toBe(3); + const result = Table.from(source.serialize()); + expect(result).toEqualTable(source); + expect(result.schema.metadata.get('foo')).toEqual('bar'); + }); + + test(`Table#assign an empty Table to a Table with a zero-length Null column round-trips through serialization`, () => { + const table1 = new Table(nullSchema); + const table2 = Table.empty(); + const source = table1.assign(table2); + expect(source).toHaveLength(0); + expect(source.numCols).toBe(1); + const result = Table.from(source.serialize()); + expect(result).toEqualTable(source); + }); + + const chunkLengths = [] as number[]; + for (let i = -1; ++i < 3;) { + chunkLengths[i * 2] = (Math.random() * 100) | 0; + chunkLengths[i * 2 + 1] = 0; + const table = <T extends { [key: string]: DataType } = any>(schema: Schema<T>) => createTable(schema, chunkLengths); + test(`Table#select round-trips through serialization`, () => { + const source = table(schema1).select('a', 'c'); + expect(source.numCols).toBe(2); + const result = Table.from(source.serialize()); + expect(result).toEqualTable(source); + }); + test(`Table#selectAt round-trips through serialization`, () => { + const source = table(schema1).selectAt(0, 2); + expect(source.numCols).toBe(2); + const result = Table.from(source.serialize()); + expect(result).toEqualTable(source); + }); + test(`Table#assign round-trips through serialization`, () => { + const source = table(schema1).assign(table(schema2)); + expect(source.numCols).toBe(6); + const result = Table.from(source.serialize()); + expect(result).toEqualTable(source); + expect(result.schema.metadata.get('foo')).toEqual('bar'); + }); + test(`Table#assign with an empty table round-trips through serialization`, () => { + const table1 = table(schema1); + const source = table1.assign(Table.empty()); + expect(source.numCols).toBe(table1.numCols); + expect(source).toHaveLength(table1.length); + const result = Table.from(source.serialize()); + expect(result).toEqualTable(source); + expect(result.schema.metadata.get('foo')).toEqual('bar'); + }); + test(`Table#assign with a zero-length Null column round-trips through serialization`, () => { + const table1 = new Table(nullSchema); + const table2 = table(schema1); + const source = table1.assign(table2); + expect(source).toHaveLength(table2.length); + expect(source.numCols).toBe(4); + const result = Table.from(source.serialize()); + expect(result).toEqualTable(source); + expect(result.schema.metadata.get('foo')).toEqual('bar'); + }); + test(`Table#assign with different lengths and number of chunks round-trips through serialization`, () => { + const table1 = table(schema1); + const table2 = createTable(schema2, [102, 4, 10, 97, 10, 2, 4]); + const source = table1.assign(table2); + expect(source.numCols).toBe(6); + expect(source).toHaveLength(Math.max(table1.length, table2.length)); + const result = Table.from(source.serialize()); + expect(result).toEqualTable(source); + expect(result.schema.metadata.get('foo')).toEqual('bar'); + }); + test(`Table#select with Table#assign the result of Table#selectAt round-trips through serialization`, () => { + const table1 = table(schema1); + const table2 = table(schema2); + const source = table1.select('a', 'c').assign(table2.selectAt(2)); + expect(source.numCols).toBe(3); + const result = Table.from(source.serialize()); + expect(result).toEqualTable(source); + expect(result.schema.metadata.get('foo')).toEqual('bar'); + }); + test(`Table#slice round-trips through serialization`, () => { + const table1 = table(schema1); + const length = table1.length; + const [begin, end] = [length * .25, length * .75].map((x) => x | 0); + const source = table1.slice(begin, end); + expect(source.numCols).toBe(3); + expect(source).toHaveLength(end - begin); + const result = Table.from(source.serialize()); + expect(result).toEqualTable(source); + expect(result.schema.metadata.get('foo')).toEqual('bar'); + }); + test(`Table#concat of two slices round-trips through serialization`, () => { + const table1 = table(schema1); + const length = table1.length; + const [begin1, end1] = [length * .10, length * .20].map((x) => x | 0); + const [begin2, end2] = [length * .80, length * .90].map((x) => x | 0); + const slice1 = table1.slice(begin1, end1); + const slice2 = table1.slice(begin2, end2); + const source = slice1.concat(slice2); + expect(slice1).toHaveLength(end1 - begin1); + expect(slice2).toHaveLength(end2 - begin2); + expect(source).toHaveLength((end1 - begin1) + (end2 - begin2)); + [slice1, slice2, source].forEach((x) => expect(x.numCols).toBe(3)); + const result = Table.from(source.serialize()); + expect(result).toEqualTable(source); + expect(result.schema.metadata.get('foo')).toEqual('bar'); + }); + } +}); |