summaryrefslogtreecommitdiffstats
path: root/src/arrow/js/perf
diff options
context:
space:
mode:
Diffstat (limited to 'src/arrow/js/perf')
-rw-r--r--src/arrow/js/perf/config.ts76
-rw-r--r--src/arrow/js/perf/index.ts234
2 files changed, 310 insertions, 0 deletions
diff --git a/src/arrow/js/perf/config.ts b/src/arrow/js/perf/config.ts
new file mode 100644
index 000000000..08ea9ecc1
--- /dev/null
+++ b/src/arrow/js/perf/config.ts
@@ -0,0 +1,76 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import * as Arrow from '../src/Arrow.dom';
+
+// from https://stackoverflow.com/a/19303725/214950
+let seed = 1;
+function random() {
+ const x = Math.sin(seed++) * 10000;
+ return x - Math.floor(x);
+}
+
+console.time('Prepare Data');
+
+const LENGTH = 100000;
+const NUM_BATCHES = 10;
+
+const values = Arrow.Utf8Vector.from(['Charlottesville', 'New York', 'San Francisco', 'Seattle', 'Terre Haute', 'Washington, DC']);
+
+const batches = Array.from({length: NUM_BATCHES}).map(() => {
+ const lat = Float32Array.from(
+ { length: LENGTH },
+ () => ((random() - 0.5) * 2 * 90));
+ const lng = Float32Array.from(
+ { length: LENGTH },
+ () => ((random() - 0.5) * 2 * 90));
+
+ const origin = Uint8Array.from(
+ { length: LENGTH },
+ () => (random() * 6));
+ const destination = Uint8Array.from(
+ { length: LENGTH },
+ () => (random() * 6));
+
+ const originType = new Arrow.Dictionary(values.type, new Arrow.Int8, 0, false);
+ const destinationType = new Arrow.Dictionary(values.type, new Arrow.Int8, 0, false);
+
+ return Arrow.RecordBatch.new({
+ 'lat': Arrow.Float32Vector.from(lat),
+ 'lng': Arrow.Float32Vector.from(lng),
+ 'origin': Arrow.Vector.new(Arrow.Data.Dictionary(originType, 0, origin.length, 0, null, origin, values)),
+ 'destination': Arrow.Vector.new(Arrow.Data.Dictionary(destinationType, 0, destination.length, 0, null, destination, values)),
+ });
+});
+
+const tracks = new Arrow.DataFrame(batches[0].schema, batches);
+
+console.timeEnd('Prepare Data');
+
+export default [
+ {
+ name: 'tracks',
+ df: tracks,
+ ipc: tracks.serialize(),
+ countBys: ['origin', 'destination'],
+ counts: [
+ {column: 'lat', test: 'gt' as 'gt' | 'eq', value: 0 },
+ {column: 'lng', test: 'gt' as 'gt' | 'eq', value: 0 },
+ {column: 'origin', test: 'eq' as 'gt' | 'eq', value: 'Seattle'},
+ ],
+ }
+];
diff --git a/src/arrow/js/perf/index.ts b/src/arrow/js/perf/index.ts
new file mode 100644
index 000000000..9f6cb8f79
--- /dev/null
+++ b/src/arrow/js/perf/index.ts
@@ -0,0 +1,234 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Alternatively, use bundles for performance tests
+// import * as Arrow from '../targets/es5/umd';
+// import * as Arrow from '../targets/es5/cjs';
+// import * as Arrow from '../targets/es2015/umd';
+// import * as Arrow from '../targets/es2015/cjs';
+
+import * as Arrow from '../src/Arrow';
+
+import config from './config';
+import b from 'benny';
+import { CaseResult, Summary } from 'benny/lib/internal/common-types';
+import kleur from 'kleur';
+
+const { predicate, Table, RecordBatchReader } = Arrow;
+const { col } = predicate;
+
+
+const args = process.argv.slice(2);
+const json = args[0] === '--json';
+
+const formatter = new Intl.NumberFormat();
+function formatNumber(number: number, precision = 0) {
+ const rounded = number > precision * 10 ? Math.round(number) : parseFloat((number).toPrecision(precision));
+ return formatter.format(rounded);
+}
+
+const results: CaseResult[] = [];
+
+function cycle(result: CaseResult, _summary: Summary) {
+ const duration = result.details.median * 1000;
+ if (json) {
+ result.suite = _summary.name;
+ results.push(result);
+ }
+ console.log(
+ `${kleur.cyan(result.name)} ${formatNumber(result.ops, 3)} ops/s ±${result.margin.toPrecision(2)}%, ${formatNumber(duration, 2)} ms, ${kleur.gray(result.samples + ' samples')}`,
+ );
+}
+
+for (const { name, ipc, df } of config) {
+ b.suite(
+ `Parse`,
+
+ b.add(`dataset: ${name}, function: Table.from`, () => {
+ Table.from(ipc);
+ }),
+
+ b.add(`dataset: ${name}, function: readBatches`, () => {
+ for (const _recordBatch of RecordBatchReader.from(ipc)) {}
+ }),
+
+ b.add(`dataset: ${name}, function: serialize`, () => {
+ df.serialize();
+ }),
+
+ b.cycle(cycle)
+ );
+
+ const schema = df.schema;
+
+ const suites = [{
+ suite_name: `Get values by index`,
+ fn(vector: Arrow.Column<any>) {
+ for (let i = -1, n = vector.length; ++i < n;) {
+ vector.get(i);
+ }
+ }
+ }, {
+ suite_name: `Iterate vectors`,
+ fn(vector: Arrow.Column<any>) { for (const _value of vector) {} }
+ }, {
+ suite_name: `Slice toArray vectors`,
+ fn(vector: Arrow.Column<any>) { vector.slice().toArray(); }
+ }, {
+ suite_name: `Slice vectors`,
+ fn(vector: Arrow.Column<any>) { vector.slice(); }
+ }];
+
+ for (const {suite_name, fn} of suites) {
+ b.suite(
+ suite_name,
+
+ ...schema.fields.map((f, i) => {
+ const vector = df.getColumnAt(i)!;
+ return b.add(`dataset: ${name}, column: ${f.name}, length: ${formatNumber(vector.length)}, type: ${vector.type}`, () => {
+ fn(vector);
+ });
+ }),
+
+ b.cycle(cycle)
+ );
+ }
+}
+
+
+for (const { name, df, countBys, counts } of config) {
+ b.suite(
+ `DataFrame Iterate`,
+
+ b.add(`dataset: ${name}, length: ${formatNumber(df.length)}`, () => {
+ for (const _value of df) {}
+ }),
+
+ b.cycle(cycle)
+ );
+
+ b.suite(
+ `DataFrame Count By`,
+
+ ...countBys.map((column: string) => b.add(
+ `dataset: ${name}, column: ${column}, length: ${formatNumber(df.length)}, type: ${df.schema.fields.find((c)=> c.name === column)!.type}`,
+ () => df.countBy(column)
+ )),
+
+ b.cycle(cycle)
+ );
+
+ b.suite(
+ `DataFrame Filter-Scan Count`,
+
+ ...counts.map(({ column, test, value }: {column: string; test: 'gt' | 'eq'; value: number | string}) => b.add(
+ `dataset: ${name}, column: ${column}, length: ${formatNumber(df.length)}, type: ${df.schema.fields.find((c)=> c.name === column)!.type}, test: ${test}, value: ${value}`,
+ () => {
+ let filteredDf: Arrow.FilteredDataFrame;
+ if (test == 'gt') {
+ filteredDf = df.filter(col(column).gt(value));
+ } else if (test == 'eq') {
+ filteredDf = df.filter(col(column).eq(value));
+ } else {
+ throw new Error(`Unrecognized test "${test}"`);
+ }
+
+ return () => filteredDf.count();
+ }
+ )),
+
+ b.cycle(cycle)
+ );
+
+ b.suite(
+ `DataFrame Filter-Iterate`,
+
+ ...counts.map(({ column, test, value }: {column: string; test: 'gt' | 'eq'; value: number | string}) => b.add(
+ `dataset: ${name}, column: ${column}, length: ${formatNumber(df.length)}, type: ${df.schema.fields.find((c)=> c.name === column)!.type}, test: ${test}, value: ${value}`,
+ () => {
+ let filteredDf: Arrow.FilteredDataFrame;
+ if (test == 'gt') {
+ filteredDf = df.filter(col(column).gt(value));
+ } else if (test == 'eq') {
+ filteredDf = df.filter(col(column).eq(value));
+ } else {
+ throw new Error(`Unrecognized test "${test}"`);
+ }
+
+ return () => {
+ for (const _value of filteredDf) {}
+ };
+ }
+ )),
+
+ b.cycle(cycle)
+ );
+
+ b.suite(
+ `DataFrame Direct Count`,
+
+ ...counts.map(({ column, test, value }: {column: string; test: 'gt' | 'eq'; value: number | string}) => b.add(
+ `dataset: ${name}, column: ${column}, length: ${formatNumber(df.length)}, type: ${df.schema.fields.find((c)=> c.name === column)!.type}, test: ${test}, value: ${value}`,
+ () => {
+ const colidx = df.schema.fields.findIndex((c)=> c.name === column);
+
+ if (test == 'gt') {
+ return () => {
+ let sum = 0;
+ const batches = df.chunks;
+ const numBatches = batches.length;
+ for (let batchIndex = -1; ++batchIndex < numBatches;) {
+ // load batches
+ const batch = batches[batchIndex];
+ const vector = batch.getChildAt(colidx)!;
+ // yield all indices
+ for (let index = -1, length = batch.length; ++index < length;) {
+ sum += (vector.get(index) >= value) ? 1 : 0;
+ }
+ }
+ return sum;
+ };
+ } else if (test == 'eq') {
+ return () => {
+ let sum = 0;
+ const batches = df.chunks;
+ const numBatches = batches.length;
+ for (let batchIndex = -1; ++batchIndex < numBatches;) {
+ // load batches
+ const batch = batches[batchIndex];
+ const vector = batch.getChildAt(colidx)!;
+ // yield all indices
+ for (let index = -1, length = batch.length; ++index < length;) {
+ sum += (vector.get(index) === value) ? 1 : 0;
+ }
+ }
+ return sum;
+ };
+ } else {
+ throw new Error(`Unrecognized test "${test}"`);
+ }
+ }
+ )),
+
+ b.cycle(cycle),
+
+ b.complete(() => {
+ // last benchmark finished
+ json && process.stderr.write(JSON.stringify(results, null, 2));
+ })
+ );
+}