summaryrefslogtreecommitdiffstats
path: root/src/arrow/dev/benchmarking
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-21 11:54:28 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-21 11:54:28 +0000
commite6918187568dbd01842d8d1d2c808ce16a894239 (patch)
tree64f88b554b444a49f656b6c656111a145cbbaa28 /src/arrow/dev/benchmarking
parentInitial commit. (diff)
downloadceph-upstream/18.2.2.tar.xz
ceph-upstream/18.2.2.zip
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r--src/arrow/dev/benchmarking/.env18
-rw-r--r--src/arrow/dev/benchmarking/.gitignore1
-rw-r--r--src/arrow/dev/benchmarking/Dockerfile23
-rw-r--r--src/arrow/dev/benchmarking/README.md255
-rw-r--r--src/arrow/dev/benchmarking/data_model.dot219
-rw-r--r--src/arrow/dev/benchmarking/data_model.rst373
-rw-r--r--src/arrow/dev/benchmarking/ddl/0_setup.sql23
-rw-r--r--src/arrow/dev/benchmarking/ddl/1_00_table_public_project.sql45
-rw-r--r--src/arrow/dev/benchmarking/ddl/1_01_table_public_cpu.sql63
-rw-r--r--src/arrow/dev/benchmarking/ddl/1_02_table_public_gpu.sql43
-rw-r--r--src/arrow/dev/benchmarking/ddl/1_03_table_public_os.sql57
-rw-r--r--src/arrow/dev/benchmarking/ddl/1_04_table_public_benchmark_language.sql35
-rw-r--r--src/arrow/dev/benchmarking/ddl/1_05_table_public_dependencies.sql31
-rw-r--r--src/arrow/dev/benchmarking/ddl/1_06_table_public_language_implementation_version.sql46
-rw-r--r--src/arrow/dev/benchmarking/ddl/1_07_table_public_benchmark_type.sql39
-rw-r--r--src/arrow/dev/benchmarking/ddl/1_08_table_public_machine.sql69
-rw-r--r--src/arrow/dev/benchmarking/ddl/1_09_table_public_unit.sql37
-rw-r--r--src/arrow/dev/benchmarking/ddl/1_10_table_public_environment.sql51
-rw-r--r--src/arrow/dev/benchmarking/ddl/1_11_table_public_benchmark.sql54
-rw-r--r--src/arrow/dev/benchmarking/ddl/1_12_table_public_benchmark_run.sql112
-rw-r--r--src/arrow/dev/benchmarking/ddl/2_00_views.sql324
-rw-r--r--src/arrow/dev/benchmarking/ddl/3_00_functions_helpers.sql643
-rw-r--r--src/arrow/dev/benchmarking/ddl/3_01_functions_triggers.sql574
-rw-r--r--src/arrow/dev/benchmarking/ddl/3_02_functions_ingestion.sql323
-rw-r--r--src/arrow/dev/benchmarking/ddl/3_10_functions_documentation.sql395
-rw-r--r--src/arrow/dev/benchmarking/ddl/4_00_triggers.sql61
-rw-r--r--src/arrow/dev/benchmarking/ddl/5_00_permissions.sql73
-rw-r--r--src/arrow/dev/benchmarking/docker-compose.yml43
-rw-r--r--src/arrow/dev/benchmarking/examples/benchmark_example.json32
-rw-r--r--src/arrow/dev/benchmarking/examples/benchmark_run_example.csv6
-rw-r--r--src/arrow/dev/benchmarking/examples/benchmark_run_example.json97
-rw-r--r--src/arrow/dev/benchmarking/examples/benchmark_with_context_example.json73
-rw-r--r--src/arrow/dev/benchmarking/examples/example.sql232
-rw-r--r--src/arrow/dev/benchmarking/examples/example_graphql_mutation.json12
-rw-r--r--src/arrow/dev/benchmarking/examples/graphql_query_environment_view.json3
-rw-r--r--src/arrow/dev/benchmarking/examples/machine.json22
-rwxr-xr-xsrc/arrow/dev/benchmarking/graphql_submit.sh75
-rwxr-xr-xsrc/arrow/dev/benchmarking/make_data_model_rst.sh69
-rwxr-xr-xsrc/arrow/dev/benchmarking/make_dotfile.sh70
-rwxr-xr-xsrc/arrow/dev/benchmarking/make_machine_json.sh55
40 files changed, 4776 insertions, 0 deletions
diff --git a/src/arrow/dev/benchmarking/.env b/src/arrow/dev/benchmarking/.env
new file mode 100644
index 000000000..7485f5866
--- /dev/null
+++ b/src/arrow/dev/benchmarking/.env
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+PG_USER=benchmark
+PG_PASS=benchmark
diff --git a/src/arrow/dev/benchmarking/.gitignore b/src/arrow/dev/benchmarking/.gitignore
new file mode 100644
index 000000000..cda00d658
--- /dev/null
+++ b/src/arrow/dev/benchmarking/.gitignore
@@ -0,0 +1 @@
+/machine.json
diff --git a/src/arrow/dev/benchmarking/Dockerfile b/src/arrow/dev/benchmarking/Dockerfile
new file mode 100644
index 000000000..f47033397
--- /dev/null
+++ b/src/arrow/dev/benchmarking/Dockerfile
@@ -0,0 +1,23 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+
+# http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+FROM postgres:11-alpine
+
+# Any `.sh` and `.sql` files copied to the entrypoint directory
+# will be run during startup. See `docker-entrypoint.sh` in
+# https://github.com/docker-library/postgres/blob/master/11/alpine/
+COPY ddl/* /docker-entrypoint-initdb.d/
diff --git a/src/arrow/dev/benchmarking/README.md b/src/arrow/dev/benchmarking/README.md
new file mode 100644
index 000000000..c5ddd62e0
--- /dev/null
+++ b/src/arrow/dev/benchmarking/README.md
@@ -0,0 +1,255 @@
+<!--
+ ~ Licensed to the Apache Software Foundation (ASF) under one
+ ~ or more contributor license agreements. See the NOTICE file
+ ~ distributed with this work for additional information
+ ~ regarding copyright ownership. The ASF licenses this file
+ ~ to you under the Apache License, Version 2.0 (the
+ ~ "License"); you may not use this file except in compliance
+ ~ with the License. You may obtain a copy of the License at
+ ~
+ ~ http://www.apache.org/licenses/LICENSE-2.0
+ ~
+ ~ Unless required by applicable law or agreed to in writing,
+ ~ software distributed under the License is distributed on an
+ ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ ~ KIND, either express or implied. See the License for the
+ ~ specific language governing permissions and limitations
+ ~ under the License.
+ -->
+
+
+> NOTE: For those deploying this database, Postgres does not by default use
+> UTF-8, however it is [required for the jsonb][pg-jsonb] format used in
+> some columns to always work. This [stackoverflow post][so-utf8] describes
+> how to do it for Amazon RDS. This [section of the docs][pg-charset]
+> states how to do it in general, i.e.: `initdb -E UTF8`.
+
+# Benchmark database
+
+This directory contains files related to the benchmark database.
+
+- 'ddl/\*.sql' contains the database definition.
+- 'examples/' contain code to test the database and demonstrate its use.
+- 'Dockerfile' and 'docker-compose.yml' are for developing benchmarks
+ against a testing database.
+- An auto-generated summary of views in the [Data model][./data_model.rst].
+
+## Setup
+
+To create a 'machine.json' file that will uniquely identify a computer for
+benchmark submission, run the provided shell script and fill in the prompts
+to identify the GPU.
+
+> NOTE: this does not work on VMs or Windows.
+
+```shell
+./make_machine_json.sh
+```
+
+Submit the machine details via http using the command
+
+> NOTE: This will only work if we have selected graphql as a client
+> and have it running in production or if during development
+> you have run `docker-compose up` to create and run both a
+> database Docker container and graphql client Docker container.
+
+```shell
+./graphql_submit.sh machine machine.json localhost:5000/graphql
+```
+
+or submit after starting up the psql client from this directory, using
+
+```
+\set content `cat machine.json`
+SELECT ingest_machine_view(:'content'::jsonb);
+```
+
+> NOTE: If you don't have a "machine.json" file generated,
+> use the example file "examples/machine.json" instead.
+
+## Local testing
+
+There is a file named "[.env][.env]" in this directory that is used by
+`docker-compose` to set up the postgres user and password for the
+local containers. Currently the name and password are both
+`benchmark`. This will be the password for the psql client as well.
+
+The Postgres Alpine image runs any added '\*.sql' and '\*.sh' scripts placed
+in '/docker-entrypoint-initdb.d/' during its startup script, so the local
+database will be set up automatically once the container is running.
+
+To start the containers, be sure to have [Docker installed][docker],
+and then run the following from this directory (arrow/dev/benchmarking).
+
+
+```
+docker-compose up
+```
+
+This will start a process that will show logs from both the running
+Postgres container and the running GraphQL container.
+To stop the running containers gracefully, background the process
+and run
+
+```
+docker-compose down
+fg # To re-foreground the backgrounded process while it exits
+```
+
+You will still have the container images "benchmarking_pg",
+"graphile/postgraphile", and "postgres:11-alpine" on your
+computer. You should keep them if you want to run this again.
+If you don't, then remove them with the command:
+
+```
+docker rmi benchmarking_pg postgres:11-alpine graphile/postgraphile
+```
+
+### Postgres client
+
+The `psql` shell client is bundled with the PostgreSQL core distribution
+available from the [Postgres download page][postgres-downloads].
+Using the `PG_USER` defined in the `.env` file (currently "benchmark"),
+the command to connect to the container is:
+```shell
+psql -h localhost -p 5432 -U benchmark
+```
+There is an example script in [examples/example.sql](examples/example.sql) that
+runs some queries against the database. To run it in the psql client, type
+the following in the psql command-line interface:
+
+```
+\i examples/example.sql
+```
+
+#### Bulk ingestion using CSV
+
+An example CSV file for bulk ingestion is in
+[examples/benchmark_run_example.csv](examples/benchmark_run_example.csv).
+The columns are listed in the same order as they are defined, to avoid having
+to explicitly name every column in ingestion. The "id" column is left empty
+and will be automatically assigned on insert.
+
+To ingest the example CSV file from the command line,
+use the command below:
+
+```shell
+CSV='examples/benchmark_run_example.csv' && \
+psql -U benchmark -h localhost -p 5432 \
+ -c "\copy benchmark_run_view FROM '${CSV}' WITH (FORMAT csv, HEADER);"
+```
+
+#### Bulk ingestion using JSON
+
+To ingest the example JSON file using the psql client, use the command below.
+
+```
+\set content `cat examples/benchmark_example.json`
+SELECT ingest_benchmark_view(:'content'::jsonb);
+```
+
+### HTTP client
+
+This section requires an actual HTTP client to be up, either
+for the production database or via the testing setup.
+(See the [local testing section](#local-testing) for how to set it up).
+
+The 'graphile/postgraphile' container provides an HTTP interface
+to the database via two url routes:
+
+- A GraphiQL page ([localhost:5000/graphiql][graphiql])
+ to aid visual exploration of the data model.
+ (The `--watch` flag on the command line. Not recommended for production.)
+- An endpoint that receives POST requests only (localhost:5000/graphql).
+
+#### Ingestion
+
+The script [graphql_submit.sh](./graphql_submit.sh) simplifies submission
+to the database via curl. Examples:
+
+```shell
+./graphql_submit.sh benchmarks examples/benchmark_example.json
+./graphql_submit.sh runs examples/benchmark_run_example.json
+```
+
+#### Querying
+
+The output of the query is a JSON object that is hard to read on the command line.
+Here is an example query in the shell:
+```shell
+curl -X POST \
+ -H "Content-Type: application/json" \
+ --data '{"query": "{projectDetails{ projectName }}"}' \
+ localhost:5000/graphql
+```
+
+which (if you have previously run the "examples.sql" command) yields
+
+```
+{"data":{"projectDetails":{"projectName":"Apache Arrow"}}}
+```
+
+Here is an example query using Python:
+```python
+import json
+import requests
+
+uri = "http://localhost:5000/graphql"
+query = json.load(open("examples/graphql_query_environment_view.json"))
+response = requests.post(uri, json=query)
+message = "{benchmarkLanguage}: {languageImplementationVersion}, {dependencies}"
+
+for row in response.json()['data']['allEnvironmentViews']['edges']:
+ print(message.format(**row['node']))
+
+# result:
+#
+# Python: CPython 2.7, {"six":"","numpy":"1.14","other_lib":"1.0"}
+# Python: CPython 2.7, {"six":"","numpy":"1.15","other_lib":"1.0"}
+# Python: CPython 3.6, {"boost":"1.42","numpy":"1.15"}
+```
+
+## Deployment
+
+(work in progress).
+
+> NOTE: For those deploying this database, Postgres does not by default use
+> UTF-8, however it is [required for the jsonb][pg-jsonb] format used in
+> some columns to always work. This [stackoverflow post][so-utf8] describes
+> how to do it for Amazon RDS. This [section of the docs][pg-charset]
+> states how to do it in general, i.e.: `initdb -E UTF8`.
+
+
+## Quick reference
+
+- String variables `'have single quotes'`
+- Arrays `'{"have", "curly", "braces"}'::text[]` or `'{1, 2, 3}'::integer[]`
+- JSONb `'{"has":"this", "format":42}'::jsonb`
+- Elements inserted using JSON-formatted strings can use standard
+ JSON-formatted arrays (`[1, 2, 3]`) and do not have to use the above
+ string formats.
+- When comparing nullable values use `x IS NOT DISTINCT FROM y` rather than `x = y`
+- An auto-generated summary of the [Data model][./data_model.rst].
+
+## Data model documentation
+
+To recreate the data model documentation,
+(1) install the [psql client][postgres-downloads]
+(sorry you need to download the whole thing),
+(2) start the docker container using `docker-compose up`,
+(3) and then run these scripts:
+
+```
+./make_dotfile.sh
+./make_data_model_rst.sh
+```
+
+[pg-jsonb]: https://www.postgresql.org/docs/11/datatype-json.html#id-1.5.7.22.3
+[so-utf8]: https://stackoverflow.com/a/33557023
+[pg-charset]: https://www.postgresql.org/docs/9.3/multibyte.html#AEN34424
+[docker]: https://www.docker.com/get-started
+[citext-limitations]: https://www.postgresql.org/docs/11/citext.html#id-1.11.7.17.7
+[postgres-downloads]: https://www.postgresql.org/download/
+[graphiql]: http://localhost:5000/graphiql
+[postgraphile-lambda]: https://github.com/graphile/postgraphile-lambda-example
+[postgraphile-cli]: https://www.graphile.org/postgraphile/usage-cli/
diff --git a/src/arrow/dev/benchmarking/data_model.dot b/src/arrow/dev/benchmarking/data_model.dot
new file mode 100644
index 000000000..d311acd4e
--- /dev/null
+++ b/src/arrow/dev/benchmarking/data_model.dot
@@ -0,0 +1,219 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements.See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership.The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License.You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied.See the License for the
+ specific language governing permissions and limitations
+ under the License.
+*/
+
+/*
+ WARNING
+ This is an auto-generated file. Please do not edit.
+
+ To reproduce, please run :code:`./make_data_model_rst.sh`.
+ (This requires you have the
+ `psql client <https://www.postgresql.org/download/>`_
+ and have started the docker containers using
+ :code:`docker-compose up`).
+*/
+digraph database {
+ concentrate = true;
+ rankdir = LR;
+ ratio = ".75";
+ node [shape = none, fontsize="11", fontname="Helvetica"];
+ edge [fontsize="8", fontname="Helvetica"];
+legend
+[fontsize = "14"
+label =
+<<table border="0" cellpadding="0">
+ <tr><td align="left"><font point-size="16">Legend</font></td></tr>
+ <tr><td align="left">pk = primary key</td></tr>
+ <tr><td align="left">fk = foreign key</td></tr>
+ <tr><td align="left">u = unique*</td></tr>
+ <tr><td align="left">o = optional</td></tr>
+ <tr><td align="left">* multiple uniques in the same table are a unique group</td></tr>
+</table>>
+];
+benchmark
+[label =
+ <<table border="0" cellborder="1" cellspacing="0" cellpadding="2">
+ <tr><td border="0"><font point-size="14">benchmark</font></td></tr>
+ <tr><td port="benchmark_id"><b>benchmark_id (pk)</b></td></tr>
+ <tr><td port="benchmark_language_id"><b>benchmark_language_id (pk)</b></td></tr>
+ <tr><td>benchmark_name (u)</td></tr>
+ <tr><td>parameter_names (o)</td></tr>
+ <tr><td>benchmark_description</td></tr>
+ <tr><td>benchmark_version (u)</td></tr>
+ <tr><td port="unit_id">unit_id (fk) </td></tr>
+ </table>>
+];
+benchmark_language
+[label =
+ <<table border="0" cellborder="1" cellspacing="0" cellpadding="2">
+ <tr><td border="0"><font point-size="14">benchmark_language</font></td></tr>
+ <tr><td port="benchmark_language_id"><b>benchmark_language_id (pk)</b></td></tr>
+ <tr><td>benchmark_language (u)</td></tr>
+ </table>>
+];
+benchmark_run
+[label =
+ <<table border="0" cellborder="1" cellspacing="0" cellpadding="2">
+ <tr><td border="0"><font point-size="14">benchmark_run</font></td></tr>
+ <tr><td port="benchmark_run_id"><b>benchmark_run_id (pk)</b></td></tr>
+ <tr><td>parameter_values (u)</td></tr>
+ <tr><td>value</td></tr>
+ <tr><td>git_commit_timestamp (u)</td></tr>
+ <tr><td>git_hash</td></tr>
+ <tr><td>val_min (o)</td></tr>
+ <tr><td>val_q1 (o)</td></tr>
+ <tr><td>val_q3 (o)</td></tr>
+ <tr><td>val_max (o)</td></tr>
+ <tr><td>std_dev</td></tr>
+ <tr><td>n_obs</td></tr>
+ <tr><td>run_timestamp (u)</td></tr>
+ <tr><td>run_metadata (o)</td></tr>
+ <tr><td>run_notes (o)</td></tr>
+ <tr><td port="machine_id">machine_id (u) (fk) </td></tr>
+ <tr><td port="environment_id">environment_id (u) (fk) </td></tr>
+ <tr><td port="language_implementation_version_id">language_implementation_version_id (fk) </td></tr>
+ <tr><td port="benchmark_language_id">benchmark_language_id (fk) </td></tr>
+ <tr><td port="benchmark_id">benchmark_id (u) (fk) </td></tr>
+ </table>>
+];
+benchmark_type
+[label =
+ <<table border="0" cellborder="1" cellspacing="0" cellpadding="2">
+ <tr><td border="0"><font point-size="14">benchmark_type</font></td></tr>
+ <tr><td port="benchmark_type_id"><b>benchmark_type_id (pk)</b></td></tr>
+ <tr><td>benchmark_type (u)</td></tr>
+ <tr><td>lessisbetter</td></tr>
+ </table>>
+];
+cpu
+[label =
+ <<table border="0" cellborder="1" cellspacing="0" cellpadding="2">
+ <tr><td border="0"><font point-size="14">cpu</font></td></tr>
+ <tr><td port="cpu_id"><b>cpu_id (pk)</b></td></tr>
+ <tr><td>cpu_model_name (u)</td></tr>
+ <tr><td>cpu_core_count</td></tr>
+ <tr><td>cpu_thread_count</td></tr>
+ <tr><td>cpu_frequency_max_hz</td></tr>
+ <tr><td>cpu_frequency_min_hz</td></tr>
+ <tr><td>cpu_l1d_cache_bytes</td></tr>
+ <tr><td>cpu_l1i_cache_bytes</td></tr>
+ <tr><td>cpu_l2_cache_bytes</td></tr>
+ <tr><td>cpu_l3_cache_bytes</td></tr>
+ </table>>
+];
+dependencies
+[label =
+ <<table border="0" cellborder="1" cellspacing="0" cellpadding="2">
+ <tr><td border="0"><font point-size="14">dependencies</font></td></tr>
+ <tr><td port="dependencies_id"><b>dependencies_id (pk)</b></td></tr>
+ <tr><td>dependencies (u)</td></tr>
+ </table>>
+];
+gpu
+[label =
+ <<table border="0" cellborder="1" cellspacing="0" cellpadding="2">
+ <tr><td border="0"><font point-size="14">gpu</font></td></tr>
+ <tr><td port="gpu_id"><b>gpu_id (pk)</b></td></tr>
+ <tr><td>gpu_information (u)</td></tr>
+ <tr><td>gpu_part_number</td></tr>
+ <tr><td>gpu_product_name</td></tr>
+ </table>>
+];
+language_implementation_version
+[label =
+ <<table border="0" cellborder="1" cellspacing="0" cellpadding="2">
+ <tr><td border="0"><font point-size="14">language_implementation_version</font></td></tr>
+ <tr><td port="language_implementation_version_id"><b>language_implementation_version_id (pk)</b></td></tr>
+ <tr><td port="benchmark_language_id"><b>benchmark_language_id (pk)</b></td></tr>
+ <tr><td>language_implementation_version (u)</td></tr>
+ </table>>
+];
+machine
+[label =
+ <<table border="0" cellborder="1" cellspacing="0" cellpadding="2">
+ <tr><td border="0"><font point-size="14">machine</font></td></tr>
+ <tr><td port="machine_id"><b>machine_id (pk)</b></td></tr>
+ <tr><td>machine_name</td></tr>
+ <tr><td>mac_address (u)</td></tr>
+ <tr><td>memory_bytes</td></tr>
+ <tr><td>cpu_actual_frequency_hz</td></tr>
+ <tr><td>machine_other_attributes (o)</td></tr>
+ <tr><td port="cpu_id">cpu_id (fk) </td></tr>
+ <tr><td port="gpu_id">gpu_id (fk) </td></tr>
+ <tr><td port="os_id">os_id (fk) </td></tr>
+ </table>>
+];
+os
+[label =
+ <<table border="0" cellborder="1" cellspacing="0" cellpadding="2">
+ <tr><td border="0"><font point-size="14">os</font></td></tr>
+ <tr><td port="os_id"><b>os_id (pk)</b></td></tr>
+ <tr><td>os_name (u)</td></tr>
+ <tr><td>architecture_name (u)</td></tr>
+ <tr><td>kernel_name (u)</td></tr>
+ </table>>
+];
+project
+[label =
+ <<table border="0" cellborder="1" cellspacing="0" cellpadding="2">
+ <tr><td border="0"><font point-size="14">project</font></td></tr>
+ <tr><td port="project_id"><b>project_id (pk)</b></td></tr>
+ <tr><td>project_name (u)</td></tr>
+ <tr><td>project_url (u)</td></tr>
+ <tr><td>repo_url (u)</td></tr>
+ <tr><td>last_changed</td></tr>
+ </table>>
+];
+unit
+[label =
+ <<table border="0" cellborder="1" cellspacing="0" cellpadding="2">
+ <tr><td border="0"><font point-size="14">unit</font></td></tr>
+ <tr><td port="unit_id"><b>unit_id (pk)</b></td></tr>
+ <tr><td>units (u)</td></tr>
+ <tr><td port="benchmark_type_id">benchmark_type_id (fk) </td></tr>
+ </table>>
+];
+environment
+[label =
+ <<table border="0" cellborder="1" cellspacing="0" cellpadding="2">
+ <tr><td border="0"><font point-size="14">environment</font></td></tr>
+ <tr><td port="environment_id"><b>environment_id (pk)</b></td></tr>
+ <tr><td port="language_implementation_version_id"><b>language_implementation_version_id (pk)</b></td></tr>
+ <tr><td port="benchmark_language_id"><b>benchmark_language_id (pk)</b></td></tr>
+ <tr><td port="dependencies_id">dependencies_id (u) (fk) </td></tr>
+ </table>>
+];
+machine:cpu_id -> cpu:cpu_id;
+machine:gpu_id -> gpu:gpu_id;
+machine:os_id -> os:os_id;
+benchmark:benchmark_language_id -> benchmark_language:benchmark_language_id;
+environment:benchmark_language_id -> benchmark_language:benchmark_language_id;
+language_implementation_version:benchmark_language_id -> benchmark_language:benchmark_language_id;
+environment:dependencies_id -> dependencies:dependencies_id;
+environment:benchmark_language_id -> language_implementation_version:benchmark_language_id;
+environment:language_implementation_version_id -> language_implementation_version:language_implementation_version_id;
+unit:benchmark_type_id -> benchmark_type:benchmark_type_id;
+benchmark_run:machine_id -> machine:machine_id;
+benchmark:unit_id -> unit:unit_id;
+benchmark_run:language_implementation_version_id -> environment:language_implementation_version_id;
+benchmark_run:benchmark_language_id -> environment:benchmark_language_id;
+benchmark_run:environment_id -> environment:environment_id;
+benchmark_run:benchmark_language_id -> benchmark:benchmark_language_id;
+benchmark_run:benchmark_id -> benchmark:benchmark_id;
+}
+
diff --git a/src/arrow/dev/benchmarking/data_model.rst b/src/arrow/dev/benchmarking/data_model.rst
new file mode 100644
index 000000000..d0f3dc7fc
--- /dev/null
+++ b/src/arrow/dev/benchmarking/data_model.rst
@@ -0,0 +1,373 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements. See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership. The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License. You may obtain a copy of the License at
+
+.. http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied. See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+
+.. WARNING
+.. This is an auto-generated file. Please do not edit.
+
+.. To reproduce, please run :code:`./make_data_model_rst.sh`.
+.. (This requires you have the
+.. `psql client <https://www.postgresql.org/download/>`_
+.. and have started the docker containers using
+.. :code:`docker-compose up`).
+
+
+.. _benchmark-data-model:
+
+Benchmark data model
+====================
+
+
+.. graphviz:: data_model.dot
+
+
+.. _benchmark-ingestion:
+
+Benchmark ingestion helper functions
+====================================
+
+ingest_benchmark_run_view
+-------------------------
+
+:code:`ingest_benchmark_run_view(from_jsonb jsonb)`
+
+The argument is a JSON object. NOTE: key names must be entirely
+lowercase, or the insert will fail. Extra key-value pairs are ignored.
+Example::
+
+ [
+ {
+ "benchmark_name": "Benchmark 2",
+ "benchmark_version": "version 0",
+ "parameter_values": {"arg0": 100, "arg1": 5},
+ "value": 2.5,
+ "git_commit_timestamp": "2019-02-08 22:35:53 +0100",
+ "git_hash": "324d3cf198444a",
+ "val_min": 1,
+ "val_q1": 2,
+ "val_q3": 3,
+ "val_max": 4,
+ "std_dev": 1.41,
+ "n_obs": 8,
+ "run_timestamp": "2019-02-14 03:00:05 -0600",
+ "mac_address": "08:00:2b:01:02:03",
+ "benchmark_language": "Python",
+ "language_implementation_version": "CPython 2.7",
+ "dependencies": {"six": "", "numpy": "1.14", "other_lib": "1.0"}
+ },
+ {
+ "benchmark_name": "Benchmark 2",
+ "benchmark_version": "version 0",
+ "parameter_values": {"arg0": 1000, "arg1": 5},
+ "value": 5,
+ "git_commit_timestamp": "2019-02-08 22:35:53 +0100",
+ "git_hash": "324d3cf198444a",
+ "std_dev": 3.14,
+ "n_obs": 8,
+ "run_timestamp": "2019-02-14 03:00:10 -0600",
+ "mac_address": "08:00:2b:01:02:03",
+ "benchmark_language": "Python",
+ "language_implementation_version": "CPython 2.7",
+ "dependencies": {"six": "", "numpy": "1.14", "other_lib": "1.0"}
+ }
+ ]
+To identify which columns in "benchmark_run_view" are required,
+please see the view documentation in :ref:`benchmark-data-model`.
+
+
+
+back to `Benchmark data model <benchmark-data-model>`_
+
+
+ingest_benchmark_view
+---------------------
+
+:code:`ingest_benchmark_view(from_jsonb jsonb)`
+
+The argument is a JSON object. NOTE: key names must be entirely
+lowercase, or the insert will fail. Extra key-value pairs are ignored.
+Example::
+
+ [
+ {
+ "benchmark_name": "Benchmark 1",
+ "parameter_names": ["arg0", "arg1", "arg2"],
+ "benchmark_description": "First benchmark",
+ "benchmark_type": "Time",
+ "units": "miliseconds",
+ "lessisbetter": true,
+ "benchmark_version": "second version",
+ "benchmark_language": "Python"
+ },
+ {
+ "benchmark_name": "Benchmark 2",
+ "parameter_names": ["arg0", "arg1"],
+ "benchmark_description": "Description 2.",
+ "benchmark_type": "Time",
+ "units": "nanoseconds",
+ "lessisbetter": true,
+ "benchmark_version": "second version",
+ "benchmark_language": "Python"
+ }
+ ]
+
+To identify which columns in "benchmark_view" are required,
+please see the view documentation in :ref:`benchmark-data-model`.
+
+
+
+back to `Benchmark data model <benchmark-data-model>`_
+
+
+ingest_benchmark_runs_with_context
+----------------------------------
+
+:code:`ingest_benchmark_runs_with_context(from_jsonb jsonb)`
+
+The argument is a JSON object. NOTE: key names must be entirely
+lowercase, or the insert will fail. Extra key-value pairs are ignored.
+The object contains three key-value pairs::
+
+ {"context": {
+ "mac_address": "08:00:2b:01:02:03",
+ "benchmark_language": "Python",
+ "language_implementation_version": "CPython 3.6",
+ "dependencies": {"six": "", "numpy": "1.14", "other_lib": "1.0"},
+ "git_commit_timestamp": "2019-02-14 22:42:22 +0100",
+ "git_hash": "123456789abcde",
+ "run_timestamp": "2019-02-14 03:00:40 -0600",
+ "extra stuff": "does not hurt anything and will not be added."
+ },
+ "benchmark_version": {
+ "Benchmark Name 1": "Any string can be a version.",
+ "Benchmark Name 2": "A git hash can be a version.",
+ "An Unused Benchmark Name": "Will be ignored."
+ },
+ "benchmarks": [
+ {
+ "benchmark_name": "Benchmark Name 1",
+ "parameter_values": {"argument1": 1, "argument2": "value2"},
+ "value": 42,
+ "val_min": 41.2,
+ "val_q1": 41.5,
+ "val_q3": 42.5,
+ "val_max": 42.8,
+ "std_dev": 0.5,
+ "n_obs": 100,
+ "run_metadata": {"any": "key-value pairs"},
+ "run_notes": "Any relevant notes."
+ },
+ {
+ "benchmark_name": "Benchmark Name 2",
+ "parameter_values": {"not nullable": "Use {} if no params."},
+ "value": 8,
+ "std_dev": 1,
+ "n_obs": 2,
+ }
+ ]
+ }
+
+- The entry for "context" contains the machine, environment, and timestamp
+ information common to all of the runs
+- The entry for "benchmark_version" maps benchmark
+ names to their version strings. (Which can be a git hash,
+ the entire code string, a number, or any other string of your choice.)
+- The entry for "benchmarks" is a list of benchmark run data
+ for the given context and benchmark versions. The first example
+ benchmark run entry contains all possible values, even
+ nullable ones, and the second entry omits all nullable values.
+
+
+
+
+back to `Benchmark data model <benchmark-data-model>`_
+
+
+ingest_machine_view
+-------------------
+
+:code:`ingest_machine_view(from_jsonb jsonb)`
+
+The argument is a JSON object. NOTE: key names must be entirely
+lowercase, or the insert will fail. Extra key-value pairs are ignored.
+Example::
+
+ {
+ "mac_address": "0a:00:2d:01:02:03",
+ "machine_name": "Yet-Another-Machine-Name",
+ "memory_bytes": 8589934592,
+ "cpu_actual_frequency_hz": 2300000000,
+ "os_name": "OSX",
+ "architecture_name": "x86_64",
+ "kernel_name": "18.2.0",
+ "cpu_model_name": "Intel(R) Core(TM) i5-7360U CPU @ 2.30GHz",
+ "cpu_core_count": 2,
+ "cpu_thread_count": 4,
+ "cpu_frequency_max_hz": 2300000000,
+ "cpu_frequency_min_hz": 2300000000,
+ "cpu_l1d_cache_bytes": 32768,
+ "cpu_l1i_cache_bytes": 32768,
+ "cpu_l2_cache_bytes": 262144,
+ "cpu_l3_cache_bytes": 4194304,
+ "machine_other_attributes": {"just": "an example"},
+ "gpu_information": "",
+ "gpu_part_number": "",
+ "gpu_product_name": ""
+ }
+
+To identify which columns in "machine_view" are required,
+please see the view documentation in :ref:`benchmark-data-model`.
+
+
+
+back to `Benchmark data model <benchmark-data-model>`_
+
+
+
+.. _benchmark-views:
+
+Benchmark views
+===============
+
+
+benchmark_run_view
+------------------
+
+Each benchmark run.
+
+- Each entry is unique on the machine, environment, benchmark,
+ and git commit timestamp.
+
+=============================== =========== ======== =========== ===========
+Column Type Nullable Default Description
+=============================== =========== ======== =========== ===========
+benchmark_run_id int8 not null serial primary key
+benchmark_name citext not null unique
+benchmark_version citext not null unique
+parameter_values jsonb not null '{}'::jsonb unique
+value numeric not null
+git_commit_timestamp timestamptz not null unique
+git_hash text not null
+val_min numeric
+val_q1 numeric
+val_q3 numeric
+val_max numeric
+std_dev numeric not null
+n_obs int4 not null
+run_timestamp timestamptz not null unique
+run_metadata jsonb
+run_notes text
+mac_address macaddr not null unique
+benchmark_language citext not null unique
+language_implementation_version citext not null ''::citext unique
+dependencies jsonb not null '{}'::jsonb unique
+=============================== =========== ======== =========== ===========
+
+back to `Benchmark data model <benchmark-data-model>`_
+
+benchmark_view
+--------------
+
+The details about a particular benchmark.
+
+- "benchmark_name" is unique for a given "benchmark_language"
+- Each entry is unique on
+ ("benchmark_language", "benchmark_name", "benchmark_version")
+
+===================== ====== ======== ======= ===========
+Column Type Nullable Default Description
+===================== ====== ======== ======= ===========
+benchmark_id int4 not null serial primary key
+benchmark_name citext not null unique
+parameter_names _text
+benchmark_description text not null
+benchmark_type citext not null unique
+units citext not null unique
+lessisbetter bool not null
+benchmark_version citext not null unique
+benchmark_language citext not null unique
+===================== ====== ======== ======= ===========
+
+back to `Benchmark data model <benchmark-data-model>`_
+
+environment_view
+----------------
+
+The build environment used for a reported benchmark run.
+(Will be inferred from each "benchmark_run" if not explicitly added).
+
+- Each entry is unique on
+ ("benchmark_language", "language_implementation_version", "dependencies")
+- "benchmark_language" is unique in the "benchmark_language" table
+- "benchmark_language" plus "language_implementation_version" is unique in
+ the "language_implementation_version" table
+- "dependencies" is unique in the "dependencies" table
+
+=============================== ====== ======== =========== ===========
+Column Type Nullable Default Description
+=============================== ====== ======== =========== ===========
+environment_id int4 not null serial primary key
+benchmark_language citext not null unique
+language_implementation_version citext not null ''::citext unique
+dependencies jsonb not null '{}'::jsonb unique
+=============================== ====== ======== =========== ===========
+
+back to `Benchmark data model <benchmark-data-model>`_
+
+machine_view
+------------
+
+The machine environment (CPU, GPU, OS) used for each benchmark run.
+
+- "mac_address" is unique in the "machine" table
+- "gpu_part_number" is unique in the "gpu" (graphics processing unit) table
+ Empty string (''), not null, is used for machines that won't use the GPU
+- "cpu_model_name" is unique in the "cpu" (central processing unit) table
+- "os_name", "os_architecture_name", and "os_kernel_name"
+ are unique in the "os" (operating system) table
+- "machine_other_attributes" is a key-value store for any other relevant
+ data, e.g. '{"hard_disk_type": "solid state"}'
+
+======================== ======= ======== ========== ===========
+Column Type Nullable Default Description
+======================== ======= ======== ========== ===========
+machine_id int4 not null serial primary key
+mac_address macaddr not null unique
+machine_name citext not null
+memory_bytes int8 not null
+cpu_actual_frequency_hz int8 not null
+os_name citext not null unique
+architecture_name citext not null unique
+kernel_name citext not null ''::citext unique
+cpu_model_name citext not null unique
+cpu_core_count int4 not null
+cpu_thread_count int4 not null
+cpu_frequency_max_hz int8 not null
+cpu_frequency_min_hz int8 not null
+cpu_l1d_cache_bytes int4 not null
+cpu_l1i_cache_bytes int4 not null
+cpu_l2_cache_bytes int4 not null
+cpu_l3_cache_bytes int4 not null
+gpu_information citext not null ''::citext unique
+gpu_part_number citext not null ''::citext
+gpu_product_name citext not null ''::citext
+machine_other_attributes jsonb
+======================== ======= ======== ========== ===========
+
+back to `Benchmark data model <benchmark-data-model>`_
+
+
diff --git a/src/arrow/dev/benchmarking/ddl/0_setup.sql b/src/arrow/dev/benchmarking/ddl/0_setup.sql
new file mode 100644
index 000000000..ec1044641
--- /dev/null
+++ b/src/arrow/dev/benchmarking/ddl/0_setup.sql
@@ -0,0 +1,23 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+*/
+
+CREATE EXTENSION IF NOT EXISTS "citext"; -- type for case-insensitive text
+
+-- For future fine-grained control over function execution by user group.
+ALTER DEFAULT PRIVILEGES REVOKE EXECUTE ON functions FROM public;
diff --git a/src/arrow/dev/benchmarking/ddl/1_00_table_public_project.sql b/src/arrow/dev/benchmarking/ddl/1_00_table_public_project.sql
new file mode 100644
index 000000000..c52d66cfd
--- /dev/null
+++ b/src/arrow/dev/benchmarking/ddl/1_00_table_public_project.sql
@@ -0,0 +1,45 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+*/
+
+
+-- PROJECT
+CREATE TABLE IF NOT EXISTS public.project
+(
+ project_id SERIAL PRIMARY KEY
+ , project_name citext NOT NULL
+ , project_url text NOT NULL
+ , repo_url text NOT NULL
+ , last_changed timestamp (0) without time zone NOT NULL DEFAULT now()
+);
+COMMENT ON TABLE public.project
+ IS 'Project name and relevant URLs.';
+COMMENT ON COLUMN public.project.project_url
+ IS 'Homepage URL.';
+COMMENT ON COLUMN public.project.repo_url
+ IS 'Git repo URL to link stored commit hashes to code in a webpage.';
+COMMENT ON COLUMN public.project.last_changed
+ IS 'New project details are added with a new timestamp. '
+ 'The project details with the newest timestamp will be used.';
+
+-- CONSTRAINTS
+CREATE UNIQUE INDEX project_unique_index_on_project_name_urls
+ ON public.project(project_name, project_url, repo_url);
+COMMENT ON INDEX
+ public.project_unique_index_on_project_name_urls
+ IS 'Enforce uniqueness of project name and urls.';
diff --git a/src/arrow/dev/benchmarking/ddl/1_01_table_public_cpu.sql b/src/arrow/dev/benchmarking/ddl/1_01_table_public_cpu.sql
new file mode 100644
index 000000000..df1a9e757
--- /dev/null
+++ b/src/arrow/dev/benchmarking/ddl/1_01_table_public_cpu.sql
@@ -0,0 +1,63 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+*/
+
+
+-- CPU
+CREATE TABLE IF NOT EXISTS public.cpu
+(
+ cpu_id SERIAL PRIMARY KEY
+ , cpu_model_name citext NOT NULL UNIQUE
+ , cpu_core_count integer NOT NULL
+ , cpu_thread_count integer NOT NULL
+ , cpu_frequency_max_Hz bigint NOT NULL
+ , cpu_frequency_min_Hz bigint NOT NULL
+ , cpu_L1d_cache_bytes integer NOT NULL
+ , cpu_L1i_cache_bytes integer NOT NULL
+ , cpu_L2_cache_bytes integer NOT NULL
+ , cpu_L3_cache_bytes integer NOT NULL
+);
+COMMENT ON TABLE public.cpu
+ IS 'CPU model and its specifications.';
+COMMENT ON COLUMN public.cpu.cpu_id
+ IS 'The primary key for the CPU table. '
+ 'NOTE: This is a synthetic primary key and not meant to represent a '
+ 'processor instruction to read capabilities.';
+COMMENT ON COLUMN public.cpu.cpu_model_name
+ IS 'The output of `sysctl -n machdep.cpu.brand_stringp`.';
+COMMENT ON COLUMN public.cpu.cpu_core_count
+ IS 'The output of `sysctl -n hw.physicalcpu`.';
+COMMENT ON COLUMN public.cpu.cpu_thread_count
+ IS 'The output of `sysctl -n hw.logicalcpu`.';
+COMMENT ON COLUMN public.cpu.cpu_frequency_max_Hz
+ IS 'The output of `sysctl -n hw.cpufrequency_max`.';
+COMMENT ON COLUMN public.cpu.cpu_frequency_min_Hz
+ IS 'The output of `sysctl -n hw.cpufrequency_min`.';
+COMMENT ON COLUMN public.cpu.cpu_L1d_cache_bytes
+ IS 'The output of `sysctl -n hw.l1dcachesize`.';
+COMMENT ON COLUMN public.cpu.cpu_L1i_cache_bytes
+ IS 'The output of `sysctl -n hw.l1icachesize`.';
+COMMENT ON COLUMN public.cpu.cpu_L2_cache_bytes
+ IS 'The output of `sysctl -n hw.l2cachesize`.';
+COMMENT ON COLUMN public.cpu.cpu_L3_cache_bytes
+ IS 'The output of `sysctl -n hw.l3cachesize`.';
+
+-- CONSTRAINTS
+ALTER TABLE public.cpu
+ ADD CONSTRAINT cpu_check_cpu_model_name_length
+ CHECK (char_length(cpu_model_name) < 255);
diff --git a/src/arrow/dev/benchmarking/ddl/1_02_table_public_gpu.sql b/src/arrow/dev/benchmarking/ddl/1_02_table_public_gpu.sql
new file mode 100644
index 000000000..564af19de
--- /dev/null
+++ b/src/arrow/dev/benchmarking/ddl/1_02_table_public_gpu.sql
@@ -0,0 +1,43 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+*/
+
+
+-- GPU
+CREATE TABLE IF NOT EXISTS public.gpu
+(
+ gpu_id SERIAL PRIMARY KEY
+ , gpu_information citext UNIQUE NOT NULL DEFAULT ''
+ , gpu_part_number citext NOT NULL DEFAULT ''
+ , gpu_product_name citext NOT NULL DEFAULT ''
+);
+COMMENT ON TABLE public.gpu IS 'GPU specifications.';
+COMMENT ON COLUMN public.gpu.gpu_information
+ IS 'The output of `nvidia-smi -q` (on Linux or Windows), or `cuda-smi` '
+ 'or `kextstat | grep -i cuda` on OSX, or another command; anything '
+ 'that gets a string to uniquely identify the GPU.';
+
+-- CONSTRAINTS
+CREATE INDEX gpu_index_on_part_number
+ ON public.gpu (gpu_part_number);
+
+CREATE INDEX gpu_index_on_product_name
+ ON public.gpu (gpu_product_name);
+
+CREATE INDEX gpu_index_on_product_name_and_part_number
+ ON public.gpu (gpu_product_name, gpu_part_number);
diff --git a/src/arrow/dev/benchmarking/ddl/1_03_table_public_os.sql b/src/arrow/dev/benchmarking/ddl/1_03_table_public_os.sql
new file mode 100644
index 000000000..7b03d82f4
--- /dev/null
+++ b/src/arrow/dev/benchmarking/ddl/1_03_table_public_os.sql
@@ -0,0 +1,57 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+*/
+
+
+-- OS
+CREATE TABLE IF NOT EXISTS public.os
+(
+ os_id SERIAL PRIMARY KEY
+ , os_name citext NOT NULL
+ , architecture_name citext NOT NULL
+ , kernel_name citext NOT NULL DEFAULT ''
+);
+-- @name os. forces retention of an 's' in the Graphile GraphQL api.
+COMMENT ON TABLE public.os
+ IS E'@name os.\nOperating system name and kernel (version).';
+COMMENT ON COLUMN public.os.os_name
+ IS 'Operating system name. For example, OSX, Ubuntu, Windows`.';
+COMMENT ON COLUMN public.os.architecture_name
+ IS 'Operating system architecture; the output of `uname -m`.';
+COMMENT ON COLUMN public.os.kernel_name
+ IS 'Operating system kernel, or NULL. '
+ 'On Linux/OSX, the output of `uname -r`. '
+ 'On Windows, the output of `ver`.';
+
+-- CONSTRAINTS
+ALTER TABLE public.os
+ ADD CONSTRAINT os_check_os_name_length
+ CHECK (char_length(os_name) < 63);
+
+ALTER TABLE public.os
+ ADD CONSTRAINT os_check_architecture_name_length
+ CHECK (char_length(architecture_name) < 63);
+
+ALTER TABLE public.os
+ ADD CONSTRAINT os_check_kernel_name_length
+ CHECK (char_length(kernel_name) < 63);
+
+CREATE UNIQUE INDEX os_unique_index
+ ON public.os(os_name, architecture_name, kernel_name);
+COMMENT ON INDEX public.os_unique_index
+ IS 'Enforce uniqueness of os, architecture, and kernel names.';
diff --git a/src/arrow/dev/benchmarking/ddl/1_04_table_public_benchmark_language.sql b/src/arrow/dev/benchmarking/ddl/1_04_table_public_benchmark_language.sql
new file mode 100644
index 000000000..2e3553677
--- /dev/null
+++ b/src/arrow/dev/benchmarking/ddl/1_04_table_public_benchmark_language.sql
@@ -0,0 +1,35 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+*/
+
+
+-- BENCHMARK_LANGUAGE
+CREATE TABLE IF NOT EXISTS public.benchmark_language
+(
+ benchmark_language_id SERIAL PRIMARY KEY
+ , benchmark_language citext NOT NULL UNIQUE
+);
+COMMENT ON TABLE public.benchmark_language
+ IS 'The language the benchmark was written in (and presumably for).';
+COMMENT ON COLUMN public.benchmark_language.benchmark_language
+ IS 'The benchmark language. For example: Python';
+
+-- CONSTRAINTS
+ALTER TABLE public.benchmark_language
+ ADD CONSTRAINT benchmark_language_check_language_length
+ CHECK (char_length(benchmark_language) < 63);
diff --git a/src/arrow/dev/benchmarking/ddl/1_05_table_public_dependencies.sql b/src/arrow/dev/benchmarking/ddl/1_05_table_public_dependencies.sql
new file mode 100644
index 000000000..3744a0c35
--- /dev/null
+++ b/src/arrow/dev/benchmarking/ddl/1_05_table_public_dependencies.sql
@@ -0,0 +1,31 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+*/
+
+
+-- DEPENDENCIES
+CREATE TABLE IF NOT EXISTS public.dependencies
+(
+ dependencies_id SERIAL PRIMARY KEY
+ , dependencies jsonb UNIQUE NOT NULL DEFAULT '{}'::jsonb
+);
+COMMENT ON TABLE public.dependencies
+ IS E'@name dependencies.\n'
+ 'A JSON object mapping dependencies to their versions.';
+COMMENT ON COLUMN public.dependencies.dependencies
+ IS 'For example: ''{"boost": "1.69", "conda": "", "numpy": "1.15"}''.';
diff --git a/src/arrow/dev/benchmarking/ddl/1_06_table_public_language_implementation_version.sql b/src/arrow/dev/benchmarking/ddl/1_06_table_public_language_implementation_version.sql
new file mode 100644
index 000000000..f7d26e4e2
--- /dev/null
+++ b/src/arrow/dev/benchmarking/ddl/1_06_table_public_language_implementation_version.sql
@@ -0,0 +1,46 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+*/
+
+
+-- LANGUAGE_IMPLEMENTATION_VERSION
+CREATE TABLE IF NOT EXISTS public.language_implementation_version
+(
+ language_implementation_version_id SERIAL
+ , language_implementation_version citext NOT NULL DEFAULT ''
+ , benchmark_language_id integer NOT NULL
+ , PRIMARY KEY (language_implementation_version_id, benchmark_language_id)
+ , FOREIGN KEY (benchmark_language_id) REFERENCES public.benchmark_language
+);
+COMMENT ON TABLE public.language_implementation_version
+ IS 'The benchmark language implementation or compiler version, e.g. '
+ '''CPython 2.7'' or ''PyPy x.y'' or ''gcc 7.3.0'' or '
+ '''gcc (Ubuntu 7.3.0-27ubuntu1~18.04) 7.3.0''.';
+COMMENT ON COLUMN public.language_implementation_version.language_implementation_version
+ IS 'The version number used in the benchmark environment (e.g. ''2.7'').';
+
+-- CONSTRAINTS
+ALTER TABLE public.language_implementation_version
+ ADD CONSTRAINT language_implementation_version_check_version_length
+ CHECK (char_length(language_implementation_version) < 255);
+
+CREATE UNIQUE INDEX language_implementation_version_unique_index
+ ON public.language_implementation_version
+ (benchmark_language_id, language_implementation_version);
+COMMENT ON INDEX language_implementation_version_unique_index
+ IS 'Enforce unique implementation versions for the languages.';
diff --git a/src/arrow/dev/benchmarking/ddl/1_07_table_public_benchmark_type.sql b/src/arrow/dev/benchmarking/ddl/1_07_table_public_benchmark_type.sql
new file mode 100644
index 000000000..1143cdb00
--- /dev/null
+++ b/src/arrow/dev/benchmarking/ddl/1_07_table_public_benchmark_type.sql
@@ -0,0 +1,39 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+*/
+
+
+-- BENCHMARK_TYPE
+CREATE TABLE IF NOT EXISTS public.benchmark_type
+(
+ benchmark_type_id SERIAL PRIMARY KEY
+ , benchmark_type citext NOT NULL UNIQUE
+ , lessisbetter boolean NOT NULL
+);
+COMMENT ON TABLE public.benchmark_type
+ IS 'The type of benchmark. For example "time", "mem", "peakmem", "track"';
+COMMENT ON COLUMN public.benchmark_type.benchmark_type
+ IS 'The type of units, so ''time'' for seconds, miliseconds, or '
+ '''mem'' for kilobytes, megabytes.';
+COMMENT ON COLUMN public.benchmark_type.lessisbetter
+ IS 'True if a smaller benchmark value is better.';
+
+-- CONSTRAINTS
+ALTER TABLE public.benchmark_type
+ ADD CONSTRAINT benchmark_type_check_benchmark_type_char_length
+ CHECK (char_length(benchmark_type) < 63);
diff --git a/src/arrow/dev/benchmarking/ddl/1_08_table_public_machine.sql b/src/arrow/dev/benchmarking/ddl/1_08_table_public_machine.sql
new file mode 100644
index 000000000..8f219d3e0
--- /dev/null
+++ b/src/arrow/dev/benchmarking/ddl/1_08_table_public_machine.sql
@@ -0,0 +1,69 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+*/
+
+
+-- MACHINE
+CREATE TABLE IF NOT EXISTS public.machine
+(
+ machine_id SERIAL PRIMARY KEY
+ , machine_name citext NOT NULL
+ , mac_address macaddr NOT NULL
+ , memory_bytes bigint NOT NULL
+ , cpu_actual_frequency_Hz bigint NOT NULL
+ , machine_other_attributes jsonb
+ , cpu_id integer NOT NULL
+ , gpu_id integer NOT NULL
+ , os_id integer NOT NULL
+ , FOREIGN KEY (cpu_id) REFERENCES public.cpu
+ , FOREIGN KEY (gpu_id) REFERENCES public.gpu
+ , FOREIGN KEY (os_id) REFERENCES public.os
+);
+COMMENT ON TABLE public.machine
+ IS 'Unique identifiers for a machine.';
+COMMENT ON COLUMN public.machine.machine_name
+ IS 'A machine name of your choice.';
+COMMENT ON COLUMN public.machine.mac_address
+ IS 'The mac_address of a physical network interface to uniquely '
+ 'identify a computer. Postgres accepts standard formats, including '
+ '''08:00:2b:01:02:03'', ''08-00-2b-01-02-03'', ''08002b:010203''';
+COMMENT ON COLUMN public.machine.memory_bytes
+ IS 'The output of `sysctl -n hw.memsize`.';
+COMMENT ON COLUMN public.machine.cpu_actual_frequency_Hz
+ IS 'The output of `sysctl -n hw.cpufrequency`.';
+COMMENT ON COLUMN public.machine.machine_other_attributes
+ IS 'Additional attributes of interest, as a JSON object. '
+ 'For example: ''{"hard_disk_type": "solid state"}''::jsonb.';
+
+-- CONSTRAINTS
+CREATE UNIQUE INDEX machine_index_on_mac_address
+ ON public.machine(mac_address);
+COMMENT ON INDEX machine_index_on_mac_address
+ IS 'Enforce unique mac address';
+
+CREATE INDEX machine_index_on_cpu_id
+ ON public.machine(cpu_id);
+
+CREATE INDEX machine_index_on_gpu_id
+ ON public.machine(gpu_id);
+
+CREATE INDEX machine_index_on_os_id
+ ON public.machine(os_id);
+
+CREATE INDEX machine_index_on_cpu_gpu_os_id
+ ON public.machine(cpu_id, gpu_id, os_id);
diff --git a/src/arrow/dev/benchmarking/ddl/1_09_table_public_unit.sql b/src/arrow/dev/benchmarking/ddl/1_09_table_public_unit.sql
new file mode 100644
index 000000000..a8cf57669
--- /dev/null
+++ b/src/arrow/dev/benchmarking/ddl/1_09_table_public_unit.sql
@@ -0,0 +1,37 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+*/
+
+
+-- UNIT
+CREATE TABLE IF NOT EXISTS public.unit
+(
+ unit_id SERIAL PRIMARY KEY
+ , units citext NOT NULL UNIQUE
+ , benchmark_type_id integer NOT NULL
+ , FOREIGN KEY (benchmark_type_id)
+ REFERENCES public.benchmark_type(benchmark_type_id)
+);
+COMMENT ON TABLE public.unit IS 'The actual units for a reported benchmark.';
+COMMENT ON COLUMN public.unit.units
+ IS 'For example: nanoseconds, microseconds, bytes, megabytes.';
+
+-- CONSTRAINTS
+ALTER TABLE public.unit
+ ADD CONSTRAINT unit_check_units_string_length
+ CHECK (char_length(units) < 63);
diff --git a/src/arrow/dev/benchmarking/ddl/1_10_table_public_environment.sql b/src/arrow/dev/benchmarking/ddl/1_10_table_public_environment.sql
new file mode 100644
index 000000000..e3a6d2395
--- /dev/null
+++ b/src/arrow/dev/benchmarking/ddl/1_10_table_public_environment.sql
@@ -0,0 +1,51 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+*/
+
+
+-- ENVIRONMENT
+CREATE TABLE IF NOT EXISTS public.environment
+(
+ environment_id SERIAL
+ , language_implementation_version_id integer NOT NULL
+ , benchmark_language_id integer NOT NULL
+ , dependencies_id integer NOT NULL
+ , PRIMARY KEY
+ (environment_id, benchmark_language_id, language_implementation_version_id)
+ , FOREIGN KEY
+ (benchmark_language_id)
+ REFERENCES public.benchmark_language
+ , FOREIGN KEY
+ (language_implementation_version_id, benchmark_language_id)
+ REFERENCES public.language_implementation_version(
+ language_implementation_version_id
+ , benchmark_language_id
+ )
+ , FOREIGN KEY
+ (dependencies_id)
+ REFERENCES public.dependencies
+);
+COMMENT ON TABLE public.environment
+ IS 'Identifies a build environment for a specific suite of benchmarks.';
+
+-- CONSTRAINTS
+CREATE UNIQUE INDEX environment_unique_index
+ ON public.environment
+ (benchmark_language_id, language_implementation_version_id, dependencies_id);
+COMMENT ON INDEX environment_unique_index
+ IS 'Enforce unique combinations of language version and dependencies.';
diff --git a/src/arrow/dev/benchmarking/ddl/1_11_table_public_benchmark.sql b/src/arrow/dev/benchmarking/ddl/1_11_table_public_benchmark.sql
new file mode 100644
index 000000000..18895823d
--- /dev/null
+++ b/src/arrow/dev/benchmarking/ddl/1_11_table_public_benchmark.sql
@@ -0,0 +1,54 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+*/
+
+
+-- BENCHMARK
+CREATE TABLE IF NOT EXISTS public.benchmark
+(
+ benchmark_id SERIAL
+ , benchmark_name citext NOT NULL
+ , parameter_names text[]
+ , benchmark_description text NOT NULL
+ , benchmark_version citext NOT NULL
+ , unit_id integer NOT NULL
+ , benchmark_language_id integer NOT NULL
+ , PRIMARY KEY (benchmark_id, benchmark_language_id)
+ , FOREIGN KEY (benchmark_language_id) REFERENCES public.benchmark_language
+ , FOREIGN KEY (unit_id) REFERENCES public.unit
+);
+COMMENT ON TABLE public.benchmark
+ IS 'Identifies an individual benchmark.';
+COMMENT ON COLUMN public.benchmark.parameter_names
+ IS 'A list of strings identifying the parameter names in the benchmark.';
+COMMENT ON COLUMN public.benchmark.benchmark_version
+ IS 'Can be any string. In Airspeed Velocity, the version is '
+ 'by default the hash of the entire code string for the benchmark.';
+
+-- CONSTRAINTS
+CREATE INDEX benchmark_index_on_benchmark_language_id
+ ON public.benchmark(benchmark_language_id);
+
+CREATE INDEX benchmark_index_on_unit_id
+ ON public.benchmark(unit_id);
+
+CREATE UNIQUE INDEX benchmark_unique_index_on_language_benchmark_version
+ ON public.benchmark
+ (benchmark_language_id, benchmark_name, benchmark_version);
+COMMENT ON INDEX public.benchmark_unique_index_on_language_benchmark_version
+ IS 'Enforce uniqueness of benchmark name and version for a given language.';
diff --git a/src/arrow/dev/benchmarking/ddl/1_12_table_public_benchmark_run.sql b/src/arrow/dev/benchmarking/ddl/1_12_table_public_benchmark_run.sql
new file mode 100644
index 000000000..20b9ef0bb
--- /dev/null
+++ b/src/arrow/dev/benchmarking/ddl/1_12_table_public_benchmark_run.sql
@@ -0,0 +1,112 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+*/
+
+
+-- BENCHMARK_RUN
+CREATE TABLE IF NOT EXISTS public.benchmark_run
+(
+ benchmark_run_id BIGSERIAL PRIMARY KEY
+ , parameter_values jsonb NOT NULL DEFAULT '{}'::jsonb
+ , value numeric NOT NULL
+ , git_commit_timestamp timestamp (0) with time zone NOT NULL
+ , git_hash text NOT NULL
+ , val_min numeric
+ , val_q1 numeric
+ , val_q3 numeric
+ , val_max numeric
+ , std_dev numeric NOT NULL
+ , n_obs integer NOT NULL
+ , run_timestamp timestamp (0) with time zone NOT NULL
+ , run_metadata jsonb
+ , run_notes text
+ , machine_id integer NOT NULL
+ , environment_id integer NOT NULL
+ , language_implementation_version_id integer NOT NULL
+ , benchmark_language_id integer NOT NULL
+ , benchmark_id integer NOT NULL
+ , FOREIGN KEY (machine_id) REFERENCES public.machine
+ , FOREIGN KEY
+ (environment_id, benchmark_language_id, language_implementation_version_id)
+ REFERENCES public.environment
+ , FOREIGN KEY (benchmark_id, benchmark_language_id)
+ REFERENCES public.benchmark(benchmark_id, benchmark_language_id)
+);
+COMMENT ON TABLE public.benchmark_run
+ IS 'One run per benchmark run.';
+COMMENT ON COLUMN public.benchmark_run.parameter_values
+ IS 'A JSON object mapping the parameter names from '
+ '"benchmark.parameter_names" to values.';
+COMMENT ON COLUMN public.benchmark_run.value
+ IS 'The average value from the benchmark run.';
+COMMENT ON COLUMN public.benchmark_run.git_commit_timestamp
+ IS 'Get this using `git show -s --date=local --format="%ci" <hash>`. '
+ 'ISO 8601 is recommended, e.g. ''2019-01-30 03:12 -0600''.';
+COMMENT ON COLUMN public.benchmark_run.git_hash
+ IS 'The commit has of the codebase currently being benchmarked.';
+COMMENT ON COLUMN public.benchmark_run.val_min
+ IS 'The smallest benchmark run value for this run.';
+COMMENT ON COLUMN public.benchmark_run.val_q1
+ IS 'The first quartile of the benchmark run values for this run.';
+COMMENT ON COLUMN public.benchmark_run.val_q3
+ IS 'The third quartile of the benchmark run values for this run.';
+COMMENT ON COLUMN public.benchmark_run.val_max
+ IS 'The largest benchmark run value for this run.';
+COMMENT ON COLUMN public.benchmark_run.std_dev
+ IS 'The standard deviation of the run values for this benchmark run.';
+COMMENT ON COLUMN public.benchmark_run.n_obs
+ IS 'The number of observations for this benchmark run.';
+COMMENT ON COLUMN public.benchmark_run.run_metadata
+ IS 'Additional metadata of interest, as a JSON object. '
+ 'For example: ''{"ci_99": [2.7e-06, 3.1e-06]}''::jsonb.';
+COMMENT ON COLUMN public.benchmark_run.run_notes
+ IS 'Additional notes of interest, as a text string. ';
+
+-- CONSTRAINTS
+ALTER TABLE public.benchmark_run
+ ADD CONSTRAINT benchmark_run_check_std_dev_nonnegative
+ CHECK (std_dev >= 0);
+
+ALTER TABLE public.benchmark_run
+ ADD CONSTRAINT benchmark_run_check_n_obs_positive
+ CHECK (n_obs > 0);
+
+CREATE INDEX benchmark_run_index_on_environment_id
+ ON public.benchmark_run(environment_id);
+
+CREATE INDEX benchmark_run_index_on_machine_id
+ ON public.benchmark_run(machine_id);
+
+CREATE INDEX benchmark_run_index_on_benchmark_id
+ ON public.benchmark_run(benchmark_id, benchmark_language_id);
+
+CREATE INDEX benchmark_run_index_on_benchmark_environment_time
+ ON public.benchmark_run
+ (benchmark_id, environment_id, git_commit_timestamp);
+COMMENT ON INDEX
+ public.benchmark_run_index_on_benchmark_environment_time
+ IS 'Index to improve sorting by benchmark, environment, and timestamp.';
+
+CREATE UNIQUE INDEX
+ benchmark_run_unique_index_on_env_benchmark_timestamp_params
+ ON public.benchmark_run
+ (machine_id, environment_id, benchmark_id, git_commit_timestamp, parameter_values, run_timestamp);
+COMMENT ON INDEX
+ public.benchmark_run_unique_index_on_env_benchmark_timestamp_params
+ IS 'Enforce uniqueness of benchmark run for a given machine, '
+ 'environment, benchmark, git commit timestamp, and parameter values.';
diff --git a/src/arrow/dev/benchmarking/ddl/2_00_views.sql b/src/arrow/dev/benchmarking/ddl/2_00_views.sql
new file mode 100644
index 000000000..cbd295e50
--- /dev/null
+++ b/src/arrow/dev/benchmarking/ddl/2_00_views.sql
@@ -0,0 +1,324 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+*/
+
+-- NOTE:
+-- The function for documentation depends on view columns
+-- being named exactly the same as in the table view.
+
+-- MACHINE_VIEW
+CREATE OR REPLACE VIEW public.machine_view AS
+ SELECT
+ machine.machine_id
+ , mac_address
+ , machine_name
+ , memory_bytes
+ , cpu_actual_frequency_Hz
+ , os_name
+ , architecture_name
+ , kernel_name
+ , cpu_model_name
+ , cpu_core_count
+ , cpu_thread_count
+ , cpu_frequency_max_Hz
+ , cpu_frequency_min_Hz
+ , cpu_L1d_cache_bytes
+ , cpu_L1i_cache_bytes
+ , cpu_L2_cache_bytes
+ , cpu_L3_cache_bytes
+ , gpu_information
+ , gpu_part_number
+ , gpu_product_name
+ , machine_other_attributes
+ FROM public.machine AS machine
+ JOIN public.cpu AS cpu ON machine.cpu_id = cpu.cpu_id
+ JOIN public.gpu AS gpu ON machine.gpu_id = gpu.gpu_id
+ JOIN public.os AS os ON machine.os_id = os.os_id;
+COMMENT ON VIEW public.machine_view IS
+E'The machine environment (CPU, GPU, OS) used for each benchmark run.\n\n'
+ '- "mac_address" is unique in the "machine" table\n'
+ '- "gpu_part_number" is unique in the "gpu" (graphics processing unit) table\n'
+ ' Empty string (''''), not null, is used for machines that won''t use the GPU\n'
+ '- "cpu_model_name" is unique in the "cpu" (central processing unit) table\n'
+ '- "os_name", "os_architecture_name", and "os_kernel_name"\n'
+ ' are unique in the "os" (operating system) table\n'
+ '- "machine_other_attributes" is a key-value store for any other relevant\n'
+ ' data, e.g. ''{"hard_disk_type": "solid state"}''';
+
+
+-- LANGUAGE_IMPLEMENTATION_VERSION_VIEW
+CREATE OR REPLACE VIEW public.language_implementation_version_view AS
+ SELECT
+ lv.language_implementation_version_id
+ , bl.benchmark_language
+ , lv.language_implementation_version
+ FROM public.language_implementation_version AS lv
+ JOIN public.benchmark_language AS bl
+ ON lv.benchmark_language_id = bl.benchmark_language_id;
+
+-- ENVIRONMENT_VIEW
+CREATE OR REPLACE VIEW public.environment_view AS
+ SELECT
+ env.environment_id
+ , benchmark_language
+ , language_implementation_version
+ , dependencies
+ FROM public.environment AS env
+ JOIN public.benchmark_language AS language
+ ON env.benchmark_language_id = language.benchmark_language_id
+ JOIN public.language_implementation_version AS version
+ ON env.language_implementation_version_id = version.language_implementation_version_id
+ JOIN public.dependencies AS deps
+ ON env.dependencies_id = deps.dependencies_id;
+COMMENT ON VIEW public.environment_view IS
+E'The build environment used for a reported benchmark run.\n'
+ '(Will be inferred from each "benchmark_run" if not explicitly added).\n\n'
+ '- Each entry is unique on\n'
+ ' ("benchmark_language", "language_implementation_version", "dependencies")\n'
+ '- "benchmark_language" is unique in the "benchmark_language" table\n'
+ '- "benchmark_language" plus "language_implementation_version" is unique in\n'
+ ' the "language_implementation_version" table\n'
+ '- "dependencies" is unique in the "dependencies" table';
+
+-- UNIT_VIEW
+CREATE OR REPLACE VIEW public.unit_view AS
+ SELECT
+ unit.unit_id
+ , units
+ , benchmark_type
+ , lessisbetter
+ FROM public.unit AS unit
+ JOIN public.benchmark_type AS bt
+ ON unit.benchmark_type_id = bt.benchmark_type_id;
+
+-- BENCHMARK_VIEW
+CREATE OR REPLACE VIEW public.benchmark_view AS
+ SELECT
+ b.benchmark_id
+ , benchmark_name
+ , parameter_names
+ , benchmark_description
+ , benchmark_type
+ , units
+ , lessisbetter
+ , benchmark_version
+ , benchmark_language
+ FROM public.benchmark AS b
+ JOIN public.benchmark_language AS benchmark_language
+ ON b.benchmark_language_id = benchmark_language.benchmark_language_id
+ JOIN public.unit AS unit
+ ON b.unit_id = unit.unit_id
+ JOIN public.benchmark_type AS benchmark_type
+ ON unit.benchmark_type_id = benchmark_type.benchmark_type_id;
+COMMENT ON VIEW public.benchmark_view IS
+E'The details about a particular benchmark.\n\n'
+ '- "benchmark_name" is unique for a given "benchmark_language"\n'
+ '- Each entry is unique on\n'
+ ' ("benchmark_language", "benchmark_name", "benchmark_version")';
+
+-- BENCHMARK_RUN_VIEW
+CREATE OR REPLACE VIEW public.benchmark_run_view AS
+ SELECT
+ run.benchmark_run_id
+ -- benchmark_view (name, version, language only)
+ , benchmark_name
+ , benchmark_version
+ -- datum
+ , parameter_values
+ , value
+ , git_commit_timestamp
+ , git_hash
+ , val_min
+ , val_q1
+ , val_q3
+ , val_max
+ , std_dev
+ , n_obs
+ , run_timestamp
+ , run_metadata
+ , run_notes
+ -- machine_view (mac address only)
+ , mac_address
+ -- environment_view
+ , env.benchmark_language
+ , language_implementation_version
+ , dependencies
+ FROM public.benchmark_run AS run
+ JOIN public.benchmark_view AS benchmark
+ ON run.benchmark_id = benchmark.benchmark_id
+ JOIN public.machine_view AS machine
+ ON run.machine_id = machine.machine_id
+ JOIN public.environment_view AS env
+ ON run.environment_id = env.environment_id;
+COMMENT ON VIEW public.benchmark_run_view IS
+E'Each benchmark run.\n\n'
+ '- Each entry is unique on the machine, environment, benchmark,\n'
+ ' and git commit timestamp.';
+
+-- FULL_BENCHMARK_RUN_VIEW
+CREATE OR REPLACE VIEW public.full_benchmark_run_view AS
+ SELECT
+ run.benchmark_run_id
+ -- benchmark_view
+ , benchmark_name
+ , parameter_names
+ , benchmark_description
+ , benchmark_type
+ , units
+ , lessisbetter
+ , benchmark_version
+ -- datum
+ , parameter_values
+ , value
+ , git_commit_timestamp
+ , git_hash
+ , val_min
+ , val_q1
+ , val_q3
+ , val_max
+ , std_dev
+ , n_obs
+ , run_timestamp
+ , run_metadata
+ , run_notes
+ -- machine_view
+ , machine_name
+ , mac_address
+ , memory_bytes
+ , cpu_actual_frequency_Hz
+ , os_name
+ , architecture_name
+ , kernel_name
+ , cpu_model_name
+ , cpu_core_count
+ , cpu_thread_count
+ , cpu_frequency_max_Hz
+ , cpu_frequency_min_Hz
+ , cpu_L1d_cache_bytes
+ , cpu_L1i_cache_bytes
+ , cpu_L2_cache_bytes
+ , cpu_L3_cache_bytes
+ , gpu_information
+ , gpu_part_number
+ , gpu_product_name
+ , machine_other_attributes
+ -- environment_view
+ , env.benchmark_language
+ , env.language_implementation_version
+ , dependencies
+ FROM public.benchmark_run AS run
+ JOIN public.benchmark_view AS benchmark
+ ON run.benchmark_id = benchmark.benchmark_id
+ JOIN public.machine_view AS machine
+ ON run.machine_id = machine.machine_id
+ JOIN public.environment_view AS env
+ ON run.environment_id = env.environment_id;
+
+-- SUMMARIZED_TABLES_VIEW
+CREATE VIEW public.summarized_tables_view AS
+ WITH chosen AS (
+ SELECT
+ cls.oid AS id
+ , cls.relname as tbl_name
+ FROM pg_catalog.pg_class AS cls
+ JOIN pg_catalog.pg_namespace AS ns ON cls.relnamespace = ns.oid
+ WHERE
+ cls.relkind = 'r'
+ AND ns.nspname = 'public'
+ ), all_constraints AS (
+ SELECT
+ chosen.id AS tbl_id
+ , chosen.tbl_name
+ , unnest(conkey) AS col_id
+ , 'foreign key' AS col_constraint
+ FROM pg_catalog.pg_constraint
+ JOIN chosen ON chosen.id = conrelid
+ WHERE contype = 'f'
+
+ UNION
+
+ SELECT
+ chosen.id
+ , chosen.tbl_name
+ , unnest(indkey)
+ , 'unique'
+ FROM pg_catalog.pg_index i
+ JOIN chosen ON chosen.id = i.indrelid
+ WHERE i.indisunique AND NOT i.indisprimary
+
+ UNION
+
+ SELECT
+ chosen.id
+ , chosen.tbl_name
+ , unnest(indkey)
+ , 'primary key'
+ FROM pg_catalog.pg_index i
+ JOIN chosen on chosen.id = i.indrelid
+ WHERE i.indisprimary
+ ), gathered_constraints AS (
+ SELECT
+ tbl_id
+ , tbl_name
+ , col_id
+ , string_agg(col_constraint, ', ' ORDER BY col_constraint)
+ AS col_constraint
+ FROM all_constraints
+ GROUP BY tbl_id, tbl_name, col_id
+ )
+ SELECT
+ chosen.tbl_name AS table_name
+ , columns.attnum AS column_number
+ , columns.attname AS column_name
+ , typ.typname AS type_name
+ , CASE
+ WHEN columns.attnotnull
+ THEN 'not null'
+ ELSE ''
+ END AS nullable
+ , CASE
+ WHEN defaults.adsrc like 'nextval%'
+ THEN 'serial'
+ ELSE defaults.adsrc
+ END AS default_value
+ , CASE
+ WHEN gc.col_constraint = '' OR gc.col_constraint IS NULL
+ THEN cnstrnt.consrc
+ WHEN cnstrnt.consrc IS NULL
+ THEN gc.col_constraint
+ ELSE gc.col_constraint || ', ' || cnstrnt.consrc
+ END AS description
+ FROM pg_catalog.pg_attribute AS columns
+ JOIN chosen ON columns.attrelid = chosen.id
+ JOIN pg_catalog.pg_type AS typ
+ ON typ.oid = columns.atttypid
+ LEFT JOIN gathered_constraints AS gc
+ ON gc.col_id = columns.attnum
+ AND gc.tbl_id = columns.attrelid
+ LEFT JOIN pg_attrdef AS defaults
+ ON defaults.adrelid = chosen.id
+ AND defaults.adnum = columns.attnum
+ LEFT JOIN pg_catalog.pg_constraint AS cnstrnt
+ ON cnstrnt.conrelid = columns.attrelid
+ AND columns.attrelid = ANY(cnstrnt.conkey)
+ WHERE
+ columns.attnum > 0
+ ORDER BY table_name, column_number;
+COMMENT ON VIEW public.summarized_tables_view
+ IS 'A summary of all columns from all tables in the public schema, '
+ ' identifying nullability, primary/foreign keys, and data type.';
diff --git a/src/arrow/dev/benchmarking/ddl/3_00_functions_helpers.sql b/src/arrow/dev/benchmarking/ddl/3_00_functions_helpers.sql
new file mode 100644
index 000000000..b10b69a4e
--- /dev/null
+++ b/src/arrow/dev/benchmarking/ddl/3_00_functions_helpers.sql
@@ -0,0 +1,643 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+*/
+
+
+-- PROJECT_DETAILS
+CREATE TYPE public.type_project_details AS (
+ project_name text
+ , project_url text
+ , repo_url text
+);
+
+CREATE OR REPLACE FUNCTION public.project_details()
+RETURNS public.type_project_details AS
+$$
+ SELECT project_name, project_url, repo_url
+ FROM public.project
+ ORDER BY last_changed DESC
+ LIMIT 1
+$$
+LANGUAGE sql STABLE;
+COMMENT ON FUNCTION public.project_details()
+IS 'Get the current project name, url, and repo url.';
+
+
+-------------------------- GET-OR-SET FUNCTIONS --------------------------
+-- The following functions have the naming convention "get_<tablename>_id".
+-- All of them attempt to SELECT the desired row given the column
+-- values, and if it does not exist will INSERT it.
+--
+-- When functions are overloaded with fewer columns, it is to allow
+-- selection only, given columns that comprise a unique index.
+
+-- GET_CPU_ID
+CREATE OR REPLACE FUNCTION public.get_cpu_id(
+ cpu_model_name citext
+ , cpu_core_count integer
+ , cpu_thread_count integer
+ , cpu_frequency_max_Hz bigint
+ , cpu_frequency_min_Hz bigint
+ , cpu_L1d_cache_bytes integer
+ , cpu_L1i_cache_bytes integer
+ , cpu_L2_cache_bytes integer
+ , cpu_L3_cache_bytes integer
+)
+RETURNS integer AS
+$$
+ DECLARE
+ result integer;
+ BEGIN
+ SELECT cpu_id INTO result FROM public.cpu AS cpu
+ WHERE cpu.cpu_model_name = $1
+ AND cpu.cpu_core_count = $2
+ AND cpu.cpu_thread_count = $3
+ AND cpu.cpu_frequency_max_Hz = $4
+ AND cpu.cpu_frequency_min_Hz = $5
+ AND cpu.cpu_L1d_cache_bytes = $6
+ AND cpu.cpu_L1i_cache_bytes = $7
+ AND cpu.cpu_L2_cache_bytes = $8
+ AND cpu.cpu_L3_cache_bytes = $9;
+
+ IF result IS NULL THEN
+ INSERT INTO public.cpu(
+ cpu_model_name
+ , cpu_core_count
+ , cpu_thread_count
+ , cpu_frequency_max_Hz
+ , cpu_frequency_min_Hz
+ , cpu_L1d_cache_bytes
+ , cpu_L1i_cache_bytes
+ , cpu_L2_cache_bytes
+ , cpu_L3_cache_bytes
+ )
+ VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)
+ RETURNING cpu_id INTO result;
+ END IF;
+
+ RETURN result;
+ END
+$$
+LANGUAGE plpgsql;
+COMMENT ON FUNCTION public.get_cpu_id(
+ citext
+ , integer
+ , integer
+ , bigint -- cpu_frequency_max_Hz
+ , bigint -- cpu_frequency_min_Hz
+ , integer
+ , integer
+ , integer
+ , integer
+)
+IS 'Insert or select CPU data, returning "cpu.cpu_id".';
+
+-- GET_GPU_ID
+CREATE OR REPLACE FUNCTION public.get_gpu_id(
+ gpu_information citext DEFAULT NULL
+ , gpu_part_number citext DEFAULT NULL
+ , gpu_product_name citext DEFAULT NULL
+)
+RETURNS integer AS
+$$
+ DECLARE
+ result integer;
+ BEGIN
+ SELECT gpu_id INTO result FROM public.gpu AS gpu
+ WHERE
+ gpu.gpu_information = COALESCE($1, '')
+ AND gpu.gpu_part_number = COALESCE($2, '')
+ AND gpu.gpu_product_name = COALESCE($3, '');
+
+ IF result IS NULL THEN
+ INSERT INTO public.gpu(
+ gpu_information
+ , gpu_part_number
+ , gpu_product_name
+ )
+ VALUES (COALESCE($1, ''), COALESCE($2, ''), COALESCE($3, ''))
+ RETURNING gpu_id INTO result;
+ END IF;
+
+ RETURN result;
+ END
+$$
+LANGUAGE plpgsql;
+COMMENT ON FUNCTION public.get_gpu_id(citext, citext, citext)
+IS 'Insert or select GPU data, returning "gpu.gpu_id".';
+
+-- GET_OS_ID
+CREATE OR REPLACE FUNCTION public.get_os_id(
+ os_name citext
+ , architecture_name citext
+ , kernel_name citext DEFAULT ''
+)
+RETURNS integer AS
+$$
+ DECLARE
+ result integer;
+ BEGIN
+ SELECT os_id INTO result FROM public.os AS os
+ WHERE os.os_name = $1
+ AND os.architecture_name = $2
+ AND os.kernel_name = COALESCE($3, '');
+
+ IF result is NULL THEN
+ INSERT INTO public.os(os_name, architecture_name, kernel_name)
+ VALUES ($1, $2, COALESCE($3, ''))
+ RETURNING os_id INTO result;
+ END IF;
+
+ RETURN result;
+ END
+$$
+LANGUAGE plpgsql;
+COMMENT ON FUNCTION public.get_os_id(citext, citext, citext)
+IS 'Insert or select OS data, returning "os.os_id".';
+
+-- GET_MACHINE_ID (full signature)
+CREATE OR REPLACE FUNCTION public.get_machine_id(
+ mac_address macaddr
+ , machine_name citext
+ , memory_bytes bigint
+ , cpu_actual_frequency_Hz bigint
+ -- os
+ , os_name citext
+ , architecture_name citext
+ , kernel_name citext
+ -- cpu
+ , cpu_model_name citext
+ , cpu_core_count integer
+ , cpu_thread_count integer
+ , cpu_frequency_max_Hz bigint
+ , cpu_frequency_min_Hz bigint
+ , L1d_cache_bytes integer
+ , L1i_cache_bytes integer
+ , L2_cache_bytes integer
+ , L3_cache_bytes integer
+ -- gpu
+ , gpu_information citext DEFAULT ''
+ , gpu_part_number citext DEFAULT NULL
+ , gpu_product_name citext DEFAULT NULL
+ -- nullable machine attributes
+ , machine_other_attributes jsonb DEFAULT NULL
+)
+RETURNS integer AS
+$$
+ DECLARE
+ found_cpu_id integer;
+ found_gpu_id integer;
+ found_os_id integer;
+ result integer;
+ BEGIN
+ -- Can't bypass looking up all the values because of unique constraint.
+ SELECT public.get_cpu_id(
+ cpu_model_name
+ , cpu_core_count
+ , cpu_thread_count
+ , cpu_frequency_max_Hz
+ , cpu_frequency_min_Hz
+ , L1d_cache_bytes
+ , L1i_cache_bytes
+ , L2_cache_bytes
+ , L3_cache_bytes
+ ) INTO found_cpu_id;
+
+ SELECT public.get_gpu_id(
+ gpu_information
+ , gpu_part_number
+ , gpu_product_name
+ ) INTO found_gpu_id;
+
+ SELECT public.get_os_id(
+ os_name
+ , architecture_name
+ , kernel_name
+ ) INTO found_os_id;
+
+ SELECT machine_id INTO result FROM public.machine AS m
+ WHERE m.os_id = found_os_id
+ AND m.cpu_id = found_cpu_id
+ AND m.gpu_id = found_gpu_id
+ AND m.mac_address = $1
+ AND m.machine_name = $2
+ AND m.memory_bytes = $3
+ AND m.cpu_actual_frequency_Hz = $4;
+
+ IF result IS NULL THEN
+ INSERT INTO public.machine(
+ os_id
+ , cpu_id
+ , gpu_id
+ , mac_address
+ , machine_name
+ , memory_bytes
+ , cpu_actual_frequency_Hz
+ , machine_other_attributes
+ )
+ VALUES (found_os_id, found_cpu_id, found_gpu_id, $1, $2, $3, $4, $20)
+ RETURNING machine_id INTO result;
+ END IF;
+
+ RETURN result;
+ END
+$$
+LANGUAGE plpgsql;
+COMMENT ON FUNCTION public.get_machine_id(
+ macaddr
+ , citext
+ , bigint -- memory_bytes
+ , bigint -- cpu_frequency_actual_Hz
+ -- os
+ , citext
+ , citext
+ , citext
+ -- cpu
+ , citext
+ , integer
+ , integer
+ , bigint -- cpu_frequency_max_Hz
+ , bigint -- cpu_frequency_min_Hz
+ , integer
+ , integer
+ , integer
+ , integer
+ -- gpu
+ , citext
+ , citext
+ , citext
+ -- nullable machine attributes
+ , jsonb
+)
+IS 'Insert or select machine data, returning "machine.machine_id".';
+
+-- GET_MACHINE_ID (given unique mac_address)
+CREATE OR REPLACE FUNCTION public.get_machine_id(mac_address macaddr)
+RETURNS integer AS
+$$
+ SELECT machine_id FROM public.machine AS m
+ WHERE m.mac_address = $1;
+$$
+LANGUAGE sql STABLE;
+COMMENT ON FUNCTION public.get_machine_id(macaddr)
+IS 'Select machine_id given its mac address, returning "machine.machine_id".';
+
+-- GET_BENCHMARK_LANGUAGE_ID
+CREATE OR REPLACE FUNCTION public.get_benchmark_language_id(language citext)
+RETURNS integer AS
+$$
+ DECLARE
+ result integer;
+ BEGIN
+ SELECT benchmark_language_id INTO result
+ FROM public.benchmark_language AS bl
+ WHERE bl.benchmark_language = language;
+
+ IF result IS NULL THEN
+ INSERT INTO public.benchmark_language(benchmark_language)
+ VALUES (language)
+ RETURNING benchmark_language_id INTO result;
+ END IF;
+
+ RETURN result;
+ END
+$$
+LANGUAGE plpgsql;
+COMMENT ON FUNCTION public.get_benchmark_language_id(citext)
+IS 'Insert or select benchmark_language returning '
+ '"benchmark_language.benchmark_language_id".';
+
+-- GET_LANGUAGE_IMPLEMENTATION_VERSION_ID
+CREATE OR REPLACE FUNCTION public.get_language_implementation_version_id(
+ language citext
+ , language_implementation_version citext DEFAULT ''
+)
+RETURNS integer AS
+$$
+ DECLARE
+ language_id integer;
+ result integer;
+ BEGIN
+ SELECT public.get_benchmark_language_id($1) INTO language_id;
+
+ SELECT language_implementation_version_id INTO result FROM public.language_implementation_version AS lv
+ WHERE lv.benchmark_language_id = language_id
+ AND lv.language_implementation_version = COALESCE($2, '');
+
+ IF result IS NULL THEN
+ INSERT INTO
+ public.language_implementation_version(benchmark_language_id, language_implementation_version)
+ VALUES (language_id, COALESCE($2, ''))
+ RETURNING language_implementation_version_id INTO result;
+ END IF;
+
+ RETURN result;
+ END
+$$
+LANGUAGE plpgsql;
+COMMENT ON FUNCTION public.get_language_implementation_version_id(citext, citext)
+IS 'Insert or select language and version data, '
+ 'returning "language_implementation_version.language_implementation_version_id".';
+
+CREATE OR REPLACE FUNCTION public.get_language_implementation_version_id(
+ -- overload for when language_id is known
+ language_id integer
+ , language_implementation_version citext DEFAULT ''
+)
+RETURNS integer AS
+$$
+ DECLARE
+ result integer;
+ BEGIN
+ SELECT language_implementation_version_id INTO result FROM public.language_implementation_version AS lv
+ WHERE lv.benchmark_language_id = language_id
+ AND lv.language_implementation_version = COALESCE($2, '');
+
+ IF result IS NULL THEN
+ INSERT INTO
+ public.language_implementation_version(benchmark_language_id, language_implementation_version)
+ VALUES (language_id, COALESCE($2, ''))
+ RETURNING language_implementation_version_id INTO result;
+ END IF;
+
+ RETURN result;
+ END
+$$
+LANGUAGE plpgsql;
+
+-- GET_LANGUAGE_DEPENDENCY_LOOKUP_ID
+CREATE OR REPLACE FUNCTION public.get_dependencies_id(
+ dependencies jsonb DEFAULT '{}'::jsonb
+)
+RETURNS integer AS
+$$
+ DECLARE
+ result integer;
+ BEGIN
+ SELECT dependencies_id INTO result
+ FROM public.dependencies AS ldl
+ WHERE ldl.dependencies = COALESCE($1, '{}'::jsonb);
+
+ IF result IS NULL THEN
+ INSERT INTO
+ public.dependencies(dependencies)
+ VALUES (COALESCE($1, '{}'::jsonb))
+ RETURNING dependencies_id INTO result;
+ END IF;
+
+ RETURN result;
+ END
+$$
+LANGUAGE plpgsql;
+COMMENT ON FUNCTION public.get_dependencies_id(jsonb)
+IS 'Insert or select dependencies, returning "dependencies.dependencies_id".';
+
+-- GET_ENVIRONMENT_ID
+CREATE OR REPLACE FUNCTION public.get_environment_id(
+ language citext,
+ language_implementation_version citext DEFAULT '',
+ dependencies jsonb DEFAULT '{}'::jsonb
+)
+RETURNS integer AS
+$$
+ DECLARE
+ found_language_id integer;
+ found_version_id integer;
+ found_dependencies_id integer;
+ result integer;
+ BEGIN
+ SELECT public.get_benchmark_language_id($1) INTO found_language_id;
+ SELECT
+ public.get_language_implementation_version_id(found_language_id, $2)
+ INTO found_version_id;
+ SELECT
+ public.get_dependencies_id ($3)
+ INTO found_dependencies_id;
+
+ SELECT environment_id INTO result FROM public.environment AS e
+ WHERE e.benchmark_language_id = found_language_id
+ AND e.language_implementation_version_id = found_version_id
+ AND e.dependencies_id = found_dependencies_id;
+
+ IF result IS NULL THEN
+ INSERT INTO
+ public.environment(
+ benchmark_language_id
+ , language_implementation_version_id
+ , dependencies_id
+ )
+ VALUES (found_language_id, found_version_id, found_dependencies_id)
+ RETURNING environment_id INTO result;
+ END IF;
+
+ RETURN result;
+ END
+$$
+LANGUAGE plpgsql;
+COMMENT ON FUNCTION public.get_environment_id(citext, citext, jsonb)
+IS 'Insert or select language, language version, and dependencies, '
+ 'returning "environment.environment_id".';
+
+-- GET_BENCHMARK_TYPE_ID (full signature)
+CREATE OR REPLACE FUNCTION public.get_benchmark_type_id(
+ benchmark_type citext
+ , lessisbetter boolean
+)
+RETURNS integer AS
+$$
+ DECLARE
+ result integer;
+ BEGIN
+ SELECT benchmark_type_id INTO result FROM public.benchmark_type AS bt
+ WHERE bt.benchmark_type = $1
+ AND bt.lessisbetter = $2;
+
+ IF result IS NULL THEN
+ INSERT INTO public.benchmark_type(benchmark_type, lessisbetter)
+ VALUES($1, $2)
+ RETURNING benchmark_type_id INTO result;
+ END IF;
+
+ RETURN result;
+ END
+$$
+LANGUAGE plpgsql;
+COMMENT ON FUNCTION public.get_benchmark_type_id(citext, boolean)
+IS 'Insert or select benchmark type and lessisbetter, '
+ 'returning "benchmark_type.benchmark_type_id".';
+
+-- GET_BENCHMARK_TYPE_ID (given unique benchmark_type string only)
+CREATE OR REPLACE FUNCTION public.get_benchmark_type_id(
+ benchmark_type citext
+)
+RETURNS integer AS
+$$
+ DECLARE
+ result integer;
+ BEGIN
+ SELECT benchmark_type_id INTO result FROM public.benchmark_type AS bt
+ WHERE bt.benchmark_type = $1;
+
+ RETURN result;
+ END
+$$
+LANGUAGE plpgsql;
+COMMENT ON FUNCTION public.get_benchmark_type_id(citext)
+IS 'Select benchmark_type_id given benchmark type (e.g. ''time''), '
+ 'returning "benchmark_type.benchmark_type_id".';
+
+-- GET_UNIT_ID (full signature)
+CREATE OR REPLACE FUNCTION public.get_unit_id(
+ benchmark_type citext
+ , units citext
+ , lessisbetter boolean DEFAULT NULL
+)
+RETURNS integer AS
+$$
+ DECLARE
+ found_benchmark_type_id integer;
+ result integer;
+ BEGIN
+
+ IF ($3 IS NOT NULL) -- if lessisbetter is not null
+ THEN
+ SELECT public.get_benchmark_type_id($1, $3)
+ INTO found_benchmark_type_id;
+ ELSE
+ SELECT public.get_benchmark_type_id($1)
+ INTO found_benchmark_type_id;
+ END IF;
+
+ SELECT unit_id INTO result FROM public.unit AS u
+ WHERE u.benchmark_type_id = found_benchmark_type_id
+ AND u.units = $2;
+
+ IF result IS NULL THEN
+ INSERT INTO public.unit(benchmark_type_id, units)
+ VALUES(found_benchmark_type_id, $2)
+ RETURNING unit_id INTO result;
+ END IF;
+
+ RETURN result;
+ END
+$$
+LANGUAGE plpgsql;
+COMMENT ON FUNCTION public.get_unit_id(citext, citext, boolean)
+IS 'Insert or select benchmark type (e.g. ''time''), '
+ 'units string (e.g. ''miliseconds''), '
+ 'and "lessisbetter" (true if smaller benchmark values are better), '
+ 'returning "unit.unit_id".';
+
+-- GET_UNIT_ID (given unique units string only)
+CREATE OR REPLACE FUNCTION public.get_unit_id(units citext)
+RETURNS integer AS
+$$
+ SELECT unit_id FROM public.unit AS u
+ WHERE u.units = units;
+$$
+LANGUAGE sql STABLE;
+COMMENT ON FUNCTION public.get_unit_id(citext)
+IS 'Select unit_id given unit name, returning "unit.unit_id".';
+
+-- GET_BENCHMARK_ID (full signature)
+CREATE OR REPLACE FUNCTION public.get_benchmark_id(
+ benchmark_language citext
+ , benchmark_name citext
+ , parameter_names text[]
+ , benchmark_description text
+ , benchmark_version citext
+ , benchmark_type citext
+ , units citext
+ , lessisbetter boolean
+)
+RETURNS integer AS
+$$
+ DECLARE
+ found_benchmark_language_id integer;
+ found_unit_id integer;
+ result integer;
+ BEGIN
+ SELECT public.get_benchmark_language_id(
+ benchmark_language
+ ) INTO found_benchmark_language_id;
+
+ SELECT public.get_unit_id(
+ benchmark_type
+ , units
+ , lessisbetter
+ ) INTO found_unit_id;
+
+ SELECT benchmark_id INTO result FROM public.benchmark AS b
+ WHERE b.benchmark_language_id = found_benchmark_language_id
+ AND b.benchmark_name = $2
+ -- handle nullable "parameter_names"
+ AND b.parameter_names IS NOT DISTINCT FROM $3
+ AND b.benchmark_description = $4
+ AND b.benchmark_version = $5
+ AND b.unit_id = found_unit_id;
+
+ IF result IS NULL THEN
+ INSERT INTO public.benchmark(
+ benchmark_language_id
+ , benchmark_name
+ , parameter_names
+ , benchmark_description
+ , benchmark_version
+ , unit_id
+ )
+ VALUES (found_benchmark_language_id, $2, $3, $4, $5, found_unit_id)
+ RETURNING benchmark_id INTO result;
+ END IF;
+
+ RETURN result;
+ END
+$$
+LANGUAGE plpgsql;
+COMMENT ON FUNCTION public.get_benchmark_id(
+ citext
+ , citext
+ , text[]
+ , text
+ , citext
+ , citext
+ , citext
+ , boolean
+)
+IS 'Insert/select benchmark given data, returning "benchmark.benchmark_id".';
+
+-- GET_BENCHMARK_ID (by unique columns)
+CREATE OR REPLACE FUNCTION public.get_benchmark_id(
+ benchmark_language citext
+ , benchmark_name citext
+ , benchmark_version citext
+)
+RETURNS integer AS
+$$
+ WITH language AS (
+ SELECT public.get_benchmark_language_id(benchmark_language) AS id
+ )
+ SELECT b.benchmark_id
+ FROM public.benchmark AS b
+ JOIN language ON b.benchmark_language_id = language.id
+ WHERE b.benchmark_name = benchmark_name
+ AND benchmark_version = benchmark_version
+$$
+LANGUAGE sql STABLE;
+COMMENT ON FUNCTION public.get_benchmark_id(citext, citext, citext)
+IS 'Select existing benchmark given unique columns, '
+ 'returning "benchmark.benchmark_id".';
diff --git a/src/arrow/dev/benchmarking/ddl/3_01_functions_triggers.sql b/src/arrow/dev/benchmarking/ddl/3_01_functions_triggers.sql
new file mode 100644
index 000000000..b6ce4741a
--- /dev/null
+++ b/src/arrow/dev/benchmarking/ddl/3_01_functions_triggers.sql
@@ -0,0 +1,574 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+*/
+
+
+-------------------------- TRIGGER FUNCTIONS --------------------------
+-- Views that do not select from a single table or view are not
+-- automatically updatable. These trigger functions are intended
+-- to be run instead of INSERT into the complicated views.
+
+
+-- LANGUAGE_IMPLEMENTATION_VERSION_VIEW_INSERT_ROW
+CREATE OR REPLACE FUNCTION public.language_implementation_version_view_insert_row()
+RETURNS trigger AS
+$$
+ DECLARE
+ language_id integer;
+ result integer;
+ BEGIN
+ IF NEW.benchmark_language IS NULL THEN
+ RAISE EXCEPTION 'Column "benchmark_language" cannot be NULL.';
+ END IF;
+ IF NEW.language_implementation_version IS NULL THEN
+ RAISE EXCEPTION
+ 'Column "language_implementation_version" cannot be NULL (use '''' instead).';
+ END IF;
+
+ SELECT public.get_benchmark_language_id(NEW.benchmark_language)
+ INTO language_id;
+
+ SELECT language_implementation_version_id INTO result FROM public.language_implementation_version AS lv
+ WHERE lv.benchmark_language_id = language_id
+ AND lv.language_implementation_version = NEW.language_implementation_version;
+
+ IF result IS NOT NULL THEN
+ -- row already exists
+ RETURN NULL;
+ ELSE
+ INSERT INTO
+ public.language_implementation_version(
+ benchmark_language_id
+ , language_implementation_version
+ )
+ VALUES (language_id, NEW.language_implementation_version)
+ RETURNING language_implementation_version_id INTO NEW.language_implementation_version_id;
+ END IF;
+
+ RETURN NEW;
+ END
+$$
+LANGUAGE plpgsql;
+
+-- ENVIRONMENT_VIEW_INSERT_ROW
+CREATE OR REPLACE FUNCTION public.environment_view_insert_row()
+RETURNS trigger AS
+$$
+ DECLARE
+ found_language_id integer;
+ found_version_id integer;
+ found_dependencies_id integer;
+ result integer;
+ BEGIN
+ IF NEW.benchmark_language IS NULL
+ THEN
+ RAISE EXCEPTION 'Column "benchmark_language" cannot be NULL.';
+ END IF;
+ IF NEW.language_implementation_version IS NULL THEN
+ RAISE EXCEPTION
+ 'Column "language_implementation_version" cannot be NULL (use '''' instead).';
+ END IF;
+
+ SELECT public.get_benchmark_language_id(NEW.benchmark_language)
+ INTO found_language_id;
+
+ SELECT public.get_language_implementation_version_id(
+ found_language_id
+ , NEW.language_implementation_version
+ )
+ INTO found_version_id;
+
+ SELECT public.get_dependencies_id(NEW.dependencies)
+ INTO found_dependencies_id;
+
+ SELECT environment_id INTO result FROM public.environment AS e
+ WHERE e.benchmark_language_id = found_language_id
+ AND e.language_implementation_version_id = found_version_id
+ AND e.dependencies_id = found_dependencies_id;
+
+ IF result IS NOT NULL THEN
+ -- row already exists
+ RETURN NULL;
+ ELSE
+ INSERT INTO
+ public.environment(
+ benchmark_language_id
+ , language_implementation_version_id
+ , dependencies_id
+ )
+ VALUES (found_language_id, found_version_id, found_dependencies_id)
+ RETURNING environment_id INTO NEW.environment_id;
+ END IF;
+
+ RETURN NEW;
+ END
+$$
+LANGUAGE plpgsql;
+
+-- MACHINE_VIEW_INSERT_ROW
+CREATE OR REPLACE FUNCTION public.machine_view_insert_row()
+RETURNS trigger AS
+$$
+ DECLARE
+ found_cpu_id integer;
+ found_gpu_id integer;
+ found_os_id integer;
+ result integer;
+ BEGIN
+ IF (
+ NEW.machine_name IS NULL
+ OR NEW.memory_bytes IS NULL
+ OR NEW.cpu_model_name IS NULL
+ OR NEW.cpu_core_count IS NULL
+ OR NEW.cpu_thread_count IS NULL
+ OR NEW.cpu_frequency_max_Hz IS NULL
+ OR NEW.cpu_frequency_min_Hz IS NULL
+ OR NEW.cpu_L1d_cache_bytes IS NULL
+ OR NEW.cpu_L1i_cache_bytes IS NULL
+ OR NEW.cpu_L2_cache_bytes IS NULL
+ OR NEW.cpu_L3_cache_bytes IS NULL
+ OR NEW.os_name IS NULL
+ OR NEW.architecture_name IS NULL
+ )
+ THEN
+ RAISE EXCEPTION 'None of the columns in "machine_view" can be NULL. '
+ 'all columns in table "gpu" will default to the empty string '''', '
+ 'as will blank "os.kernel_name". This is to allow uniqueness '
+ 'constraints to work. Thank you!.';
+ END IF;
+
+ SELECT public.get_cpu_id(
+ NEW.cpu_model_name
+ , NEW.cpu_core_count
+ , NEW.cpu_thread_count
+ , NEW.cpu_frequency_max_Hz
+ , NEW.cpu_frequency_min_Hz
+ , NEW.cpu_L1d_cache_bytes
+ , NEW.cpu_L1i_cache_bytes
+ , NEW.cpu_L2_cache_bytes
+ , NEW.cpu_L3_cache_bytes
+ ) INTO found_cpu_id;
+
+ SELECT public.get_gpu_id(
+ NEW.gpu_information
+ , NEW.gpu_part_number
+ , NEW.gpu_product_name
+ ) INTO found_gpu_id;
+
+ SELECT public.get_os_id(
+ NEW.os_name
+ , NEW.architecture_name
+ , NEW.kernel_name
+ ) INTO found_os_id;
+
+ SELECT machine_id INTO result FROM public.machine AS m
+ WHERE m.os_id = found_os_id
+ AND m.cpu_id = found_cpu_id
+ AND m.gpu_id = found_gpu_id
+ AND m.machine_name = NEW.machine_name
+ AND m.memory_bytes = NEW.memory_bytes
+ AND m.cpu_actual_frequency_Hz = NEW.cpu_actual_frequency_Hz;
+
+ IF result IS NOT NULL THEN
+ -- row already exists
+ RETURN NULL;
+ ELSE
+ INSERT INTO public.machine(
+ os_id
+ , cpu_id
+ , gpu_id
+ , machine_name
+ , mac_address
+ , memory_bytes
+ , cpu_actual_frequency_Hz
+ , machine_other_attributes
+ )
+ VALUES (
+ found_os_id
+ , found_cpu_id
+ , found_gpu_id
+ , NEW.machine_name
+ , NEW.mac_address
+ , NEW.memory_bytes
+ , NEW.cpu_actual_frequency_Hz
+ , NEW.machine_other_attributes
+ )
+ RETURNING machine_id INTO NEW.machine_id;
+ END IF;
+
+ RETURN NEW;
+ END
+$$
+LANGUAGE plpgsql;
+
+-- UNIT_VIEW_INSERT_ROW
+CREATE OR REPLACE FUNCTION public.unit_view_insert_row()
+RETURNS trigger AS
+$$
+ DECLARE
+ found_benchmark_type_id integer;
+ result integer;
+ BEGIN
+ IF (NEW.benchmark_type IS NULL OR NEW.units IS NULL)
+ THEN
+ RAISE EXCEPTION E'"benchmark_type" and "units" cannot be NULL.\n'
+ 'Further, if the "benchmark_type" has never been defined, '
+ '"lessisbetter" must be defined or there will be an error.';
+ END IF;
+
+ -- It's OK for "lessisbetter" = NULL if "benchmark_type" already exists.
+ SELECT public.get_benchmark_type_id(NEW.benchmark_type, NEW.lessisbetter)
+ INTO found_benchmark_type_id;
+
+ SELECT unit_id INTO result FROM public.unit AS u
+ WHERE u.benchmark_type_id = found_benchmark_type_id
+ AND u.units = NEW.units;
+
+ IF result IS NOT NULL THEN
+ -- row already exists
+ RETURN NULL;
+ ELSE
+ INSERT INTO public.unit (
+ benchmark_type_id
+ , units
+ )
+ VALUES (
+ found_benchmark_type_id
+ , NEW.units
+ )
+ RETURNING unit_id INTO NEW.unit_id;
+ END IF;
+
+ RETURN NEW;
+ END
+$$
+LANGUAGE plpgsql;
+
+-- BENCHMARK_VIEW_INSERT_ROW
+CREATE OR REPLACE FUNCTION public.benchmark_view_insert_row()
+RETURNS trigger AS
+$$
+ DECLARE
+ found_benchmark_language_id integer;
+ found_units_id integer;
+ result integer;
+ BEGIN
+ IF (
+ NEW.benchmark_name IS NULL
+ OR NEW.benchmark_version IS NULL
+ OR NEW.benchmark_language IS NULL
+ OR NEW.benchmark_type IS NULL
+ OR NEW.benchmark_description IS NULL
+ OR NEW.units IS NULL
+ )
+ THEN
+ RAISE EXCEPTION 'The only nullable column in this view is '
+ '"benchmark.parameter_names".';
+ END IF;
+
+ SELECT public.get_benchmark_language_id(
+ NEW.benchmark_language
+ ) INTO found_benchmark_language_id;
+
+ SELECT public.get_unit_id(NEW.units) INTO found_units_id;
+
+ SELECT benchmark_id INTO result FROM public.benchmark AS b
+ WHERE b.benchmark_language_id = found_benchmark_language_id
+ AND b.benchmark_name = NEW.benchmark_name
+ -- handle nullable "parameter_names"
+ AND b.parameter_names IS NOT DISTINCT FROM NEW.parameter_names
+ AND b.benchmark_description = NEW.benchmark_description
+ AND b.benchmark_version = NEW.benchmark_version
+ AND b.unit_id = found_units_id;
+
+ IF result IS NOT NULL THEN
+ -- row already exists
+ RETURN NULL;
+ ELSE
+ INSERT INTO public.benchmark(
+ benchmark_language_id
+ , benchmark_name
+ , parameter_names
+ , benchmark_description
+ , benchmark_version
+ , unit_id
+ )
+ VALUES (
+ found_benchmark_language_id
+ , NEW.benchmark_name
+ , NEW.parameter_names
+ , NEW.benchmark_description
+ , NEW.benchmark_version
+ , found_units_id
+ )
+ RETURNING benchmark_id INTO NEW.benchmark_id;
+ END IF;
+
+ RETURN NEW;
+ END
+$$
+LANGUAGE plpgsql;
+
+-- BENCHMARK_RUN_VIEW_INSERT_ROW
+CREATE OR REPLACE FUNCTION public.benchmark_run_view_insert_row()
+RETURNS trigger AS
+$$
+ DECLARE
+ found_benchmark_id integer;
+ found_benchmark_language_id integer;
+ found_machine_id integer;
+ found_environment_id integer;
+ found_language_implementation_version_id integer;
+ BEGIN
+ IF (
+ NEW.benchmark_name IS NULL
+ OR NEW.benchmark_version IS NULL
+ OR NEW.benchmark_language IS NULL
+ OR NEW.value IS NULL
+ OR NEW.run_timestamp IS NULL
+ OR NEW.git_commit_timestamp IS NULL
+ OR NEW.git_hash IS NULL
+ OR NEW.language_implementation_version IS NULL
+ OR NEW.mac_address IS NULL
+ )
+ THEN
+ RAISE EXCEPTION 'Only the following columns can be NULL: '
+ '"parameter_names", "val_min", "val_q1", "val_q3", "val_max".';
+ END IF;
+
+ SELECT public.get_benchmark_id(
+ NEW.benchmark_language
+ , NEW.benchmark_name
+ , NEW.benchmark_version
+ ) INTO found_benchmark_id;
+
+ SELECT public.get_benchmark_language_id(
+ NEW.benchmark_language
+ ) INTO found_benchmark_language_id;
+
+ SELECT public.get_machine_id(
+ NEW.mac_address
+ ) INTO found_machine_id;
+
+ SELECT public.get_environment_id(
+ NEW.benchmark_language
+ , NEW.language_implementation_version
+ , NEW.dependencies
+ ) INTO found_environment_id;
+
+ SELECT public.get_language_implementation_version_id(
+ found_benchmark_language_id,
+ NEW.language_implementation_version
+ ) INTO found_language_implementation_version_id;
+
+ INSERT INTO public.benchmark_run (
+ parameter_values
+ , value
+ , git_commit_timestamp
+ , git_hash
+ , val_min
+ , val_q1
+ , val_q3
+ , val_max
+ , std_dev
+ , n_obs
+ , run_timestamp
+ , run_metadata
+ , run_notes
+ , machine_id
+ , benchmark_language_id
+ , language_implementation_version_id
+ , environment_id
+ , benchmark_id
+ )
+ VALUES (
+ COALESCE(NEW.parameter_values, '{}'::jsonb)
+ , NEW.value
+ , NEW.git_commit_timestamp
+ , NEW.git_hash
+ , NEW.val_min
+ , NEW.val_q1
+ , NEW.val_q3
+ , NEW.val_max
+ , NEW.std_dev
+ , NEW.n_obs
+ , NEW.run_timestamp
+ , NEW.run_metadata
+ , NEW.run_notes
+ , found_machine_id
+ , found_benchmark_language_id
+ , found_language_implementation_version_id
+ , found_environment_id
+ , found_benchmark_id
+ ) returning benchmark_run_id INTO NEW.benchmark_run_id;
+
+ RETURN NEW;
+ END
+$$
+LANGUAGE plpgsql;
+
+-- FULL_BENCHMARK_RUN_VIEW_INSERT_ROW
+CREATE OR REPLACE FUNCTION public.full_benchmark_run_view_insert_row()
+RETURNS trigger AS
+$$
+ DECLARE
+ found_benchmark_id integer;
+ found_benchmark_language_id integer;
+ found_machine_id integer;
+ found_environment_id integer;
+ found_language_implementation_version_id integer;
+ BEGIN
+ IF (
+ NEW.value IS NULL
+ OR NEW.git_hash IS NULL
+ OR NEW.git_commit_timestamp IS NULL
+ OR NEW.run_timestamp IS NULL
+ -- benchmark
+ OR NEW.benchmark_name IS NULL
+ OR NEW.benchmark_description IS NULL
+ OR NEW.benchmark_version IS NULL
+ OR NEW.benchmark_language IS NULL
+ -- unit
+ OR NEW.benchmark_type IS NULL
+ OR NEW.units IS NULL
+ OR NEW.lessisbetter IS NULL
+ -- machine
+ OR NEW.machine_name IS NULL
+ OR NEW.memory_bytes IS NULL
+ OR NEW.cpu_model_name IS NULL
+ OR NEW.cpu_core_count IS NULL
+ OR NEW.os_name IS NULL
+ OR NEW.architecture_name IS NULL
+ OR NEW.kernel_name IS NULL
+ OR NEW.cpu_model_name IS NULL
+ OR NEW.cpu_core_count IS NULL
+ OR NEW.cpu_thread_count IS NULL
+ OR NEW.cpu_frequency_max_Hz IS NULL
+ OR NEW.cpu_frequency_min_Hz IS NULL
+ OR NEW.cpu_L1d_cache_bytes IS NULL
+ OR NEW.cpu_L1i_cache_bytes IS NULL
+ OR NEW.cpu_L2_cache_bytes IS NULL
+ OR NEW.cpu_L3_cache_bytes IS NULL
+ )
+ THEN
+ RAISE EXCEPTION 'Only the following columns can be NULL: '
+ '"machine_other_attributes", "parameter_names", "val_min", '
+ '"val_q1", "val_q3", "val_max", "run_metadata", "run_notes". '
+ 'If "gpu_information", "gpu_part_number", "gpu_product_name", or '
+ '"kernel_name" are null, they will be silently turned into an '
+ 'empty string ('''').';
+ END IF;
+
+ SELECT public.get_benchmark_id(
+ NEW.benchmark_language
+ , NEW.benchmark_name
+ , NEW.parameter_names
+ , NEW.benchmark_description
+ , NEW.benchmark_version
+ , NEW.benchmark_type
+ , NEW.units
+ , NEW.lessisbetter
+ ) INTO found_benchmark_id;
+
+ SELECT public.get_benchmark_language_id(
+ NEW.benchmark_language
+ ) INTO found_benchmark_language_id;
+
+ SELECT public.get_machine_id(
+ NEW.mac_address
+ , NEW.machine_name
+ , NEW.memory_bytes
+ , NEW.cpu_actual_frequency_Hz
+ -- os
+ , NEW.os_name
+ , NEW.architecture_name
+ , NEW.kernel_name
+ -- cpu
+ , NEW.cpu_model_name
+ , NEW.cpu_core_count
+ , NEW.cpu_thread_count
+ , NEW.cpu_frequency_max_Hz
+ , NEW.cpu_frequency_min_Hz
+ , NEW.cpu_L1d_cache_bytes
+ , NEW.cpu_L1i_cache_bytes
+ , NEW.cpu_L2_cache_bytes
+ , NEW.cpu_L3_cache_bytes
+ -- gpu
+ , NEW.gpu_information
+ , NEW.gpu_part_number
+ , NEW.gpu_product_name
+ -- nullable machine attributes
+ , NEW.machine_other_attributes
+ ) INTO found_machine_id;
+
+ SELECT public.get_environment_id(
+ NEW.benchmark_language
+ , NEW.language_implementation_version
+ , NEW.dependencies
+ ) INTO found_environment_id;
+
+ SELECT public.get_language_implementation_version_id(
+ found_benchmark_language_id,
+ NEW.language_implementation_version
+ ) INTO found_language_implementation_version_id;
+
+ INSERT INTO public.benchmark_run (
+ parameter_values
+ , value
+ , git_commit_timestamp
+ , git_hash
+ , val_min
+ , val_q1
+ , val_q3
+ , val_max
+ , std_dev
+ , n_obs
+ , run_timestamp
+ , run_metadata
+ , run_notes
+ , machine_id
+ , benchmark_language_id
+ , language_implementation_version_id
+ , environment_id
+ , benchmark_id
+ )
+ VALUES (
+ NEW.parameter_values
+ , NEW.value
+ , NEW.git_commit_timestamp
+ , NEW.git_hash
+ , NEW.val_min
+ , NEW.val_q1
+ , NEW.val_q3
+ , NEW.val_max
+ , NEW.std_dev
+ , NEW.n_obs
+ , NEW.run_timestamp
+ , NEW.run_metadata
+ , NEW.run_notes
+ , found_machine_id
+ , found_benchmark_language_id
+ , found_language_implementation_version_id
+ , found_environment_id
+ , found_benchmark_id
+ ) returning benchmark_run_id INTO NEW.benchmark_run_id;
+
+ RETURN NEW;
+ END
+$$
+LANGUAGE plpgsql;
diff --git a/src/arrow/dev/benchmarking/ddl/3_02_functions_ingestion.sql b/src/arrow/dev/benchmarking/ddl/3_02_functions_ingestion.sql
new file mode 100644
index 000000000..000c61d00
--- /dev/null
+++ b/src/arrow/dev/benchmarking/ddl/3_02_functions_ingestion.sql
@@ -0,0 +1,323 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+*/
+
+
+-------------------------- IMPORT HELPERS --------------------------
+-- Load from JSON (from https://stackoverflow.com/a/48396608)
+-- How to use it in the psql client:
+-- \set content `cat /examples/machine.json`
+-- select ingest_machine(:'content'::jsonb);
+-- INGEST_MACHINE_VIEW
+CREATE OR REPLACE FUNCTION public.ingest_machine_view(from_jsonb jsonb)
+RETURNS integer AS
+$$
+ DECLARE
+ result integer;
+ BEGIN
+ INSERT INTO public.machine_view
+ SELECT * FROM jsonb_populate_record(null::public.machine_view, from_jsonb)
+ RETURNING machine_id INTO result;
+ RETURN result;
+ END
+$$
+LANGUAGE plpgsql;
+COMMENT ON FUNCTION public.ingest_machine_view(jsonb) IS
+ E'The argument is a JSON object. NOTE: key names must be entirely\n'
+ 'lowercase, or the insert will fail. Extra key-value pairs are ignored.\n'
+ 'Example::\n\n'
+ ' {\n'
+ ' "mac_address": "0a:00:2d:01:02:03",\n'
+ ' "machine_name": "Yet-Another-Machine-Name",\n'
+ ' "memory_bytes": 8589934592,\n'
+ ' "cpu_actual_frequency_hz": 2300000000,\n'
+ ' "os_name": "OSX",\n'
+ ' "architecture_name": "x86_64",\n'
+ ' "kernel_name": "18.2.0",\n'
+ ' "cpu_model_name": "Intel(R) Core(TM) i5-7360U CPU @ 2.30GHz",\n'
+ ' "cpu_core_count": 2,\n'
+ ' "cpu_thread_count": 4,\n'
+ ' "cpu_frequency_max_hz": 2300000000,\n'
+ ' "cpu_frequency_min_hz": 2300000000,\n'
+ ' "cpu_l1d_cache_bytes": 32768,\n'
+ ' "cpu_l1i_cache_bytes": 32768,\n'
+ ' "cpu_l2_cache_bytes": 262144,\n'
+ ' "cpu_l3_cache_bytes": 4194304,\n'
+ ' "machine_other_attributes": {"just": "an example"},\n'
+ ' "gpu_information": "",\n'
+ ' "gpu_part_number": "",\n'
+ ' "gpu_product_name": ""\n'
+ ' }\n\n'
+ 'To identify which columns in "machine_view" are required,\n'
+ 'please see the view documentation in :ref:`benchmark-data-model`.\n';
+
+-- INGEST_BENCHMARK_VIEW
+CREATE OR REPLACE FUNCTION public.ingest_benchmark_view(from_jsonb jsonb)
+RETURNS setof integer AS
+$$
+ BEGIN
+ RETURN QUERY
+ INSERT INTO public.benchmark_view
+ SELECT * FROM jsonb_populate_recordset(
+ null::public.benchmark_view
+ , from_jsonb
+ )
+ RETURNING benchmark_id;
+ END
+$$
+LANGUAGE plpgsql;
+COMMENT ON FUNCTION public.ingest_benchmark_view(jsonb) IS
+ E'The argument is a JSON object. NOTE: key names must be entirely\n'
+ 'lowercase, or the insert will fail. Extra key-value pairs are ignored.\n'
+ 'Example::\n\n'
+ ' [\n'
+ ' {\n'
+ ' "benchmark_name": "Benchmark 1",\n'
+ ' "parameter_names": ["arg0", "arg1", "arg2"],\n'
+ ' "benchmark_description": "First benchmark",\n'
+ ' "benchmark_type": "Time",\n'
+ ' "units": "miliseconds",\n'
+ ' "lessisbetter": true,\n'
+ ' "benchmark_version": "second version",\n'
+ ' "benchmark_language": "Python"\n'
+ ' },\n'
+ ' {\n'
+ ' "benchmark_name": "Benchmark 2",\n'
+ ' "parameter_names": ["arg0", "arg1"],\n'
+ ' "benchmark_description": "Description 2.",\n'
+ ' "benchmark_type": "Time",\n'
+ ' "units": "nanoseconds",\n'
+ ' "lessisbetter": true,\n'
+ ' "benchmark_version": "second version",\n'
+ ' "benchmark_language": "Python"\n'
+ ' }\n'
+ ' ]\n\n'
+ 'To identify which columns in "benchmark_view" are required,\n'
+ 'please see the view documentation in :ref:`benchmark-data-model`.\n';
+
+-- INGEST_BENCHMARK_RUN_VIEW
+CREATE OR REPLACE FUNCTION public.ingest_benchmark_run_view(from_jsonb jsonb)
+RETURNS setof bigint AS
+$$
+ BEGIN
+ RETURN QUERY
+ INSERT INTO public.benchmark_run_view
+ SELECT * FROM
+ jsonb_populate_recordset(null::public.benchmark_run_view, from_jsonb)
+ RETURNING benchmark_run_id;
+ END
+$$
+LANGUAGE plpgsql;
+COMMENT ON FUNCTION public.ingest_benchmark_run_view(jsonb) IS
+ E'The argument is a JSON object. NOTE: key names must be entirely\n'
+ 'lowercase, or the insert will fail. Extra key-value pairs are ignored.\n'
+ 'Example::\n\n'
+ ' [\n'
+ ' {\n'
+ ' "benchmark_name": "Benchmark 2",\n'
+ ' "benchmark_version": "version 0",\n'
+ ' "parameter_values": {"arg0": 100, "arg1": 5},\n'
+ ' "value": 2.5,\n'
+ ' "git_commit_timestamp": "2019-02-08 22:35:53 +0100",\n'
+ ' "git_hash": "324d3cf198444a",\n'
+ ' "val_min": 1,\n'
+ ' "val_q1": 2,\n'
+ ' "val_q3": 3,\n'
+ ' "val_max": 4,\n'
+ ' "std_dev": 1.41,\n'
+ ' "n_obs": 8,\n'
+ ' "run_timestamp": "2019-02-14 03:00:05 -0600",\n'
+ ' "mac_address": "08:00:2b:01:02:03",\n'
+ ' "benchmark_language": "Python",\n'
+ ' "language_implementation_version": "CPython 2.7",\n'
+ ' "dependencies": {"six": "", "numpy": "1.14", "other_lib": "1.0"}\n'
+ ' },\n'
+ ' {\n'
+ ' "benchmark_name": "Benchmark 2",\n'
+ ' "benchmark_version": "version 0",\n'
+ ' "parameter_values": {"arg0": 1000, "arg1": 5},\n'
+ ' "value": 5,\n'
+ ' "git_commit_timestamp": "2019-02-08 22:35:53 +0100",\n'
+ ' "git_hash": "324d3cf198444a",\n'
+ ' "std_dev": 3.14,\n'
+ ' "n_obs": 8,\n'
+ ' "run_timestamp": "2019-02-14 03:00:10 -0600",\n'
+ ' "mac_address": "08:00:2b:01:02:03",\n'
+ ' "benchmark_language": "Python",\n'
+ ' "language_implementation_version": "CPython 2.7",\n'
+ ' "dependencies": {"six": "", "numpy": "1.14", "other_lib": "1.0"}\n'
+ ' }\n'
+ ' ]\n'
+ 'To identify which columns in "benchmark_run_view" are required,\n'
+ 'please see the view documentation in :ref:`benchmark-data-model`.\n';
+
+-- INGEST_BENCHMARK_RUNS_WITH_CONTEXT
+CREATE OR REPLACE FUNCTION public.ingest_benchmark_runs_with_context(from_jsonb jsonb)
+RETURNS setof bigint AS
+$$
+ DECLARE
+ context_jsonb jsonb;
+ found_environment_id integer;
+ found_machine_id integer;
+ BEGIN
+ SELECT from_jsonb -> 'context' INTO context_jsonb;
+
+ SELECT public.get_machine_id((context_jsonb ->> 'mac_address')::macaddr)
+ INTO found_machine_id;
+
+ SELECT get_environment_id(
+ (context_jsonb ->> 'benchmark_language')::citext
+ , (context_jsonb ->> 'language_implementation_version')::citext
+ , context_jsonb -> 'dependencies'
+ ) INTO found_environment_id;
+
+ RETURN QUERY
+ WITH run_datum AS (
+ SELECT *
+ FROM jsonb_to_recordset(from_jsonb -> 'benchmarks')
+ AS x(
+ benchmark_name citext
+ , parameter_values jsonb
+ , value numeric
+ , val_min numeric
+ , val_q1 numeric
+ , val_q3 numeric
+ , val_max numeric
+ , std_dev numeric
+ , n_obs integer
+ , run_timestamp timestamp (0) with time zone
+ , run_metadata jsonb
+ , run_notes text
+ )
+ ), benchmark_name_and_id AS (
+ SELECT
+ key AS benchmark_name
+ , public.get_benchmark_id(
+ (context_jsonb ->> 'benchmark_language')::citext
+ , key::citext -- benchmark_name
+ , value::citext -- benchmark_version
+ ) AS benchmark_id
+ FROM jsonb_each_text(from_jsonb -> 'benchmark_version')
+ )
+ INSERT INTO public.benchmark_run (
+ benchmark_id
+ -- run_datum
+ , parameter_values
+ , value
+ , val_min
+ , val_q1
+ , val_q3
+ , val_max
+ , std_dev
+ , n_obs
+ , run_metadata
+ , run_notes
+ -- additional context information
+ , git_commit_timestamp
+ , git_hash
+ , run_timestamp
+ -- machine
+ , machine_id
+ -- environment
+ , environment_id
+ , language_implementation_version_id
+ , benchmark_language_id
+ )
+ SELECT
+ b.benchmark_id
+ -- run_datum
+ , run_datum.parameter_values
+ , run_datum.value
+ , run_datum.val_min
+ , run_datum.val_q1
+ , run_datum.val_q3
+ , run_datum.val_max
+ , run_datum.std_dev
+ , run_datum.n_obs
+ , run_datum.run_metadata
+ , run_datum.run_notes
+ -- additional context information
+ , (context_jsonb ->> 'git_commit_timestamp')::timestamp (0) with time zone
+ , context_jsonb ->> 'git_hash'
+ , (context_jsonb ->> 'run_timestamp')::timestamp (0) with time zone
+ -- machine
+ , found_machine_id
+ -- environment
+ , e.environment_id
+ , e.language_implementation_version_id
+ , e.benchmark_language_id
+ FROM run_datum
+ JOIN public.environment AS e
+ ON e.environment_id = found_environment_id
+ JOIN benchmark_name_and_id AS b
+ ON b.benchmark_name = run_datum.benchmark_name
+ RETURNING benchmark_run_id;
+ END
+$$
+LANGUAGE plpgsql;
+COMMENT ON FUNCTION public.ingest_benchmark_runs_with_context(jsonb) IS
+ E'The argument is a JSON object. NOTE: key names must be entirely\n'
+ 'lowercase, or the insert will fail. Extra key-value pairs are ignored.\n'
+ 'The object contains three key-value pairs::\n\n'
+ ' {"context": {\n'
+ ' "mac_address": "08:00:2b:01:02:03",\n'
+ ' "benchmark_language": "Python",\n'
+ ' "language_implementation_version": "CPython 3.6",\n'
+ ' "dependencies": {"six": "", "numpy": "1.14", "other_lib": "1.0"},\n'
+ ' "git_commit_timestamp": "2019-02-14 22:42:22 +0100",\n'
+ ' "git_hash": "123456789abcde",\n'
+ ' "run_timestamp": "2019-02-14 03:00:40 -0600",\n'
+ ' "extra stuff": "does not hurt anything and will not be added."\n'
+ ' },\n'
+ ' "benchmark_version": {\n'
+ ' "Benchmark Name 1": "Any string can be a version.",\n'
+ ' "Benchmark Name 2": "A git hash can be a version.",\n'
+ ' "An Unused Benchmark Name": "Will be ignored."\n'
+ ' },\n'
+ ' "benchmarks": [\n'
+ ' {\n'
+ ' "benchmark_name": "Benchmark Name 1",\n'
+ ' "parameter_values": {"argument1": 1, "argument2": "value2"},\n'
+ ' "value": 42,\n'
+ ' "val_min": 41.2,\n'
+ ' "val_q1": 41.5,\n'
+ ' "val_q3": 42.5,\n'
+ ' "val_max": 42.8,\n'
+ ' "std_dev": 0.5,\n'
+ ' "n_obs": 100,\n'
+ ' "run_metadata": {"any": "key-value pairs"},\n'
+ ' "run_notes": "Any relevant notes."\n'
+ ' },\n'
+ ' {\n'
+ ' "benchmark_name": "Benchmark Name 2",\n'
+ ' "parameter_values": {"not nullable": "Use {} if no params."},\n'
+ ' "value": 8,\n'
+ ' "std_dev": 1,\n'
+ ' "n_obs": 2,\n'
+ ' }\n'
+ ' ]\n'
+ ' }\n\n'
+ '- The entry for "context" contains the machine, environment, and timestamp\n'
+ ' information common to all of the runs\n'
+ '- The entry for "benchmark_version" maps benchmark\n'
+ ' names to their version strings. (Which can be a git hash,\n'
+ ' the entire code string, a number, or any other string of your choice.)\n'
+ '- The entry for "benchmarks" is a list of benchmark run data\n'
+ ' for the given context and benchmark versions. The first example\n'
+ ' benchmark run entry contains all possible values, even\n'
+ ' nullable ones, and the second entry omits all nullable values.\n\n';
diff --git a/src/arrow/dev/benchmarking/ddl/3_10_functions_documentation.sql b/src/arrow/dev/benchmarking/ddl/3_10_functions_documentation.sql
new file mode 100644
index 000000000..6b2a05790
--- /dev/null
+++ b/src/arrow/dev/benchmarking/ddl/3_10_functions_documentation.sql
@@ -0,0 +1,395 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+*/
+
+
+-- _DOCUMENTATION_INGESTION
+CREATE OR REPLACE FUNCTION public._documentation_ingestion()
+RETURNS text AS
+$$
+ WITH ingestion_docs AS (
+ SELECT
+ proname || E'\n'
+ || rpad('', character_length(proname), '-')
+ || E'\n\n:code:`'
+ || proname || '('
+ || string_agg(a.argname || ' ' || typname , ', ')
+ || E')`\n\n'
+ || description
+ || E'\n\n\nback to `Benchmark data model <benchmark-data-model>`_\n'
+ AS docs
+ FROM pg_catalog.pg_proc
+ JOIN pg_catalog.pg_namespace
+ ON nspname='public'
+ AND pg_namespace.oid = pronamespace
+ AND proname LIKE '%ingest%'
+ JOIN pg_catalog.pg_description
+ ON pg_description.objoid=pg_proc.oid,
+ LATERAL unnest(proargnames, proargtypes) AS a(argname, argtype)
+ JOIN pg_catalog.pg_type
+ ON pg_type.oid = a.argtype
+ GROUP BY proname, description
+ )
+ SELECT
+ string_agg(docs, E'\n\n') AS docs
+ FROM ingestion_docs;
+$$
+LANGUAGE sql STABLE;
+
+-- _DOCUMENTATION_VIEW_DETAILS
+CREATE OR REPLACE FUNCTION public._documentation_view_details(view_name citext)
+RETURNS TABLE(
+ column_name name
+ , type_name name
+ , nullable text
+ , default_value text
+ , description text
+) AS
+$$
+ WITH view_columns AS (
+ SELECT
+ attname AS column_name
+ , attnum AS column_order
+ FROM pg_catalog.pg_attribute
+ WHERE attrelid=view_name::regclass
+ )
+ SELECT
+ t.column_name
+ , type_name
+ , coalesce(nullable, '')
+ , coalesce(default_value, '')
+ , coalesce(description, '')
+ FROM public.summarized_tables_view AS t
+ JOIN view_columns AS v ON v.column_name = t.column_name
+ WHERE t.table_name || '_view' = view_name OR t.column_name NOT LIKE '%_id'
+ ORDER BY column_order;
+$$
+LANGUAGE sql STABLE;
+
+
+-- _DOCUMENTATION_VIEW_PIECES
+CREATE OR REPLACE FUNCTION public._documentation_view_pieces(view_name citext)
+RETURNS TABLE (rst_formatted text)
+AS
+$$
+DECLARE
+ column_length integer;
+ type_length integer;
+ nullable_length integer;
+ default_length integer;
+ description_length integer;
+ sep text;
+ border text;
+BEGIN
+
+ -- All of the hard-coded constants here are the string length of the table
+ -- column headers: 'Column', 'Type', 'Nullable', 'Default', 'Description'
+ SELECT greatest(6, max(character_length(column_name)))
+ FROM public._documentation_view_details(view_name) INTO column_length;
+
+ SELECT greatest(4, max(character_length(type_name)))
+ FROM public._documentation_view_details(view_name) INTO type_length;
+
+ SELECT greatest(8, max(character_length(nullable)))
+ FROM public._documentation_view_details(view_name) INTO nullable_length;
+
+ SELECT greatest(7, max(character_length(default_value)))
+ FROM public._documentation_view_details(view_name) INTO default_length;
+
+ SELECT greatest(11, max(character_length(description)))
+ FROM public._documentation_view_details(view_name) INTO description_length;
+
+ SELECT ' ' INTO sep;
+
+ SELECT
+ concat_ws(sep
+ , rpad('', column_length, '=')
+ , rpad('', type_length, '=')
+ , rpad('', nullable_length, '=')
+ , rpad('', default_length, '=')
+ , rpad('', description_length, '=')
+ )
+ INTO border;
+
+ RETURN QUERY
+ SELECT
+ border
+ UNION ALL
+ SELECT
+ concat_ws(sep
+ , rpad('Column', column_length, ' ')
+ , rpad('Type', type_length, ' ')
+ , rpad('Nullable', nullable_length, ' ')
+ , rpad('Default', default_length, ' ')
+ , rpad('Description', description_length, ' ')
+ )
+ UNION ALL
+ SELECT border
+ UNION ALL
+ SELECT
+ concat_ws(sep
+ , rpad(v.column_name, column_length, ' ')
+ , rpad(v.type_name, type_length, ' ')
+ , rpad(v.nullable, nullable_length, ' ')
+ , rpad(v.default_value, default_length, ' ')
+ , rpad(v.description, description_length, ' ')
+ )
+ FROM public._documentation_view_details(view_name) AS v
+ UNION ALL
+ SELECT border;
+
+END
+$$
+LANGUAGE plpgsql STABLE;
+
+
+-- DOCUMENTATION_FOR
+CREATE OR REPLACE FUNCTION public.documentation_for(view_name citext)
+RETURNS text AS
+$$
+ DECLARE
+ view_description text;
+ view_table_markup text;
+ BEGIN
+ SELECT description FROM pg_catalog.pg_description
+ WHERE pg_description.objoid = view_name::regclass
+ INTO view_description;
+
+ SELECT
+ view_name || E'\n' || rpad('', length(view_name), '-') || E'\n\n' ||
+ view_description || E'\n\n' ||
+ string_agg(rst_formatted, E'\n')
+ INTO view_table_markup
+ FROM public._documentation_view_pieces(view_name);
+
+ RETURN view_table_markup;
+ END
+$$
+LANGUAGE plpgsql STABLE;
+COMMENT ON FUNCTION public.documentation_for(citext)
+IS E'Create an ".rst"-formatted table describing a specific view.\n'
+ 'Example: SELECT public.documentation_for(''endpoint'');';
+
+
+-- DOCUMENTATION
+CREATE OR REPLACE FUNCTION public.documentation(dotfile_name text)
+RETURNS TABLE (full_text text) AS
+$$
+ WITH v AS (
+ SELECT
+ public.documentation_for(relname::citext)
+ || E'\n\nback to `Benchmark data model <benchmark-data-model>`_\n'
+ AS view_documentation
+ FROM pg_catalog.pg_trigger
+ JOIN pg_catalog.pg_class ON pg_trigger.tgrelid = pg_class.oid
+ WHERE NOT tgisinternal
+ )
+ SELECT
+ E'\n.. _benchmark-data-model:\n\n'
+ 'Benchmark data model\n'
+ '====================\n\n\n'
+ '.. graphviz:: '
+ || dotfile_name
+ || E'\n\n\n.. _benchmark-ingestion:\n\n'
+ 'Benchmark ingestion helper functions\n'
+ '====================================\n\n'
+ || public._documentation_ingestion()
+ || E'\n\n\n.. _benchmark-views:\n\n'
+ 'Benchmark views\n'
+ '===============\n\n\n'
+ || string_agg(v.view_documentation, E'\n')
+ FROM v
+ GROUP BY True;
+$$
+LANGUAGE sql STABLE;
+COMMENT ON FUNCTION public.documentation(text)
+IS E'Create an ".rst"-formatted file that shows the columns in '
+ 'every insertable view in the "public" schema.\n'
+ 'The text argument is the name of the generated dotfile to be included.\n'
+ 'Example: SELECT public.documentation(''data_model.dot'');';
+
+
+-- _DOCUMENTATION_DOTFILE_NODE_FOR
+CREATE OR REPLACE FUNCTION public._documentation_dotfile_node_for(tablename name)
+RETURNS text AS
+$$
+DECLARE
+ result text;
+BEGIN
+ WITH node AS (
+ SELECT
+ tablename::text AS lines
+ UNION ALL
+ SELECT
+ E'[label = \n'
+ ' <<table border="0" cellborder="1" cellspacing="0" cellpadding="2">'
+ UNION ALL
+ -- table name
+ SELECT
+ ' <tr><td border="0"><font point-size="14">'
+ || tablename
+ || '</font></td></tr>'
+ UNION ALL
+ -- primary keys
+ SELECT
+ ' <tr><td port="' || column_name || '"><b>'
+ || column_name
+ || ' (pk)</b></td></tr>'
+ FROM public.summarized_tables_view
+ WHERE table_name = tablename
+ AND description LIKE '%primary key%'
+ UNION ALL
+ -- columns
+ SELECT
+ ' <tr><td>'
+ || column_name
+ || CASE WHEN description LIKE '%unique' THEN ' (u)' ELSE '' END
+ || CASE WHEN nullable <> 'not null' THEN ' (o)' ELSE '' END
+ || '</td></tr>'
+ FROM public.summarized_tables_view
+ WHERE table_name = tablename
+ AND (description IS NULL OR description not like '%key%')
+ UNION ALL
+ -- foreign keys
+ SELECT
+ ' <tr><td port="' || column_name || '">'
+ || column_name
+ || CASE WHEN description LIKE '%unique' THEN ' (u)' ELSE '' END
+ || ' (fk) </td></tr>'
+ FROM public.summarized_tables_view
+ WHERE table_name = tablename
+ AND description LIKE '%foreign key%'
+ AND description NOT LIKE '%primary key%'
+ UNION ALL
+ SELECT
+ E' </table>>\n];'
+ )
+ SELECT
+ string_agg(lines, E'\n')
+ INTO result
+ FROM node;
+
+ RETURN result;
+END
+$$
+LANGUAGE plpgsql STABLE;
+
+
+-- _DOCUMENTATION_DOTFILE_EDGES
+CREATE OR REPLACE FUNCTION public._documentation_dotfile_edges()
+RETURNS text AS
+$$
+DECLARE
+ result text;
+BEGIN
+ WITH relationship AS (
+ SELECT
+ conrelid AS fk_table_id
+ , confrelid AS pk_table_id
+ , unnest(conkey) AS fk_colnum
+ , unnest(confkey) AS pk_colnum
+ FROM pg_catalog.pg_constraint
+ WHERE confkey IS NOT NULL
+ AND connamespace='public'::regnamespace
+ ), all_edges AS (
+ SELECT
+ fk_tbl.relname || ':' || fk_col.attname
+ || ' -> '
+ || pk_tbl.relname || ':' || pk_col.attname
+ || ';' AS lines
+ FROM relationship
+ -- foreign key table + column
+ JOIN pg_catalog.pg_attribute AS fk_col
+ ON fk_col.attrelid = relationship.fk_table_id
+ AND fk_col.attnum = relationship.fk_colnum
+ JOIN pg_catalog.pg_class AS fk_tbl
+ ON fk_tbl.oid = relationship.fk_table_id
+ -- primary key table + column
+ JOIN pg_catalog.pg_attribute AS pk_col
+ ON pk_col.attrelid = relationship.pk_table_id
+ AND pk_col.attnum = relationship.pk_colnum
+ JOIN pg_catalog.pg_class AS pk_tbl
+ ON pk_tbl.oid = relationship.pk_table_id
+ )
+ SELECT
+ string_agg(lines, E'\n')
+ INTO result
+ FROM all_edges;
+
+ RETURN result;
+END
+$$
+LANGUAGE plpgsql STABLE;
+
+
+-- DOCUMENTATION_DOTFILE
+CREATE OR REPLACE FUNCTION public.documentation_dotfile()
+RETURNS text AS
+$$
+DECLARE
+ schemaname name := 'public';
+ result text;
+BEGIN
+ WITH file_contents AS (
+ SELECT
+ E'digraph database {\n concentrate = true;\n'
+ ' rankdir = LR;\n'
+ ' ratio = ".75";\n'
+ ' node [shape = none, fontsize="11", fontname="Helvetica"];\n'
+ ' edge [fontsize="8", fontname="Helvetica"];'
+ AS lines
+ UNION ALL
+ SELECT
+ E'legend\n[fontsize = "14"\nlabel =\n'
+ '<<table border="0" cellpadding="0">\n'
+ ' <tr><td align="left"><font point-size="16">Legend</font></td></tr>\n'
+ ' <tr><td align="left">pk = primary key</td></tr>\n'
+ ' <tr><td align="left">fk = foreign key</td></tr>\n'
+ ' <tr><td align="left">u = unique*</td></tr>\n'
+ ' <tr><td align="left">o = optional</td></tr>\n'
+ ' <tr><td align="left">'
+ '* multiple uniques in the same table are a unique group</td></tr>\n'
+ '</table>>\n];'
+ UNION ALL
+ SELECT
+ string_agg(
+ public._documentation_dotfile_node_for(relname),
+ E'\n' -- Forcing the 'env' table to the end makes a better image
+ ORDER BY (CASE WHEN relname LIKE 'env%' THEN 'z' ELSE relname END)
+ )
+ FROM pg_catalog.pg_class
+ WHERE relkind='r' AND relnamespace = schemaname::regnamespace
+ UNION ALL
+ SELECT
+ public._documentation_dotfile_edges()
+ UNION ALL
+ SELECT
+ '}'
+ )
+ SELECT
+ string_agg(lines, E'\n') AS dotfile
+ INTO result
+ FROM file_contents;
+ RETURN result;
+END
+$$
+LANGUAGE plpgsql STABLE;
+COMMENT ON FUNCTION public.documentation_dotfile()
+IS E'Create a Graphviz dotfile of the data model: '
+ 'every table in the "public" schema.\n'
+ 'Example: SELECT public.documentation_dotfile();';
diff --git a/src/arrow/dev/benchmarking/ddl/4_00_triggers.sql b/src/arrow/dev/benchmarking/ddl/4_00_triggers.sql
new file mode 100644
index 000000000..5fb0e5018
--- /dev/null
+++ b/src/arrow/dev/benchmarking/ddl/4_00_triggers.sql
@@ -0,0 +1,61 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+*/
+
+
+-- LANGUAGE_IMPLEMENTATION_VERSION_VIEW_TRIGGER_INSERT
+CREATE TRIGGER language_implementation_version_view_trigger_insert
+ INSTEAD OF INSERT ON public.language_implementation_version_view
+ FOR EACH ROW
+ EXECUTE FUNCTION public.language_implementation_version_view_insert_row();
+
+-- ENVIRONMENT_VIEW_TRIGGER_INSERT
+CREATE TRIGGER environment_view_trigger_insert
+ INSTEAD OF INSERT ON public.environment_view
+ FOR EACH ROW
+ EXECUTE FUNCTION public.environment_view_insert_row();
+
+-- MACHINE_VIEW_TRIGGER_INSERT
+CREATE TRIGGER machine_view_trigger_insert
+ INSTEAD OF INSERT ON public.machine_view
+ FOR EACH ROW
+ EXECUTE FUNCTION public.machine_view_insert_row();
+
+-- UNIT_VIEW_TRIGGER_INSERT
+CREATE TRIGGER unit_view_trigger_insert
+ INSTEAD OF INSERT ON public.unit_view
+ FOR EACH ROW
+ EXECUTE FUNCTION public.unit_view_insert_row();
+
+-- BENCHMARK_VIEW_TRIGGER_INSERT
+CREATE TRIGGER benchmark_view_trigger_insert
+ INSTEAD OF INSERT ON public.benchmark_view
+ FOR EACH ROW
+ EXECUTE FUNCTION public.benchmark_view_insert_row();
+
+-- BENCHMARK_RUN_VIEW_TRIGGER_INSERT
+CREATE TRIGGER benchmark_run_view_trigger_insert
+ INSTEAD OF INSERT ON public.benchmark_run_view
+ FOR EACH ROW
+ EXECUTE FUNCTION public.benchmark_run_view_insert_row();
+
+-- FULL_BENCHMARK_RUN_VIEW_TRIGGER_INSERT
+CREATE TRIGGER full_benchmark_run_view_trigger_insert
+ INSTEAD OF INSERT ON public.full_benchmark_run_view
+ FOR EACH ROW
+ EXECUTE FUNCTION public.full_benchmark_run_view_insert_row();
diff --git a/src/arrow/dev/benchmarking/ddl/5_00_permissions.sql b/src/arrow/dev/benchmarking/ddl/5_00_permissions.sql
new file mode 100644
index 000000000..dd72c40db
--- /dev/null
+++ b/src/arrow/dev/benchmarking/ddl/5_00_permissions.sql
@@ -0,0 +1,73 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+*/
+---------------------------- ROLES ----------------------------
+-- ARROW_WEB
+CREATE ROLE arrow_web login password 'arrow';
+COMMENT ON ROLE arrow_web IS 'Anonymous login user.';
+
+-- ARROW_ADMIN
+CREATE ROLE arrow_admin;
+COMMENT ON ROLE arrow_admin
+ IS 'Can select, insert, update, and delete on all public tables.';
+
+-- ARROW_ANONYMOUS
+CREATE ROLE arrow_anonymous;
+COMMENT ON ROLE arrow_anonymous
+ IS 'Can insert and select on all public tables.';
+
+GRANT arrow_anonymous TO arrow_web;
+
+
+---------------------------- PRIVILEGES ----------------------------
+GRANT USAGE ON SCHEMA public TO arrow_anonymous, arrow_admin;
+
+-- ARROW_ADMIN
+GRANT EXECUTE ON ALL FUNCTIONS IN SCHEMA public TO arrow_admin;
+GRANT USAGE, SELECT ON ALL SEQUENCES IN SCHEMA public to arrow_admin;
+GRANT SELECT, UPDATE, INSERT, DELETE ON ALL TABLES IN SCHEMA public
+ TO arrow_admin;
+
+-- ARROW_ANONYMOUS
+GRANT EXECUTE ON ALL FUNCTIONS IN SCHEMA public TO arrow_anonymous;
+GRANT SELECT ON ALL TABLES IN SCHEMA public TO arrow_anonymous;
+GRANT USAGE, SELECT ON ALL SEQUENCES IN SCHEMA public to arrow_anonymous;
+GRANT INSERT ON
+ public.benchmark
+ , public.benchmark_language
+ , public.dependencies
+ , public.language_implementation_version
+ , public.benchmark_run
+ , public.benchmark_type
+ , public.cpu
+ , public.environment
+ , public.environment_view
+ , public.gpu
+ , public.machine
+ , public.machine_view
+ , public.os
+ , public.unit
+ --, public.project -- The only disallowed table is `project`.
+ , public.benchmark_run_view
+ , public.benchmark_view
+ , public.environment_view
+ , public.full_benchmark_run_view
+ , public.language_implementation_version_view
+ , public.machine_view
+ , public.unit_view
+TO arrow_anonymous;
diff --git a/src/arrow/dev/benchmarking/docker-compose.yml b/src/arrow/dev/benchmarking/docker-compose.yml
new file mode 100644
index 000000000..ca60206bf
--- /dev/null
+++ b/src/arrow/dev/benchmarking/docker-compose.yml
@@ -0,0 +1,43 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+version: '3'
+services:
+
+ pg:
+ build:
+ context: .
+ dockerfile: Dockerfile
+ restart: always
+ ports:
+ - '5432:5432'
+ environment:
+ - POSTGRES_PASSWORD=${PG_PASS}
+ - POSTGRES_USER=${PG_USER}
+
+ graphile:
+ image: graphile/postgraphile
+ restart: always
+ ports:
+ - 5000:5000
+ depends_on:
+ - pg
+ command:
+ - --connection
+ - postgres://${PG_USER}:${PG_PASS}@pg:5432/${PG_USER}
+ - --schema
+ - public
+ - --watch
diff --git a/src/arrow/dev/benchmarking/examples/benchmark_example.json b/src/arrow/dev/benchmarking/examples/benchmark_example.json
new file mode 100644
index 000000000..d6f58c286
--- /dev/null
+++ b/src/arrow/dev/benchmarking/examples/benchmark_example.json
@@ -0,0 +1,32 @@
+[
+ {
+ "benchmark_name": "Benchmark 1",
+ "parameter_names": ["arg0", "arg1", "arg2"],
+ "benchmark_description": "First benchmark",
+ "benchmark_type": "Time",
+ "units": "miliseconds",
+ "lessisbetter": true,
+ "benchmark_version": "second version",
+ "benchmark_language": "Python"
+ },
+ {
+ "benchmark_name": "Benchmark 2",
+ "parameter_names": ["arg0", "arg1"],
+ "benchmark_description": "Description 2.",
+ "benchmark_type": "Time",
+ "units": "nanoseconds",
+ "lessisbetter": true,
+ "benchmark_version": "second version",
+ "benchmark_language": "Python"
+ },
+ {
+ "benchmark_name": "Benchmark 3",
+ "parameter_names": ["arg0"],
+ "benchmark_description": "Third benchmark",
+ "benchmark_type": "Memory",
+ "units": "kilobytes",
+ "lessisbetter": true,
+ "benchmark_version": "1",
+ "benchmark_language": "Python"
+ }
+]
diff --git a/src/arrow/dev/benchmarking/examples/benchmark_run_example.csv b/src/arrow/dev/benchmarking/examples/benchmark_run_example.csv
new file mode 100644
index 000000000..eab208a1c
--- /dev/null
+++ b/src/arrow/dev/benchmarking/examples/benchmark_run_example.csv
@@ -0,0 +1,6 @@
+benchmark_run_id,benchmark_name,benchmark_version,parameter_values,value,git_commit_timestamp,git_hash,val_min,val_q1,val_q3,val_max,std_dev,n_obs,run_timestamp,run_metadata,run_notes,mac_address,benchmark_language,language_implementation_version,dependencies
+,Benchmark 2,version 0,"{""arg0"": 100, ""arg1"": 5}",2.5,2019-01-31 14:31:10 -0600,8136c46d5c60fb,1,2,3,4,1.41,8,2019-02-14 02:00:00 -0600,,,08:00:2b:01:02:03,Python,CPython 2.7,"{""six"": """", ""numpy"": ""1.14"", ""other_lib"": ""1.0""}"
+,Benchmark 2,version 0,"{""arg0"": 1000, ""arg1"": 5}",5,2019-01-31 14:31:10 -0600,8136c46d5c60fb,2,4,6,8,3.14,8,2019-02-14 02:01:00 -0600,,,08:00:2b:01:02:03,Python,CPython 2.7,"{""six"": """", ""numpy"": ""1.14"", ""other_lib"": ""1.0""}"
+,Benchmark 2,version 0,"{""arg0"": 100, ""arg1"": 5}",2.5,2019-01-31 14:31:10 -0600,8136c46d5c60fb,0.5,1,3,5,3,8,2019-02-14 02:02:00 -0600,,,08:00:2b:01:02:03,Python,CPython 3.6,"{""boost"": ""1.42"", ""numpy"": ""1.15""}"
+,Benchmark 2,version 0,"{""arg0"": 1000, ""arg1"": 5}",3,2019-01-31 14:31:10 -0600,8136c46d5c60fb,2,2.5,4,4.5,1.5,8,2019-02-14 02:03:00 -0600,,,08:00:2b:01:02:03,Python,CPython 3.6,"{""boost"": ""1.42"", ""numpy"": ""1.15""}"
+,Benchmark 2,version 0,"{""arg0"": 1000, ""arg1"": 10}",3,2019-01-31 14:31:10 -0600,8136c46d5c60fb,1,2,4,5,2,8,2019-02-14 02:03:30 -0600,,,08:00:2b:01:02:03,Python,CPython 2.7,"{""six"": """", ""numpy"": ""1.15"", ""other_lib"": ""1.0""}"
diff --git a/src/arrow/dev/benchmarking/examples/benchmark_run_example.json b/src/arrow/dev/benchmarking/examples/benchmark_run_example.json
new file mode 100644
index 000000000..2ded776c9
--- /dev/null
+++ b/src/arrow/dev/benchmarking/examples/benchmark_run_example.json
@@ -0,0 +1,97 @@
+[
+ {
+ "benchmark_name": "Benchmark 2",
+ "benchmark_version": "version 0",
+ "parameter_values": {"arg0": 100, "arg1": 5},
+ "value": 2.5,
+ "git_commit_timestamp": "2019-02-08 22:35:53 +0100",
+ "git_hash": "324d3cf198444a",
+ "val_min": 1,
+ "val_q1": 2,
+ "val_q3": 3,
+ "val_max": 4,
+ "std_dev": 1.41,
+ "n_obs": 8,
+ "run_timestamp": "2019-02-14 03:00:05 -0600",
+ "mac_address": "08:00:2b:01:02:03",
+ "benchmark_language": "Python",
+ "language_implementation_version": "CPython 2.7",
+ "dependencies": {"six": "", "numpy": "1.14", "other_lib": "1.0"}
+ },
+ {
+ "benchmark_name": "Benchmark 2",
+ "benchmark_version": "version 0",
+ "parameter_values": {"arg0": 1000, "arg1": 5},
+ "value": 5,
+ "git_commit_timestamp": "2019-02-08 22:35:53 +0100",
+ "git_hash": "324d3cf198444a",
+ "val_min": 2,
+ "val_q1": 4,
+ "val_q3": 6,
+ "val_max": 8,
+ "std_dev": 3.14,
+ "n_obs": 8,
+ "run_timestamp": "2019-02-14 03:00:10 -0600",
+ "mac_address": "08:00:2b:01:02:03",
+ "benchmark_language": "Python",
+ "language_implementation_version": "CPython 2.7",
+ "dependencies": {"six": "", "numpy": "1.14", "other_lib": "1.0"}
+ },
+ {
+ "benchmark_name": "Benchmark 2",
+ "benchmark_version": "version 0",
+ "parameter_values": {"arg0": 100, "arg1": 5},
+ "value": 2.5,
+ "git_commit_timestamp": "2019-02-08 22:35:53 +0100",
+ "git_hash": "324d3cf198444a",
+ "val_min": 0.5,
+ "val_q1": 1,
+ "val_q3": 3,
+ "val_max": 5,
+ "std_dev": 3,
+ "n_obs": 8,
+ "run_timestamp": "2019-02-14 03:00:20 -0600",
+ "mac_address": "08:00:2b:01:02:03",
+ "benchmark_language": "Python",
+ "language_implementation_version": "CPython 2.7",
+ "dependencies": {"boost": "1.42", "numpy": "1.15"}
+ },
+ {
+ "benchmark_name": "Benchmark 2",
+ "benchmark_version": "version 0",
+ "parameter_values": {"arg0": 1000, "arg1": 5},
+ "value": 3,
+ "git_commit_timestamp": "2019-02-08 22:35:53 +0100",
+ "git_hash": "324d3cf198444a",
+ "val_min": 2,
+ "val_q1": 2.5,
+ "val_q3": 4,
+ "val_max": 4.5,
+ "std_dev": 1.5,
+ "n_obs": 8,
+ "run_timestamp": "2019-02-14 03:00:30 -0600",
+ "mac_address": "08:00:2b:01:02:03",
+ "benchmark_language": "Python",
+ "language_implementation_version": "CPython 2.7",
+ "dependencies": {"boost": "1.42", "numpy": "1.15"}
+ },
+ {
+ "benchmark_name": "Benchmark 2",
+ "benchmark_version": "version 0",
+ "parameter_values": {"arg0": 1000, "arg1": 10},
+ "value": 3,
+ "git_commit_timestamp": "2019-02-08 22:35:53 +0100",
+ "git_hash": "324d3cf198444a",
+ "val_min": 1,
+ "val_q1": 2,
+ "val_q3": 4,
+ "val_max": 5,
+ "std_dev": 2,
+ "n_obs": 8,
+ "run_timestamp": "2019-02-14 03:00:40 -0600",
+ "mac_address": "08:00:2b:01:02:03",
+ "benchmark_language": "Python",
+ "language_implementation_version": "CPython 2.7",
+ "dependencies": {"six": "", "numpy": "1.15", "other_lib": "1.0"}
+ }
+]
diff --git a/src/arrow/dev/benchmarking/examples/benchmark_with_context_example.json b/src/arrow/dev/benchmarking/examples/benchmark_with_context_example.json
new file mode 100644
index 000000000..f9e6e3130
--- /dev/null
+++ b/src/arrow/dev/benchmarking/examples/benchmark_with_context_example.json
@@ -0,0 +1,73 @@
+{
+ "context": {
+ "mac_address": "08:00:2b:01:02:03",
+ "benchmark_language": "Python",
+ "language_implementation_version": "CPython 2.7",
+ "dependencies": {"six": "", "numpy": "1.14", "other_lib": "1.0"},
+ "git_commit_timestamp": "2019-02-14 22:42:22 +0100",
+ "git_hash": "123456789abcde",
+ "run_timestamp": "2019-02-25 03:00:40 -0600",
+ "Extra stuff": "does not hurt anything and won't be added.",
+ "However": "all of the entries above 'Extra stuff' are required."
+ },
+ "benchmark_version": {
+ "Benchmark 2": "version 0",
+ "Benchmark 3": "any string is a version. (Benchmark 3 not actually used)"
+ },
+ "benchmarks": [
+ {
+ "benchmark_name": "Benchmark 2",
+ "parameter_values": {"arg0": 1, "arg1": 5},
+ "value": 2.5,
+ "val_min": 1,
+ "val_q1": 2,
+ "val_q3": 3,
+ "val_max": 4,
+ "std_dev": 1.41,
+ "n_obs": 8,
+ "run_metadata": {"any": "json object is admissible"},
+ "run_notes": "This value is an arbitrary-length string."
+ },
+ {
+ "benchmark_name": "Benchmark 2",
+ "parameter_values": {"arg0": 2, "arg1": 5},
+ "value": 5,
+ "std_dev": 3.14,
+ "n_obs": 8
+ },
+ {
+ "benchmark_name": "Benchmark 2",
+ "parameter_values": {"arg0": 3, "arg1": 5},
+ "value": 2.5,
+ "val_min": 0.5,
+ "val_q1": 1,
+ "val_q3": 3,
+ "val_max": 5,
+ "std_dev": 3,
+ "n_obs": 8,
+ "run_notes": "The previous run in this list has the minimal set of keys."
+ },
+ {
+ "benchmark_name": "Benchmark 2",
+ "parameter_values": {"arg0": 4, "arg1": 5},
+ "value": 3,
+ "val_min": 2,
+ "val_q1": 2.5,
+ "val_q3": 4,
+ "val_max": 4.5,
+ "std_dev": 1.5,
+ "n_obs": 8
+ },
+ {
+ "benchmark_name": "Benchmark 2",
+ "parameter_values": {"arg0": 5, "arg1": 5},
+ "value": 3,
+ "val_min": 1,
+ "val_q1": 2,
+ "val_q3": 4,
+ "val_max": 5,
+ "std_dev": 2,
+ "n_obs": 8
+ }
+ ]
+}
diff --git a/src/arrow/dev/benchmarking/examples/example.sql b/src/arrow/dev/benchmarking/examples/example.sql
new file mode 100644
index 000000000..e93269af7
--- /dev/null
+++ b/src/arrow/dev/benchmarking/examples/example.sql
@@ -0,0 +1,232 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+*/
+
+
+-- Example insert into each of the views:
+INSERT INTO public.project(project_name, project_url, repo_url)
+VALUES (
+ 'Apache Arrow'
+ , 'https://arrow.apache.org/'
+ , 'https://github.com/apache/arrow');
+
+INSERT INTO public.environment_view
+ (benchmark_language, language_implementation_version, dependencies)
+VALUES
+ ('Python', 'CPython 2.7', '{"six": "", "numpy": "1.14", "other_lib": "1.0"}'),
+ ('Python', 'CPython 3.6', '{"boost": "1.42", "numpy": "1.15"}');
+
+INSERT INTO public.dependencies(dependencies)
+VALUES
+ ('{"boost": "1.68", "numpy": "1.14"}'),
+ ('{"boost": "1.42", "numpy": "1.16"}');
+
+INSERT INTO public.language_implementation_version_view
+ (benchmark_language, language_implementation_version)
+VALUES
+ ('Python', 'CPython 2.7'),
+ ('Python', 'CPython 3.6');
+
+INSERT INTO public.unit_view
+ (benchmark_type, units, lessisbetter)
+VALUES
+ ('Memory', 'gigabytes', True),
+ ('Memory', 'kilobytes', True);
+
+
+\echo 'use \\dv to list the views views';
+\dv
+
+
+SELECT * FROM environment_view;
+SELECT * FROM unit_view;
+
+
+INSERT INTO public.machine_view (
+ mac_address
+ , machine_name
+ , memory_bytes
+ , cpu_actual_frequency_hz
+ , os_name
+ , architecture_name
+ , kernel_name
+ , cpu_model_name
+ , cpu_core_count
+ , cpu_thread_count
+ , cpu_frequency_max_hz
+ , cpu_frequency_min_hz
+ , cpu_l1d_cache_bytes
+ , cpu_l1i_cache_bytes
+ , cpu_l2_cache_bytes
+ , cpu_l3_cache_bytes
+ , machine_other_attributes
+) VALUES (
+ '08:00:2b:01:02:03' -- mac_address
+ , 'My-Machine-Name' -- machine_name
+ , 8589934592 -- memory_bytes
+ -- All (?) standard mac address formats are allowable:
+ -- https://www.postgresql.org/docs/11/datatype-net-types.html
+ , 2300000000 -- cpu_actual_frequency_Hz
+ , 'OSX' -- os_name
+ , 'x86_64' -- architecture_name
+ , '18.2.0' -- kernel
+ , 'Intel(R) Core(TM) i5-7360U CPU @ 2.30GHz' -- cpu_model_name
+ , 2 -- cpu_core_count
+ , 4 -- cpu_thread_count
+ , 2300000000 -- cpu_frequency_max_Hz
+ , 2300000000 -- cpu_frequency_min_Hz
+ , 32768 -- cpu_l1d_cache_bytes
+ , 32768 -- cpu_l1i_cache_bytes
+ , 262144 -- cpu_l2_cache_bytes
+ , 4194304 -- cpu_l3_cache_bytes
+ , '{"example": "for machine_other_attributes"}'::jsonb
+);
+
+
+INSERT INTO public.full_benchmark_run_view (
+ benchmark_name
+ , parameter_names
+ , benchmark_description
+ , benchmark_type
+ , units
+ , lessisbetter
+ , benchmark_version
+ -- datum
+ , parameter_values
+ , value
+ , git_commit_timestamp
+ , git_hash
+ , val_min
+ , val_q1
+ , val_q3
+ , val_max
+ , std_dev
+ , n_obs
+ , run_timestamp
+ , run_metadata
+ , run_notes
+ -- machine_view
+ , machine_name
+ , mac_address
+ , memory_bytes
+ , cpu_actual_frequency_hz
+ , os_name
+ , architecture_name
+ , kernel_name
+ , cpu_model_name
+ , cpu_core_count
+ , cpu_thread_count
+ , cpu_frequency_max_hz
+ , cpu_frequency_min_hz
+ , cpu_l1d_cache_bytes
+ , cpu_l1i_cache_bytes
+ , cpu_l2_cache_bytes
+ , cpu_l3_cache_bytes
+ , machine_other_attributes
+ -- environment_view
+ , benchmark_language
+ , language_implementation_version
+ , dependencies
+) VALUES (
+ 'Benchmark 3'
+ , '{"arg0"}'::text[]
+ , 'Third benchmark'
+ , 'Memory'
+ , 'kilobytes'
+ , TRUE
+ , '0'
+ -- datum
+ , '{"arg0": 10}'::jsonb
+ , 0.5
+ , '2019-01-31 14:31:10 -0600'
+ , '8136c46d5c60fb'
+ , 0.5
+ , 0.5
+ , 0.5
+ , 0.5
+ , 0
+ , 2
+ , '2019-02-14 14:00:00 -0600'
+ , '{"ci_99": [2.7e-06, 3.1e-06]}'::jsonb
+ , 'Additional run_notes.'
+ -- machine_view
+ , 'My-Machine-Name'
+ , '09-00-2c-01-02-03'
+ , 8589934592
+ , 2300000000
+ , 'OSX'
+ , 'x86_64'
+ , '18.2.0'
+ , 'Intel(R) Core(TM) i5-7360U CPU @ 2.30GHz'
+ , 2
+ , 4
+ , 2300000000
+ , 2300000000
+ , 32768
+ , 32768
+ , 262144
+ , 4194304
+ , '{"example": "for machine_other_attributes"}'::jsonb
+ -- environment_view
+ , 'Python'
+ , 'CPython 2.7'
+ , '{"six": "", "numpy": "1.15", "other_lib": "1.0"}'::jsonb
+);
+
+
+-- Bulk load from CSV. First column is empty; serial "benchmark_run_id" will be assigned.
+--\copy benchmark_run_view FROM 'examples/benchmark_run_example.csv' WITH (FORMAT csv, HEADER);
+
+-- Load from JSON
+--\set content `cat examples/benchmark_example.json`
+--SELECT ingest_benchmark_view(:'content'::jsonb);
+
+INSERT INTO public.benchmark_view (
+ benchmark_name
+ , parameter_names
+ , benchmark_description
+ , benchmark_type
+ , units
+ , lessisbetter
+ , benchmark_version
+ , benchmark_language
+ ) VALUES (
+ 'Benchmark 1'
+ , '{"arg0", "arg1", "arg2"}'::text[]
+ , E'Description.\nNewlines are OK in a string escaped with leading "E".'
+ , 'Time'
+ , 'miliseconds'
+ , TRUE
+ , 'Hash of code or other way to identify distinct benchmark versions.'
+ , 'Python'
+ ), (
+ 'Benchmark 2'
+ , '{"arg0", "arg1"}'::text[]
+ , 'Description 2.'
+ , 'Time'
+ , 'nanoseconds'
+ , TRUE
+ , 'version 0'
+ , 'Python'
+ );
+
+
+\x
+SELECT * from benchmark_run_view;
+
+\x
diff --git a/src/arrow/dev/benchmarking/examples/example_graphql_mutation.json b/src/arrow/dev/benchmarking/examples/example_graphql_mutation.json
new file mode 100644
index 000000000..fec5eed0a
--- /dev/null
+++ b/src/arrow/dev/benchmarking/examples/example_graphql_mutation.json
@@ -0,0 +1,12 @@
+{
+ "query": "mutation ($p: CreateProjectInput!){createProject(input:$p){project{id}}}",
+ "variables": {
+ "p": {
+ "project": {
+ "projectName": "Apache Arrow",
+ "projectUrl": "https://www.arrow.apache.org",
+ "repoUrl": "https://www.github.com/apache/arrow"
+ }
+ }
+ }
+}
diff --git a/src/arrow/dev/benchmarking/examples/graphql_query_environment_view.json b/src/arrow/dev/benchmarking/examples/graphql_query_environment_view.json
new file mode 100644
index 000000000..78804fa91
--- /dev/null
+++ b/src/arrow/dev/benchmarking/examples/graphql_query_environment_view.json
@@ -0,0 +1,3 @@
+{
+ "query": "{allEnvironmentViews(orderBy: [BENCHMARK_LANGUAGE_ASC, LANGUAGE_IMPLEMENTATION_VERSION_ASC, DEPENDENCIES_ASC]) {edges {node {environmentId, benchmarkLanguage, languageImplementationVersion, dependencies}}}}"
+}
diff --git a/src/arrow/dev/benchmarking/examples/machine.json b/src/arrow/dev/benchmarking/examples/machine.json
new file mode 100644
index 000000000..2485e2bc1
--- /dev/null
+++ b/src/arrow/dev/benchmarking/examples/machine.json
@@ -0,0 +1,22 @@
+{
+ "mac_address": "0a:00:2d:01:02:03",
+ "machine_name": "Yet-Another-Machine-Name",
+ "memory_bytes": 8589934592,
+ "cpu_actual_frequency_hz": 2300000000,
+ "os_name": "OSX",
+ "architecture_name": "x86_64",
+ "kernel_name": "18.2.0",
+ "cpu_model_name": "Intel(R) Core(TM) i5-7360U CPU @ 2.30GHz",
+ "cpu_core_count": 2,
+ "cpu_thread_count": 4,
+ "cpu_frequency_max_hz": 2300000000,
+ "cpu_frequency_min_hz": 2300000000,
+ "cpu_l1d_cache_bytes": 32768,
+ "cpu_l1i_cache_bytes": 32768,
+ "cpu_l2_cache_bytes": 262144,
+ "cpu_l3_cache_bytes": 4194304,
+ "machine_other_attributes": {"just": "an example"},
+ "gpu_information": "",
+ "gpu_part_number": "",
+ "gpu_product_name": ""
+}
diff --git a/src/arrow/dev/benchmarking/graphql_submit.sh b/src/arrow/dev/benchmarking/graphql_submit.sh
new file mode 100755
index 000000000..2eaab9cdf
--- /dev/null
+++ b/src/arrow/dev/benchmarking/graphql_submit.sh
@@ -0,0 +1,75 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+OPTIONS=("machine" "benchmarks" "runs")
+
+option=${1-help}
+datafile=${2-machine.json}
+uri=${3-localhost:5000/graphql}
+
+help() {
+ cat <<HELP
+ Submit data via GraphQL
+
+ Usage:
+ ${0} [option] [JSON_file] [URI]
+
+ Arguments:
+ option - $(echo ${OPTIONS[@]} | sed 's/ /|/g')
+ JSON_file - path to the submission file (default 'machine.json')
+ URI - URI to submit to (default 'localhost:5000/graphql')
+HELP
+}
+
+escape_quote() { sed 's/"/\\"/g'; }
+
+template() {
+ cat <<TEMPLATE
+ {
+ "query": "mutation (\$jsonb: JSON!){${1}(input:{fromJsonb:\$jsonb}){${2}}}",
+ "variables": {
+ "jsonb": "$(echo $(cat ${datafile}) | escape_quote )"
+ }
+ }
+TEMPLATE
+}
+
+submit () {
+ curl -X POST -H "Content-Type: application/json" --data @<(template $1 $2) ${uri}
+}
+
+
+case "$1" in
+ machine)
+ submit ingestMachineView integer;;
+
+ benchmarks)
+ submit ingestBenchmarkView integers;;
+
+ runs)
+ if grep -q context <(head -n2 ${2})
+ then
+ submit ingestBenchmarkRunsWithContext bigInts
+ else
+ submit ingestBenchmarkRunView bigInts
+ fi;;
+
+ *)
+ help
+ exit 1
+esac
diff --git a/src/arrow/dev/benchmarking/make_data_model_rst.sh b/src/arrow/dev/benchmarking/make_data_model_rst.sh
new file mode 100755
index 000000000..6a4f5f5b6
--- /dev/null
+++ b/src/arrow/dev/benchmarking/make_data_model_rst.sh
@@ -0,0 +1,69 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+set -e
+DOTFILE=data_model.dot
+OUTFILE=data_model.rst
+
+license() {
+ cat <<'LICENSE' > ${1}
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements. See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership. The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License. You may obtain a copy of the License at
+
+.. http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied. See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
+
+LICENSE
+}
+
+warning() {
+ cat <<'WARNING' >> ${1}
+.. WARNING
+.. This is an auto-generated file. Please do not edit.
+
+.. To reproduce, please run :code:`./make_data_model_rst.sh`.
+.. (This requires you have the
+.. `psql client <https://www.postgresql.org/download/>`_
+.. and have started the docker containers using
+.. :code:`docker-compose up`).
+
+WARNING
+}
+
+echo "Making ${OUTFILE}"
+
+license ${OUTFILE}
+warning ${OUTFILE}
+
+PGPASSWORD=arrow \
+ psql --tuples-only --username=arrow_web \
+ --dbname=benchmark --port=5432 --host=localhost \
+ --command="select public.documentation('${DOTFILE}');" \
+ | sed "s/ *+$//" | sed "s/^ //" >> ${OUTFILE}
diff --git a/src/arrow/dev/benchmarking/make_dotfile.sh b/src/arrow/dev/benchmarking/make_dotfile.sh
new file mode 100755
index 000000000..b86dc3eb3
--- /dev/null
+++ b/src/arrow/dev/benchmarking/make_dotfile.sh
@@ -0,0 +1,70 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+set -e
+OUTFILE=data_model.dot
+
+license() {
+ cat <<'LICENSE' > ${1}
+/*
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements.See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership.The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License.You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied.See the License for the
+ specific language governing permissions and limitations
+ under the License.
+*/
+
+LICENSE
+}
+
+warning() {
+ cat <<'WARNING' >> ${1}
+/*
+ WARNING
+ This is an auto-generated file. Please do not edit.
+
+ To reproduce, please run :code:`./make_data_model_rst.sh`.
+ (This requires you have the
+ `psql client <https://www.postgresql.org/download/>`_
+ and have started the docker containers using
+ :code:`docker-compose up`).
+*/
+WARNING
+}
+
+echo "Making ${OUTFILE}"
+
+license ${OUTFILE}
+warning ${OUTFILE}
+
+PGPASSWORD=arrow \
+ psql --tuples-only --username=arrow_web \
+ --dbname=benchmark --port=5432 --host=localhost \
+ --command="select public.documentation_dotfile();" \
+ | sed "s/ *+$//" | sed "s/^ //" >> ${OUTFILE}
diff --git a/src/arrow/dev/benchmarking/make_machine_json.sh b/src/arrow/dev/benchmarking/make_machine_json.sh
new file mode 100755
index 000000000..09bf0ea2d
--- /dev/null
+++ b/src/arrow/dev/benchmarking/make_machine_json.sh
@@ -0,0 +1,55 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+set -e
+OUTFILE=machine.json
+
+echo "Making ${OUTFILE}"
+echo "** NOTE: This command fails on everything but OSX right now. **"
+echo "* also, the intent is to make this script not suck, just not now. *"
+echo "Please type GPU details here (or manually modify ${OUTFILE} later)."
+read -p "GPU information string (or <enter>): " gpu_information
+read -p "GPU part number (or <enter>): " gpu_part_number
+read -p "GPU product name (or <enter>): " gpu_product_name
+
+
+cat <<MACHINE_JSON > ${OUTFILE}
+{
+ "mac_address": "$(ifconfig en1 | awk '/ether/{print $2}')",
+ "machine_name": "$(uname -n)",
+ "memory_bytes": $(sysctl -n hw.memsize),
+ "cpu_actual_frequency_hz": $(sysctl -n hw.cpufrequency),
+ "os_name": "$(uname -s)",
+ "architecture_name": "$(uname -m)",
+ "kernel_name": "$(uname -r)",
+ "cpu_model_name": "$(sysctl -n machdep.cpu.brand_string)",
+ "cpu_core_count": $(sysctl -n hw.physicalcpu),
+ "cpu_thread_count": $(sysctl -n hw.logicalcpu),
+ "cpu_frequency_max_hz": $(sysctl -n hw.cpufrequency_max),
+ "cpu_frequency_min_hz": $(sysctl -n hw.cpufrequency_min),
+ "cpu_l1d_cache_bytes": $(sysctl -n hw.l1dcachesize),
+ "cpu_l1i_cache_bytes": $(sysctl -n hw.l1icachesize),
+ "cpu_l2_cache_bytes": $(sysctl -n hw.l2cachesize),
+ "cpu_l3_cache_bytes": $(sysctl -n hw.l3cachesize),
+ "gpu_information": "${gpu_information}",
+ "gpu_part_number": "${gpu_part_number}",
+ "gpu_product_name": "${gpu_product_name}"
+}
+MACHINE_JSON
+
+echo "Machine details saved in ${OUTFILE}"