diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
commit | e6918187568dbd01842d8d1d2c808ce16a894239 (patch) | |
tree | 64f88b554b444a49f656b6c656111a145cbbaa28 /src/arrow/dev/benchmarking | |
parent | Initial commit. (diff) | |
download | ceph-upstream/18.2.2.tar.xz ceph-upstream/18.2.2.zip |
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
40 files changed, 4776 insertions, 0 deletions
diff --git a/src/arrow/dev/benchmarking/.env b/src/arrow/dev/benchmarking/.env new file mode 100644 index 000000000..7485f5866 --- /dev/null +++ b/src/arrow/dev/benchmarking/.env @@ -0,0 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +PG_USER=benchmark +PG_PASS=benchmark diff --git a/src/arrow/dev/benchmarking/.gitignore b/src/arrow/dev/benchmarking/.gitignore new file mode 100644 index 000000000..cda00d658 --- /dev/null +++ b/src/arrow/dev/benchmarking/.gitignore @@ -0,0 +1 @@ +/machine.json diff --git a/src/arrow/dev/benchmarking/Dockerfile b/src/arrow/dev/benchmarking/Dockerfile new file mode 100644 index 000000000..f47033397 --- /dev/null +++ b/src/arrow/dev/benchmarking/Dockerfile @@ -0,0 +1,23 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +FROM postgres:11-alpine + +# Any `.sh` and `.sql` files copied to the entrypoint directory +# will be run during startup. See `docker-entrypoint.sh` in +# https://github.com/docker-library/postgres/blob/master/11/alpine/ +COPY ddl/* /docker-entrypoint-initdb.d/ diff --git a/src/arrow/dev/benchmarking/README.md b/src/arrow/dev/benchmarking/README.md new file mode 100644 index 000000000..c5ddd62e0 --- /dev/null +++ b/src/arrow/dev/benchmarking/README.md @@ -0,0 +1,255 @@ +<!-- + ~ Licensed to the Apache Software Foundation (ASF) under one + ~ or more contributor license agreements. See the NOTICE file + ~ distributed with this work for additional information + ~ regarding copyright ownership. The ASF licenses this file + ~ to you under the Apache License, Version 2.0 (the + ~ "License"); you may not use this file except in compliance + ~ with the License. You may obtain a copy of the License at + ~ + ~ http://www.apache.org/licenses/LICENSE-2.0 + ~ + ~ Unless required by applicable law or agreed to in writing, + ~ software distributed under the License is distributed on an + ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + ~ KIND, either express or implied. See the License for the + ~ specific language governing permissions and limitations + ~ under the License. + --> + + +> NOTE: For those deploying this database, Postgres does not by default use +> UTF-8, however it is [required for the jsonb][pg-jsonb] format used in +> some columns to always work. This [stackoverflow post][so-utf8] describes +> how to do it for Amazon RDS. This [section of the docs][pg-charset] +> states how to do it in general, i.e.: `initdb -E UTF8`. + +# Benchmark database + +This directory contains files related to the benchmark database. + +- 'ddl/\*.sql' contains the database definition. +- 'examples/' contain code to test the database and demonstrate its use. +- 'Dockerfile' and 'docker-compose.yml' are for developing benchmarks + against a testing database. +- An auto-generated summary of views in the [Data model][./data_model.rst]. + +## Setup + +To create a 'machine.json' file that will uniquely identify a computer for +benchmark submission, run the provided shell script and fill in the prompts +to identify the GPU. + +> NOTE: this does not work on VMs or Windows. + +```shell +./make_machine_json.sh +``` + +Submit the machine details via http using the command + +> NOTE: This will only work if we have selected graphql as a client +> and have it running in production or if during development +> you have run `docker-compose up` to create and run both a +> database Docker container and graphql client Docker container. + +```shell +./graphql_submit.sh machine machine.json localhost:5000/graphql +``` + +or submit after starting up the psql client from this directory, using + +``` +\set content `cat machine.json` +SELECT ingest_machine_view(:'content'::jsonb); +``` + +> NOTE: If you don't have a "machine.json" file generated, +> use the example file "examples/machine.json" instead. + +## Local testing + +There is a file named "[.env][.env]" in this directory that is used by +`docker-compose` to set up the postgres user and password for the +local containers. Currently the name and password are both +`benchmark`. This will be the password for the psql client as well. + +The Postgres Alpine image runs any added '\*.sql' and '\*.sh' scripts placed +in '/docker-entrypoint-initdb.d/' during its startup script, so the local +database will be set up automatically once the container is running. + +To start the containers, be sure to have [Docker installed][docker], +and then run the following from this directory (arrow/dev/benchmarking). + + +``` +docker-compose up +``` + +This will start a process that will show logs from both the running +Postgres container and the running GraphQL container. +To stop the running containers gracefully, background the process +and run + +``` +docker-compose down +fg # To re-foreground the backgrounded process while it exits +``` + +You will still have the container images "benchmarking_pg", +"graphile/postgraphile", and "postgres:11-alpine" on your +computer. You should keep them if you want to run this again. +If you don't, then remove them with the command: + +``` +docker rmi benchmarking_pg postgres:11-alpine graphile/postgraphile +``` + +### Postgres client + +The `psql` shell client is bundled with the PostgreSQL core distribution +available from the [Postgres download page][postgres-downloads]. +Using the `PG_USER` defined in the `.env` file (currently "benchmark"), +the command to connect to the container is: +```shell +psql -h localhost -p 5432 -U benchmark +``` +There is an example script in [examples/example.sql](examples/example.sql) that +runs some queries against the database. To run it in the psql client, type +the following in the psql command-line interface: + +``` +\i examples/example.sql +``` + +#### Bulk ingestion using CSV + +An example CSV file for bulk ingestion is in +[examples/benchmark_run_example.csv](examples/benchmark_run_example.csv). +The columns are listed in the same order as they are defined, to avoid having +to explicitly name every column in ingestion. The "id" column is left empty +and will be automatically assigned on insert. + +To ingest the example CSV file from the command line, +use the command below: + +```shell +CSV='examples/benchmark_run_example.csv' && \ +psql -U benchmark -h localhost -p 5432 \ + -c "\copy benchmark_run_view FROM '${CSV}' WITH (FORMAT csv, HEADER);" +``` + +#### Bulk ingestion using JSON + +To ingest the example JSON file using the psql client, use the command below. + +``` +\set content `cat examples/benchmark_example.json` +SELECT ingest_benchmark_view(:'content'::jsonb); +``` + +### HTTP client + +This section requires an actual HTTP client to be up, either +for the production database or via the testing setup. +(See the [local testing section](#local-testing) for how to set it up). + +The 'graphile/postgraphile' container provides an HTTP interface +to the database via two url routes: + +- A GraphiQL page ([localhost:5000/graphiql][graphiql]) + to aid visual exploration of the data model. + (The `--watch` flag on the command line. Not recommended for production.) +- An endpoint that receives POST requests only (localhost:5000/graphql). + +#### Ingestion + +The script [graphql_submit.sh](./graphql_submit.sh) simplifies submission +to the database via curl. Examples: + +```shell +./graphql_submit.sh benchmarks examples/benchmark_example.json +./graphql_submit.sh runs examples/benchmark_run_example.json +``` + +#### Querying + +The output of the query is a JSON object that is hard to read on the command line. +Here is an example query in the shell: +```shell +curl -X POST \ + -H "Content-Type: application/json" \ + --data '{"query": "{projectDetails{ projectName }}"}' \ + localhost:5000/graphql +``` + +which (if you have previously run the "examples.sql" command) yields + +``` +{"data":{"projectDetails":{"projectName":"Apache Arrow"}}} +``` + +Here is an example query using Python: +```python +import json +import requests + +uri = "http://localhost:5000/graphql" +query = json.load(open("examples/graphql_query_environment_view.json")) +response = requests.post(uri, json=query) +message = "{benchmarkLanguage}: {languageImplementationVersion}, {dependencies}" + +for row in response.json()['data']['allEnvironmentViews']['edges']: + print(message.format(**row['node'])) + +# result: +# +# Python: CPython 2.7, {"six":"","numpy":"1.14","other_lib":"1.0"} +# Python: CPython 2.7, {"six":"","numpy":"1.15","other_lib":"1.0"} +# Python: CPython 3.6, {"boost":"1.42","numpy":"1.15"} +``` + +## Deployment + +(work in progress). + +> NOTE: For those deploying this database, Postgres does not by default use +> UTF-8, however it is [required for the jsonb][pg-jsonb] format used in +> some columns to always work. This [stackoverflow post][so-utf8] describes +> how to do it for Amazon RDS. This [section of the docs][pg-charset] +> states how to do it in general, i.e.: `initdb -E UTF8`. + + +## Quick reference + +- String variables `'have single quotes'` +- Arrays `'{"have", "curly", "braces"}'::text[]` or `'{1, 2, 3}'::integer[]` +- JSONb `'{"has":"this", "format":42}'::jsonb` +- Elements inserted using JSON-formatted strings can use standard + JSON-formatted arrays (`[1, 2, 3]`) and do not have to use the above + string formats. +- When comparing nullable values use `x IS NOT DISTINCT FROM y` rather than `x = y` +- An auto-generated summary of the [Data model][./data_model.rst]. + +## Data model documentation + +To recreate the data model documentation, +(1) install the [psql client][postgres-downloads] +(sorry you need to download the whole thing), +(2) start the docker container using `docker-compose up`, +(3) and then run these scripts: + +``` +./make_dotfile.sh +./make_data_model_rst.sh +``` + +[pg-jsonb]: https://www.postgresql.org/docs/11/datatype-json.html#id-1.5.7.22.3 +[so-utf8]: https://stackoverflow.com/a/33557023 +[pg-charset]: https://www.postgresql.org/docs/9.3/multibyte.html#AEN34424 +[docker]: https://www.docker.com/get-started +[citext-limitations]: https://www.postgresql.org/docs/11/citext.html#id-1.11.7.17.7 +[postgres-downloads]: https://www.postgresql.org/download/ +[graphiql]: http://localhost:5000/graphiql +[postgraphile-lambda]: https://github.com/graphile/postgraphile-lambda-example +[postgraphile-cli]: https://www.graphile.org/postgraphile/usage-cli/ diff --git a/src/arrow/dev/benchmarking/data_model.dot b/src/arrow/dev/benchmarking/data_model.dot new file mode 100644 index 000000000..d311acd4e --- /dev/null +++ b/src/arrow/dev/benchmarking/data_model.dot @@ -0,0 +1,219 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements.See the NOTICE file + distributed with this work for additional information + regarding copyright ownership.The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License.You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied.See the License for the + specific language governing permissions and limitations + under the License. +*/ + +/* + WARNING + This is an auto-generated file. Please do not edit. + + To reproduce, please run :code:`./make_data_model_rst.sh`. + (This requires you have the + `psql client <https://www.postgresql.org/download/>`_ + and have started the docker containers using + :code:`docker-compose up`). +*/ +digraph database { + concentrate = true; + rankdir = LR; + ratio = ".75"; + node [shape = none, fontsize="11", fontname="Helvetica"]; + edge [fontsize="8", fontname="Helvetica"]; +legend +[fontsize = "14" +label = +<<table border="0" cellpadding="0"> + <tr><td align="left"><font point-size="16">Legend</font></td></tr> + <tr><td align="left">pk = primary key</td></tr> + <tr><td align="left">fk = foreign key</td></tr> + <tr><td align="left">u = unique*</td></tr> + <tr><td align="left">o = optional</td></tr> + <tr><td align="left">* multiple uniques in the same table are a unique group</td></tr> +</table>> +]; +benchmark +[label = + <<table border="0" cellborder="1" cellspacing="0" cellpadding="2"> + <tr><td border="0"><font point-size="14">benchmark</font></td></tr> + <tr><td port="benchmark_id"><b>benchmark_id (pk)</b></td></tr> + <tr><td port="benchmark_language_id"><b>benchmark_language_id (pk)</b></td></tr> + <tr><td>benchmark_name (u)</td></tr> + <tr><td>parameter_names (o)</td></tr> + <tr><td>benchmark_description</td></tr> + <tr><td>benchmark_version (u)</td></tr> + <tr><td port="unit_id">unit_id (fk) </td></tr> + </table>> +]; +benchmark_language +[label = + <<table border="0" cellborder="1" cellspacing="0" cellpadding="2"> + <tr><td border="0"><font point-size="14">benchmark_language</font></td></tr> + <tr><td port="benchmark_language_id"><b>benchmark_language_id (pk)</b></td></tr> + <tr><td>benchmark_language (u)</td></tr> + </table>> +]; +benchmark_run +[label = + <<table border="0" cellborder="1" cellspacing="0" cellpadding="2"> + <tr><td border="0"><font point-size="14">benchmark_run</font></td></tr> + <tr><td port="benchmark_run_id"><b>benchmark_run_id (pk)</b></td></tr> + <tr><td>parameter_values (u)</td></tr> + <tr><td>value</td></tr> + <tr><td>git_commit_timestamp (u)</td></tr> + <tr><td>git_hash</td></tr> + <tr><td>val_min (o)</td></tr> + <tr><td>val_q1 (o)</td></tr> + <tr><td>val_q3 (o)</td></tr> + <tr><td>val_max (o)</td></tr> + <tr><td>std_dev</td></tr> + <tr><td>n_obs</td></tr> + <tr><td>run_timestamp (u)</td></tr> + <tr><td>run_metadata (o)</td></tr> + <tr><td>run_notes (o)</td></tr> + <tr><td port="machine_id">machine_id (u) (fk) </td></tr> + <tr><td port="environment_id">environment_id (u) (fk) </td></tr> + <tr><td port="language_implementation_version_id">language_implementation_version_id (fk) </td></tr> + <tr><td port="benchmark_language_id">benchmark_language_id (fk) </td></tr> + <tr><td port="benchmark_id">benchmark_id (u) (fk) </td></tr> + </table>> +]; +benchmark_type +[label = + <<table border="0" cellborder="1" cellspacing="0" cellpadding="2"> + <tr><td border="0"><font point-size="14">benchmark_type</font></td></tr> + <tr><td port="benchmark_type_id"><b>benchmark_type_id (pk)</b></td></tr> + <tr><td>benchmark_type (u)</td></tr> + <tr><td>lessisbetter</td></tr> + </table>> +]; +cpu +[label = + <<table border="0" cellborder="1" cellspacing="0" cellpadding="2"> + <tr><td border="0"><font point-size="14">cpu</font></td></tr> + <tr><td port="cpu_id"><b>cpu_id (pk)</b></td></tr> + <tr><td>cpu_model_name (u)</td></tr> + <tr><td>cpu_core_count</td></tr> + <tr><td>cpu_thread_count</td></tr> + <tr><td>cpu_frequency_max_hz</td></tr> + <tr><td>cpu_frequency_min_hz</td></tr> + <tr><td>cpu_l1d_cache_bytes</td></tr> + <tr><td>cpu_l1i_cache_bytes</td></tr> + <tr><td>cpu_l2_cache_bytes</td></tr> + <tr><td>cpu_l3_cache_bytes</td></tr> + </table>> +]; +dependencies +[label = + <<table border="0" cellborder="1" cellspacing="0" cellpadding="2"> + <tr><td border="0"><font point-size="14">dependencies</font></td></tr> + <tr><td port="dependencies_id"><b>dependencies_id (pk)</b></td></tr> + <tr><td>dependencies (u)</td></tr> + </table>> +]; +gpu +[label = + <<table border="0" cellborder="1" cellspacing="0" cellpadding="2"> + <tr><td border="0"><font point-size="14">gpu</font></td></tr> + <tr><td port="gpu_id"><b>gpu_id (pk)</b></td></tr> + <tr><td>gpu_information (u)</td></tr> + <tr><td>gpu_part_number</td></tr> + <tr><td>gpu_product_name</td></tr> + </table>> +]; +language_implementation_version +[label = + <<table border="0" cellborder="1" cellspacing="0" cellpadding="2"> + <tr><td border="0"><font point-size="14">language_implementation_version</font></td></tr> + <tr><td port="language_implementation_version_id"><b>language_implementation_version_id (pk)</b></td></tr> + <tr><td port="benchmark_language_id"><b>benchmark_language_id (pk)</b></td></tr> + <tr><td>language_implementation_version (u)</td></tr> + </table>> +]; +machine +[label = + <<table border="0" cellborder="1" cellspacing="0" cellpadding="2"> + <tr><td border="0"><font point-size="14">machine</font></td></tr> + <tr><td port="machine_id"><b>machine_id (pk)</b></td></tr> + <tr><td>machine_name</td></tr> + <tr><td>mac_address (u)</td></tr> + <tr><td>memory_bytes</td></tr> + <tr><td>cpu_actual_frequency_hz</td></tr> + <tr><td>machine_other_attributes (o)</td></tr> + <tr><td port="cpu_id">cpu_id (fk) </td></tr> + <tr><td port="gpu_id">gpu_id (fk) </td></tr> + <tr><td port="os_id">os_id (fk) </td></tr> + </table>> +]; +os +[label = + <<table border="0" cellborder="1" cellspacing="0" cellpadding="2"> + <tr><td border="0"><font point-size="14">os</font></td></tr> + <tr><td port="os_id"><b>os_id (pk)</b></td></tr> + <tr><td>os_name (u)</td></tr> + <tr><td>architecture_name (u)</td></tr> + <tr><td>kernel_name (u)</td></tr> + </table>> +]; +project +[label = + <<table border="0" cellborder="1" cellspacing="0" cellpadding="2"> + <tr><td border="0"><font point-size="14">project</font></td></tr> + <tr><td port="project_id"><b>project_id (pk)</b></td></tr> + <tr><td>project_name (u)</td></tr> + <tr><td>project_url (u)</td></tr> + <tr><td>repo_url (u)</td></tr> + <tr><td>last_changed</td></tr> + </table>> +]; +unit +[label = + <<table border="0" cellborder="1" cellspacing="0" cellpadding="2"> + <tr><td border="0"><font point-size="14">unit</font></td></tr> + <tr><td port="unit_id"><b>unit_id (pk)</b></td></tr> + <tr><td>units (u)</td></tr> + <tr><td port="benchmark_type_id">benchmark_type_id (fk) </td></tr> + </table>> +]; +environment +[label = + <<table border="0" cellborder="1" cellspacing="0" cellpadding="2"> + <tr><td border="0"><font point-size="14">environment</font></td></tr> + <tr><td port="environment_id"><b>environment_id (pk)</b></td></tr> + <tr><td port="language_implementation_version_id"><b>language_implementation_version_id (pk)</b></td></tr> + <tr><td port="benchmark_language_id"><b>benchmark_language_id (pk)</b></td></tr> + <tr><td port="dependencies_id">dependencies_id (u) (fk) </td></tr> + </table>> +]; +machine:cpu_id -> cpu:cpu_id; +machine:gpu_id -> gpu:gpu_id; +machine:os_id -> os:os_id; +benchmark:benchmark_language_id -> benchmark_language:benchmark_language_id; +environment:benchmark_language_id -> benchmark_language:benchmark_language_id; +language_implementation_version:benchmark_language_id -> benchmark_language:benchmark_language_id; +environment:dependencies_id -> dependencies:dependencies_id; +environment:benchmark_language_id -> language_implementation_version:benchmark_language_id; +environment:language_implementation_version_id -> language_implementation_version:language_implementation_version_id; +unit:benchmark_type_id -> benchmark_type:benchmark_type_id; +benchmark_run:machine_id -> machine:machine_id; +benchmark:unit_id -> unit:unit_id; +benchmark_run:language_implementation_version_id -> environment:language_implementation_version_id; +benchmark_run:benchmark_language_id -> environment:benchmark_language_id; +benchmark_run:environment_id -> environment:environment_id; +benchmark_run:benchmark_language_id -> benchmark:benchmark_language_id; +benchmark_run:benchmark_id -> benchmark:benchmark_id; +} + diff --git a/src/arrow/dev/benchmarking/data_model.rst b/src/arrow/dev/benchmarking/data_model.rst new file mode 100644 index 000000000..d0f3dc7fc --- /dev/null +++ b/src/arrow/dev/benchmarking/data_model.rst @@ -0,0 +1,373 @@ +.. Licensed to the Apache Software Foundation (ASF) under one +.. or more contributor license agreements. See the NOTICE file +.. distributed with this work for additional information +.. regarding copyright ownership. The ASF licenses this file +.. to you under the Apache License, Version 2.0 (the +.. "License"); you may not use this file except in compliance +.. with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, +.. software distributed under the License is distributed on an +.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +.. KIND, either express or implied. See the License for the +.. specific language governing permissions and limitations +.. under the License. + + +.. WARNING +.. This is an auto-generated file. Please do not edit. + +.. To reproduce, please run :code:`./make_data_model_rst.sh`. +.. (This requires you have the +.. `psql client <https://www.postgresql.org/download/>`_ +.. and have started the docker containers using +.. :code:`docker-compose up`). + + +.. _benchmark-data-model: + +Benchmark data model +==================== + + +.. graphviz:: data_model.dot + + +.. _benchmark-ingestion: + +Benchmark ingestion helper functions +==================================== + +ingest_benchmark_run_view +------------------------- + +:code:`ingest_benchmark_run_view(from_jsonb jsonb)` + +The argument is a JSON object. NOTE: key names must be entirely +lowercase, or the insert will fail. Extra key-value pairs are ignored. +Example:: + + [ + { + "benchmark_name": "Benchmark 2", + "benchmark_version": "version 0", + "parameter_values": {"arg0": 100, "arg1": 5}, + "value": 2.5, + "git_commit_timestamp": "2019-02-08 22:35:53 +0100", + "git_hash": "324d3cf198444a", + "val_min": 1, + "val_q1": 2, + "val_q3": 3, + "val_max": 4, + "std_dev": 1.41, + "n_obs": 8, + "run_timestamp": "2019-02-14 03:00:05 -0600", + "mac_address": "08:00:2b:01:02:03", + "benchmark_language": "Python", + "language_implementation_version": "CPython 2.7", + "dependencies": {"six": "", "numpy": "1.14", "other_lib": "1.0"} + }, + { + "benchmark_name": "Benchmark 2", + "benchmark_version": "version 0", + "parameter_values": {"arg0": 1000, "arg1": 5}, + "value": 5, + "git_commit_timestamp": "2019-02-08 22:35:53 +0100", + "git_hash": "324d3cf198444a", + "std_dev": 3.14, + "n_obs": 8, + "run_timestamp": "2019-02-14 03:00:10 -0600", + "mac_address": "08:00:2b:01:02:03", + "benchmark_language": "Python", + "language_implementation_version": "CPython 2.7", + "dependencies": {"six": "", "numpy": "1.14", "other_lib": "1.0"} + } + ] +To identify which columns in "benchmark_run_view" are required, +please see the view documentation in :ref:`benchmark-data-model`. + + + +back to `Benchmark data model <benchmark-data-model>`_ + + +ingest_benchmark_view +--------------------- + +:code:`ingest_benchmark_view(from_jsonb jsonb)` + +The argument is a JSON object. NOTE: key names must be entirely +lowercase, or the insert will fail. Extra key-value pairs are ignored. +Example:: + + [ + { + "benchmark_name": "Benchmark 1", + "parameter_names": ["arg0", "arg1", "arg2"], + "benchmark_description": "First benchmark", + "benchmark_type": "Time", + "units": "miliseconds", + "lessisbetter": true, + "benchmark_version": "second version", + "benchmark_language": "Python" + }, + { + "benchmark_name": "Benchmark 2", + "parameter_names": ["arg0", "arg1"], + "benchmark_description": "Description 2.", + "benchmark_type": "Time", + "units": "nanoseconds", + "lessisbetter": true, + "benchmark_version": "second version", + "benchmark_language": "Python" + } + ] + +To identify which columns in "benchmark_view" are required, +please see the view documentation in :ref:`benchmark-data-model`. + + + +back to `Benchmark data model <benchmark-data-model>`_ + + +ingest_benchmark_runs_with_context +---------------------------------- + +:code:`ingest_benchmark_runs_with_context(from_jsonb jsonb)` + +The argument is a JSON object. NOTE: key names must be entirely +lowercase, or the insert will fail. Extra key-value pairs are ignored. +The object contains three key-value pairs:: + + {"context": { + "mac_address": "08:00:2b:01:02:03", + "benchmark_language": "Python", + "language_implementation_version": "CPython 3.6", + "dependencies": {"six": "", "numpy": "1.14", "other_lib": "1.0"}, + "git_commit_timestamp": "2019-02-14 22:42:22 +0100", + "git_hash": "123456789abcde", + "run_timestamp": "2019-02-14 03:00:40 -0600", + "extra stuff": "does not hurt anything and will not be added." + }, + "benchmark_version": { + "Benchmark Name 1": "Any string can be a version.", + "Benchmark Name 2": "A git hash can be a version.", + "An Unused Benchmark Name": "Will be ignored." + }, + "benchmarks": [ + { + "benchmark_name": "Benchmark Name 1", + "parameter_values": {"argument1": 1, "argument2": "value2"}, + "value": 42, + "val_min": 41.2, + "val_q1": 41.5, + "val_q3": 42.5, + "val_max": 42.8, + "std_dev": 0.5, + "n_obs": 100, + "run_metadata": {"any": "key-value pairs"}, + "run_notes": "Any relevant notes." + }, + { + "benchmark_name": "Benchmark Name 2", + "parameter_values": {"not nullable": "Use {} if no params."}, + "value": 8, + "std_dev": 1, + "n_obs": 2, + } + ] + } + +- The entry for "context" contains the machine, environment, and timestamp + information common to all of the runs +- The entry for "benchmark_version" maps benchmark + names to their version strings. (Which can be a git hash, + the entire code string, a number, or any other string of your choice.) +- The entry for "benchmarks" is a list of benchmark run data + for the given context and benchmark versions. The first example + benchmark run entry contains all possible values, even + nullable ones, and the second entry omits all nullable values. + + + + +back to `Benchmark data model <benchmark-data-model>`_ + + +ingest_machine_view +------------------- + +:code:`ingest_machine_view(from_jsonb jsonb)` + +The argument is a JSON object. NOTE: key names must be entirely +lowercase, or the insert will fail. Extra key-value pairs are ignored. +Example:: + + { + "mac_address": "0a:00:2d:01:02:03", + "machine_name": "Yet-Another-Machine-Name", + "memory_bytes": 8589934592, + "cpu_actual_frequency_hz": 2300000000, + "os_name": "OSX", + "architecture_name": "x86_64", + "kernel_name": "18.2.0", + "cpu_model_name": "Intel(R) Core(TM) i5-7360U CPU @ 2.30GHz", + "cpu_core_count": 2, + "cpu_thread_count": 4, + "cpu_frequency_max_hz": 2300000000, + "cpu_frequency_min_hz": 2300000000, + "cpu_l1d_cache_bytes": 32768, + "cpu_l1i_cache_bytes": 32768, + "cpu_l2_cache_bytes": 262144, + "cpu_l3_cache_bytes": 4194304, + "machine_other_attributes": {"just": "an example"}, + "gpu_information": "", + "gpu_part_number": "", + "gpu_product_name": "" + } + +To identify which columns in "machine_view" are required, +please see the view documentation in :ref:`benchmark-data-model`. + + + +back to `Benchmark data model <benchmark-data-model>`_ + + + +.. _benchmark-views: + +Benchmark views +=============== + + +benchmark_run_view +------------------ + +Each benchmark run. + +- Each entry is unique on the machine, environment, benchmark, + and git commit timestamp. + +=============================== =========== ======== =========== =========== +Column Type Nullable Default Description +=============================== =========== ======== =========== =========== +benchmark_run_id int8 not null serial primary key +benchmark_name citext not null unique +benchmark_version citext not null unique +parameter_values jsonb not null '{}'::jsonb unique +value numeric not null +git_commit_timestamp timestamptz not null unique +git_hash text not null +val_min numeric +val_q1 numeric +val_q3 numeric +val_max numeric +std_dev numeric not null +n_obs int4 not null +run_timestamp timestamptz not null unique +run_metadata jsonb +run_notes text +mac_address macaddr not null unique +benchmark_language citext not null unique +language_implementation_version citext not null ''::citext unique +dependencies jsonb not null '{}'::jsonb unique +=============================== =========== ======== =========== =========== + +back to `Benchmark data model <benchmark-data-model>`_ + +benchmark_view +-------------- + +The details about a particular benchmark. + +- "benchmark_name" is unique for a given "benchmark_language" +- Each entry is unique on + ("benchmark_language", "benchmark_name", "benchmark_version") + +===================== ====== ======== ======= =========== +Column Type Nullable Default Description +===================== ====== ======== ======= =========== +benchmark_id int4 not null serial primary key +benchmark_name citext not null unique +parameter_names _text +benchmark_description text not null +benchmark_type citext not null unique +units citext not null unique +lessisbetter bool not null +benchmark_version citext not null unique +benchmark_language citext not null unique +===================== ====== ======== ======= =========== + +back to `Benchmark data model <benchmark-data-model>`_ + +environment_view +---------------- + +The build environment used for a reported benchmark run. +(Will be inferred from each "benchmark_run" if not explicitly added). + +- Each entry is unique on + ("benchmark_language", "language_implementation_version", "dependencies") +- "benchmark_language" is unique in the "benchmark_language" table +- "benchmark_language" plus "language_implementation_version" is unique in + the "language_implementation_version" table +- "dependencies" is unique in the "dependencies" table + +=============================== ====== ======== =========== =========== +Column Type Nullable Default Description +=============================== ====== ======== =========== =========== +environment_id int4 not null serial primary key +benchmark_language citext not null unique +language_implementation_version citext not null ''::citext unique +dependencies jsonb not null '{}'::jsonb unique +=============================== ====== ======== =========== =========== + +back to `Benchmark data model <benchmark-data-model>`_ + +machine_view +------------ + +The machine environment (CPU, GPU, OS) used for each benchmark run. + +- "mac_address" is unique in the "machine" table +- "gpu_part_number" is unique in the "gpu" (graphics processing unit) table + Empty string (''), not null, is used for machines that won't use the GPU +- "cpu_model_name" is unique in the "cpu" (central processing unit) table +- "os_name", "os_architecture_name", and "os_kernel_name" + are unique in the "os" (operating system) table +- "machine_other_attributes" is a key-value store for any other relevant + data, e.g. '{"hard_disk_type": "solid state"}' + +======================== ======= ======== ========== =========== +Column Type Nullable Default Description +======================== ======= ======== ========== =========== +machine_id int4 not null serial primary key +mac_address macaddr not null unique +machine_name citext not null +memory_bytes int8 not null +cpu_actual_frequency_hz int8 not null +os_name citext not null unique +architecture_name citext not null unique +kernel_name citext not null ''::citext unique +cpu_model_name citext not null unique +cpu_core_count int4 not null +cpu_thread_count int4 not null +cpu_frequency_max_hz int8 not null +cpu_frequency_min_hz int8 not null +cpu_l1d_cache_bytes int4 not null +cpu_l1i_cache_bytes int4 not null +cpu_l2_cache_bytes int4 not null +cpu_l3_cache_bytes int4 not null +gpu_information citext not null ''::citext unique +gpu_part_number citext not null ''::citext +gpu_product_name citext not null ''::citext +machine_other_attributes jsonb +======================== ======= ======== ========== =========== + +back to `Benchmark data model <benchmark-data-model>`_ + + diff --git a/src/arrow/dev/benchmarking/ddl/0_setup.sql b/src/arrow/dev/benchmarking/ddl/0_setup.sql new file mode 100644 index 000000000..ec1044641 --- /dev/null +++ b/src/arrow/dev/benchmarking/ddl/0_setup.sql @@ -0,0 +1,23 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + +CREATE EXTENSION IF NOT EXISTS "citext"; -- type for case-insensitive text + +-- For future fine-grained control over function execution by user group. +ALTER DEFAULT PRIVILEGES REVOKE EXECUTE ON functions FROM public; diff --git a/src/arrow/dev/benchmarking/ddl/1_00_table_public_project.sql b/src/arrow/dev/benchmarking/ddl/1_00_table_public_project.sql new file mode 100644 index 000000000..c52d66cfd --- /dev/null +++ b/src/arrow/dev/benchmarking/ddl/1_00_table_public_project.sql @@ -0,0 +1,45 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + + +-- PROJECT +CREATE TABLE IF NOT EXISTS public.project +( + project_id SERIAL PRIMARY KEY + , project_name citext NOT NULL + , project_url text NOT NULL + , repo_url text NOT NULL + , last_changed timestamp (0) without time zone NOT NULL DEFAULT now() +); +COMMENT ON TABLE public.project + IS 'Project name and relevant URLs.'; +COMMENT ON COLUMN public.project.project_url + IS 'Homepage URL.'; +COMMENT ON COLUMN public.project.repo_url + IS 'Git repo URL to link stored commit hashes to code in a webpage.'; +COMMENT ON COLUMN public.project.last_changed + IS 'New project details are added with a new timestamp. ' + 'The project details with the newest timestamp will be used.'; + +-- CONSTRAINTS +CREATE UNIQUE INDEX project_unique_index_on_project_name_urls + ON public.project(project_name, project_url, repo_url); +COMMENT ON INDEX + public.project_unique_index_on_project_name_urls + IS 'Enforce uniqueness of project name and urls.'; diff --git a/src/arrow/dev/benchmarking/ddl/1_01_table_public_cpu.sql b/src/arrow/dev/benchmarking/ddl/1_01_table_public_cpu.sql new file mode 100644 index 000000000..df1a9e757 --- /dev/null +++ b/src/arrow/dev/benchmarking/ddl/1_01_table_public_cpu.sql @@ -0,0 +1,63 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + + +-- CPU +CREATE TABLE IF NOT EXISTS public.cpu +( + cpu_id SERIAL PRIMARY KEY + , cpu_model_name citext NOT NULL UNIQUE + , cpu_core_count integer NOT NULL + , cpu_thread_count integer NOT NULL + , cpu_frequency_max_Hz bigint NOT NULL + , cpu_frequency_min_Hz bigint NOT NULL + , cpu_L1d_cache_bytes integer NOT NULL + , cpu_L1i_cache_bytes integer NOT NULL + , cpu_L2_cache_bytes integer NOT NULL + , cpu_L3_cache_bytes integer NOT NULL +); +COMMENT ON TABLE public.cpu + IS 'CPU model and its specifications.'; +COMMENT ON COLUMN public.cpu.cpu_id + IS 'The primary key for the CPU table. ' + 'NOTE: This is a synthetic primary key and not meant to represent a ' + 'processor instruction to read capabilities.'; +COMMENT ON COLUMN public.cpu.cpu_model_name + IS 'The output of `sysctl -n machdep.cpu.brand_stringp`.'; +COMMENT ON COLUMN public.cpu.cpu_core_count + IS 'The output of `sysctl -n hw.physicalcpu`.'; +COMMENT ON COLUMN public.cpu.cpu_thread_count + IS 'The output of `sysctl -n hw.logicalcpu`.'; +COMMENT ON COLUMN public.cpu.cpu_frequency_max_Hz + IS 'The output of `sysctl -n hw.cpufrequency_max`.'; +COMMENT ON COLUMN public.cpu.cpu_frequency_min_Hz + IS 'The output of `sysctl -n hw.cpufrequency_min`.'; +COMMENT ON COLUMN public.cpu.cpu_L1d_cache_bytes + IS 'The output of `sysctl -n hw.l1dcachesize`.'; +COMMENT ON COLUMN public.cpu.cpu_L1i_cache_bytes + IS 'The output of `sysctl -n hw.l1icachesize`.'; +COMMENT ON COLUMN public.cpu.cpu_L2_cache_bytes + IS 'The output of `sysctl -n hw.l2cachesize`.'; +COMMENT ON COLUMN public.cpu.cpu_L3_cache_bytes + IS 'The output of `sysctl -n hw.l3cachesize`.'; + +-- CONSTRAINTS +ALTER TABLE public.cpu + ADD CONSTRAINT cpu_check_cpu_model_name_length + CHECK (char_length(cpu_model_name) < 255); diff --git a/src/arrow/dev/benchmarking/ddl/1_02_table_public_gpu.sql b/src/arrow/dev/benchmarking/ddl/1_02_table_public_gpu.sql new file mode 100644 index 000000000..564af19de --- /dev/null +++ b/src/arrow/dev/benchmarking/ddl/1_02_table_public_gpu.sql @@ -0,0 +1,43 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + + +-- GPU +CREATE TABLE IF NOT EXISTS public.gpu +( + gpu_id SERIAL PRIMARY KEY + , gpu_information citext UNIQUE NOT NULL DEFAULT '' + , gpu_part_number citext NOT NULL DEFAULT '' + , gpu_product_name citext NOT NULL DEFAULT '' +); +COMMENT ON TABLE public.gpu IS 'GPU specifications.'; +COMMENT ON COLUMN public.gpu.gpu_information + IS 'The output of `nvidia-smi -q` (on Linux or Windows), or `cuda-smi` ' + 'or `kextstat | grep -i cuda` on OSX, or another command; anything ' + 'that gets a string to uniquely identify the GPU.'; + +-- CONSTRAINTS +CREATE INDEX gpu_index_on_part_number + ON public.gpu (gpu_part_number); + +CREATE INDEX gpu_index_on_product_name + ON public.gpu (gpu_product_name); + +CREATE INDEX gpu_index_on_product_name_and_part_number + ON public.gpu (gpu_product_name, gpu_part_number); diff --git a/src/arrow/dev/benchmarking/ddl/1_03_table_public_os.sql b/src/arrow/dev/benchmarking/ddl/1_03_table_public_os.sql new file mode 100644 index 000000000..7b03d82f4 --- /dev/null +++ b/src/arrow/dev/benchmarking/ddl/1_03_table_public_os.sql @@ -0,0 +1,57 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + + +-- OS +CREATE TABLE IF NOT EXISTS public.os +( + os_id SERIAL PRIMARY KEY + , os_name citext NOT NULL + , architecture_name citext NOT NULL + , kernel_name citext NOT NULL DEFAULT '' +); +-- @name os. forces retention of an 's' in the Graphile GraphQL api. +COMMENT ON TABLE public.os + IS E'@name os.\nOperating system name and kernel (version).'; +COMMENT ON COLUMN public.os.os_name + IS 'Operating system name. For example, OSX, Ubuntu, Windows`.'; +COMMENT ON COLUMN public.os.architecture_name + IS 'Operating system architecture; the output of `uname -m`.'; +COMMENT ON COLUMN public.os.kernel_name + IS 'Operating system kernel, or NULL. ' + 'On Linux/OSX, the output of `uname -r`. ' + 'On Windows, the output of `ver`.'; + +-- CONSTRAINTS +ALTER TABLE public.os + ADD CONSTRAINT os_check_os_name_length + CHECK (char_length(os_name) < 63); + +ALTER TABLE public.os + ADD CONSTRAINT os_check_architecture_name_length + CHECK (char_length(architecture_name) < 63); + +ALTER TABLE public.os + ADD CONSTRAINT os_check_kernel_name_length + CHECK (char_length(kernel_name) < 63); + +CREATE UNIQUE INDEX os_unique_index + ON public.os(os_name, architecture_name, kernel_name); +COMMENT ON INDEX public.os_unique_index + IS 'Enforce uniqueness of os, architecture, and kernel names.'; diff --git a/src/arrow/dev/benchmarking/ddl/1_04_table_public_benchmark_language.sql b/src/arrow/dev/benchmarking/ddl/1_04_table_public_benchmark_language.sql new file mode 100644 index 000000000..2e3553677 --- /dev/null +++ b/src/arrow/dev/benchmarking/ddl/1_04_table_public_benchmark_language.sql @@ -0,0 +1,35 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + + +-- BENCHMARK_LANGUAGE +CREATE TABLE IF NOT EXISTS public.benchmark_language +( + benchmark_language_id SERIAL PRIMARY KEY + , benchmark_language citext NOT NULL UNIQUE +); +COMMENT ON TABLE public.benchmark_language + IS 'The language the benchmark was written in (and presumably for).'; +COMMENT ON COLUMN public.benchmark_language.benchmark_language + IS 'The benchmark language. For example: Python'; + +-- CONSTRAINTS +ALTER TABLE public.benchmark_language + ADD CONSTRAINT benchmark_language_check_language_length + CHECK (char_length(benchmark_language) < 63); diff --git a/src/arrow/dev/benchmarking/ddl/1_05_table_public_dependencies.sql b/src/arrow/dev/benchmarking/ddl/1_05_table_public_dependencies.sql new file mode 100644 index 000000000..3744a0c35 --- /dev/null +++ b/src/arrow/dev/benchmarking/ddl/1_05_table_public_dependencies.sql @@ -0,0 +1,31 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + + +-- DEPENDENCIES +CREATE TABLE IF NOT EXISTS public.dependencies +( + dependencies_id SERIAL PRIMARY KEY + , dependencies jsonb UNIQUE NOT NULL DEFAULT '{}'::jsonb +); +COMMENT ON TABLE public.dependencies + IS E'@name dependencies.\n' + 'A JSON object mapping dependencies to their versions.'; +COMMENT ON COLUMN public.dependencies.dependencies + IS 'For example: ''{"boost": "1.69", "conda": "", "numpy": "1.15"}''.'; diff --git a/src/arrow/dev/benchmarking/ddl/1_06_table_public_language_implementation_version.sql b/src/arrow/dev/benchmarking/ddl/1_06_table_public_language_implementation_version.sql new file mode 100644 index 000000000..f7d26e4e2 --- /dev/null +++ b/src/arrow/dev/benchmarking/ddl/1_06_table_public_language_implementation_version.sql @@ -0,0 +1,46 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + + +-- LANGUAGE_IMPLEMENTATION_VERSION +CREATE TABLE IF NOT EXISTS public.language_implementation_version +( + language_implementation_version_id SERIAL + , language_implementation_version citext NOT NULL DEFAULT '' + , benchmark_language_id integer NOT NULL + , PRIMARY KEY (language_implementation_version_id, benchmark_language_id) + , FOREIGN KEY (benchmark_language_id) REFERENCES public.benchmark_language +); +COMMENT ON TABLE public.language_implementation_version + IS 'The benchmark language implementation or compiler version, e.g. ' + '''CPython 2.7'' or ''PyPy x.y'' or ''gcc 7.3.0'' or ' + '''gcc (Ubuntu 7.3.0-27ubuntu1~18.04) 7.3.0''.'; +COMMENT ON COLUMN public.language_implementation_version.language_implementation_version + IS 'The version number used in the benchmark environment (e.g. ''2.7'').'; + +-- CONSTRAINTS +ALTER TABLE public.language_implementation_version + ADD CONSTRAINT language_implementation_version_check_version_length + CHECK (char_length(language_implementation_version) < 255); + +CREATE UNIQUE INDEX language_implementation_version_unique_index + ON public.language_implementation_version + (benchmark_language_id, language_implementation_version); +COMMENT ON INDEX language_implementation_version_unique_index + IS 'Enforce unique implementation versions for the languages.'; diff --git a/src/arrow/dev/benchmarking/ddl/1_07_table_public_benchmark_type.sql b/src/arrow/dev/benchmarking/ddl/1_07_table_public_benchmark_type.sql new file mode 100644 index 000000000..1143cdb00 --- /dev/null +++ b/src/arrow/dev/benchmarking/ddl/1_07_table_public_benchmark_type.sql @@ -0,0 +1,39 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + + +-- BENCHMARK_TYPE +CREATE TABLE IF NOT EXISTS public.benchmark_type +( + benchmark_type_id SERIAL PRIMARY KEY + , benchmark_type citext NOT NULL UNIQUE + , lessisbetter boolean NOT NULL +); +COMMENT ON TABLE public.benchmark_type + IS 'The type of benchmark. For example "time", "mem", "peakmem", "track"'; +COMMENT ON COLUMN public.benchmark_type.benchmark_type + IS 'The type of units, so ''time'' for seconds, miliseconds, or ' + '''mem'' for kilobytes, megabytes.'; +COMMENT ON COLUMN public.benchmark_type.lessisbetter + IS 'True if a smaller benchmark value is better.'; + +-- CONSTRAINTS +ALTER TABLE public.benchmark_type + ADD CONSTRAINT benchmark_type_check_benchmark_type_char_length + CHECK (char_length(benchmark_type) < 63); diff --git a/src/arrow/dev/benchmarking/ddl/1_08_table_public_machine.sql b/src/arrow/dev/benchmarking/ddl/1_08_table_public_machine.sql new file mode 100644 index 000000000..8f219d3e0 --- /dev/null +++ b/src/arrow/dev/benchmarking/ddl/1_08_table_public_machine.sql @@ -0,0 +1,69 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + + +-- MACHINE +CREATE TABLE IF NOT EXISTS public.machine +( + machine_id SERIAL PRIMARY KEY + , machine_name citext NOT NULL + , mac_address macaddr NOT NULL + , memory_bytes bigint NOT NULL + , cpu_actual_frequency_Hz bigint NOT NULL + , machine_other_attributes jsonb + , cpu_id integer NOT NULL + , gpu_id integer NOT NULL + , os_id integer NOT NULL + , FOREIGN KEY (cpu_id) REFERENCES public.cpu + , FOREIGN KEY (gpu_id) REFERENCES public.gpu + , FOREIGN KEY (os_id) REFERENCES public.os +); +COMMENT ON TABLE public.machine + IS 'Unique identifiers for a machine.'; +COMMENT ON COLUMN public.machine.machine_name + IS 'A machine name of your choice.'; +COMMENT ON COLUMN public.machine.mac_address + IS 'The mac_address of a physical network interface to uniquely ' + 'identify a computer. Postgres accepts standard formats, including ' + '''08:00:2b:01:02:03'', ''08-00-2b-01-02-03'', ''08002b:010203'''; +COMMENT ON COLUMN public.machine.memory_bytes + IS 'The output of `sysctl -n hw.memsize`.'; +COMMENT ON COLUMN public.machine.cpu_actual_frequency_Hz + IS 'The output of `sysctl -n hw.cpufrequency`.'; +COMMENT ON COLUMN public.machine.machine_other_attributes + IS 'Additional attributes of interest, as a JSON object. ' + 'For example: ''{"hard_disk_type": "solid state"}''::jsonb.'; + +-- CONSTRAINTS +CREATE UNIQUE INDEX machine_index_on_mac_address + ON public.machine(mac_address); +COMMENT ON INDEX machine_index_on_mac_address + IS 'Enforce unique mac address'; + +CREATE INDEX machine_index_on_cpu_id + ON public.machine(cpu_id); + +CREATE INDEX machine_index_on_gpu_id + ON public.machine(gpu_id); + +CREATE INDEX machine_index_on_os_id + ON public.machine(os_id); + +CREATE INDEX machine_index_on_cpu_gpu_os_id + ON public.machine(cpu_id, gpu_id, os_id); diff --git a/src/arrow/dev/benchmarking/ddl/1_09_table_public_unit.sql b/src/arrow/dev/benchmarking/ddl/1_09_table_public_unit.sql new file mode 100644 index 000000000..a8cf57669 --- /dev/null +++ b/src/arrow/dev/benchmarking/ddl/1_09_table_public_unit.sql @@ -0,0 +1,37 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + + +-- UNIT +CREATE TABLE IF NOT EXISTS public.unit +( + unit_id SERIAL PRIMARY KEY + , units citext NOT NULL UNIQUE + , benchmark_type_id integer NOT NULL + , FOREIGN KEY (benchmark_type_id) + REFERENCES public.benchmark_type(benchmark_type_id) +); +COMMENT ON TABLE public.unit IS 'The actual units for a reported benchmark.'; +COMMENT ON COLUMN public.unit.units + IS 'For example: nanoseconds, microseconds, bytes, megabytes.'; + +-- CONSTRAINTS +ALTER TABLE public.unit + ADD CONSTRAINT unit_check_units_string_length + CHECK (char_length(units) < 63); diff --git a/src/arrow/dev/benchmarking/ddl/1_10_table_public_environment.sql b/src/arrow/dev/benchmarking/ddl/1_10_table_public_environment.sql new file mode 100644 index 000000000..e3a6d2395 --- /dev/null +++ b/src/arrow/dev/benchmarking/ddl/1_10_table_public_environment.sql @@ -0,0 +1,51 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + + +-- ENVIRONMENT +CREATE TABLE IF NOT EXISTS public.environment +( + environment_id SERIAL + , language_implementation_version_id integer NOT NULL + , benchmark_language_id integer NOT NULL + , dependencies_id integer NOT NULL + , PRIMARY KEY + (environment_id, benchmark_language_id, language_implementation_version_id) + , FOREIGN KEY + (benchmark_language_id) + REFERENCES public.benchmark_language + , FOREIGN KEY + (language_implementation_version_id, benchmark_language_id) + REFERENCES public.language_implementation_version( + language_implementation_version_id + , benchmark_language_id + ) + , FOREIGN KEY + (dependencies_id) + REFERENCES public.dependencies +); +COMMENT ON TABLE public.environment + IS 'Identifies a build environment for a specific suite of benchmarks.'; + +-- CONSTRAINTS +CREATE UNIQUE INDEX environment_unique_index + ON public.environment + (benchmark_language_id, language_implementation_version_id, dependencies_id); +COMMENT ON INDEX environment_unique_index + IS 'Enforce unique combinations of language version and dependencies.'; diff --git a/src/arrow/dev/benchmarking/ddl/1_11_table_public_benchmark.sql b/src/arrow/dev/benchmarking/ddl/1_11_table_public_benchmark.sql new file mode 100644 index 000000000..18895823d --- /dev/null +++ b/src/arrow/dev/benchmarking/ddl/1_11_table_public_benchmark.sql @@ -0,0 +1,54 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + + +-- BENCHMARK +CREATE TABLE IF NOT EXISTS public.benchmark +( + benchmark_id SERIAL + , benchmark_name citext NOT NULL + , parameter_names text[] + , benchmark_description text NOT NULL + , benchmark_version citext NOT NULL + , unit_id integer NOT NULL + , benchmark_language_id integer NOT NULL + , PRIMARY KEY (benchmark_id, benchmark_language_id) + , FOREIGN KEY (benchmark_language_id) REFERENCES public.benchmark_language + , FOREIGN KEY (unit_id) REFERENCES public.unit +); +COMMENT ON TABLE public.benchmark + IS 'Identifies an individual benchmark.'; +COMMENT ON COLUMN public.benchmark.parameter_names + IS 'A list of strings identifying the parameter names in the benchmark.'; +COMMENT ON COLUMN public.benchmark.benchmark_version + IS 'Can be any string. In Airspeed Velocity, the version is ' + 'by default the hash of the entire code string for the benchmark.'; + +-- CONSTRAINTS +CREATE INDEX benchmark_index_on_benchmark_language_id + ON public.benchmark(benchmark_language_id); + +CREATE INDEX benchmark_index_on_unit_id + ON public.benchmark(unit_id); + +CREATE UNIQUE INDEX benchmark_unique_index_on_language_benchmark_version + ON public.benchmark + (benchmark_language_id, benchmark_name, benchmark_version); +COMMENT ON INDEX public.benchmark_unique_index_on_language_benchmark_version + IS 'Enforce uniqueness of benchmark name and version for a given language.'; diff --git a/src/arrow/dev/benchmarking/ddl/1_12_table_public_benchmark_run.sql b/src/arrow/dev/benchmarking/ddl/1_12_table_public_benchmark_run.sql new file mode 100644 index 000000000..20b9ef0bb --- /dev/null +++ b/src/arrow/dev/benchmarking/ddl/1_12_table_public_benchmark_run.sql @@ -0,0 +1,112 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + + +-- BENCHMARK_RUN +CREATE TABLE IF NOT EXISTS public.benchmark_run +( + benchmark_run_id BIGSERIAL PRIMARY KEY + , parameter_values jsonb NOT NULL DEFAULT '{}'::jsonb + , value numeric NOT NULL + , git_commit_timestamp timestamp (0) with time zone NOT NULL + , git_hash text NOT NULL + , val_min numeric + , val_q1 numeric + , val_q3 numeric + , val_max numeric + , std_dev numeric NOT NULL + , n_obs integer NOT NULL + , run_timestamp timestamp (0) with time zone NOT NULL + , run_metadata jsonb + , run_notes text + , machine_id integer NOT NULL + , environment_id integer NOT NULL + , language_implementation_version_id integer NOT NULL + , benchmark_language_id integer NOT NULL + , benchmark_id integer NOT NULL + , FOREIGN KEY (machine_id) REFERENCES public.machine + , FOREIGN KEY + (environment_id, benchmark_language_id, language_implementation_version_id) + REFERENCES public.environment + , FOREIGN KEY (benchmark_id, benchmark_language_id) + REFERENCES public.benchmark(benchmark_id, benchmark_language_id) +); +COMMENT ON TABLE public.benchmark_run + IS 'One run per benchmark run.'; +COMMENT ON COLUMN public.benchmark_run.parameter_values + IS 'A JSON object mapping the parameter names from ' + '"benchmark.parameter_names" to values.'; +COMMENT ON COLUMN public.benchmark_run.value + IS 'The average value from the benchmark run.'; +COMMENT ON COLUMN public.benchmark_run.git_commit_timestamp + IS 'Get this using `git show -s --date=local --format="%ci" <hash>`. ' + 'ISO 8601 is recommended, e.g. ''2019-01-30 03:12 -0600''.'; +COMMENT ON COLUMN public.benchmark_run.git_hash + IS 'The commit has of the codebase currently being benchmarked.'; +COMMENT ON COLUMN public.benchmark_run.val_min + IS 'The smallest benchmark run value for this run.'; +COMMENT ON COLUMN public.benchmark_run.val_q1 + IS 'The first quartile of the benchmark run values for this run.'; +COMMENT ON COLUMN public.benchmark_run.val_q3 + IS 'The third quartile of the benchmark run values for this run.'; +COMMENT ON COLUMN public.benchmark_run.val_max + IS 'The largest benchmark run value for this run.'; +COMMENT ON COLUMN public.benchmark_run.std_dev + IS 'The standard deviation of the run values for this benchmark run.'; +COMMENT ON COLUMN public.benchmark_run.n_obs + IS 'The number of observations for this benchmark run.'; +COMMENT ON COLUMN public.benchmark_run.run_metadata + IS 'Additional metadata of interest, as a JSON object. ' + 'For example: ''{"ci_99": [2.7e-06, 3.1e-06]}''::jsonb.'; +COMMENT ON COLUMN public.benchmark_run.run_notes + IS 'Additional notes of interest, as a text string. '; + +-- CONSTRAINTS +ALTER TABLE public.benchmark_run + ADD CONSTRAINT benchmark_run_check_std_dev_nonnegative + CHECK (std_dev >= 0); + +ALTER TABLE public.benchmark_run + ADD CONSTRAINT benchmark_run_check_n_obs_positive + CHECK (n_obs > 0); + +CREATE INDEX benchmark_run_index_on_environment_id + ON public.benchmark_run(environment_id); + +CREATE INDEX benchmark_run_index_on_machine_id + ON public.benchmark_run(machine_id); + +CREATE INDEX benchmark_run_index_on_benchmark_id + ON public.benchmark_run(benchmark_id, benchmark_language_id); + +CREATE INDEX benchmark_run_index_on_benchmark_environment_time + ON public.benchmark_run + (benchmark_id, environment_id, git_commit_timestamp); +COMMENT ON INDEX + public.benchmark_run_index_on_benchmark_environment_time + IS 'Index to improve sorting by benchmark, environment, and timestamp.'; + +CREATE UNIQUE INDEX + benchmark_run_unique_index_on_env_benchmark_timestamp_params + ON public.benchmark_run + (machine_id, environment_id, benchmark_id, git_commit_timestamp, parameter_values, run_timestamp); +COMMENT ON INDEX + public.benchmark_run_unique_index_on_env_benchmark_timestamp_params + IS 'Enforce uniqueness of benchmark run for a given machine, ' + 'environment, benchmark, git commit timestamp, and parameter values.'; diff --git a/src/arrow/dev/benchmarking/ddl/2_00_views.sql b/src/arrow/dev/benchmarking/ddl/2_00_views.sql new file mode 100644 index 000000000..cbd295e50 --- /dev/null +++ b/src/arrow/dev/benchmarking/ddl/2_00_views.sql @@ -0,0 +1,324 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + +-- NOTE: +-- The function for documentation depends on view columns +-- being named exactly the same as in the table view. + +-- MACHINE_VIEW +CREATE OR REPLACE VIEW public.machine_view AS + SELECT + machine.machine_id + , mac_address + , machine_name + , memory_bytes + , cpu_actual_frequency_Hz + , os_name + , architecture_name + , kernel_name + , cpu_model_name + , cpu_core_count + , cpu_thread_count + , cpu_frequency_max_Hz + , cpu_frequency_min_Hz + , cpu_L1d_cache_bytes + , cpu_L1i_cache_bytes + , cpu_L2_cache_bytes + , cpu_L3_cache_bytes + , gpu_information + , gpu_part_number + , gpu_product_name + , machine_other_attributes + FROM public.machine AS machine + JOIN public.cpu AS cpu ON machine.cpu_id = cpu.cpu_id + JOIN public.gpu AS gpu ON machine.gpu_id = gpu.gpu_id + JOIN public.os AS os ON machine.os_id = os.os_id; +COMMENT ON VIEW public.machine_view IS +E'The machine environment (CPU, GPU, OS) used for each benchmark run.\n\n' + '- "mac_address" is unique in the "machine" table\n' + '- "gpu_part_number" is unique in the "gpu" (graphics processing unit) table\n' + ' Empty string (''''), not null, is used for machines that won''t use the GPU\n' + '- "cpu_model_name" is unique in the "cpu" (central processing unit) table\n' + '- "os_name", "os_architecture_name", and "os_kernel_name"\n' + ' are unique in the "os" (operating system) table\n' + '- "machine_other_attributes" is a key-value store for any other relevant\n' + ' data, e.g. ''{"hard_disk_type": "solid state"}'''; + + +-- LANGUAGE_IMPLEMENTATION_VERSION_VIEW +CREATE OR REPLACE VIEW public.language_implementation_version_view AS + SELECT + lv.language_implementation_version_id + , bl.benchmark_language + , lv.language_implementation_version + FROM public.language_implementation_version AS lv + JOIN public.benchmark_language AS bl + ON lv.benchmark_language_id = bl.benchmark_language_id; + +-- ENVIRONMENT_VIEW +CREATE OR REPLACE VIEW public.environment_view AS + SELECT + env.environment_id + , benchmark_language + , language_implementation_version + , dependencies + FROM public.environment AS env + JOIN public.benchmark_language AS language + ON env.benchmark_language_id = language.benchmark_language_id + JOIN public.language_implementation_version AS version + ON env.language_implementation_version_id = version.language_implementation_version_id + JOIN public.dependencies AS deps + ON env.dependencies_id = deps.dependencies_id; +COMMENT ON VIEW public.environment_view IS +E'The build environment used for a reported benchmark run.\n' + '(Will be inferred from each "benchmark_run" if not explicitly added).\n\n' + '- Each entry is unique on\n' + ' ("benchmark_language", "language_implementation_version", "dependencies")\n' + '- "benchmark_language" is unique in the "benchmark_language" table\n' + '- "benchmark_language" plus "language_implementation_version" is unique in\n' + ' the "language_implementation_version" table\n' + '- "dependencies" is unique in the "dependencies" table'; + +-- UNIT_VIEW +CREATE OR REPLACE VIEW public.unit_view AS + SELECT + unit.unit_id + , units + , benchmark_type + , lessisbetter + FROM public.unit AS unit + JOIN public.benchmark_type AS bt + ON unit.benchmark_type_id = bt.benchmark_type_id; + +-- BENCHMARK_VIEW +CREATE OR REPLACE VIEW public.benchmark_view AS + SELECT + b.benchmark_id + , benchmark_name + , parameter_names + , benchmark_description + , benchmark_type + , units + , lessisbetter + , benchmark_version + , benchmark_language + FROM public.benchmark AS b + JOIN public.benchmark_language AS benchmark_language + ON b.benchmark_language_id = benchmark_language.benchmark_language_id + JOIN public.unit AS unit + ON b.unit_id = unit.unit_id + JOIN public.benchmark_type AS benchmark_type + ON unit.benchmark_type_id = benchmark_type.benchmark_type_id; +COMMENT ON VIEW public.benchmark_view IS +E'The details about a particular benchmark.\n\n' + '- "benchmark_name" is unique for a given "benchmark_language"\n' + '- Each entry is unique on\n' + ' ("benchmark_language", "benchmark_name", "benchmark_version")'; + +-- BENCHMARK_RUN_VIEW +CREATE OR REPLACE VIEW public.benchmark_run_view AS + SELECT + run.benchmark_run_id + -- benchmark_view (name, version, language only) + , benchmark_name + , benchmark_version + -- datum + , parameter_values + , value + , git_commit_timestamp + , git_hash + , val_min + , val_q1 + , val_q3 + , val_max + , std_dev + , n_obs + , run_timestamp + , run_metadata + , run_notes + -- machine_view (mac address only) + , mac_address + -- environment_view + , env.benchmark_language + , language_implementation_version + , dependencies + FROM public.benchmark_run AS run + JOIN public.benchmark_view AS benchmark + ON run.benchmark_id = benchmark.benchmark_id + JOIN public.machine_view AS machine + ON run.machine_id = machine.machine_id + JOIN public.environment_view AS env + ON run.environment_id = env.environment_id; +COMMENT ON VIEW public.benchmark_run_view IS +E'Each benchmark run.\n\n' + '- Each entry is unique on the machine, environment, benchmark,\n' + ' and git commit timestamp.'; + +-- FULL_BENCHMARK_RUN_VIEW +CREATE OR REPLACE VIEW public.full_benchmark_run_view AS + SELECT + run.benchmark_run_id + -- benchmark_view + , benchmark_name + , parameter_names + , benchmark_description + , benchmark_type + , units + , lessisbetter + , benchmark_version + -- datum + , parameter_values + , value + , git_commit_timestamp + , git_hash + , val_min + , val_q1 + , val_q3 + , val_max + , std_dev + , n_obs + , run_timestamp + , run_metadata + , run_notes + -- machine_view + , machine_name + , mac_address + , memory_bytes + , cpu_actual_frequency_Hz + , os_name + , architecture_name + , kernel_name + , cpu_model_name + , cpu_core_count + , cpu_thread_count + , cpu_frequency_max_Hz + , cpu_frequency_min_Hz + , cpu_L1d_cache_bytes + , cpu_L1i_cache_bytes + , cpu_L2_cache_bytes + , cpu_L3_cache_bytes + , gpu_information + , gpu_part_number + , gpu_product_name + , machine_other_attributes + -- environment_view + , env.benchmark_language + , env.language_implementation_version + , dependencies + FROM public.benchmark_run AS run + JOIN public.benchmark_view AS benchmark + ON run.benchmark_id = benchmark.benchmark_id + JOIN public.machine_view AS machine + ON run.machine_id = machine.machine_id + JOIN public.environment_view AS env + ON run.environment_id = env.environment_id; + +-- SUMMARIZED_TABLES_VIEW +CREATE VIEW public.summarized_tables_view AS + WITH chosen AS ( + SELECT + cls.oid AS id + , cls.relname as tbl_name + FROM pg_catalog.pg_class AS cls + JOIN pg_catalog.pg_namespace AS ns ON cls.relnamespace = ns.oid + WHERE + cls.relkind = 'r' + AND ns.nspname = 'public' + ), all_constraints AS ( + SELECT + chosen.id AS tbl_id + , chosen.tbl_name + , unnest(conkey) AS col_id + , 'foreign key' AS col_constraint + FROM pg_catalog.pg_constraint + JOIN chosen ON chosen.id = conrelid + WHERE contype = 'f' + + UNION + + SELECT + chosen.id + , chosen.tbl_name + , unnest(indkey) + , 'unique' + FROM pg_catalog.pg_index i + JOIN chosen ON chosen.id = i.indrelid + WHERE i.indisunique AND NOT i.indisprimary + + UNION + + SELECT + chosen.id + , chosen.tbl_name + , unnest(indkey) + , 'primary key' + FROM pg_catalog.pg_index i + JOIN chosen on chosen.id = i.indrelid + WHERE i.indisprimary + ), gathered_constraints AS ( + SELECT + tbl_id + , tbl_name + , col_id + , string_agg(col_constraint, ', ' ORDER BY col_constraint) + AS col_constraint + FROM all_constraints + GROUP BY tbl_id, tbl_name, col_id + ) + SELECT + chosen.tbl_name AS table_name + , columns.attnum AS column_number + , columns.attname AS column_name + , typ.typname AS type_name + , CASE + WHEN columns.attnotnull + THEN 'not null' + ELSE '' + END AS nullable + , CASE + WHEN defaults.adsrc like 'nextval%' + THEN 'serial' + ELSE defaults.adsrc + END AS default_value + , CASE + WHEN gc.col_constraint = '' OR gc.col_constraint IS NULL + THEN cnstrnt.consrc + WHEN cnstrnt.consrc IS NULL + THEN gc.col_constraint + ELSE gc.col_constraint || ', ' || cnstrnt.consrc + END AS description + FROM pg_catalog.pg_attribute AS columns + JOIN chosen ON columns.attrelid = chosen.id + JOIN pg_catalog.pg_type AS typ + ON typ.oid = columns.atttypid + LEFT JOIN gathered_constraints AS gc + ON gc.col_id = columns.attnum + AND gc.tbl_id = columns.attrelid + LEFT JOIN pg_attrdef AS defaults + ON defaults.adrelid = chosen.id + AND defaults.adnum = columns.attnum + LEFT JOIN pg_catalog.pg_constraint AS cnstrnt + ON cnstrnt.conrelid = columns.attrelid + AND columns.attrelid = ANY(cnstrnt.conkey) + WHERE + columns.attnum > 0 + ORDER BY table_name, column_number; +COMMENT ON VIEW public.summarized_tables_view + IS 'A summary of all columns from all tables in the public schema, ' + ' identifying nullability, primary/foreign keys, and data type.'; diff --git a/src/arrow/dev/benchmarking/ddl/3_00_functions_helpers.sql b/src/arrow/dev/benchmarking/ddl/3_00_functions_helpers.sql new file mode 100644 index 000000000..b10b69a4e --- /dev/null +++ b/src/arrow/dev/benchmarking/ddl/3_00_functions_helpers.sql @@ -0,0 +1,643 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + + +-- PROJECT_DETAILS +CREATE TYPE public.type_project_details AS ( + project_name text + , project_url text + , repo_url text +); + +CREATE OR REPLACE FUNCTION public.project_details() +RETURNS public.type_project_details AS +$$ + SELECT project_name, project_url, repo_url + FROM public.project + ORDER BY last_changed DESC + LIMIT 1 +$$ +LANGUAGE sql STABLE; +COMMENT ON FUNCTION public.project_details() +IS 'Get the current project name, url, and repo url.'; + + +-------------------------- GET-OR-SET FUNCTIONS -------------------------- +-- The following functions have the naming convention "get_<tablename>_id". +-- All of them attempt to SELECT the desired row given the column +-- values, and if it does not exist will INSERT it. +-- +-- When functions are overloaded with fewer columns, it is to allow +-- selection only, given columns that comprise a unique index. + +-- GET_CPU_ID +CREATE OR REPLACE FUNCTION public.get_cpu_id( + cpu_model_name citext + , cpu_core_count integer + , cpu_thread_count integer + , cpu_frequency_max_Hz bigint + , cpu_frequency_min_Hz bigint + , cpu_L1d_cache_bytes integer + , cpu_L1i_cache_bytes integer + , cpu_L2_cache_bytes integer + , cpu_L3_cache_bytes integer +) +RETURNS integer AS +$$ + DECLARE + result integer; + BEGIN + SELECT cpu_id INTO result FROM public.cpu AS cpu + WHERE cpu.cpu_model_name = $1 + AND cpu.cpu_core_count = $2 + AND cpu.cpu_thread_count = $3 + AND cpu.cpu_frequency_max_Hz = $4 + AND cpu.cpu_frequency_min_Hz = $5 + AND cpu.cpu_L1d_cache_bytes = $6 + AND cpu.cpu_L1i_cache_bytes = $7 + AND cpu.cpu_L2_cache_bytes = $8 + AND cpu.cpu_L3_cache_bytes = $9; + + IF result IS NULL THEN + INSERT INTO public.cpu( + cpu_model_name + , cpu_core_count + , cpu_thread_count + , cpu_frequency_max_Hz + , cpu_frequency_min_Hz + , cpu_L1d_cache_bytes + , cpu_L1i_cache_bytes + , cpu_L2_cache_bytes + , cpu_L3_cache_bytes + ) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9) + RETURNING cpu_id INTO result; + END IF; + + RETURN result; + END +$$ +LANGUAGE plpgsql; +COMMENT ON FUNCTION public.get_cpu_id( + citext + , integer + , integer + , bigint -- cpu_frequency_max_Hz + , bigint -- cpu_frequency_min_Hz + , integer + , integer + , integer + , integer +) +IS 'Insert or select CPU data, returning "cpu.cpu_id".'; + +-- GET_GPU_ID +CREATE OR REPLACE FUNCTION public.get_gpu_id( + gpu_information citext DEFAULT NULL + , gpu_part_number citext DEFAULT NULL + , gpu_product_name citext DEFAULT NULL +) +RETURNS integer AS +$$ + DECLARE + result integer; + BEGIN + SELECT gpu_id INTO result FROM public.gpu AS gpu + WHERE + gpu.gpu_information = COALESCE($1, '') + AND gpu.gpu_part_number = COALESCE($2, '') + AND gpu.gpu_product_name = COALESCE($3, ''); + + IF result IS NULL THEN + INSERT INTO public.gpu( + gpu_information + , gpu_part_number + , gpu_product_name + ) + VALUES (COALESCE($1, ''), COALESCE($2, ''), COALESCE($3, '')) + RETURNING gpu_id INTO result; + END IF; + + RETURN result; + END +$$ +LANGUAGE plpgsql; +COMMENT ON FUNCTION public.get_gpu_id(citext, citext, citext) +IS 'Insert or select GPU data, returning "gpu.gpu_id".'; + +-- GET_OS_ID +CREATE OR REPLACE FUNCTION public.get_os_id( + os_name citext + , architecture_name citext + , kernel_name citext DEFAULT '' +) +RETURNS integer AS +$$ + DECLARE + result integer; + BEGIN + SELECT os_id INTO result FROM public.os AS os + WHERE os.os_name = $1 + AND os.architecture_name = $2 + AND os.kernel_name = COALESCE($3, ''); + + IF result is NULL THEN + INSERT INTO public.os(os_name, architecture_name, kernel_name) + VALUES ($1, $2, COALESCE($3, '')) + RETURNING os_id INTO result; + END IF; + + RETURN result; + END +$$ +LANGUAGE plpgsql; +COMMENT ON FUNCTION public.get_os_id(citext, citext, citext) +IS 'Insert or select OS data, returning "os.os_id".'; + +-- GET_MACHINE_ID (full signature) +CREATE OR REPLACE FUNCTION public.get_machine_id( + mac_address macaddr + , machine_name citext + , memory_bytes bigint + , cpu_actual_frequency_Hz bigint + -- os + , os_name citext + , architecture_name citext + , kernel_name citext + -- cpu + , cpu_model_name citext + , cpu_core_count integer + , cpu_thread_count integer + , cpu_frequency_max_Hz bigint + , cpu_frequency_min_Hz bigint + , L1d_cache_bytes integer + , L1i_cache_bytes integer + , L2_cache_bytes integer + , L3_cache_bytes integer + -- gpu + , gpu_information citext DEFAULT '' + , gpu_part_number citext DEFAULT NULL + , gpu_product_name citext DEFAULT NULL + -- nullable machine attributes + , machine_other_attributes jsonb DEFAULT NULL +) +RETURNS integer AS +$$ + DECLARE + found_cpu_id integer; + found_gpu_id integer; + found_os_id integer; + result integer; + BEGIN + -- Can't bypass looking up all the values because of unique constraint. + SELECT public.get_cpu_id( + cpu_model_name + , cpu_core_count + , cpu_thread_count + , cpu_frequency_max_Hz + , cpu_frequency_min_Hz + , L1d_cache_bytes + , L1i_cache_bytes + , L2_cache_bytes + , L3_cache_bytes + ) INTO found_cpu_id; + + SELECT public.get_gpu_id( + gpu_information + , gpu_part_number + , gpu_product_name + ) INTO found_gpu_id; + + SELECT public.get_os_id( + os_name + , architecture_name + , kernel_name + ) INTO found_os_id; + + SELECT machine_id INTO result FROM public.machine AS m + WHERE m.os_id = found_os_id + AND m.cpu_id = found_cpu_id + AND m.gpu_id = found_gpu_id + AND m.mac_address = $1 + AND m.machine_name = $2 + AND m.memory_bytes = $3 + AND m.cpu_actual_frequency_Hz = $4; + + IF result IS NULL THEN + INSERT INTO public.machine( + os_id + , cpu_id + , gpu_id + , mac_address + , machine_name + , memory_bytes + , cpu_actual_frequency_Hz + , machine_other_attributes + ) + VALUES (found_os_id, found_cpu_id, found_gpu_id, $1, $2, $3, $4, $20) + RETURNING machine_id INTO result; + END IF; + + RETURN result; + END +$$ +LANGUAGE plpgsql; +COMMENT ON FUNCTION public.get_machine_id( + macaddr + , citext + , bigint -- memory_bytes + , bigint -- cpu_frequency_actual_Hz + -- os + , citext + , citext + , citext + -- cpu + , citext + , integer + , integer + , bigint -- cpu_frequency_max_Hz + , bigint -- cpu_frequency_min_Hz + , integer + , integer + , integer + , integer + -- gpu + , citext + , citext + , citext + -- nullable machine attributes + , jsonb +) +IS 'Insert or select machine data, returning "machine.machine_id".'; + +-- GET_MACHINE_ID (given unique mac_address) +CREATE OR REPLACE FUNCTION public.get_machine_id(mac_address macaddr) +RETURNS integer AS +$$ + SELECT machine_id FROM public.machine AS m + WHERE m.mac_address = $1; +$$ +LANGUAGE sql STABLE; +COMMENT ON FUNCTION public.get_machine_id(macaddr) +IS 'Select machine_id given its mac address, returning "machine.machine_id".'; + +-- GET_BENCHMARK_LANGUAGE_ID +CREATE OR REPLACE FUNCTION public.get_benchmark_language_id(language citext) +RETURNS integer AS +$$ + DECLARE + result integer; + BEGIN + SELECT benchmark_language_id INTO result + FROM public.benchmark_language AS bl + WHERE bl.benchmark_language = language; + + IF result IS NULL THEN + INSERT INTO public.benchmark_language(benchmark_language) + VALUES (language) + RETURNING benchmark_language_id INTO result; + END IF; + + RETURN result; + END +$$ +LANGUAGE plpgsql; +COMMENT ON FUNCTION public.get_benchmark_language_id(citext) +IS 'Insert or select benchmark_language returning ' + '"benchmark_language.benchmark_language_id".'; + +-- GET_LANGUAGE_IMPLEMENTATION_VERSION_ID +CREATE OR REPLACE FUNCTION public.get_language_implementation_version_id( + language citext + , language_implementation_version citext DEFAULT '' +) +RETURNS integer AS +$$ + DECLARE + language_id integer; + result integer; + BEGIN + SELECT public.get_benchmark_language_id($1) INTO language_id; + + SELECT language_implementation_version_id INTO result FROM public.language_implementation_version AS lv + WHERE lv.benchmark_language_id = language_id + AND lv.language_implementation_version = COALESCE($2, ''); + + IF result IS NULL THEN + INSERT INTO + public.language_implementation_version(benchmark_language_id, language_implementation_version) + VALUES (language_id, COALESCE($2, '')) + RETURNING language_implementation_version_id INTO result; + END IF; + + RETURN result; + END +$$ +LANGUAGE plpgsql; +COMMENT ON FUNCTION public.get_language_implementation_version_id(citext, citext) +IS 'Insert or select language and version data, ' + 'returning "language_implementation_version.language_implementation_version_id".'; + +CREATE OR REPLACE FUNCTION public.get_language_implementation_version_id( + -- overload for when language_id is known + language_id integer + , language_implementation_version citext DEFAULT '' +) +RETURNS integer AS +$$ + DECLARE + result integer; + BEGIN + SELECT language_implementation_version_id INTO result FROM public.language_implementation_version AS lv + WHERE lv.benchmark_language_id = language_id + AND lv.language_implementation_version = COALESCE($2, ''); + + IF result IS NULL THEN + INSERT INTO + public.language_implementation_version(benchmark_language_id, language_implementation_version) + VALUES (language_id, COALESCE($2, '')) + RETURNING language_implementation_version_id INTO result; + END IF; + + RETURN result; + END +$$ +LANGUAGE plpgsql; + +-- GET_LANGUAGE_DEPENDENCY_LOOKUP_ID +CREATE OR REPLACE FUNCTION public.get_dependencies_id( + dependencies jsonb DEFAULT '{}'::jsonb +) +RETURNS integer AS +$$ + DECLARE + result integer; + BEGIN + SELECT dependencies_id INTO result + FROM public.dependencies AS ldl + WHERE ldl.dependencies = COALESCE($1, '{}'::jsonb); + + IF result IS NULL THEN + INSERT INTO + public.dependencies(dependencies) + VALUES (COALESCE($1, '{}'::jsonb)) + RETURNING dependencies_id INTO result; + END IF; + + RETURN result; + END +$$ +LANGUAGE plpgsql; +COMMENT ON FUNCTION public.get_dependencies_id(jsonb) +IS 'Insert or select dependencies, returning "dependencies.dependencies_id".'; + +-- GET_ENVIRONMENT_ID +CREATE OR REPLACE FUNCTION public.get_environment_id( + language citext, + language_implementation_version citext DEFAULT '', + dependencies jsonb DEFAULT '{}'::jsonb +) +RETURNS integer AS +$$ + DECLARE + found_language_id integer; + found_version_id integer; + found_dependencies_id integer; + result integer; + BEGIN + SELECT public.get_benchmark_language_id($1) INTO found_language_id; + SELECT + public.get_language_implementation_version_id(found_language_id, $2) + INTO found_version_id; + SELECT + public.get_dependencies_id ($3) + INTO found_dependencies_id; + + SELECT environment_id INTO result FROM public.environment AS e + WHERE e.benchmark_language_id = found_language_id + AND e.language_implementation_version_id = found_version_id + AND e.dependencies_id = found_dependencies_id; + + IF result IS NULL THEN + INSERT INTO + public.environment( + benchmark_language_id + , language_implementation_version_id + , dependencies_id + ) + VALUES (found_language_id, found_version_id, found_dependencies_id) + RETURNING environment_id INTO result; + END IF; + + RETURN result; + END +$$ +LANGUAGE plpgsql; +COMMENT ON FUNCTION public.get_environment_id(citext, citext, jsonb) +IS 'Insert or select language, language version, and dependencies, ' + 'returning "environment.environment_id".'; + +-- GET_BENCHMARK_TYPE_ID (full signature) +CREATE OR REPLACE FUNCTION public.get_benchmark_type_id( + benchmark_type citext + , lessisbetter boolean +) +RETURNS integer AS +$$ + DECLARE + result integer; + BEGIN + SELECT benchmark_type_id INTO result FROM public.benchmark_type AS bt + WHERE bt.benchmark_type = $1 + AND bt.lessisbetter = $2; + + IF result IS NULL THEN + INSERT INTO public.benchmark_type(benchmark_type, lessisbetter) + VALUES($1, $2) + RETURNING benchmark_type_id INTO result; + END IF; + + RETURN result; + END +$$ +LANGUAGE plpgsql; +COMMENT ON FUNCTION public.get_benchmark_type_id(citext, boolean) +IS 'Insert or select benchmark type and lessisbetter, ' + 'returning "benchmark_type.benchmark_type_id".'; + +-- GET_BENCHMARK_TYPE_ID (given unique benchmark_type string only) +CREATE OR REPLACE FUNCTION public.get_benchmark_type_id( + benchmark_type citext +) +RETURNS integer AS +$$ + DECLARE + result integer; + BEGIN + SELECT benchmark_type_id INTO result FROM public.benchmark_type AS bt + WHERE bt.benchmark_type = $1; + + RETURN result; + END +$$ +LANGUAGE plpgsql; +COMMENT ON FUNCTION public.get_benchmark_type_id(citext) +IS 'Select benchmark_type_id given benchmark type (e.g. ''time''), ' + 'returning "benchmark_type.benchmark_type_id".'; + +-- GET_UNIT_ID (full signature) +CREATE OR REPLACE FUNCTION public.get_unit_id( + benchmark_type citext + , units citext + , lessisbetter boolean DEFAULT NULL +) +RETURNS integer AS +$$ + DECLARE + found_benchmark_type_id integer; + result integer; + BEGIN + + IF ($3 IS NOT NULL) -- if lessisbetter is not null + THEN + SELECT public.get_benchmark_type_id($1, $3) + INTO found_benchmark_type_id; + ELSE + SELECT public.get_benchmark_type_id($1) + INTO found_benchmark_type_id; + END IF; + + SELECT unit_id INTO result FROM public.unit AS u + WHERE u.benchmark_type_id = found_benchmark_type_id + AND u.units = $2; + + IF result IS NULL THEN + INSERT INTO public.unit(benchmark_type_id, units) + VALUES(found_benchmark_type_id, $2) + RETURNING unit_id INTO result; + END IF; + + RETURN result; + END +$$ +LANGUAGE plpgsql; +COMMENT ON FUNCTION public.get_unit_id(citext, citext, boolean) +IS 'Insert or select benchmark type (e.g. ''time''), ' + 'units string (e.g. ''miliseconds''), ' + 'and "lessisbetter" (true if smaller benchmark values are better), ' + 'returning "unit.unit_id".'; + +-- GET_UNIT_ID (given unique units string only) +CREATE OR REPLACE FUNCTION public.get_unit_id(units citext) +RETURNS integer AS +$$ + SELECT unit_id FROM public.unit AS u + WHERE u.units = units; +$$ +LANGUAGE sql STABLE; +COMMENT ON FUNCTION public.get_unit_id(citext) +IS 'Select unit_id given unit name, returning "unit.unit_id".'; + +-- GET_BENCHMARK_ID (full signature) +CREATE OR REPLACE FUNCTION public.get_benchmark_id( + benchmark_language citext + , benchmark_name citext + , parameter_names text[] + , benchmark_description text + , benchmark_version citext + , benchmark_type citext + , units citext + , lessisbetter boolean +) +RETURNS integer AS +$$ + DECLARE + found_benchmark_language_id integer; + found_unit_id integer; + result integer; + BEGIN + SELECT public.get_benchmark_language_id( + benchmark_language + ) INTO found_benchmark_language_id; + + SELECT public.get_unit_id( + benchmark_type + , units + , lessisbetter + ) INTO found_unit_id; + + SELECT benchmark_id INTO result FROM public.benchmark AS b + WHERE b.benchmark_language_id = found_benchmark_language_id + AND b.benchmark_name = $2 + -- handle nullable "parameter_names" + AND b.parameter_names IS NOT DISTINCT FROM $3 + AND b.benchmark_description = $4 + AND b.benchmark_version = $5 + AND b.unit_id = found_unit_id; + + IF result IS NULL THEN + INSERT INTO public.benchmark( + benchmark_language_id + , benchmark_name + , parameter_names + , benchmark_description + , benchmark_version + , unit_id + ) + VALUES (found_benchmark_language_id, $2, $3, $4, $5, found_unit_id) + RETURNING benchmark_id INTO result; + END IF; + + RETURN result; + END +$$ +LANGUAGE plpgsql; +COMMENT ON FUNCTION public.get_benchmark_id( + citext + , citext + , text[] + , text + , citext + , citext + , citext + , boolean +) +IS 'Insert/select benchmark given data, returning "benchmark.benchmark_id".'; + +-- GET_BENCHMARK_ID (by unique columns) +CREATE OR REPLACE FUNCTION public.get_benchmark_id( + benchmark_language citext + , benchmark_name citext + , benchmark_version citext +) +RETURNS integer AS +$$ + WITH language AS ( + SELECT public.get_benchmark_language_id(benchmark_language) AS id + ) + SELECT b.benchmark_id + FROM public.benchmark AS b + JOIN language ON b.benchmark_language_id = language.id + WHERE b.benchmark_name = benchmark_name + AND benchmark_version = benchmark_version +$$ +LANGUAGE sql STABLE; +COMMENT ON FUNCTION public.get_benchmark_id(citext, citext, citext) +IS 'Select existing benchmark given unique columns, ' + 'returning "benchmark.benchmark_id".'; diff --git a/src/arrow/dev/benchmarking/ddl/3_01_functions_triggers.sql b/src/arrow/dev/benchmarking/ddl/3_01_functions_triggers.sql new file mode 100644 index 000000000..b6ce4741a --- /dev/null +++ b/src/arrow/dev/benchmarking/ddl/3_01_functions_triggers.sql @@ -0,0 +1,574 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + + +-------------------------- TRIGGER FUNCTIONS -------------------------- +-- Views that do not select from a single table or view are not +-- automatically updatable. These trigger functions are intended +-- to be run instead of INSERT into the complicated views. + + +-- LANGUAGE_IMPLEMENTATION_VERSION_VIEW_INSERT_ROW +CREATE OR REPLACE FUNCTION public.language_implementation_version_view_insert_row() +RETURNS trigger AS +$$ + DECLARE + language_id integer; + result integer; + BEGIN + IF NEW.benchmark_language IS NULL THEN + RAISE EXCEPTION 'Column "benchmark_language" cannot be NULL.'; + END IF; + IF NEW.language_implementation_version IS NULL THEN + RAISE EXCEPTION + 'Column "language_implementation_version" cannot be NULL (use '''' instead).'; + END IF; + + SELECT public.get_benchmark_language_id(NEW.benchmark_language) + INTO language_id; + + SELECT language_implementation_version_id INTO result FROM public.language_implementation_version AS lv + WHERE lv.benchmark_language_id = language_id + AND lv.language_implementation_version = NEW.language_implementation_version; + + IF result IS NOT NULL THEN + -- row already exists + RETURN NULL; + ELSE + INSERT INTO + public.language_implementation_version( + benchmark_language_id + , language_implementation_version + ) + VALUES (language_id, NEW.language_implementation_version) + RETURNING language_implementation_version_id INTO NEW.language_implementation_version_id; + END IF; + + RETURN NEW; + END +$$ +LANGUAGE plpgsql; + +-- ENVIRONMENT_VIEW_INSERT_ROW +CREATE OR REPLACE FUNCTION public.environment_view_insert_row() +RETURNS trigger AS +$$ + DECLARE + found_language_id integer; + found_version_id integer; + found_dependencies_id integer; + result integer; + BEGIN + IF NEW.benchmark_language IS NULL + THEN + RAISE EXCEPTION 'Column "benchmark_language" cannot be NULL.'; + END IF; + IF NEW.language_implementation_version IS NULL THEN + RAISE EXCEPTION + 'Column "language_implementation_version" cannot be NULL (use '''' instead).'; + END IF; + + SELECT public.get_benchmark_language_id(NEW.benchmark_language) + INTO found_language_id; + + SELECT public.get_language_implementation_version_id( + found_language_id + , NEW.language_implementation_version + ) + INTO found_version_id; + + SELECT public.get_dependencies_id(NEW.dependencies) + INTO found_dependencies_id; + + SELECT environment_id INTO result FROM public.environment AS e + WHERE e.benchmark_language_id = found_language_id + AND e.language_implementation_version_id = found_version_id + AND e.dependencies_id = found_dependencies_id; + + IF result IS NOT NULL THEN + -- row already exists + RETURN NULL; + ELSE + INSERT INTO + public.environment( + benchmark_language_id + , language_implementation_version_id + , dependencies_id + ) + VALUES (found_language_id, found_version_id, found_dependencies_id) + RETURNING environment_id INTO NEW.environment_id; + END IF; + + RETURN NEW; + END +$$ +LANGUAGE plpgsql; + +-- MACHINE_VIEW_INSERT_ROW +CREATE OR REPLACE FUNCTION public.machine_view_insert_row() +RETURNS trigger AS +$$ + DECLARE + found_cpu_id integer; + found_gpu_id integer; + found_os_id integer; + result integer; + BEGIN + IF ( + NEW.machine_name IS NULL + OR NEW.memory_bytes IS NULL + OR NEW.cpu_model_name IS NULL + OR NEW.cpu_core_count IS NULL + OR NEW.cpu_thread_count IS NULL + OR NEW.cpu_frequency_max_Hz IS NULL + OR NEW.cpu_frequency_min_Hz IS NULL + OR NEW.cpu_L1d_cache_bytes IS NULL + OR NEW.cpu_L1i_cache_bytes IS NULL + OR NEW.cpu_L2_cache_bytes IS NULL + OR NEW.cpu_L3_cache_bytes IS NULL + OR NEW.os_name IS NULL + OR NEW.architecture_name IS NULL + ) + THEN + RAISE EXCEPTION 'None of the columns in "machine_view" can be NULL. ' + 'all columns in table "gpu" will default to the empty string '''', ' + 'as will blank "os.kernel_name". This is to allow uniqueness ' + 'constraints to work. Thank you!.'; + END IF; + + SELECT public.get_cpu_id( + NEW.cpu_model_name + , NEW.cpu_core_count + , NEW.cpu_thread_count + , NEW.cpu_frequency_max_Hz + , NEW.cpu_frequency_min_Hz + , NEW.cpu_L1d_cache_bytes + , NEW.cpu_L1i_cache_bytes + , NEW.cpu_L2_cache_bytes + , NEW.cpu_L3_cache_bytes + ) INTO found_cpu_id; + + SELECT public.get_gpu_id( + NEW.gpu_information + , NEW.gpu_part_number + , NEW.gpu_product_name + ) INTO found_gpu_id; + + SELECT public.get_os_id( + NEW.os_name + , NEW.architecture_name + , NEW.kernel_name + ) INTO found_os_id; + + SELECT machine_id INTO result FROM public.machine AS m + WHERE m.os_id = found_os_id + AND m.cpu_id = found_cpu_id + AND m.gpu_id = found_gpu_id + AND m.machine_name = NEW.machine_name + AND m.memory_bytes = NEW.memory_bytes + AND m.cpu_actual_frequency_Hz = NEW.cpu_actual_frequency_Hz; + + IF result IS NOT NULL THEN + -- row already exists + RETURN NULL; + ELSE + INSERT INTO public.machine( + os_id + , cpu_id + , gpu_id + , machine_name + , mac_address + , memory_bytes + , cpu_actual_frequency_Hz + , machine_other_attributes + ) + VALUES ( + found_os_id + , found_cpu_id + , found_gpu_id + , NEW.machine_name + , NEW.mac_address + , NEW.memory_bytes + , NEW.cpu_actual_frequency_Hz + , NEW.machine_other_attributes + ) + RETURNING machine_id INTO NEW.machine_id; + END IF; + + RETURN NEW; + END +$$ +LANGUAGE plpgsql; + +-- UNIT_VIEW_INSERT_ROW +CREATE OR REPLACE FUNCTION public.unit_view_insert_row() +RETURNS trigger AS +$$ + DECLARE + found_benchmark_type_id integer; + result integer; + BEGIN + IF (NEW.benchmark_type IS NULL OR NEW.units IS NULL) + THEN + RAISE EXCEPTION E'"benchmark_type" and "units" cannot be NULL.\n' + 'Further, if the "benchmark_type" has never been defined, ' + '"lessisbetter" must be defined or there will be an error.'; + END IF; + + -- It's OK for "lessisbetter" = NULL if "benchmark_type" already exists. + SELECT public.get_benchmark_type_id(NEW.benchmark_type, NEW.lessisbetter) + INTO found_benchmark_type_id; + + SELECT unit_id INTO result FROM public.unit AS u + WHERE u.benchmark_type_id = found_benchmark_type_id + AND u.units = NEW.units; + + IF result IS NOT NULL THEN + -- row already exists + RETURN NULL; + ELSE + INSERT INTO public.unit ( + benchmark_type_id + , units + ) + VALUES ( + found_benchmark_type_id + , NEW.units + ) + RETURNING unit_id INTO NEW.unit_id; + END IF; + + RETURN NEW; + END +$$ +LANGUAGE plpgsql; + +-- BENCHMARK_VIEW_INSERT_ROW +CREATE OR REPLACE FUNCTION public.benchmark_view_insert_row() +RETURNS trigger AS +$$ + DECLARE + found_benchmark_language_id integer; + found_units_id integer; + result integer; + BEGIN + IF ( + NEW.benchmark_name IS NULL + OR NEW.benchmark_version IS NULL + OR NEW.benchmark_language IS NULL + OR NEW.benchmark_type IS NULL + OR NEW.benchmark_description IS NULL + OR NEW.units IS NULL + ) + THEN + RAISE EXCEPTION 'The only nullable column in this view is ' + '"benchmark.parameter_names".'; + END IF; + + SELECT public.get_benchmark_language_id( + NEW.benchmark_language + ) INTO found_benchmark_language_id; + + SELECT public.get_unit_id(NEW.units) INTO found_units_id; + + SELECT benchmark_id INTO result FROM public.benchmark AS b + WHERE b.benchmark_language_id = found_benchmark_language_id + AND b.benchmark_name = NEW.benchmark_name + -- handle nullable "parameter_names" + AND b.parameter_names IS NOT DISTINCT FROM NEW.parameter_names + AND b.benchmark_description = NEW.benchmark_description + AND b.benchmark_version = NEW.benchmark_version + AND b.unit_id = found_units_id; + + IF result IS NOT NULL THEN + -- row already exists + RETURN NULL; + ELSE + INSERT INTO public.benchmark( + benchmark_language_id + , benchmark_name + , parameter_names + , benchmark_description + , benchmark_version + , unit_id + ) + VALUES ( + found_benchmark_language_id + , NEW.benchmark_name + , NEW.parameter_names + , NEW.benchmark_description + , NEW.benchmark_version + , found_units_id + ) + RETURNING benchmark_id INTO NEW.benchmark_id; + END IF; + + RETURN NEW; + END +$$ +LANGUAGE plpgsql; + +-- BENCHMARK_RUN_VIEW_INSERT_ROW +CREATE OR REPLACE FUNCTION public.benchmark_run_view_insert_row() +RETURNS trigger AS +$$ + DECLARE + found_benchmark_id integer; + found_benchmark_language_id integer; + found_machine_id integer; + found_environment_id integer; + found_language_implementation_version_id integer; + BEGIN + IF ( + NEW.benchmark_name IS NULL + OR NEW.benchmark_version IS NULL + OR NEW.benchmark_language IS NULL + OR NEW.value IS NULL + OR NEW.run_timestamp IS NULL + OR NEW.git_commit_timestamp IS NULL + OR NEW.git_hash IS NULL + OR NEW.language_implementation_version IS NULL + OR NEW.mac_address IS NULL + ) + THEN + RAISE EXCEPTION 'Only the following columns can be NULL: ' + '"parameter_names", "val_min", "val_q1", "val_q3", "val_max".'; + END IF; + + SELECT public.get_benchmark_id( + NEW.benchmark_language + , NEW.benchmark_name + , NEW.benchmark_version + ) INTO found_benchmark_id; + + SELECT public.get_benchmark_language_id( + NEW.benchmark_language + ) INTO found_benchmark_language_id; + + SELECT public.get_machine_id( + NEW.mac_address + ) INTO found_machine_id; + + SELECT public.get_environment_id( + NEW.benchmark_language + , NEW.language_implementation_version + , NEW.dependencies + ) INTO found_environment_id; + + SELECT public.get_language_implementation_version_id( + found_benchmark_language_id, + NEW.language_implementation_version + ) INTO found_language_implementation_version_id; + + INSERT INTO public.benchmark_run ( + parameter_values + , value + , git_commit_timestamp + , git_hash + , val_min + , val_q1 + , val_q3 + , val_max + , std_dev + , n_obs + , run_timestamp + , run_metadata + , run_notes + , machine_id + , benchmark_language_id + , language_implementation_version_id + , environment_id + , benchmark_id + ) + VALUES ( + COALESCE(NEW.parameter_values, '{}'::jsonb) + , NEW.value + , NEW.git_commit_timestamp + , NEW.git_hash + , NEW.val_min + , NEW.val_q1 + , NEW.val_q3 + , NEW.val_max + , NEW.std_dev + , NEW.n_obs + , NEW.run_timestamp + , NEW.run_metadata + , NEW.run_notes + , found_machine_id + , found_benchmark_language_id + , found_language_implementation_version_id + , found_environment_id + , found_benchmark_id + ) returning benchmark_run_id INTO NEW.benchmark_run_id; + + RETURN NEW; + END +$$ +LANGUAGE plpgsql; + +-- FULL_BENCHMARK_RUN_VIEW_INSERT_ROW +CREATE OR REPLACE FUNCTION public.full_benchmark_run_view_insert_row() +RETURNS trigger AS +$$ + DECLARE + found_benchmark_id integer; + found_benchmark_language_id integer; + found_machine_id integer; + found_environment_id integer; + found_language_implementation_version_id integer; + BEGIN + IF ( + NEW.value IS NULL + OR NEW.git_hash IS NULL + OR NEW.git_commit_timestamp IS NULL + OR NEW.run_timestamp IS NULL + -- benchmark + OR NEW.benchmark_name IS NULL + OR NEW.benchmark_description IS NULL + OR NEW.benchmark_version IS NULL + OR NEW.benchmark_language IS NULL + -- unit + OR NEW.benchmark_type IS NULL + OR NEW.units IS NULL + OR NEW.lessisbetter IS NULL + -- machine + OR NEW.machine_name IS NULL + OR NEW.memory_bytes IS NULL + OR NEW.cpu_model_name IS NULL + OR NEW.cpu_core_count IS NULL + OR NEW.os_name IS NULL + OR NEW.architecture_name IS NULL + OR NEW.kernel_name IS NULL + OR NEW.cpu_model_name IS NULL + OR NEW.cpu_core_count IS NULL + OR NEW.cpu_thread_count IS NULL + OR NEW.cpu_frequency_max_Hz IS NULL + OR NEW.cpu_frequency_min_Hz IS NULL + OR NEW.cpu_L1d_cache_bytes IS NULL + OR NEW.cpu_L1i_cache_bytes IS NULL + OR NEW.cpu_L2_cache_bytes IS NULL + OR NEW.cpu_L3_cache_bytes IS NULL + ) + THEN + RAISE EXCEPTION 'Only the following columns can be NULL: ' + '"machine_other_attributes", "parameter_names", "val_min", ' + '"val_q1", "val_q3", "val_max", "run_metadata", "run_notes". ' + 'If "gpu_information", "gpu_part_number", "gpu_product_name", or ' + '"kernel_name" are null, they will be silently turned into an ' + 'empty string ('''').'; + END IF; + + SELECT public.get_benchmark_id( + NEW.benchmark_language + , NEW.benchmark_name + , NEW.parameter_names + , NEW.benchmark_description + , NEW.benchmark_version + , NEW.benchmark_type + , NEW.units + , NEW.lessisbetter + ) INTO found_benchmark_id; + + SELECT public.get_benchmark_language_id( + NEW.benchmark_language + ) INTO found_benchmark_language_id; + + SELECT public.get_machine_id( + NEW.mac_address + , NEW.machine_name + , NEW.memory_bytes + , NEW.cpu_actual_frequency_Hz + -- os + , NEW.os_name + , NEW.architecture_name + , NEW.kernel_name + -- cpu + , NEW.cpu_model_name + , NEW.cpu_core_count + , NEW.cpu_thread_count + , NEW.cpu_frequency_max_Hz + , NEW.cpu_frequency_min_Hz + , NEW.cpu_L1d_cache_bytes + , NEW.cpu_L1i_cache_bytes + , NEW.cpu_L2_cache_bytes + , NEW.cpu_L3_cache_bytes + -- gpu + , NEW.gpu_information + , NEW.gpu_part_number + , NEW.gpu_product_name + -- nullable machine attributes + , NEW.machine_other_attributes + ) INTO found_machine_id; + + SELECT public.get_environment_id( + NEW.benchmark_language + , NEW.language_implementation_version + , NEW.dependencies + ) INTO found_environment_id; + + SELECT public.get_language_implementation_version_id( + found_benchmark_language_id, + NEW.language_implementation_version + ) INTO found_language_implementation_version_id; + + INSERT INTO public.benchmark_run ( + parameter_values + , value + , git_commit_timestamp + , git_hash + , val_min + , val_q1 + , val_q3 + , val_max + , std_dev + , n_obs + , run_timestamp + , run_metadata + , run_notes + , machine_id + , benchmark_language_id + , language_implementation_version_id + , environment_id + , benchmark_id + ) + VALUES ( + NEW.parameter_values + , NEW.value + , NEW.git_commit_timestamp + , NEW.git_hash + , NEW.val_min + , NEW.val_q1 + , NEW.val_q3 + , NEW.val_max + , NEW.std_dev + , NEW.n_obs + , NEW.run_timestamp + , NEW.run_metadata + , NEW.run_notes + , found_machine_id + , found_benchmark_language_id + , found_language_implementation_version_id + , found_environment_id + , found_benchmark_id + ) returning benchmark_run_id INTO NEW.benchmark_run_id; + + RETURN NEW; + END +$$ +LANGUAGE plpgsql; diff --git a/src/arrow/dev/benchmarking/ddl/3_02_functions_ingestion.sql b/src/arrow/dev/benchmarking/ddl/3_02_functions_ingestion.sql new file mode 100644 index 000000000..000c61d00 --- /dev/null +++ b/src/arrow/dev/benchmarking/ddl/3_02_functions_ingestion.sql @@ -0,0 +1,323 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + + +-------------------------- IMPORT HELPERS -------------------------- +-- Load from JSON (from https://stackoverflow.com/a/48396608) +-- How to use it in the psql client: +-- \set content `cat /examples/machine.json` +-- select ingest_machine(:'content'::jsonb); +-- INGEST_MACHINE_VIEW +CREATE OR REPLACE FUNCTION public.ingest_machine_view(from_jsonb jsonb) +RETURNS integer AS +$$ + DECLARE + result integer; + BEGIN + INSERT INTO public.machine_view + SELECT * FROM jsonb_populate_record(null::public.machine_view, from_jsonb) + RETURNING machine_id INTO result; + RETURN result; + END +$$ +LANGUAGE plpgsql; +COMMENT ON FUNCTION public.ingest_machine_view(jsonb) IS + E'The argument is a JSON object. NOTE: key names must be entirely\n' + 'lowercase, or the insert will fail. Extra key-value pairs are ignored.\n' + 'Example::\n\n' + ' {\n' + ' "mac_address": "0a:00:2d:01:02:03",\n' + ' "machine_name": "Yet-Another-Machine-Name",\n' + ' "memory_bytes": 8589934592,\n' + ' "cpu_actual_frequency_hz": 2300000000,\n' + ' "os_name": "OSX",\n' + ' "architecture_name": "x86_64",\n' + ' "kernel_name": "18.2.0",\n' + ' "cpu_model_name": "Intel(R) Core(TM) i5-7360U CPU @ 2.30GHz",\n' + ' "cpu_core_count": 2,\n' + ' "cpu_thread_count": 4,\n' + ' "cpu_frequency_max_hz": 2300000000,\n' + ' "cpu_frequency_min_hz": 2300000000,\n' + ' "cpu_l1d_cache_bytes": 32768,\n' + ' "cpu_l1i_cache_bytes": 32768,\n' + ' "cpu_l2_cache_bytes": 262144,\n' + ' "cpu_l3_cache_bytes": 4194304,\n' + ' "machine_other_attributes": {"just": "an example"},\n' + ' "gpu_information": "",\n' + ' "gpu_part_number": "",\n' + ' "gpu_product_name": ""\n' + ' }\n\n' + 'To identify which columns in "machine_view" are required,\n' + 'please see the view documentation in :ref:`benchmark-data-model`.\n'; + +-- INGEST_BENCHMARK_VIEW +CREATE OR REPLACE FUNCTION public.ingest_benchmark_view(from_jsonb jsonb) +RETURNS setof integer AS +$$ + BEGIN + RETURN QUERY + INSERT INTO public.benchmark_view + SELECT * FROM jsonb_populate_recordset( + null::public.benchmark_view + , from_jsonb + ) + RETURNING benchmark_id; + END +$$ +LANGUAGE plpgsql; +COMMENT ON FUNCTION public.ingest_benchmark_view(jsonb) IS + E'The argument is a JSON object. NOTE: key names must be entirely\n' + 'lowercase, or the insert will fail. Extra key-value pairs are ignored.\n' + 'Example::\n\n' + ' [\n' + ' {\n' + ' "benchmark_name": "Benchmark 1",\n' + ' "parameter_names": ["arg0", "arg1", "arg2"],\n' + ' "benchmark_description": "First benchmark",\n' + ' "benchmark_type": "Time",\n' + ' "units": "miliseconds",\n' + ' "lessisbetter": true,\n' + ' "benchmark_version": "second version",\n' + ' "benchmark_language": "Python"\n' + ' },\n' + ' {\n' + ' "benchmark_name": "Benchmark 2",\n' + ' "parameter_names": ["arg0", "arg1"],\n' + ' "benchmark_description": "Description 2.",\n' + ' "benchmark_type": "Time",\n' + ' "units": "nanoseconds",\n' + ' "lessisbetter": true,\n' + ' "benchmark_version": "second version",\n' + ' "benchmark_language": "Python"\n' + ' }\n' + ' ]\n\n' + 'To identify which columns in "benchmark_view" are required,\n' + 'please see the view documentation in :ref:`benchmark-data-model`.\n'; + +-- INGEST_BENCHMARK_RUN_VIEW +CREATE OR REPLACE FUNCTION public.ingest_benchmark_run_view(from_jsonb jsonb) +RETURNS setof bigint AS +$$ + BEGIN + RETURN QUERY + INSERT INTO public.benchmark_run_view + SELECT * FROM + jsonb_populate_recordset(null::public.benchmark_run_view, from_jsonb) + RETURNING benchmark_run_id; + END +$$ +LANGUAGE plpgsql; +COMMENT ON FUNCTION public.ingest_benchmark_run_view(jsonb) IS + E'The argument is a JSON object. NOTE: key names must be entirely\n' + 'lowercase, or the insert will fail. Extra key-value pairs are ignored.\n' + 'Example::\n\n' + ' [\n' + ' {\n' + ' "benchmark_name": "Benchmark 2",\n' + ' "benchmark_version": "version 0",\n' + ' "parameter_values": {"arg0": 100, "arg1": 5},\n' + ' "value": 2.5,\n' + ' "git_commit_timestamp": "2019-02-08 22:35:53 +0100",\n' + ' "git_hash": "324d3cf198444a",\n' + ' "val_min": 1,\n' + ' "val_q1": 2,\n' + ' "val_q3": 3,\n' + ' "val_max": 4,\n' + ' "std_dev": 1.41,\n' + ' "n_obs": 8,\n' + ' "run_timestamp": "2019-02-14 03:00:05 -0600",\n' + ' "mac_address": "08:00:2b:01:02:03",\n' + ' "benchmark_language": "Python",\n' + ' "language_implementation_version": "CPython 2.7",\n' + ' "dependencies": {"six": "", "numpy": "1.14", "other_lib": "1.0"}\n' + ' },\n' + ' {\n' + ' "benchmark_name": "Benchmark 2",\n' + ' "benchmark_version": "version 0",\n' + ' "parameter_values": {"arg0": 1000, "arg1": 5},\n' + ' "value": 5,\n' + ' "git_commit_timestamp": "2019-02-08 22:35:53 +0100",\n' + ' "git_hash": "324d3cf198444a",\n' + ' "std_dev": 3.14,\n' + ' "n_obs": 8,\n' + ' "run_timestamp": "2019-02-14 03:00:10 -0600",\n' + ' "mac_address": "08:00:2b:01:02:03",\n' + ' "benchmark_language": "Python",\n' + ' "language_implementation_version": "CPython 2.7",\n' + ' "dependencies": {"six": "", "numpy": "1.14", "other_lib": "1.0"}\n' + ' }\n' + ' ]\n' + 'To identify which columns in "benchmark_run_view" are required,\n' + 'please see the view documentation in :ref:`benchmark-data-model`.\n'; + +-- INGEST_BENCHMARK_RUNS_WITH_CONTEXT +CREATE OR REPLACE FUNCTION public.ingest_benchmark_runs_with_context(from_jsonb jsonb) +RETURNS setof bigint AS +$$ + DECLARE + context_jsonb jsonb; + found_environment_id integer; + found_machine_id integer; + BEGIN + SELECT from_jsonb -> 'context' INTO context_jsonb; + + SELECT public.get_machine_id((context_jsonb ->> 'mac_address')::macaddr) + INTO found_machine_id; + + SELECT get_environment_id( + (context_jsonb ->> 'benchmark_language')::citext + , (context_jsonb ->> 'language_implementation_version')::citext + , context_jsonb -> 'dependencies' + ) INTO found_environment_id; + + RETURN QUERY + WITH run_datum AS ( + SELECT * + FROM jsonb_to_recordset(from_jsonb -> 'benchmarks') + AS x( + benchmark_name citext + , parameter_values jsonb + , value numeric + , val_min numeric + , val_q1 numeric + , val_q3 numeric + , val_max numeric + , std_dev numeric + , n_obs integer + , run_timestamp timestamp (0) with time zone + , run_metadata jsonb + , run_notes text + ) + ), benchmark_name_and_id AS ( + SELECT + key AS benchmark_name + , public.get_benchmark_id( + (context_jsonb ->> 'benchmark_language')::citext + , key::citext -- benchmark_name + , value::citext -- benchmark_version + ) AS benchmark_id + FROM jsonb_each_text(from_jsonb -> 'benchmark_version') + ) + INSERT INTO public.benchmark_run ( + benchmark_id + -- run_datum + , parameter_values + , value + , val_min + , val_q1 + , val_q3 + , val_max + , std_dev + , n_obs + , run_metadata + , run_notes + -- additional context information + , git_commit_timestamp + , git_hash + , run_timestamp + -- machine + , machine_id + -- environment + , environment_id + , language_implementation_version_id + , benchmark_language_id + ) + SELECT + b.benchmark_id + -- run_datum + , run_datum.parameter_values + , run_datum.value + , run_datum.val_min + , run_datum.val_q1 + , run_datum.val_q3 + , run_datum.val_max + , run_datum.std_dev + , run_datum.n_obs + , run_datum.run_metadata + , run_datum.run_notes + -- additional context information + , (context_jsonb ->> 'git_commit_timestamp')::timestamp (0) with time zone + , context_jsonb ->> 'git_hash' + , (context_jsonb ->> 'run_timestamp')::timestamp (0) with time zone + -- machine + , found_machine_id + -- environment + , e.environment_id + , e.language_implementation_version_id + , e.benchmark_language_id + FROM run_datum + JOIN public.environment AS e + ON e.environment_id = found_environment_id + JOIN benchmark_name_and_id AS b + ON b.benchmark_name = run_datum.benchmark_name + RETURNING benchmark_run_id; + END +$$ +LANGUAGE plpgsql; +COMMENT ON FUNCTION public.ingest_benchmark_runs_with_context(jsonb) IS + E'The argument is a JSON object. NOTE: key names must be entirely\n' + 'lowercase, or the insert will fail. Extra key-value pairs are ignored.\n' + 'The object contains three key-value pairs::\n\n' + ' {"context": {\n' + ' "mac_address": "08:00:2b:01:02:03",\n' + ' "benchmark_language": "Python",\n' + ' "language_implementation_version": "CPython 3.6",\n' + ' "dependencies": {"six": "", "numpy": "1.14", "other_lib": "1.0"},\n' + ' "git_commit_timestamp": "2019-02-14 22:42:22 +0100",\n' + ' "git_hash": "123456789abcde",\n' + ' "run_timestamp": "2019-02-14 03:00:40 -0600",\n' + ' "extra stuff": "does not hurt anything and will not be added."\n' + ' },\n' + ' "benchmark_version": {\n' + ' "Benchmark Name 1": "Any string can be a version.",\n' + ' "Benchmark Name 2": "A git hash can be a version.",\n' + ' "An Unused Benchmark Name": "Will be ignored."\n' + ' },\n' + ' "benchmarks": [\n' + ' {\n' + ' "benchmark_name": "Benchmark Name 1",\n' + ' "parameter_values": {"argument1": 1, "argument2": "value2"},\n' + ' "value": 42,\n' + ' "val_min": 41.2,\n' + ' "val_q1": 41.5,\n' + ' "val_q3": 42.5,\n' + ' "val_max": 42.8,\n' + ' "std_dev": 0.5,\n' + ' "n_obs": 100,\n' + ' "run_metadata": {"any": "key-value pairs"},\n' + ' "run_notes": "Any relevant notes."\n' + ' },\n' + ' {\n' + ' "benchmark_name": "Benchmark Name 2",\n' + ' "parameter_values": {"not nullable": "Use {} if no params."},\n' + ' "value": 8,\n' + ' "std_dev": 1,\n' + ' "n_obs": 2,\n' + ' }\n' + ' ]\n' + ' }\n\n' + '- The entry for "context" contains the machine, environment, and timestamp\n' + ' information common to all of the runs\n' + '- The entry for "benchmark_version" maps benchmark\n' + ' names to their version strings. (Which can be a git hash,\n' + ' the entire code string, a number, or any other string of your choice.)\n' + '- The entry for "benchmarks" is a list of benchmark run data\n' + ' for the given context and benchmark versions. The first example\n' + ' benchmark run entry contains all possible values, even\n' + ' nullable ones, and the second entry omits all nullable values.\n\n'; diff --git a/src/arrow/dev/benchmarking/ddl/3_10_functions_documentation.sql b/src/arrow/dev/benchmarking/ddl/3_10_functions_documentation.sql new file mode 100644 index 000000000..6b2a05790 --- /dev/null +++ b/src/arrow/dev/benchmarking/ddl/3_10_functions_documentation.sql @@ -0,0 +1,395 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + + +-- _DOCUMENTATION_INGESTION +CREATE OR REPLACE FUNCTION public._documentation_ingestion() +RETURNS text AS +$$ + WITH ingestion_docs AS ( + SELECT + proname || E'\n' + || rpad('', character_length(proname), '-') + || E'\n\n:code:`' + || proname || '(' + || string_agg(a.argname || ' ' || typname , ', ') + || E')`\n\n' + || description + || E'\n\n\nback to `Benchmark data model <benchmark-data-model>`_\n' + AS docs + FROM pg_catalog.pg_proc + JOIN pg_catalog.pg_namespace + ON nspname='public' + AND pg_namespace.oid = pronamespace + AND proname LIKE '%ingest%' + JOIN pg_catalog.pg_description + ON pg_description.objoid=pg_proc.oid, + LATERAL unnest(proargnames, proargtypes) AS a(argname, argtype) + JOIN pg_catalog.pg_type + ON pg_type.oid = a.argtype + GROUP BY proname, description + ) + SELECT + string_agg(docs, E'\n\n') AS docs + FROM ingestion_docs; +$$ +LANGUAGE sql STABLE; + +-- _DOCUMENTATION_VIEW_DETAILS +CREATE OR REPLACE FUNCTION public._documentation_view_details(view_name citext) +RETURNS TABLE( + column_name name + , type_name name + , nullable text + , default_value text + , description text +) AS +$$ + WITH view_columns AS ( + SELECT + attname AS column_name + , attnum AS column_order + FROM pg_catalog.pg_attribute + WHERE attrelid=view_name::regclass + ) + SELECT + t.column_name + , type_name + , coalesce(nullable, '') + , coalesce(default_value, '') + , coalesce(description, '') + FROM public.summarized_tables_view AS t + JOIN view_columns AS v ON v.column_name = t.column_name + WHERE t.table_name || '_view' = view_name OR t.column_name NOT LIKE '%_id' + ORDER BY column_order; +$$ +LANGUAGE sql STABLE; + + +-- _DOCUMENTATION_VIEW_PIECES +CREATE OR REPLACE FUNCTION public._documentation_view_pieces(view_name citext) +RETURNS TABLE (rst_formatted text) +AS +$$ +DECLARE + column_length integer; + type_length integer; + nullable_length integer; + default_length integer; + description_length integer; + sep text; + border text; +BEGIN + + -- All of the hard-coded constants here are the string length of the table + -- column headers: 'Column', 'Type', 'Nullable', 'Default', 'Description' + SELECT greatest(6, max(character_length(column_name))) + FROM public._documentation_view_details(view_name) INTO column_length; + + SELECT greatest(4, max(character_length(type_name))) + FROM public._documentation_view_details(view_name) INTO type_length; + + SELECT greatest(8, max(character_length(nullable))) + FROM public._documentation_view_details(view_name) INTO nullable_length; + + SELECT greatest(7, max(character_length(default_value))) + FROM public._documentation_view_details(view_name) INTO default_length; + + SELECT greatest(11, max(character_length(description))) + FROM public._documentation_view_details(view_name) INTO description_length; + + SELECT ' ' INTO sep; + + SELECT + concat_ws(sep + , rpad('', column_length, '=') + , rpad('', type_length, '=') + , rpad('', nullable_length, '=') + , rpad('', default_length, '=') + , rpad('', description_length, '=') + ) + INTO border; + + RETURN QUERY + SELECT + border + UNION ALL + SELECT + concat_ws(sep + , rpad('Column', column_length, ' ') + , rpad('Type', type_length, ' ') + , rpad('Nullable', nullable_length, ' ') + , rpad('Default', default_length, ' ') + , rpad('Description', description_length, ' ') + ) + UNION ALL + SELECT border + UNION ALL + SELECT + concat_ws(sep + , rpad(v.column_name, column_length, ' ') + , rpad(v.type_name, type_length, ' ') + , rpad(v.nullable, nullable_length, ' ') + , rpad(v.default_value, default_length, ' ') + , rpad(v.description, description_length, ' ') + ) + FROM public._documentation_view_details(view_name) AS v + UNION ALL + SELECT border; + +END +$$ +LANGUAGE plpgsql STABLE; + + +-- DOCUMENTATION_FOR +CREATE OR REPLACE FUNCTION public.documentation_for(view_name citext) +RETURNS text AS +$$ + DECLARE + view_description text; + view_table_markup text; + BEGIN + SELECT description FROM pg_catalog.pg_description + WHERE pg_description.objoid = view_name::regclass + INTO view_description; + + SELECT + view_name || E'\n' || rpad('', length(view_name), '-') || E'\n\n' || + view_description || E'\n\n' || + string_agg(rst_formatted, E'\n') + INTO view_table_markup + FROM public._documentation_view_pieces(view_name); + + RETURN view_table_markup; + END +$$ +LANGUAGE plpgsql STABLE; +COMMENT ON FUNCTION public.documentation_for(citext) +IS E'Create an ".rst"-formatted table describing a specific view.\n' + 'Example: SELECT public.documentation_for(''endpoint'');'; + + +-- DOCUMENTATION +CREATE OR REPLACE FUNCTION public.documentation(dotfile_name text) +RETURNS TABLE (full_text text) AS +$$ + WITH v AS ( + SELECT + public.documentation_for(relname::citext) + || E'\n\nback to `Benchmark data model <benchmark-data-model>`_\n' + AS view_documentation + FROM pg_catalog.pg_trigger + JOIN pg_catalog.pg_class ON pg_trigger.tgrelid = pg_class.oid + WHERE NOT tgisinternal + ) + SELECT + E'\n.. _benchmark-data-model:\n\n' + 'Benchmark data model\n' + '====================\n\n\n' + '.. graphviz:: ' + || dotfile_name + || E'\n\n\n.. _benchmark-ingestion:\n\n' + 'Benchmark ingestion helper functions\n' + '====================================\n\n' + || public._documentation_ingestion() + || E'\n\n\n.. _benchmark-views:\n\n' + 'Benchmark views\n' + '===============\n\n\n' + || string_agg(v.view_documentation, E'\n') + FROM v + GROUP BY True; +$$ +LANGUAGE sql STABLE; +COMMENT ON FUNCTION public.documentation(text) +IS E'Create an ".rst"-formatted file that shows the columns in ' + 'every insertable view in the "public" schema.\n' + 'The text argument is the name of the generated dotfile to be included.\n' + 'Example: SELECT public.documentation(''data_model.dot'');'; + + +-- _DOCUMENTATION_DOTFILE_NODE_FOR +CREATE OR REPLACE FUNCTION public._documentation_dotfile_node_for(tablename name) +RETURNS text AS +$$ +DECLARE + result text; +BEGIN + WITH node AS ( + SELECT + tablename::text AS lines + UNION ALL + SELECT + E'[label = \n' + ' <<table border="0" cellborder="1" cellspacing="0" cellpadding="2">' + UNION ALL + -- table name + SELECT + ' <tr><td border="0"><font point-size="14">' + || tablename + || '</font></td></tr>' + UNION ALL + -- primary keys + SELECT + ' <tr><td port="' || column_name || '"><b>' + || column_name + || ' (pk)</b></td></tr>' + FROM public.summarized_tables_view + WHERE table_name = tablename + AND description LIKE '%primary key%' + UNION ALL + -- columns + SELECT + ' <tr><td>' + || column_name + || CASE WHEN description LIKE '%unique' THEN ' (u)' ELSE '' END + || CASE WHEN nullable <> 'not null' THEN ' (o)' ELSE '' END + || '</td></tr>' + FROM public.summarized_tables_view + WHERE table_name = tablename + AND (description IS NULL OR description not like '%key%') + UNION ALL + -- foreign keys + SELECT + ' <tr><td port="' || column_name || '">' + || column_name + || CASE WHEN description LIKE '%unique' THEN ' (u)' ELSE '' END + || ' (fk) </td></tr>' + FROM public.summarized_tables_view + WHERE table_name = tablename + AND description LIKE '%foreign key%' + AND description NOT LIKE '%primary key%' + UNION ALL + SELECT + E' </table>>\n];' + ) + SELECT + string_agg(lines, E'\n') + INTO result + FROM node; + + RETURN result; +END +$$ +LANGUAGE plpgsql STABLE; + + +-- _DOCUMENTATION_DOTFILE_EDGES +CREATE OR REPLACE FUNCTION public._documentation_dotfile_edges() +RETURNS text AS +$$ +DECLARE + result text; +BEGIN + WITH relationship AS ( + SELECT + conrelid AS fk_table_id + , confrelid AS pk_table_id + , unnest(conkey) AS fk_colnum + , unnest(confkey) AS pk_colnum + FROM pg_catalog.pg_constraint + WHERE confkey IS NOT NULL + AND connamespace='public'::regnamespace + ), all_edges AS ( + SELECT + fk_tbl.relname || ':' || fk_col.attname + || ' -> ' + || pk_tbl.relname || ':' || pk_col.attname + || ';' AS lines + FROM relationship + -- foreign key table + column + JOIN pg_catalog.pg_attribute AS fk_col + ON fk_col.attrelid = relationship.fk_table_id + AND fk_col.attnum = relationship.fk_colnum + JOIN pg_catalog.pg_class AS fk_tbl + ON fk_tbl.oid = relationship.fk_table_id + -- primary key table + column + JOIN pg_catalog.pg_attribute AS pk_col + ON pk_col.attrelid = relationship.pk_table_id + AND pk_col.attnum = relationship.pk_colnum + JOIN pg_catalog.pg_class AS pk_tbl + ON pk_tbl.oid = relationship.pk_table_id + ) + SELECT + string_agg(lines, E'\n') + INTO result + FROM all_edges; + + RETURN result; +END +$$ +LANGUAGE plpgsql STABLE; + + +-- DOCUMENTATION_DOTFILE +CREATE OR REPLACE FUNCTION public.documentation_dotfile() +RETURNS text AS +$$ +DECLARE + schemaname name := 'public'; + result text; +BEGIN + WITH file_contents AS ( + SELECT + E'digraph database {\n concentrate = true;\n' + ' rankdir = LR;\n' + ' ratio = ".75";\n' + ' node [shape = none, fontsize="11", fontname="Helvetica"];\n' + ' edge [fontsize="8", fontname="Helvetica"];' + AS lines + UNION ALL + SELECT + E'legend\n[fontsize = "14"\nlabel =\n' + '<<table border="0" cellpadding="0">\n' + ' <tr><td align="left"><font point-size="16">Legend</font></td></tr>\n' + ' <tr><td align="left">pk = primary key</td></tr>\n' + ' <tr><td align="left">fk = foreign key</td></tr>\n' + ' <tr><td align="left">u = unique*</td></tr>\n' + ' <tr><td align="left">o = optional</td></tr>\n' + ' <tr><td align="left">' + '* multiple uniques in the same table are a unique group</td></tr>\n' + '</table>>\n];' + UNION ALL + SELECT + string_agg( + public._documentation_dotfile_node_for(relname), + E'\n' -- Forcing the 'env' table to the end makes a better image + ORDER BY (CASE WHEN relname LIKE 'env%' THEN 'z' ELSE relname END) + ) + FROM pg_catalog.pg_class + WHERE relkind='r' AND relnamespace = schemaname::regnamespace + UNION ALL + SELECT + public._documentation_dotfile_edges() + UNION ALL + SELECT + '}' + ) + SELECT + string_agg(lines, E'\n') AS dotfile + INTO result + FROM file_contents; + RETURN result; +END +$$ +LANGUAGE plpgsql STABLE; +COMMENT ON FUNCTION public.documentation_dotfile() +IS E'Create a Graphviz dotfile of the data model: ' + 'every table in the "public" schema.\n' + 'Example: SELECT public.documentation_dotfile();'; diff --git a/src/arrow/dev/benchmarking/ddl/4_00_triggers.sql b/src/arrow/dev/benchmarking/ddl/4_00_triggers.sql new file mode 100644 index 000000000..5fb0e5018 --- /dev/null +++ b/src/arrow/dev/benchmarking/ddl/4_00_triggers.sql @@ -0,0 +1,61 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + + +-- LANGUAGE_IMPLEMENTATION_VERSION_VIEW_TRIGGER_INSERT +CREATE TRIGGER language_implementation_version_view_trigger_insert + INSTEAD OF INSERT ON public.language_implementation_version_view + FOR EACH ROW + EXECUTE FUNCTION public.language_implementation_version_view_insert_row(); + +-- ENVIRONMENT_VIEW_TRIGGER_INSERT +CREATE TRIGGER environment_view_trigger_insert + INSTEAD OF INSERT ON public.environment_view + FOR EACH ROW + EXECUTE FUNCTION public.environment_view_insert_row(); + +-- MACHINE_VIEW_TRIGGER_INSERT +CREATE TRIGGER machine_view_trigger_insert + INSTEAD OF INSERT ON public.machine_view + FOR EACH ROW + EXECUTE FUNCTION public.machine_view_insert_row(); + +-- UNIT_VIEW_TRIGGER_INSERT +CREATE TRIGGER unit_view_trigger_insert + INSTEAD OF INSERT ON public.unit_view + FOR EACH ROW + EXECUTE FUNCTION public.unit_view_insert_row(); + +-- BENCHMARK_VIEW_TRIGGER_INSERT +CREATE TRIGGER benchmark_view_trigger_insert + INSTEAD OF INSERT ON public.benchmark_view + FOR EACH ROW + EXECUTE FUNCTION public.benchmark_view_insert_row(); + +-- BENCHMARK_RUN_VIEW_TRIGGER_INSERT +CREATE TRIGGER benchmark_run_view_trigger_insert + INSTEAD OF INSERT ON public.benchmark_run_view + FOR EACH ROW + EXECUTE FUNCTION public.benchmark_run_view_insert_row(); + +-- FULL_BENCHMARK_RUN_VIEW_TRIGGER_INSERT +CREATE TRIGGER full_benchmark_run_view_trigger_insert + INSTEAD OF INSERT ON public.full_benchmark_run_view + FOR EACH ROW + EXECUTE FUNCTION public.full_benchmark_run_view_insert_row(); diff --git a/src/arrow/dev/benchmarking/ddl/5_00_permissions.sql b/src/arrow/dev/benchmarking/ddl/5_00_permissions.sql new file mode 100644 index 000000000..dd72c40db --- /dev/null +++ b/src/arrow/dev/benchmarking/ddl/5_00_permissions.sql @@ -0,0 +1,73 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ +---------------------------- ROLES ---------------------------- +-- ARROW_WEB +CREATE ROLE arrow_web login password 'arrow'; +COMMENT ON ROLE arrow_web IS 'Anonymous login user.'; + +-- ARROW_ADMIN +CREATE ROLE arrow_admin; +COMMENT ON ROLE arrow_admin + IS 'Can select, insert, update, and delete on all public tables.'; + +-- ARROW_ANONYMOUS +CREATE ROLE arrow_anonymous; +COMMENT ON ROLE arrow_anonymous + IS 'Can insert and select on all public tables.'; + +GRANT arrow_anonymous TO arrow_web; + + +---------------------------- PRIVILEGES ---------------------------- +GRANT USAGE ON SCHEMA public TO arrow_anonymous, arrow_admin; + +-- ARROW_ADMIN +GRANT EXECUTE ON ALL FUNCTIONS IN SCHEMA public TO arrow_admin; +GRANT USAGE, SELECT ON ALL SEQUENCES IN SCHEMA public to arrow_admin; +GRANT SELECT, UPDATE, INSERT, DELETE ON ALL TABLES IN SCHEMA public + TO arrow_admin; + +-- ARROW_ANONYMOUS +GRANT EXECUTE ON ALL FUNCTIONS IN SCHEMA public TO arrow_anonymous; +GRANT SELECT ON ALL TABLES IN SCHEMA public TO arrow_anonymous; +GRANT USAGE, SELECT ON ALL SEQUENCES IN SCHEMA public to arrow_anonymous; +GRANT INSERT ON + public.benchmark + , public.benchmark_language + , public.dependencies + , public.language_implementation_version + , public.benchmark_run + , public.benchmark_type + , public.cpu + , public.environment + , public.environment_view + , public.gpu + , public.machine + , public.machine_view + , public.os + , public.unit + --, public.project -- The only disallowed table is `project`. + , public.benchmark_run_view + , public.benchmark_view + , public.environment_view + , public.full_benchmark_run_view + , public.language_implementation_version_view + , public.machine_view + , public.unit_view +TO arrow_anonymous; diff --git a/src/arrow/dev/benchmarking/docker-compose.yml b/src/arrow/dev/benchmarking/docker-compose.yml new file mode 100644 index 000000000..ca60206bf --- /dev/null +++ b/src/arrow/dev/benchmarking/docker-compose.yml @@ -0,0 +1,43 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +version: '3' +services: + + pg: + build: + context: . + dockerfile: Dockerfile + restart: always + ports: + - '5432:5432' + environment: + - POSTGRES_PASSWORD=${PG_PASS} + - POSTGRES_USER=${PG_USER} + + graphile: + image: graphile/postgraphile + restart: always + ports: + - 5000:5000 + depends_on: + - pg + command: + - --connection + - postgres://${PG_USER}:${PG_PASS}@pg:5432/${PG_USER} + - --schema + - public + - --watch diff --git a/src/arrow/dev/benchmarking/examples/benchmark_example.json b/src/arrow/dev/benchmarking/examples/benchmark_example.json new file mode 100644 index 000000000..d6f58c286 --- /dev/null +++ b/src/arrow/dev/benchmarking/examples/benchmark_example.json @@ -0,0 +1,32 @@ +[ + { + "benchmark_name": "Benchmark 1", + "parameter_names": ["arg0", "arg1", "arg2"], + "benchmark_description": "First benchmark", + "benchmark_type": "Time", + "units": "miliseconds", + "lessisbetter": true, + "benchmark_version": "second version", + "benchmark_language": "Python" + }, + { + "benchmark_name": "Benchmark 2", + "parameter_names": ["arg0", "arg1"], + "benchmark_description": "Description 2.", + "benchmark_type": "Time", + "units": "nanoseconds", + "lessisbetter": true, + "benchmark_version": "second version", + "benchmark_language": "Python" + }, + { + "benchmark_name": "Benchmark 3", + "parameter_names": ["arg0"], + "benchmark_description": "Third benchmark", + "benchmark_type": "Memory", + "units": "kilobytes", + "lessisbetter": true, + "benchmark_version": "1", + "benchmark_language": "Python" + } +] diff --git a/src/arrow/dev/benchmarking/examples/benchmark_run_example.csv b/src/arrow/dev/benchmarking/examples/benchmark_run_example.csv new file mode 100644 index 000000000..eab208a1c --- /dev/null +++ b/src/arrow/dev/benchmarking/examples/benchmark_run_example.csv @@ -0,0 +1,6 @@ +benchmark_run_id,benchmark_name,benchmark_version,parameter_values,value,git_commit_timestamp,git_hash,val_min,val_q1,val_q3,val_max,std_dev,n_obs,run_timestamp,run_metadata,run_notes,mac_address,benchmark_language,language_implementation_version,dependencies
+,Benchmark 2,version 0,"{""arg0"": 100, ""arg1"": 5}",2.5,2019-01-31 14:31:10 -0600,8136c46d5c60fb,1,2,3,4,1.41,8,2019-02-14 02:00:00 -0600,,,08:00:2b:01:02:03,Python,CPython 2.7,"{""six"": """", ""numpy"": ""1.14"", ""other_lib"": ""1.0""}"
+,Benchmark 2,version 0,"{""arg0"": 1000, ""arg1"": 5}",5,2019-01-31 14:31:10 -0600,8136c46d5c60fb,2,4,6,8,3.14,8,2019-02-14 02:01:00 -0600,,,08:00:2b:01:02:03,Python,CPython 2.7,"{""six"": """", ""numpy"": ""1.14"", ""other_lib"": ""1.0""}"
+,Benchmark 2,version 0,"{""arg0"": 100, ""arg1"": 5}",2.5,2019-01-31 14:31:10 -0600,8136c46d5c60fb,0.5,1,3,5,3,8,2019-02-14 02:02:00 -0600,,,08:00:2b:01:02:03,Python,CPython 3.6,"{""boost"": ""1.42"", ""numpy"": ""1.15""}"
+,Benchmark 2,version 0,"{""arg0"": 1000, ""arg1"": 5}",3,2019-01-31 14:31:10 -0600,8136c46d5c60fb,2,2.5,4,4.5,1.5,8,2019-02-14 02:03:00 -0600,,,08:00:2b:01:02:03,Python,CPython 3.6,"{""boost"": ""1.42"", ""numpy"": ""1.15""}"
+,Benchmark 2,version 0,"{""arg0"": 1000, ""arg1"": 10}",3,2019-01-31 14:31:10 -0600,8136c46d5c60fb,1,2,4,5,2,8,2019-02-14 02:03:30 -0600,,,08:00:2b:01:02:03,Python,CPython 2.7,"{""six"": """", ""numpy"": ""1.15"", ""other_lib"": ""1.0""}"
diff --git a/src/arrow/dev/benchmarking/examples/benchmark_run_example.json b/src/arrow/dev/benchmarking/examples/benchmark_run_example.json new file mode 100644 index 000000000..2ded776c9 --- /dev/null +++ b/src/arrow/dev/benchmarking/examples/benchmark_run_example.json @@ -0,0 +1,97 @@ +[ + { + "benchmark_name": "Benchmark 2", + "benchmark_version": "version 0", + "parameter_values": {"arg0": 100, "arg1": 5}, + "value": 2.5, + "git_commit_timestamp": "2019-02-08 22:35:53 +0100", + "git_hash": "324d3cf198444a", + "val_min": 1, + "val_q1": 2, + "val_q3": 3, + "val_max": 4, + "std_dev": 1.41, + "n_obs": 8, + "run_timestamp": "2019-02-14 03:00:05 -0600", + "mac_address": "08:00:2b:01:02:03", + "benchmark_language": "Python", + "language_implementation_version": "CPython 2.7", + "dependencies": {"six": "", "numpy": "1.14", "other_lib": "1.0"} + }, + { + "benchmark_name": "Benchmark 2", + "benchmark_version": "version 0", + "parameter_values": {"arg0": 1000, "arg1": 5}, + "value": 5, + "git_commit_timestamp": "2019-02-08 22:35:53 +0100", + "git_hash": "324d3cf198444a", + "val_min": 2, + "val_q1": 4, + "val_q3": 6, + "val_max": 8, + "std_dev": 3.14, + "n_obs": 8, + "run_timestamp": "2019-02-14 03:00:10 -0600", + "mac_address": "08:00:2b:01:02:03", + "benchmark_language": "Python", + "language_implementation_version": "CPython 2.7", + "dependencies": {"six": "", "numpy": "1.14", "other_lib": "1.0"} + }, + { + "benchmark_name": "Benchmark 2", + "benchmark_version": "version 0", + "parameter_values": {"arg0": 100, "arg1": 5}, + "value": 2.5, + "git_commit_timestamp": "2019-02-08 22:35:53 +0100", + "git_hash": "324d3cf198444a", + "val_min": 0.5, + "val_q1": 1, + "val_q3": 3, + "val_max": 5, + "std_dev": 3, + "n_obs": 8, + "run_timestamp": "2019-02-14 03:00:20 -0600", + "mac_address": "08:00:2b:01:02:03", + "benchmark_language": "Python", + "language_implementation_version": "CPython 2.7", + "dependencies": {"boost": "1.42", "numpy": "1.15"} + }, + { + "benchmark_name": "Benchmark 2", + "benchmark_version": "version 0", + "parameter_values": {"arg0": 1000, "arg1": 5}, + "value": 3, + "git_commit_timestamp": "2019-02-08 22:35:53 +0100", + "git_hash": "324d3cf198444a", + "val_min": 2, + "val_q1": 2.5, + "val_q3": 4, + "val_max": 4.5, + "std_dev": 1.5, + "n_obs": 8, + "run_timestamp": "2019-02-14 03:00:30 -0600", + "mac_address": "08:00:2b:01:02:03", + "benchmark_language": "Python", + "language_implementation_version": "CPython 2.7", + "dependencies": {"boost": "1.42", "numpy": "1.15"} + }, + { + "benchmark_name": "Benchmark 2", + "benchmark_version": "version 0", + "parameter_values": {"arg0": 1000, "arg1": 10}, + "value": 3, + "git_commit_timestamp": "2019-02-08 22:35:53 +0100", + "git_hash": "324d3cf198444a", + "val_min": 1, + "val_q1": 2, + "val_q3": 4, + "val_max": 5, + "std_dev": 2, + "n_obs": 8, + "run_timestamp": "2019-02-14 03:00:40 -0600", + "mac_address": "08:00:2b:01:02:03", + "benchmark_language": "Python", + "language_implementation_version": "CPython 2.7", + "dependencies": {"six": "", "numpy": "1.15", "other_lib": "1.0"} + } +] diff --git a/src/arrow/dev/benchmarking/examples/benchmark_with_context_example.json b/src/arrow/dev/benchmarking/examples/benchmark_with_context_example.json new file mode 100644 index 000000000..f9e6e3130 --- /dev/null +++ b/src/arrow/dev/benchmarking/examples/benchmark_with_context_example.json @@ -0,0 +1,73 @@ +{ + "context": { + "mac_address": "08:00:2b:01:02:03", + "benchmark_language": "Python", + "language_implementation_version": "CPython 2.7", + "dependencies": {"six": "", "numpy": "1.14", "other_lib": "1.0"}, + "git_commit_timestamp": "2019-02-14 22:42:22 +0100", + "git_hash": "123456789abcde", + "run_timestamp": "2019-02-25 03:00:40 -0600", + "Extra stuff": "does not hurt anything and won't be added.", + "However": "all of the entries above 'Extra stuff' are required." + }, + "benchmark_version": { + "Benchmark 2": "version 0", + "Benchmark 3": "any string is a version. (Benchmark 3 not actually used)" + }, + "benchmarks": [ + { + "benchmark_name": "Benchmark 2", + "parameter_values": {"arg0": 1, "arg1": 5}, + "value": 2.5, + "val_min": 1, + "val_q1": 2, + "val_q3": 3, + "val_max": 4, + "std_dev": 1.41, + "n_obs": 8, + "run_metadata": {"any": "json object is admissible"}, + "run_notes": "This value is an arbitrary-length string." + }, + { + "benchmark_name": "Benchmark 2", + "parameter_values": {"arg0": 2, "arg1": 5}, + "value": 5, + "std_dev": 3.14, + "n_obs": 8 + }, + { + "benchmark_name": "Benchmark 2", + "parameter_values": {"arg0": 3, "arg1": 5}, + "value": 2.5, + "val_min": 0.5, + "val_q1": 1, + "val_q3": 3, + "val_max": 5, + "std_dev": 3, + "n_obs": 8, + "run_notes": "The previous run in this list has the minimal set of keys." + }, + { + "benchmark_name": "Benchmark 2", + "parameter_values": {"arg0": 4, "arg1": 5}, + "value": 3, + "val_min": 2, + "val_q1": 2.5, + "val_q3": 4, + "val_max": 4.5, + "std_dev": 1.5, + "n_obs": 8 + }, + { + "benchmark_name": "Benchmark 2", + "parameter_values": {"arg0": 5, "arg1": 5}, + "value": 3, + "val_min": 1, + "val_q1": 2, + "val_q3": 4, + "val_max": 5, + "std_dev": 2, + "n_obs": 8 + } + ] +} diff --git a/src/arrow/dev/benchmarking/examples/example.sql b/src/arrow/dev/benchmarking/examples/example.sql new file mode 100644 index 000000000..e93269af7 --- /dev/null +++ b/src/arrow/dev/benchmarking/examples/example.sql @@ -0,0 +1,232 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + + +-- Example insert into each of the views: +INSERT INTO public.project(project_name, project_url, repo_url) +VALUES ( + 'Apache Arrow' + , 'https://arrow.apache.org/' + , 'https://github.com/apache/arrow'); + +INSERT INTO public.environment_view + (benchmark_language, language_implementation_version, dependencies) +VALUES + ('Python', 'CPython 2.7', '{"six": "", "numpy": "1.14", "other_lib": "1.0"}'), + ('Python', 'CPython 3.6', '{"boost": "1.42", "numpy": "1.15"}'); + +INSERT INTO public.dependencies(dependencies) +VALUES + ('{"boost": "1.68", "numpy": "1.14"}'), + ('{"boost": "1.42", "numpy": "1.16"}'); + +INSERT INTO public.language_implementation_version_view + (benchmark_language, language_implementation_version) +VALUES + ('Python', 'CPython 2.7'), + ('Python', 'CPython 3.6'); + +INSERT INTO public.unit_view + (benchmark_type, units, lessisbetter) +VALUES + ('Memory', 'gigabytes', True), + ('Memory', 'kilobytes', True); + + +\echo 'use \\dv to list the views views'; +\dv + + +SELECT * FROM environment_view; +SELECT * FROM unit_view; + + +INSERT INTO public.machine_view ( + mac_address + , machine_name + , memory_bytes + , cpu_actual_frequency_hz + , os_name + , architecture_name + , kernel_name + , cpu_model_name + , cpu_core_count + , cpu_thread_count + , cpu_frequency_max_hz + , cpu_frequency_min_hz + , cpu_l1d_cache_bytes + , cpu_l1i_cache_bytes + , cpu_l2_cache_bytes + , cpu_l3_cache_bytes + , machine_other_attributes +) VALUES ( + '08:00:2b:01:02:03' -- mac_address + , 'My-Machine-Name' -- machine_name + , 8589934592 -- memory_bytes + -- All (?) standard mac address formats are allowable: + -- https://www.postgresql.org/docs/11/datatype-net-types.html + , 2300000000 -- cpu_actual_frequency_Hz + , 'OSX' -- os_name + , 'x86_64' -- architecture_name + , '18.2.0' -- kernel + , 'Intel(R) Core(TM) i5-7360U CPU @ 2.30GHz' -- cpu_model_name + , 2 -- cpu_core_count + , 4 -- cpu_thread_count + , 2300000000 -- cpu_frequency_max_Hz + , 2300000000 -- cpu_frequency_min_Hz + , 32768 -- cpu_l1d_cache_bytes + , 32768 -- cpu_l1i_cache_bytes + , 262144 -- cpu_l2_cache_bytes + , 4194304 -- cpu_l3_cache_bytes + , '{"example": "for machine_other_attributes"}'::jsonb +); + + +INSERT INTO public.full_benchmark_run_view ( + benchmark_name + , parameter_names + , benchmark_description + , benchmark_type + , units + , lessisbetter + , benchmark_version + -- datum + , parameter_values + , value + , git_commit_timestamp + , git_hash + , val_min + , val_q1 + , val_q3 + , val_max + , std_dev + , n_obs + , run_timestamp + , run_metadata + , run_notes + -- machine_view + , machine_name + , mac_address + , memory_bytes + , cpu_actual_frequency_hz + , os_name + , architecture_name + , kernel_name + , cpu_model_name + , cpu_core_count + , cpu_thread_count + , cpu_frequency_max_hz + , cpu_frequency_min_hz + , cpu_l1d_cache_bytes + , cpu_l1i_cache_bytes + , cpu_l2_cache_bytes + , cpu_l3_cache_bytes + , machine_other_attributes + -- environment_view + , benchmark_language + , language_implementation_version + , dependencies +) VALUES ( + 'Benchmark 3' + , '{"arg0"}'::text[] + , 'Third benchmark' + , 'Memory' + , 'kilobytes' + , TRUE + , '0' + -- datum + , '{"arg0": 10}'::jsonb + , 0.5 + , '2019-01-31 14:31:10 -0600' + , '8136c46d5c60fb' + , 0.5 + , 0.5 + , 0.5 + , 0.5 + , 0 + , 2 + , '2019-02-14 14:00:00 -0600' + , '{"ci_99": [2.7e-06, 3.1e-06]}'::jsonb + , 'Additional run_notes.' + -- machine_view + , 'My-Machine-Name' + , '09-00-2c-01-02-03' + , 8589934592 + , 2300000000 + , 'OSX' + , 'x86_64' + , '18.2.0' + , 'Intel(R) Core(TM) i5-7360U CPU @ 2.30GHz' + , 2 + , 4 + , 2300000000 + , 2300000000 + , 32768 + , 32768 + , 262144 + , 4194304 + , '{"example": "for machine_other_attributes"}'::jsonb + -- environment_view + , 'Python' + , 'CPython 2.7' + , '{"six": "", "numpy": "1.15", "other_lib": "1.0"}'::jsonb +); + + +-- Bulk load from CSV. First column is empty; serial "benchmark_run_id" will be assigned. +--\copy benchmark_run_view FROM 'examples/benchmark_run_example.csv' WITH (FORMAT csv, HEADER); + +-- Load from JSON +--\set content `cat examples/benchmark_example.json` +--SELECT ingest_benchmark_view(:'content'::jsonb); + +INSERT INTO public.benchmark_view ( + benchmark_name + , parameter_names + , benchmark_description + , benchmark_type + , units + , lessisbetter + , benchmark_version + , benchmark_language + ) VALUES ( + 'Benchmark 1' + , '{"arg0", "arg1", "arg2"}'::text[] + , E'Description.\nNewlines are OK in a string escaped with leading "E".' + , 'Time' + , 'miliseconds' + , TRUE + , 'Hash of code or other way to identify distinct benchmark versions.' + , 'Python' + ), ( + 'Benchmark 2' + , '{"arg0", "arg1"}'::text[] + , 'Description 2.' + , 'Time' + , 'nanoseconds' + , TRUE + , 'version 0' + , 'Python' + ); + + +\x +SELECT * from benchmark_run_view; + +\x diff --git a/src/arrow/dev/benchmarking/examples/example_graphql_mutation.json b/src/arrow/dev/benchmarking/examples/example_graphql_mutation.json new file mode 100644 index 000000000..fec5eed0a --- /dev/null +++ b/src/arrow/dev/benchmarking/examples/example_graphql_mutation.json @@ -0,0 +1,12 @@ +{ + "query": "mutation ($p: CreateProjectInput!){createProject(input:$p){project{id}}}", + "variables": { + "p": { + "project": { + "projectName": "Apache Arrow", + "projectUrl": "https://www.arrow.apache.org", + "repoUrl": "https://www.github.com/apache/arrow" + } + } + } +} diff --git a/src/arrow/dev/benchmarking/examples/graphql_query_environment_view.json b/src/arrow/dev/benchmarking/examples/graphql_query_environment_view.json new file mode 100644 index 000000000..78804fa91 --- /dev/null +++ b/src/arrow/dev/benchmarking/examples/graphql_query_environment_view.json @@ -0,0 +1,3 @@ +{ + "query": "{allEnvironmentViews(orderBy: [BENCHMARK_LANGUAGE_ASC, LANGUAGE_IMPLEMENTATION_VERSION_ASC, DEPENDENCIES_ASC]) {edges {node {environmentId, benchmarkLanguage, languageImplementationVersion, dependencies}}}}" +} diff --git a/src/arrow/dev/benchmarking/examples/machine.json b/src/arrow/dev/benchmarking/examples/machine.json new file mode 100644 index 000000000..2485e2bc1 --- /dev/null +++ b/src/arrow/dev/benchmarking/examples/machine.json @@ -0,0 +1,22 @@ +{ + "mac_address": "0a:00:2d:01:02:03", + "machine_name": "Yet-Another-Machine-Name", + "memory_bytes": 8589934592, + "cpu_actual_frequency_hz": 2300000000, + "os_name": "OSX", + "architecture_name": "x86_64", + "kernel_name": "18.2.0", + "cpu_model_name": "Intel(R) Core(TM) i5-7360U CPU @ 2.30GHz", + "cpu_core_count": 2, + "cpu_thread_count": 4, + "cpu_frequency_max_hz": 2300000000, + "cpu_frequency_min_hz": 2300000000, + "cpu_l1d_cache_bytes": 32768, + "cpu_l1i_cache_bytes": 32768, + "cpu_l2_cache_bytes": 262144, + "cpu_l3_cache_bytes": 4194304, + "machine_other_attributes": {"just": "an example"}, + "gpu_information": "", + "gpu_part_number": "", + "gpu_product_name": "" +} diff --git a/src/arrow/dev/benchmarking/graphql_submit.sh b/src/arrow/dev/benchmarking/graphql_submit.sh new file mode 100755 index 000000000..2eaab9cdf --- /dev/null +++ b/src/arrow/dev/benchmarking/graphql_submit.sh @@ -0,0 +1,75 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +OPTIONS=("machine" "benchmarks" "runs") + +option=${1-help} +datafile=${2-machine.json} +uri=${3-localhost:5000/graphql} + +help() { + cat <<HELP + Submit data via GraphQL + + Usage: + ${0} [option] [JSON_file] [URI] + + Arguments: + option - $(echo ${OPTIONS[@]} | sed 's/ /|/g') + JSON_file - path to the submission file (default 'machine.json') + URI - URI to submit to (default 'localhost:5000/graphql') +HELP +} + +escape_quote() { sed 's/"/\\"/g'; } + +template() { + cat <<TEMPLATE + { + "query": "mutation (\$jsonb: JSON!){${1}(input:{fromJsonb:\$jsonb}){${2}}}", + "variables": { + "jsonb": "$(echo $(cat ${datafile}) | escape_quote )" + } + } +TEMPLATE +} + +submit () { + curl -X POST -H "Content-Type: application/json" --data @<(template $1 $2) ${uri} +} + + +case "$1" in + machine) + submit ingestMachineView integer;; + + benchmarks) + submit ingestBenchmarkView integers;; + + runs) + if grep -q context <(head -n2 ${2}) + then + submit ingestBenchmarkRunsWithContext bigInts + else + submit ingestBenchmarkRunView bigInts + fi;; + + *) + help + exit 1 +esac diff --git a/src/arrow/dev/benchmarking/make_data_model_rst.sh b/src/arrow/dev/benchmarking/make_data_model_rst.sh new file mode 100755 index 000000000..6a4f5f5b6 --- /dev/null +++ b/src/arrow/dev/benchmarking/make_data_model_rst.sh @@ -0,0 +1,69 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +set -e +DOTFILE=data_model.dot +OUTFILE=data_model.rst + +license() { + cat <<'LICENSE' > ${1} +.. Licensed to the Apache Software Foundation (ASF) under one +.. or more contributor license agreements. See the NOTICE file +.. distributed with this work for additional information +.. regarding copyright ownership. The ASF licenses this file +.. to you under the Apache License, Version 2.0 (the +.. "License"); you may not use this file except in compliance +.. with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, +.. software distributed under the License is distributed on an +.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +.. KIND, either express or implied. See the License for the +.. specific language governing permissions and limitations +.. under the License. + + +LICENSE +} + +warning() { + cat <<'WARNING' >> ${1} +.. WARNING +.. This is an auto-generated file. Please do not edit. + +.. To reproduce, please run :code:`./make_data_model_rst.sh`. +.. (This requires you have the +.. `psql client <https://www.postgresql.org/download/>`_ +.. and have started the docker containers using +.. :code:`docker-compose up`). + +WARNING +} + +echo "Making ${OUTFILE}" + +license ${OUTFILE} +warning ${OUTFILE} + +PGPASSWORD=arrow \ + psql --tuples-only --username=arrow_web \ + --dbname=benchmark --port=5432 --host=localhost \ + --command="select public.documentation('${DOTFILE}');" \ + | sed "s/ *+$//" | sed "s/^ //" >> ${OUTFILE} diff --git a/src/arrow/dev/benchmarking/make_dotfile.sh b/src/arrow/dev/benchmarking/make_dotfile.sh new file mode 100755 index 000000000..b86dc3eb3 --- /dev/null +++ b/src/arrow/dev/benchmarking/make_dotfile.sh @@ -0,0 +1,70 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +set -e +OUTFILE=data_model.dot + +license() { + cat <<'LICENSE' > ${1} +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements.See the NOTICE file + distributed with this work for additional information + regarding copyright ownership.The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License.You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied.See the License for the + specific language governing permissions and limitations + under the License. +*/ + +LICENSE +} + +warning() { + cat <<'WARNING' >> ${1} +/* + WARNING + This is an auto-generated file. Please do not edit. + + To reproduce, please run :code:`./make_data_model_rst.sh`. + (This requires you have the + `psql client <https://www.postgresql.org/download/>`_ + and have started the docker containers using + :code:`docker-compose up`). +*/ +WARNING +} + +echo "Making ${OUTFILE}" + +license ${OUTFILE} +warning ${OUTFILE} + +PGPASSWORD=arrow \ + psql --tuples-only --username=arrow_web \ + --dbname=benchmark --port=5432 --host=localhost \ + --command="select public.documentation_dotfile();" \ + | sed "s/ *+$//" | sed "s/^ //" >> ${OUTFILE} diff --git a/src/arrow/dev/benchmarking/make_machine_json.sh b/src/arrow/dev/benchmarking/make_machine_json.sh new file mode 100755 index 000000000..09bf0ea2d --- /dev/null +++ b/src/arrow/dev/benchmarking/make_machine_json.sh @@ -0,0 +1,55 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +set -e +OUTFILE=machine.json + +echo "Making ${OUTFILE}" +echo "** NOTE: This command fails on everything but OSX right now. **" +echo "* also, the intent is to make this script not suck, just not now. *" +echo "Please type GPU details here (or manually modify ${OUTFILE} later)." +read -p "GPU information string (or <enter>): " gpu_information +read -p "GPU part number (or <enter>): " gpu_part_number +read -p "GPU product name (or <enter>): " gpu_product_name + + +cat <<MACHINE_JSON > ${OUTFILE} +{ + "mac_address": "$(ifconfig en1 | awk '/ether/{print $2}')", + "machine_name": "$(uname -n)", + "memory_bytes": $(sysctl -n hw.memsize), + "cpu_actual_frequency_hz": $(sysctl -n hw.cpufrequency), + "os_name": "$(uname -s)", + "architecture_name": "$(uname -m)", + "kernel_name": "$(uname -r)", + "cpu_model_name": "$(sysctl -n machdep.cpu.brand_string)", + "cpu_core_count": $(sysctl -n hw.physicalcpu), + "cpu_thread_count": $(sysctl -n hw.logicalcpu), + "cpu_frequency_max_hz": $(sysctl -n hw.cpufrequency_max), + "cpu_frequency_min_hz": $(sysctl -n hw.cpufrequency_min), + "cpu_l1d_cache_bytes": $(sysctl -n hw.l1dcachesize), + "cpu_l1i_cache_bytes": $(sysctl -n hw.l1icachesize), + "cpu_l2_cache_bytes": $(sysctl -n hw.l2cachesize), + "cpu_l3_cache_bytes": $(sysctl -n hw.l3cachesize), + "gpu_information": "${gpu_information}", + "gpu_part_number": "${gpu_part_number}", + "gpu_product_name": "${gpu_product_name}" +} +MACHINE_JSON + +echo "Machine details saved in ${OUTFILE}" |