diff options
Diffstat (limited to 'src/arrow/dev/benchmarking/ddl')
21 files changed, 3098 insertions, 0 deletions
diff --git a/src/arrow/dev/benchmarking/ddl/0_setup.sql b/src/arrow/dev/benchmarking/ddl/0_setup.sql new file mode 100644 index 000000000..ec1044641 --- /dev/null +++ b/src/arrow/dev/benchmarking/ddl/0_setup.sql @@ -0,0 +1,23 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + +CREATE EXTENSION IF NOT EXISTS "citext"; -- type for case-insensitive text + +-- For future fine-grained control over function execution by user group. +ALTER DEFAULT PRIVILEGES REVOKE EXECUTE ON functions FROM public; diff --git a/src/arrow/dev/benchmarking/ddl/1_00_table_public_project.sql b/src/arrow/dev/benchmarking/ddl/1_00_table_public_project.sql new file mode 100644 index 000000000..c52d66cfd --- /dev/null +++ b/src/arrow/dev/benchmarking/ddl/1_00_table_public_project.sql @@ -0,0 +1,45 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + + +-- PROJECT +CREATE TABLE IF NOT EXISTS public.project +( + project_id SERIAL PRIMARY KEY + , project_name citext NOT NULL + , project_url text NOT NULL + , repo_url text NOT NULL + , last_changed timestamp (0) without time zone NOT NULL DEFAULT now() +); +COMMENT ON TABLE public.project + IS 'Project name and relevant URLs.'; +COMMENT ON COLUMN public.project.project_url + IS 'Homepage URL.'; +COMMENT ON COLUMN public.project.repo_url + IS 'Git repo URL to link stored commit hashes to code in a webpage.'; +COMMENT ON COLUMN public.project.last_changed + IS 'New project details are added with a new timestamp. ' + 'The project details with the newest timestamp will be used.'; + +-- CONSTRAINTS +CREATE UNIQUE INDEX project_unique_index_on_project_name_urls + ON public.project(project_name, project_url, repo_url); +COMMENT ON INDEX + public.project_unique_index_on_project_name_urls + IS 'Enforce uniqueness of project name and urls.'; diff --git a/src/arrow/dev/benchmarking/ddl/1_01_table_public_cpu.sql b/src/arrow/dev/benchmarking/ddl/1_01_table_public_cpu.sql new file mode 100644 index 000000000..df1a9e757 --- /dev/null +++ b/src/arrow/dev/benchmarking/ddl/1_01_table_public_cpu.sql @@ -0,0 +1,63 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + + +-- CPU +CREATE TABLE IF NOT EXISTS public.cpu +( + cpu_id SERIAL PRIMARY KEY + , cpu_model_name citext NOT NULL UNIQUE + , cpu_core_count integer NOT NULL + , cpu_thread_count integer NOT NULL + , cpu_frequency_max_Hz bigint NOT NULL + , cpu_frequency_min_Hz bigint NOT NULL + , cpu_L1d_cache_bytes integer NOT NULL + , cpu_L1i_cache_bytes integer NOT NULL + , cpu_L2_cache_bytes integer NOT NULL + , cpu_L3_cache_bytes integer NOT NULL +); +COMMENT ON TABLE public.cpu + IS 'CPU model and its specifications.'; +COMMENT ON COLUMN public.cpu.cpu_id + IS 'The primary key for the CPU table. ' + 'NOTE: This is a synthetic primary key and not meant to represent a ' + 'processor instruction to read capabilities.'; +COMMENT ON COLUMN public.cpu.cpu_model_name + IS 'The output of `sysctl -n machdep.cpu.brand_stringp`.'; +COMMENT ON COLUMN public.cpu.cpu_core_count + IS 'The output of `sysctl -n hw.physicalcpu`.'; +COMMENT ON COLUMN public.cpu.cpu_thread_count + IS 'The output of `sysctl -n hw.logicalcpu`.'; +COMMENT ON COLUMN public.cpu.cpu_frequency_max_Hz + IS 'The output of `sysctl -n hw.cpufrequency_max`.'; +COMMENT ON COLUMN public.cpu.cpu_frequency_min_Hz + IS 'The output of `sysctl -n hw.cpufrequency_min`.'; +COMMENT ON COLUMN public.cpu.cpu_L1d_cache_bytes + IS 'The output of `sysctl -n hw.l1dcachesize`.'; +COMMENT ON COLUMN public.cpu.cpu_L1i_cache_bytes + IS 'The output of `sysctl -n hw.l1icachesize`.'; +COMMENT ON COLUMN public.cpu.cpu_L2_cache_bytes + IS 'The output of `sysctl -n hw.l2cachesize`.'; +COMMENT ON COLUMN public.cpu.cpu_L3_cache_bytes + IS 'The output of `sysctl -n hw.l3cachesize`.'; + +-- CONSTRAINTS +ALTER TABLE public.cpu + ADD CONSTRAINT cpu_check_cpu_model_name_length + CHECK (char_length(cpu_model_name) < 255); diff --git a/src/arrow/dev/benchmarking/ddl/1_02_table_public_gpu.sql b/src/arrow/dev/benchmarking/ddl/1_02_table_public_gpu.sql new file mode 100644 index 000000000..564af19de --- /dev/null +++ b/src/arrow/dev/benchmarking/ddl/1_02_table_public_gpu.sql @@ -0,0 +1,43 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + + +-- GPU +CREATE TABLE IF NOT EXISTS public.gpu +( + gpu_id SERIAL PRIMARY KEY + , gpu_information citext UNIQUE NOT NULL DEFAULT '' + , gpu_part_number citext NOT NULL DEFAULT '' + , gpu_product_name citext NOT NULL DEFAULT '' +); +COMMENT ON TABLE public.gpu IS 'GPU specifications.'; +COMMENT ON COLUMN public.gpu.gpu_information + IS 'The output of `nvidia-smi -q` (on Linux or Windows), or `cuda-smi` ' + 'or `kextstat | grep -i cuda` on OSX, or another command; anything ' + 'that gets a string to uniquely identify the GPU.'; + +-- CONSTRAINTS +CREATE INDEX gpu_index_on_part_number + ON public.gpu (gpu_part_number); + +CREATE INDEX gpu_index_on_product_name + ON public.gpu (gpu_product_name); + +CREATE INDEX gpu_index_on_product_name_and_part_number + ON public.gpu (gpu_product_name, gpu_part_number); diff --git a/src/arrow/dev/benchmarking/ddl/1_03_table_public_os.sql b/src/arrow/dev/benchmarking/ddl/1_03_table_public_os.sql new file mode 100644 index 000000000..7b03d82f4 --- /dev/null +++ b/src/arrow/dev/benchmarking/ddl/1_03_table_public_os.sql @@ -0,0 +1,57 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + + +-- OS +CREATE TABLE IF NOT EXISTS public.os +( + os_id SERIAL PRIMARY KEY + , os_name citext NOT NULL + , architecture_name citext NOT NULL + , kernel_name citext NOT NULL DEFAULT '' +); +-- @name os. forces retention of an 's' in the Graphile GraphQL api. +COMMENT ON TABLE public.os + IS E'@name os.\nOperating system name and kernel (version).'; +COMMENT ON COLUMN public.os.os_name + IS 'Operating system name. For example, OSX, Ubuntu, Windows`.'; +COMMENT ON COLUMN public.os.architecture_name + IS 'Operating system architecture; the output of `uname -m`.'; +COMMENT ON COLUMN public.os.kernel_name + IS 'Operating system kernel, or NULL. ' + 'On Linux/OSX, the output of `uname -r`. ' + 'On Windows, the output of `ver`.'; + +-- CONSTRAINTS +ALTER TABLE public.os + ADD CONSTRAINT os_check_os_name_length + CHECK (char_length(os_name) < 63); + +ALTER TABLE public.os + ADD CONSTRAINT os_check_architecture_name_length + CHECK (char_length(architecture_name) < 63); + +ALTER TABLE public.os + ADD CONSTRAINT os_check_kernel_name_length + CHECK (char_length(kernel_name) < 63); + +CREATE UNIQUE INDEX os_unique_index + ON public.os(os_name, architecture_name, kernel_name); +COMMENT ON INDEX public.os_unique_index + IS 'Enforce uniqueness of os, architecture, and kernel names.'; diff --git a/src/arrow/dev/benchmarking/ddl/1_04_table_public_benchmark_language.sql b/src/arrow/dev/benchmarking/ddl/1_04_table_public_benchmark_language.sql new file mode 100644 index 000000000..2e3553677 --- /dev/null +++ b/src/arrow/dev/benchmarking/ddl/1_04_table_public_benchmark_language.sql @@ -0,0 +1,35 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + + +-- BENCHMARK_LANGUAGE +CREATE TABLE IF NOT EXISTS public.benchmark_language +( + benchmark_language_id SERIAL PRIMARY KEY + , benchmark_language citext NOT NULL UNIQUE +); +COMMENT ON TABLE public.benchmark_language + IS 'The language the benchmark was written in (and presumably for).'; +COMMENT ON COLUMN public.benchmark_language.benchmark_language + IS 'The benchmark language. For example: Python'; + +-- CONSTRAINTS +ALTER TABLE public.benchmark_language + ADD CONSTRAINT benchmark_language_check_language_length + CHECK (char_length(benchmark_language) < 63); diff --git a/src/arrow/dev/benchmarking/ddl/1_05_table_public_dependencies.sql b/src/arrow/dev/benchmarking/ddl/1_05_table_public_dependencies.sql new file mode 100644 index 000000000..3744a0c35 --- /dev/null +++ b/src/arrow/dev/benchmarking/ddl/1_05_table_public_dependencies.sql @@ -0,0 +1,31 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + + +-- DEPENDENCIES +CREATE TABLE IF NOT EXISTS public.dependencies +( + dependencies_id SERIAL PRIMARY KEY + , dependencies jsonb UNIQUE NOT NULL DEFAULT '{}'::jsonb +); +COMMENT ON TABLE public.dependencies + IS E'@name dependencies.\n' + 'A JSON object mapping dependencies to their versions.'; +COMMENT ON COLUMN public.dependencies.dependencies + IS 'For example: ''{"boost": "1.69", "conda": "", "numpy": "1.15"}''.'; diff --git a/src/arrow/dev/benchmarking/ddl/1_06_table_public_language_implementation_version.sql b/src/arrow/dev/benchmarking/ddl/1_06_table_public_language_implementation_version.sql new file mode 100644 index 000000000..f7d26e4e2 --- /dev/null +++ b/src/arrow/dev/benchmarking/ddl/1_06_table_public_language_implementation_version.sql @@ -0,0 +1,46 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + + +-- LANGUAGE_IMPLEMENTATION_VERSION +CREATE TABLE IF NOT EXISTS public.language_implementation_version +( + language_implementation_version_id SERIAL + , language_implementation_version citext NOT NULL DEFAULT '' + , benchmark_language_id integer NOT NULL + , PRIMARY KEY (language_implementation_version_id, benchmark_language_id) + , FOREIGN KEY (benchmark_language_id) REFERENCES public.benchmark_language +); +COMMENT ON TABLE public.language_implementation_version + IS 'The benchmark language implementation or compiler version, e.g. ' + '''CPython 2.7'' or ''PyPy x.y'' or ''gcc 7.3.0'' or ' + '''gcc (Ubuntu 7.3.0-27ubuntu1~18.04) 7.3.0''.'; +COMMENT ON COLUMN public.language_implementation_version.language_implementation_version + IS 'The version number used in the benchmark environment (e.g. ''2.7'').'; + +-- CONSTRAINTS +ALTER TABLE public.language_implementation_version + ADD CONSTRAINT language_implementation_version_check_version_length + CHECK (char_length(language_implementation_version) < 255); + +CREATE UNIQUE INDEX language_implementation_version_unique_index + ON public.language_implementation_version + (benchmark_language_id, language_implementation_version); +COMMENT ON INDEX language_implementation_version_unique_index + IS 'Enforce unique implementation versions for the languages.'; diff --git a/src/arrow/dev/benchmarking/ddl/1_07_table_public_benchmark_type.sql b/src/arrow/dev/benchmarking/ddl/1_07_table_public_benchmark_type.sql new file mode 100644 index 000000000..1143cdb00 --- /dev/null +++ b/src/arrow/dev/benchmarking/ddl/1_07_table_public_benchmark_type.sql @@ -0,0 +1,39 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + + +-- BENCHMARK_TYPE +CREATE TABLE IF NOT EXISTS public.benchmark_type +( + benchmark_type_id SERIAL PRIMARY KEY + , benchmark_type citext NOT NULL UNIQUE + , lessisbetter boolean NOT NULL +); +COMMENT ON TABLE public.benchmark_type + IS 'The type of benchmark. For example "time", "mem", "peakmem", "track"'; +COMMENT ON COLUMN public.benchmark_type.benchmark_type + IS 'The type of units, so ''time'' for seconds, miliseconds, or ' + '''mem'' for kilobytes, megabytes.'; +COMMENT ON COLUMN public.benchmark_type.lessisbetter + IS 'True if a smaller benchmark value is better.'; + +-- CONSTRAINTS +ALTER TABLE public.benchmark_type + ADD CONSTRAINT benchmark_type_check_benchmark_type_char_length + CHECK (char_length(benchmark_type) < 63); diff --git a/src/arrow/dev/benchmarking/ddl/1_08_table_public_machine.sql b/src/arrow/dev/benchmarking/ddl/1_08_table_public_machine.sql new file mode 100644 index 000000000..8f219d3e0 --- /dev/null +++ b/src/arrow/dev/benchmarking/ddl/1_08_table_public_machine.sql @@ -0,0 +1,69 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + + +-- MACHINE +CREATE TABLE IF NOT EXISTS public.machine +( + machine_id SERIAL PRIMARY KEY + , machine_name citext NOT NULL + , mac_address macaddr NOT NULL + , memory_bytes bigint NOT NULL + , cpu_actual_frequency_Hz bigint NOT NULL + , machine_other_attributes jsonb + , cpu_id integer NOT NULL + , gpu_id integer NOT NULL + , os_id integer NOT NULL + , FOREIGN KEY (cpu_id) REFERENCES public.cpu + , FOREIGN KEY (gpu_id) REFERENCES public.gpu + , FOREIGN KEY (os_id) REFERENCES public.os +); +COMMENT ON TABLE public.machine + IS 'Unique identifiers for a machine.'; +COMMENT ON COLUMN public.machine.machine_name + IS 'A machine name of your choice.'; +COMMENT ON COLUMN public.machine.mac_address + IS 'The mac_address of a physical network interface to uniquely ' + 'identify a computer. Postgres accepts standard formats, including ' + '''08:00:2b:01:02:03'', ''08-00-2b-01-02-03'', ''08002b:010203'''; +COMMENT ON COLUMN public.machine.memory_bytes + IS 'The output of `sysctl -n hw.memsize`.'; +COMMENT ON COLUMN public.machine.cpu_actual_frequency_Hz + IS 'The output of `sysctl -n hw.cpufrequency`.'; +COMMENT ON COLUMN public.machine.machine_other_attributes + IS 'Additional attributes of interest, as a JSON object. ' + 'For example: ''{"hard_disk_type": "solid state"}''::jsonb.'; + +-- CONSTRAINTS +CREATE UNIQUE INDEX machine_index_on_mac_address + ON public.machine(mac_address); +COMMENT ON INDEX machine_index_on_mac_address + IS 'Enforce unique mac address'; + +CREATE INDEX machine_index_on_cpu_id + ON public.machine(cpu_id); + +CREATE INDEX machine_index_on_gpu_id + ON public.machine(gpu_id); + +CREATE INDEX machine_index_on_os_id + ON public.machine(os_id); + +CREATE INDEX machine_index_on_cpu_gpu_os_id + ON public.machine(cpu_id, gpu_id, os_id); diff --git a/src/arrow/dev/benchmarking/ddl/1_09_table_public_unit.sql b/src/arrow/dev/benchmarking/ddl/1_09_table_public_unit.sql new file mode 100644 index 000000000..a8cf57669 --- /dev/null +++ b/src/arrow/dev/benchmarking/ddl/1_09_table_public_unit.sql @@ -0,0 +1,37 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + + +-- UNIT +CREATE TABLE IF NOT EXISTS public.unit +( + unit_id SERIAL PRIMARY KEY + , units citext NOT NULL UNIQUE + , benchmark_type_id integer NOT NULL + , FOREIGN KEY (benchmark_type_id) + REFERENCES public.benchmark_type(benchmark_type_id) +); +COMMENT ON TABLE public.unit IS 'The actual units for a reported benchmark.'; +COMMENT ON COLUMN public.unit.units + IS 'For example: nanoseconds, microseconds, bytes, megabytes.'; + +-- CONSTRAINTS +ALTER TABLE public.unit + ADD CONSTRAINT unit_check_units_string_length + CHECK (char_length(units) < 63); diff --git a/src/arrow/dev/benchmarking/ddl/1_10_table_public_environment.sql b/src/arrow/dev/benchmarking/ddl/1_10_table_public_environment.sql new file mode 100644 index 000000000..e3a6d2395 --- /dev/null +++ b/src/arrow/dev/benchmarking/ddl/1_10_table_public_environment.sql @@ -0,0 +1,51 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + + +-- ENVIRONMENT +CREATE TABLE IF NOT EXISTS public.environment +( + environment_id SERIAL + , language_implementation_version_id integer NOT NULL + , benchmark_language_id integer NOT NULL + , dependencies_id integer NOT NULL + , PRIMARY KEY + (environment_id, benchmark_language_id, language_implementation_version_id) + , FOREIGN KEY + (benchmark_language_id) + REFERENCES public.benchmark_language + , FOREIGN KEY + (language_implementation_version_id, benchmark_language_id) + REFERENCES public.language_implementation_version( + language_implementation_version_id + , benchmark_language_id + ) + , FOREIGN KEY + (dependencies_id) + REFERENCES public.dependencies +); +COMMENT ON TABLE public.environment + IS 'Identifies a build environment for a specific suite of benchmarks.'; + +-- CONSTRAINTS +CREATE UNIQUE INDEX environment_unique_index + ON public.environment + (benchmark_language_id, language_implementation_version_id, dependencies_id); +COMMENT ON INDEX environment_unique_index + IS 'Enforce unique combinations of language version and dependencies.'; diff --git a/src/arrow/dev/benchmarking/ddl/1_11_table_public_benchmark.sql b/src/arrow/dev/benchmarking/ddl/1_11_table_public_benchmark.sql new file mode 100644 index 000000000..18895823d --- /dev/null +++ b/src/arrow/dev/benchmarking/ddl/1_11_table_public_benchmark.sql @@ -0,0 +1,54 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + + +-- BENCHMARK +CREATE TABLE IF NOT EXISTS public.benchmark +( + benchmark_id SERIAL + , benchmark_name citext NOT NULL + , parameter_names text[] + , benchmark_description text NOT NULL + , benchmark_version citext NOT NULL + , unit_id integer NOT NULL + , benchmark_language_id integer NOT NULL + , PRIMARY KEY (benchmark_id, benchmark_language_id) + , FOREIGN KEY (benchmark_language_id) REFERENCES public.benchmark_language + , FOREIGN KEY (unit_id) REFERENCES public.unit +); +COMMENT ON TABLE public.benchmark + IS 'Identifies an individual benchmark.'; +COMMENT ON COLUMN public.benchmark.parameter_names + IS 'A list of strings identifying the parameter names in the benchmark.'; +COMMENT ON COLUMN public.benchmark.benchmark_version + IS 'Can be any string. In Airspeed Velocity, the version is ' + 'by default the hash of the entire code string for the benchmark.'; + +-- CONSTRAINTS +CREATE INDEX benchmark_index_on_benchmark_language_id + ON public.benchmark(benchmark_language_id); + +CREATE INDEX benchmark_index_on_unit_id + ON public.benchmark(unit_id); + +CREATE UNIQUE INDEX benchmark_unique_index_on_language_benchmark_version + ON public.benchmark + (benchmark_language_id, benchmark_name, benchmark_version); +COMMENT ON INDEX public.benchmark_unique_index_on_language_benchmark_version + IS 'Enforce uniqueness of benchmark name and version for a given language.'; diff --git a/src/arrow/dev/benchmarking/ddl/1_12_table_public_benchmark_run.sql b/src/arrow/dev/benchmarking/ddl/1_12_table_public_benchmark_run.sql new file mode 100644 index 000000000..20b9ef0bb --- /dev/null +++ b/src/arrow/dev/benchmarking/ddl/1_12_table_public_benchmark_run.sql @@ -0,0 +1,112 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + + +-- BENCHMARK_RUN +CREATE TABLE IF NOT EXISTS public.benchmark_run +( + benchmark_run_id BIGSERIAL PRIMARY KEY + , parameter_values jsonb NOT NULL DEFAULT '{}'::jsonb + , value numeric NOT NULL + , git_commit_timestamp timestamp (0) with time zone NOT NULL + , git_hash text NOT NULL + , val_min numeric + , val_q1 numeric + , val_q3 numeric + , val_max numeric + , std_dev numeric NOT NULL + , n_obs integer NOT NULL + , run_timestamp timestamp (0) with time zone NOT NULL + , run_metadata jsonb + , run_notes text + , machine_id integer NOT NULL + , environment_id integer NOT NULL + , language_implementation_version_id integer NOT NULL + , benchmark_language_id integer NOT NULL + , benchmark_id integer NOT NULL + , FOREIGN KEY (machine_id) REFERENCES public.machine + , FOREIGN KEY + (environment_id, benchmark_language_id, language_implementation_version_id) + REFERENCES public.environment + , FOREIGN KEY (benchmark_id, benchmark_language_id) + REFERENCES public.benchmark(benchmark_id, benchmark_language_id) +); +COMMENT ON TABLE public.benchmark_run + IS 'One run per benchmark run.'; +COMMENT ON COLUMN public.benchmark_run.parameter_values + IS 'A JSON object mapping the parameter names from ' + '"benchmark.parameter_names" to values.'; +COMMENT ON COLUMN public.benchmark_run.value + IS 'The average value from the benchmark run.'; +COMMENT ON COLUMN public.benchmark_run.git_commit_timestamp + IS 'Get this using `git show -s --date=local --format="%ci" <hash>`. ' + 'ISO 8601 is recommended, e.g. ''2019-01-30 03:12 -0600''.'; +COMMENT ON COLUMN public.benchmark_run.git_hash + IS 'The commit has of the codebase currently being benchmarked.'; +COMMENT ON COLUMN public.benchmark_run.val_min + IS 'The smallest benchmark run value for this run.'; +COMMENT ON COLUMN public.benchmark_run.val_q1 + IS 'The first quartile of the benchmark run values for this run.'; +COMMENT ON COLUMN public.benchmark_run.val_q3 + IS 'The third quartile of the benchmark run values for this run.'; +COMMENT ON COLUMN public.benchmark_run.val_max + IS 'The largest benchmark run value for this run.'; +COMMENT ON COLUMN public.benchmark_run.std_dev + IS 'The standard deviation of the run values for this benchmark run.'; +COMMENT ON COLUMN public.benchmark_run.n_obs + IS 'The number of observations for this benchmark run.'; +COMMENT ON COLUMN public.benchmark_run.run_metadata + IS 'Additional metadata of interest, as a JSON object. ' + 'For example: ''{"ci_99": [2.7e-06, 3.1e-06]}''::jsonb.'; +COMMENT ON COLUMN public.benchmark_run.run_notes + IS 'Additional notes of interest, as a text string. '; + +-- CONSTRAINTS +ALTER TABLE public.benchmark_run + ADD CONSTRAINT benchmark_run_check_std_dev_nonnegative + CHECK (std_dev >= 0); + +ALTER TABLE public.benchmark_run + ADD CONSTRAINT benchmark_run_check_n_obs_positive + CHECK (n_obs > 0); + +CREATE INDEX benchmark_run_index_on_environment_id + ON public.benchmark_run(environment_id); + +CREATE INDEX benchmark_run_index_on_machine_id + ON public.benchmark_run(machine_id); + +CREATE INDEX benchmark_run_index_on_benchmark_id + ON public.benchmark_run(benchmark_id, benchmark_language_id); + +CREATE INDEX benchmark_run_index_on_benchmark_environment_time + ON public.benchmark_run + (benchmark_id, environment_id, git_commit_timestamp); +COMMENT ON INDEX + public.benchmark_run_index_on_benchmark_environment_time + IS 'Index to improve sorting by benchmark, environment, and timestamp.'; + +CREATE UNIQUE INDEX + benchmark_run_unique_index_on_env_benchmark_timestamp_params + ON public.benchmark_run + (machine_id, environment_id, benchmark_id, git_commit_timestamp, parameter_values, run_timestamp); +COMMENT ON INDEX + public.benchmark_run_unique_index_on_env_benchmark_timestamp_params + IS 'Enforce uniqueness of benchmark run for a given machine, ' + 'environment, benchmark, git commit timestamp, and parameter values.'; diff --git a/src/arrow/dev/benchmarking/ddl/2_00_views.sql b/src/arrow/dev/benchmarking/ddl/2_00_views.sql new file mode 100644 index 000000000..cbd295e50 --- /dev/null +++ b/src/arrow/dev/benchmarking/ddl/2_00_views.sql @@ -0,0 +1,324 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + +-- NOTE: +-- The function for documentation depends on view columns +-- being named exactly the same as in the table view. + +-- MACHINE_VIEW +CREATE OR REPLACE VIEW public.machine_view AS + SELECT + machine.machine_id + , mac_address + , machine_name + , memory_bytes + , cpu_actual_frequency_Hz + , os_name + , architecture_name + , kernel_name + , cpu_model_name + , cpu_core_count + , cpu_thread_count + , cpu_frequency_max_Hz + , cpu_frequency_min_Hz + , cpu_L1d_cache_bytes + , cpu_L1i_cache_bytes + , cpu_L2_cache_bytes + , cpu_L3_cache_bytes + , gpu_information + , gpu_part_number + , gpu_product_name + , machine_other_attributes + FROM public.machine AS machine + JOIN public.cpu AS cpu ON machine.cpu_id = cpu.cpu_id + JOIN public.gpu AS gpu ON machine.gpu_id = gpu.gpu_id + JOIN public.os AS os ON machine.os_id = os.os_id; +COMMENT ON VIEW public.machine_view IS +E'The machine environment (CPU, GPU, OS) used for each benchmark run.\n\n' + '- "mac_address" is unique in the "machine" table\n' + '- "gpu_part_number" is unique in the "gpu" (graphics processing unit) table\n' + ' Empty string (''''), not null, is used for machines that won''t use the GPU\n' + '- "cpu_model_name" is unique in the "cpu" (central processing unit) table\n' + '- "os_name", "os_architecture_name", and "os_kernel_name"\n' + ' are unique in the "os" (operating system) table\n' + '- "machine_other_attributes" is a key-value store for any other relevant\n' + ' data, e.g. ''{"hard_disk_type": "solid state"}'''; + + +-- LANGUAGE_IMPLEMENTATION_VERSION_VIEW +CREATE OR REPLACE VIEW public.language_implementation_version_view AS + SELECT + lv.language_implementation_version_id + , bl.benchmark_language + , lv.language_implementation_version + FROM public.language_implementation_version AS lv + JOIN public.benchmark_language AS bl + ON lv.benchmark_language_id = bl.benchmark_language_id; + +-- ENVIRONMENT_VIEW +CREATE OR REPLACE VIEW public.environment_view AS + SELECT + env.environment_id + , benchmark_language + , language_implementation_version + , dependencies + FROM public.environment AS env + JOIN public.benchmark_language AS language + ON env.benchmark_language_id = language.benchmark_language_id + JOIN public.language_implementation_version AS version + ON env.language_implementation_version_id = version.language_implementation_version_id + JOIN public.dependencies AS deps + ON env.dependencies_id = deps.dependencies_id; +COMMENT ON VIEW public.environment_view IS +E'The build environment used for a reported benchmark run.\n' + '(Will be inferred from each "benchmark_run" if not explicitly added).\n\n' + '- Each entry is unique on\n' + ' ("benchmark_language", "language_implementation_version", "dependencies")\n' + '- "benchmark_language" is unique in the "benchmark_language" table\n' + '- "benchmark_language" plus "language_implementation_version" is unique in\n' + ' the "language_implementation_version" table\n' + '- "dependencies" is unique in the "dependencies" table'; + +-- UNIT_VIEW +CREATE OR REPLACE VIEW public.unit_view AS + SELECT + unit.unit_id + , units + , benchmark_type + , lessisbetter + FROM public.unit AS unit + JOIN public.benchmark_type AS bt + ON unit.benchmark_type_id = bt.benchmark_type_id; + +-- BENCHMARK_VIEW +CREATE OR REPLACE VIEW public.benchmark_view AS + SELECT + b.benchmark_id + , benchmark_name + , parameter_names + , benchmark_description + , benchmark_type + , units + , lessisbetter + , benchmark_version + , benchmark_language + FROM public.benchmark AS b + JOIN public.benchmark_language AS benchmark_language + ON b.benchmark_language_id = benchmark_language.benchmark_language_id + JOIN public.unit AS unit + ON b.unit_id = unit.unit_id + JOIN public.benchmark_type AS benchmark_type + ON unit.benchmark_type_id = benchmark_type.benchmark_type_id; +COMMENT ON VIEW public.benchmark_view IS +E'The details about a particular benchmark.\n\n' + '- "benchmark_name" is unique for a given "benchmark_language"\n' + '- Each entry is unique on\n' + ' ("benchmark_language", "benchmark_name", "benchmark_version")'; + +-- BENCHMARK_RUN_VIEW +CREATE OR REPLACE VIEW public.benchmark_run_view AS + SELECT + run.benchmark_run_id + -- benchmark_view (name, version, language only) + , benchmark_name + , benchmark_version + -- datum + , parameter_values + , value + , git_commit_timestamp + , git_hash + , val_min + , val_q1 + , val_q3 + , val_max + , std_dev + , n_obs + , run_timestamp + , run_metadata + , run_notes + -- machine_view (mac address only) + , mac_address + -- environment_view + , env.benchmark_language + , language_implementation_version + , dependencies + FROM public.benchmark_run AS run + JOIN public.benchmark_view AS benchmark + ON run.benchmark_id = benchmark.benchmark_id + JOIN public.machine_view AS machine + ON run.machine_id = machine.machine_id + JOIN public.environment_view AS env + ON run.environment_id = env.environment_id; +COMMENT ON VIEW public.benchmark_run_view IS +E'Each benchmark run.\n\n' + '- Each entry is unique on the machine, environment, benchmark,\n' + ' and git commit timestamp.'; + +-- FULL_BENCHMARK_RUN_VIEW +CREATE OR REPLACE VIEW public.full_benchmark_run_view AS + SELECT + run.benchmark_run_id + -- benchmark_view + , benchmark_name + , parameter_names + , benchmark_description + , benchmark_type + , units + , lessisbetter + , benchmark_version + -- datum + , parameter_values + , value + , git_commit_timestamp + , git_hash + , val_min + , val_q1 + , val_q3 + , val_max + , std_dev + , n_obs + , run_timestamp + , run_metadata + , run_notes + -- machine_view + , machine_name + , mac_address + , memory_bytes + , cpu_actual_frequency_Hz + , os_name + , architecture_name + , kernel_name + , cpu_model_name + , cpu_core_count + , cpu_thread_count + , cpu_frequency_max_Hz + , cpu_frequency_min_Hz + , cpu_L1d_cache_bytes + , cpu_L1i_cache_bytes + , cpu_L2_cache_bytes + , cpu_L3_cache_bytes + , gpu_information + , gpu_part_number + , gpu_product_name + , machine_other_attributes + -- environment_view + , env.benchmark_language + , env.language_implementation_version + , dependencies + FROM public.benchmark_run AS run + JOIN public.benchmark_view AS benchmark + ON run.benchmark_id = benchmark.benchmark_id + JOIN public.machine_view AS machine + ON run.machine_id = machine.machine_id + JOIN public.environment_view AS env + ON run.environment_id = env.environment_id; + +-- SUMMARIZED_TABLES_VIEW +CREATE VIEW public.summarized_tables_view AS + WITH chosen AS ( + SELECT + cls.oid AS id + , cls.relname as tbl_name + FROM pg_catalog.pg_class AS cls + JOIN pg_catalog.pg_namespace AS ns ON cls.relnamespace = ns.oid + WHERE + cls.relkind = 'r' + AND ns.nspname = 'public' + ), all_constraints AS ( + SELECT + chosen.id AS tbl_id + , chosen.tbl_name + , unnest(conkey) AS col_id + , 'foreign key' AS col_constraint + FROM pg_catalog.pg_constraint + JOIN chosen ON chosen.id = conrelid + WHERE contype = 'f' + + UNION + + SELECT + chosen.id + , chosen.tbl_name + , unnest(indkey) + , 'unique' + FROM pg_catalog.pg_index i + JOIN chosen ON chosen.id = i.indrelid + WHERE i.indisunique AND NOT i.indisprimary + + UNION + + SELECT + chosen.id + , chosen.tbl_name + , unnest(indkey) + , 'primary key' + FROM pg_catalog.pg_index i + JOIN chosen on chosen.id = i.indrelid + WHERE i.indisprimary + ), gathered_constraints AS ( + SELECT + tbl_id + , tbl_name + , col_id + , string_agg(col_constraint, ', ' ORDER BY col_constraint) + AS col_constraint + FROM all_constraints + GROUP BY tbl_id, tbl_name, col_id + ) + SELECT + chosen.tbl_name AS table_name + , columns.attnum AS column_number + , columns.attname AS column_name + , typ.typname AS type_name + , CASE + WHEN columns.attnotnull + THEN 'not null' + ELSE '' + END AS nullable + , CASE + WHEN defaults.adsrc like 'nextval%' + THEN 'serial' + ELSE defaults.adsrc + END AS default_value + , CASE + WHEN gc.col_constraint = '' OR gc.col_constraint IS NULL + THEN cnstrnt.consrc + WHEN cnstrnt.consrc IS NULL + THEN gc.col_constraint + ELSE gc.col_constraint || ', ' || cnstrnt.consrc + END AS description + FROM pg_catalog.pg_attribute AS columns + JOIN chosen ON columns.attrelid = chosen.id + JOIN pg_catalog.pg_type AS typ + ON typ.oid = columns.atttypid + LEFT JOIN gathered_constraints AS gc + ON gc.col_id = columns.attnum + AND gc.tbl_id = columns.attrelid + LEFT JOIN pg_attrdef AS defaults + ON defaults.adrelid = chosen.id + AND defaults.adnum = columns.attnum + LEFT JOIN pg_catalog.pg_constraint AS cnstrnt + ON cnstrnt.conrelid = columns.attrelid + AND columns.attrelid = ANY(cnstrnt.conkey) + WHERE + columns.attnum > 0 + ORDER BY table_name, column_number; +COMMENT ON VIEW public.summarized_tables_view + IS 'A summary of all columns from all tables in the public schema, ' + ' identifying nullability, primary/foreign keys, and data type.'; diff --git a/src/arrow/dev/benchmarking/ddl/3_00_functions_helpers.sql b/src/arrow/dev/benchmarking/ddl/3_00_functions_helpers.sql new file mode 100644 index 000000000..b10b69a4e --- /dev/null +++ b/src/arrow/dev/benchmarking/ddl/3_00_functions_helpers.sql @@ -0,0 +1,643 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + + +-- PROJECT_DETAILS +CREATE TYPE public.type_project_details AS ( + project_name text + , project_url text + , repo_url text +); + +CREATE OR REPLACE FUNCTION public.project_details() +RETURNS public.type_project_details AS +$$ + SELECT project_name, project_url, repo_url + FROM public.project + ORDER BY last_changed DESC + LIMIT 1 +$$ +LANGUAGE sql STABLE; +COMMENT ON FUNCTION public.project_details() +IS 'Get the current project name, url, and repo url.'; + + +-------------------------- GET-OR-SET FUNCTIONS -------------------------- +-- The following functions have the naming convention "get_<tablename>_id". +-- All of them attempt to SELECT the desired row given the column +-- values, and if it does not exist will INSERT it. +-- +-- When functions are overloaded with fewer columns, it is to allow +-- selection only, given columns that comprise a unique index. + +-- GET_CPU_ID +CREATE OR REPLACE FUNCTION public.get_cpu_id( + cpu_model_name citext + , cpu_core_count integer + , cpu_thread_count integer + , cpu_frequency_max_Hz bigint + , cpu_frequency_min_Hz bigint + , cpu_L1d_cache_bytes integer + , cpu_L1i_cache_bytes integer + , cpu_L2_cache_bytes integer + , cpu_L3_cache_bytes integer +) +RETURNS integer AS +$$ + DECLARE + result integer; + BEGIN + SELECT cpu_id INTO result FROM public.cpu AS cpu + WHERE cpu.cpu_model_name = $1 + AND cpu.cpu_core_count = $2 + AND cpu.cpu_thread_count = $3 + AND cpu.cpu_frequency_max_Hz = $4 + AND cpu.cpu_frequency_min_Hz = $5 + AND cpu.cpu_L1d_cache_bytes = $6 + AND cpu.cpu_L1i_cache_bytes = $7 + AND cpu.cpu_L2_cache_bytes = $8 + AND cpu.cpu_L3_cache_bytes = $9; + + IF result IS NULL THEN + INSERT INTO public.cpu( + cpu_model_name + , cpu_core_count + , cpu_thread_count + , cpu_frequency_max_Hz + , cpu_frequency_min_Hz + , cpu_L1d_cache_bytes + , cpu_L1i_cache_bytes + , cpu_L2_cache_bytes + , cpu_L3_cache_bytes + ) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9) + RETURNING cpu_id INTO result; + END IF; + + RETURN result; + END +$$ +LANGUAGE plpgsql; +COMMENT ON FUNCTION public.get_cpu_id( + citext + , integer + , integer + , bigint -- cpu_frequency_max_Hz + , bigint -- cpu_frequency_min_Hz + , integer + , integer + , integer + , integer +) +IS 'Insert or select CPU data, returning "cpu.cpu_id".'; + +-- GET_GPU_ID +CREATE OR REPLACE FUNCTION public.get_gpu_id( + gpu_information citext DEFAULT NULL + , gpu_part_number citext DEFAULT NULL + , gpu_product_name citext DEFAULT NULL +) +RETURNS integer AS +$$ + DECLARE + result integer; + BEGIN + SELECT gpu_id INTO result FROM public.gpu AS gpu + WHERE + gpu.gpu_information = COALESCE($1, '') + AND gpu.gpu_part_number = COALESCE($2, '') + AND gpu.gpu_product_name = COALESCE($3, ''); + + IF result IS NULL THEN + INSERT INTO public.gpu( + gpu_information + , gpu_part_number + , gpu_product_name + ) + VALUES (COALESCE($1, ''), COALESCE($2, ''), COALESCE($3, '')) + RETURNING gpu_id INTO result; + END IF; + + RETURN result; + END +$$ +LANGUAGE plpgsql; +COMMENT ON FUNCTION public.get_gpu_id(citext, citext, citext) +IS 'Insert or select GPU data, returning "gpu.gpu_id".'; + +-- GET_OS_ID +CREATE OR REPLACE FUNCTION public.get_os_id( + os_name citext + , architecture_name citext + , kernel_name citext DEFAULT '' +) +RETURNS integer AS +$$ + DECLARE + result integer; + BEGIN + SELECT os_id INTO result FROM public.os AS os + WHERE os.os_name = $1 + AND os.architecture_name = $2 + AND os.kernel_name = COALESCE($3, ''); + + IF result is NULL THEN + INSERT INTO public.os(os_name, architecture_name, kernel_name) + VALUES ($1, $2, COALESCE($3, '')) + RETURNING os_id INTO result; + END IF; + + RETURN result; + END +$$ +LANGUAGE plpgsql; +COMMENT ON FUNCTION public.get_os_id(citext, citext, citext) +IS 'Insert or select OS data, returning "os.os_id".'; + +-- GET_MACHINE_ID (full signature) +CREATE OR REPLACE FUNCTION public.get_machine_id( + mac_address macaddr + , machine_name citext + , memory_bytes bigint + , cpu_actual_frequency_Hz bigint + -- os + , os_name citext + , architecture_name citext + , kernel_name citext + -- cpu + , cpu_model_name citext + , cpu_core_count integer + , cpu_thread_count integer + , cpu_frequency_max_Hz bigint + , cpu_frequency_min_Hz bigint + , L1d_cache_bytes integer + , L1i_cache_bytes integer + , L2_cache_bytes integer + , L3_cache_bytes integer + -- gpu + , gpu_information citext DEFAULT '' + , gpu_part_number citext DEFAULT NULL + , gpu_product_name citext DEFAULT NULL + -- nullable machine attributes + , machine_other_attributes jsonb DEFAULT NULL +) +RETURNS integer AS +$$ + DECLARE + found_cpu_id integer; + found_gpu_id integer; + found_os_id integer; + result integer; + BEGIN + -- Can't bypass looking up all the values because of unique constraint. + SELECT public.get_cpu_id( + cpu_model_name + , cpu_core_count + , cpu_thread_count + , cpu_frequency_max_Hz + , cpu_frequency_min_Hz + , L1d_cache_bytes + , L1i_cache_bytes + , L2_cache_bytes + , L3_cache_bytes + ) INTO found_cpu_id; + + SELECT public.get_gpu_id( + gpu_information + , gpu_part_number + , gpu_product_name + ) INTO found_gpu_id; + + SELECT public.get_os_id( + os_name + , architecture_name + , kernel_name + ) INTO found_os_id; + + SELECT machine_id INTO result FROM public.machine AS m + WHERE m.os_id = found_os_id + AND m.cpu_id = found_cpu_id + AND m.gpu_id = found_gpu_id + AND m.mac_address = $1 + AND m.machine_name = $2 + AND m.memory_bytes = $3 + AND m.cpu_actual_frequency_Hz = $4; + + IF result IS NULL THEN + INSERT INTO public.machine( + os_id + , cpu_id + , gpu_id + , mac_address + , machine_name + , memory_bytes + , cpu_actual_frequency_Hz + , machine_other_attributes + ) + VALUES (found_os_id, found_cpu_id, found_gpu_id, $1, $2, $3, $4, $20) + RETURNING machine_id INTO result; + END IF; + + RETURN result; + END +$$ +LANGUAGE plpgsql; +COMMENT ON FUNCTION public.get_machine_id( + macaddr + , citext + , bigint -- memory_bytes + , bigint -- cpu_frequency_actual_Hz + -- os + , citext + , citext + , citext + -- cpu + , citext + , integer + , integer + , bigint -- cpu_frequency_max_Hz + , bigint -- cpu_frequency_min_Hz + , integer + , integer + , integer + , integer + -- gpu + , citext + , citext + , citext + -- nullable machine attributes + , jsonb +) +IS 'Insert or select machine data, returning "machine.machine_id".'; + +-- GET_MACHINE_ID (given unique mac_address) +CREATE OR REPLACE FUNCTION public.get_machine_id(mac_address macaddr) +RETURNS integer AS +$$ + SELECT machine_id FROM public.machine AS m + WHERE m.mac_address = $1; +$$ +LANGUAGE sql STABLE; +COMMENT ON FUNCTION public.get_machine_id(macaddr) +IS 'Select machine_id given its mac address, returning "machine.machine_id".'; + +-- GET_BENCHMARK_LANGUAGE_ID +CREATE OR REPLACE FUNCTION public.get_benchmark_language_id(language citext) +RETURNS integer AS +$$ + DECLARE + result integer; + BEGIN + SELECT benchmark_language_id INTO result + FROM public.benchmark_language AS bl + WHERE bl.benchmark_language = language; + + IF result IS NULL THEN + INSERT INTO public.benchmark_language(benchmark_language) + VALUES (language) + RETURNING benchmark_language_id INTO result; + END IF; + + RETURN result; + END +$$ +LANGUAGE plpgsql; +COMMENT ON FUNCTION public.get_benchmark_language_id(citext) +IS 'Insert or select benchmark_language returning ' + '"benchmark_language.benchmark_language_id".'; + +-- GET_LANGUAGE_IMPLEMENTATION_VERSION_ID +CREATE OR REPLACE FUNCTION public.get_language_implementation_version_id( + language citext + , language_implementation_version citext DEFAULT '' +) +RETURNS integer AS +$$ + DECLARE + language_id integer; + result integer; + BEGIN + SELECT public.get_benchmark_language_id($1) INTO language_id; + + SELECT language_implementation_version_id INTO result FROM public.language_implementation_version AS lv + WHERE lv.benchmark_language_id = language_id + AND lv.language_implementation_version = COALESCE($2, ''); + + IF result IS NULL THEN + INSERT INTO + public.language_implementation_version(benchmark_language_id, language_implementation_version) + VALUES (language_id, COALESCE($2, '')) + RETURNING language_implementation_version_id INTO result; + END IF; + + RETURN result; + END +$$ +LANGUAGE plpgsql; +COMMENT ON FUNCTION public.get_language_implementation_version_id(citext, citext) +IS 'Insert or select language and version data, ' + 'returning "language_implementation_version.language_implementation_version_id".'; + +CREATE OR REPLACE FUNCTION public.get_language_implementation_version_id( + -- overload for when language_id is known + language_id integer + , language_implementation_version citext DEFAULT '' +) +RETURNS integer AS +$$ + DECLARE + result integer; + BEGIN + SELECT language_implementation_version_id INTO result FROM public.language_implementation_version AS lv + WHERE lv.benchmark_language_id = language_id + AND lv.language_implementation_version = COALESCE($2, ''); + + IF result IS NULL THEN + INSERT INTO + public.language_implementation_version(benchmark_language_id, language_implementation_version) + VALUES (language_id, COALESCE($2, '')) + RETURNING language_implementation_version_id INTO result; + END IF; + + RETURN result; + END +$$ +LANGUAGE plpgsql; + +-- GET_LANGUAGE_DEPENDENCY_LOOKUP_ID +CREATE OR REPLACE FUNCTION public.get_dependencies_id( + dependencies jsonb DEFAULT '{}'::jsonb +) +RETURNS integer AS +$$ + DECLARE + result integer; + BEGIN + SELECT dependencies_id INTO result + FROM public.dependencies AS ldl + WHERE ldl.dependencies = COALESCE($1, '{}'::jsonb); + + IF result IS NULL THEN + INSERT INTO + public.dependencies(dependencies) + VALUES (COALESCE($1, '{}'::jsonb)) + RETURNING dependencies_id INTO result; + END IF; + + RETURN result; + END +$$ +LANGUAGE plpgsql; +COMMENT ON FUNCTION public.get_dependencies_id(jsonb) +IS 'Insert or select dependencies, returning "dependencies.dependencies_id".'; + +-- GET_ENVIRONMENT_ID +CREATE OR REPLACE FUNCTION public.get_environment_id( + language citext, + language_implementation_version citext DEFAULT '', + dependencies jsonb DEFAULT '{}'::jsonb +) +RETURNS integer AS +$$ + DECLARE + found_language_id integer; + found_version_id integer; + found_dependencies_id integer; + result integer; + BEGIN + SELECT public.get_benchmark_language_id($1) INTO found_language_id; + SELECT + public.get_language_implementation_version_id(found_language_id, $2) + INTO found_version_id; + SELECT + public.get_dependencies_id ($3) + INTO found_dependencies_id; + + SELECT environment_id INTO result FROM public.environment AS e + WHERE e.benchmark_language_id = found_language_id + AND e.language_implementation_version_id = found_version_id + AND e.dependencies_id = found_dependencies_id; + + IF result IS NULL THEN + INSERT INTO + public.environment( + benchmark_language_id + , language_implementation_version_id + , dependencies_id + ) + VALUES (found_language_id, found_version_id, found_dependencies_id) + RETURNING environment_id INTO result; + END IF; + + RETURN result; + END +$$ +LANGUAGE plpgsql; +COMMENT ON FUNCTION public.get_environment_id(citext, citext, jsonb) +IS 'Insert or select language, language version, and dependencies, ' + 'returning "environment.environment_id".'; + +-- GET_BENCHMARK_TYPE_ID (full signature) +CREATE OR REPLACE FUNCTION public.get_benchmark_type_id( + benchmark_type citext + , lessisbetter boolean +) +RETURNS integer AS +$$ + DECLARE + result integer; + BEGIN + SELECT benchmark_type_id INTO result FROM public.benchmark_type AS bt + WHERE bt.benchmark_type = $1 + AND bt.lessisbetter = $2; + + IF result IS NULL THEN + INSERT INTO public.benchmark_type(benchmark_type, lessisbetter) + VALUES($1, $2) + RETURNING benchmark_type_id INTO result; + END IF; + + RETURN result; + END +$$ +LANGUAGE plpgsql; +COMMENT ON FUNCTION public.get_benchmark_type_id(citext, boolean) +IS 'Insert or select benchmark type and lessisbetter, ' + 'returning "benchmark_type.benchmark_type_id".'; + +-- GET_BENCHMARK_TYPE_ID (given unique benchmark_type string only) +CREATE OR REPLACE FUNCTION public.get_benchmark_type_id( + benchmark_type citext +) +RETURNS integer AS +$$ + DECLARE + result integer; + BEGIN + SELECT benchmark_type_id INTO result FROM public.benchmark_type AS bt + WHERE bt.benchmark_type = $1; + + RETURN result; + END +$$ +LANGUAGE plpgsql; +COMMENT ON FUNCTION public.get_benchmark_type_id(citext) +IS 'Select benchmark_type_id given benchmark type (e.g. ''time''), ' + 'returning "benchmark_type.benchmark_type_id".'; + +-- GET_UNIT_ID (full signature) +CREATE OR REPLACE FUNCTION public.get_unit_id( + benchmark_type citext + , units citext + , lessisbetter boolean DEFAULT NULL +) +RETURNS integer AS +$$ + DECLARE + found_benchmark_type_id integer; + result integer; + BEGIN + + IF ($3 IS NOT NULL) -- if lessisbetter is not null + THEN + SELECT public.get_benchmark_type_id($1, $3) + INTO found_benchmark_type_id; + ELSE + SELECT public.get_benchmark_type_id($1) + INTO found_benchmark_type_id; + END IF; + + SELECT unit_id INTO result FROM public.unit AS u + WHERE u.benchmark_type_id = found_benchmark_type_id + AND u.units = $2; + + IF result IS NULL THEN + INSERT INTO public.unit(benchmark_type_id, units) + VALUES(found_benchmark_type_id, $2) + RETURNING unit_id INTO result; + END IF; + + RETURN result; + END +$$ +LANGUAGE plpgsql; +COMMENT ON FUNCTION public.get_unit_id(citext, citext, boolean) +IS 'Insert or select benchmark type (e.g. ''time''), ' + 'units string (e.g. ''miliseconds''), ' + 'and "lessisbetter" (true if smaller benchmark values are better), ' + 'returning "unit.unit_id".'; + +-- GET_UNIT_ID (given unique units string only) +CREATE OR REPLACE FUNCTION public.get_unit_id(units citext) +RETURNS integer AS +$$ + SELECT unit_id FROM public.unit AS u + WHERE u.units = units; +$$ +LANGUAGE sql STABLE; +COMMENT ON FUNCTION public.get_unit_id(citext) +IS 'Select unit_id given unit name, returning "unit.unit_id".'; + +-- GET_BENCHMARK_ID (full signature) +CREATE OR REPLACE FUNCTION public.get_benchmark_id( + benchmark_language citext + , benchmark_name citext + , parameter_names text[] + , benchmark_description text + , benchmark_version citext + , benchmark_type citext + , units citext + , lessisbetter boolean +) +RETURNS integer AS +$$ + DECLARE + found_benchmark_language_id integer; + found_unit_id integer; + result integer; + BEGIN + SELECT public.get_benchmark_language_id( + benchmark_language + ) INTO found_benchmark_language_id; + + SELECT public.get_unit_id( + benchmark_type + , units + , lessisbetter + ) INTO found_unit_id; + + SELECT benchmark_id INTO result FROM public.benchmark AS b + WHERE b.benchmark_language_id = found_benchmark_language_id + AND b.benchmark_name = $2 + -- handle nullable "parameter_names" + AND b.parameter_names IS NOT DISTINCT FROM $3 + AND b.benchmark_description = $4 + AND b.benchmark_version = $5 + AND b.unit_id = found_unit_id; + + IF result IS NULL THEN + INSERT INTO public.benchmark( + benchmark_language_id + , benchmark_name + , parameter_names + , benchmark_description + , benchmark_version + , unit_id + ) + VALUES (found_benchmark_language_id, $2, $3, $4, $5, found_unit_id) + RETURNING benchmark_id INTO result; + END IF; + + RETURN result; + END +$$ +LANGUAGE plpgsql; +COMMENT ON FUNCTION public.get_benchmark_id( + citext + , citext + , text[] + , text + , citext + , citext + , citext + , boolean +) +IS 'Insert/select benchmark given data, returning "benchmark.benchmark_id".'; + +-- GET_BENCHMARK_ID (by unique columns) +CREATE OR REPLACE FUNCTION public.get_benchmark_id( + benchmark_language citext + , benchmark_name citext + , benchmark_version citext +) +RETURNS integer AS +$$ + WITH language AS ( + SELECT public.get_benchmark_language_id(benchmark_language) AS id + ) + SELECT b.benchmark_id + FROM public.benchmark AS b + JOIN language ON b.benchmark_language_id = language.id + WHERE b.benchmark_name = benchmark_name + AND benchmark_version = benchmark_version +$$ +LANGUAGE sql STABLE; +COMMENT ON FUNCTION public.get_benchmark_id(citext, citext, citext) +IS 'Select existing benchmark given unique columns, ' + 'returning "benchmark.benchmark_id".'; diff --git a/src/arrow/dev/benchmarking/ddl/3_01_functions_triggers.sql b/src/arrow/dev/benchmarking/ddl/3_01_functions_triggers.sql new file mode 100644 index 000000000..b6ce4741a --- /dev/null +++ b/src/arrow/dev/benchmarking/ddl/3_01_functions_triggers.sql @@ -0,0 +1,574 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + + +-------------------------- TRIGGER FUNCTIONS -------------------------- +-- Views that do not select from a single table or view are not +-- automatically updatable. These trigger functions are intended +-- to be run instead of INSERT into the complicated views. + + +-- LANGUAGE_IMPLEMENTATION_VERSION_VIEW_INSERT_ROW +CREATE OR REPLACE FUNCTION public.language_implementation_version_view_insert_row() +RETURNS trigger AS +$$ + DECLARE + language_id integer; + result integer; + BEGIN + IF NEW.benchmark_language IS NULL THEN + RAISE EXCEPTION 'Column "benchmark_language" cannot be NULL.'; + END IF; + IF NEW.language_implementation_version IS NULL THEN + RAISE EXCEPTION + 'Column "language_implementation_version" cannot be NULL (use '''' instead).'; + END IF; + + SELECT public.get_benchmark_language_id(NEW.benchmark_language) + INTO language_id; + + SELECT language_implementation_version_id INTO result FROM public.language_implementation_version AS lv + WHERE lv.benchmark_language_id = language_id + AND lv.language_implementation_version = NEW.language_implementation_version; + + IF result IS NOT NULL THEN + -- row already exists + RETURN NULL; + ELSE + INSERT INTO + public.language_implementation_version( + benchmark_language_id + , language_implementation_version + ) + VALUES (language_id, NEW.language_implementation_version) + RETURNING language_implementation_version_id INTO NEW.language_implementation_version_id; + END IF; + + RETURN NEW; + END +$$ +LANGUAGE plpgsql; + +-- ENVIRONMENT_VIEW_INSERT_ROW +CREATE OR REPLACE FUNCTION public.environment_view_insert_row() +RETURNS trigger AS +$$ + DECLARE + found_language_id integer; + found_version_id integer; + found_dependencies_id integer; + result integer; + BEGIN + IF NEW.benchmark_language IS NULL + THEN + RAISE EXCEPTION 'Column "benchmark_language" cannot be NULL.'; + END IF; + IF NEW.language_implementation_version IS NULL THEN + RAISE EXCEPTION + 'Column "language_implementation_version" cannot be NULL (use '''' instead).'; + END IF; + + SELECT public.get_benchmark_language_id(NEW.benchmark_language) + INTO found_language_id; + + SELECT public.get_language_implementation_version_id( + found_language_id + , NEW.language_implementation_version + ) + INTO found_version_id; + + SELECT public.get_dependencies_id(NEW.dependencies) + INTO found_dependencies_id; + + SELECT environment_id INTO result FROM public.environment AS e + WHERE e.benchmark_language_id = found_language_id + AND e.language_implementation_version_id = found_version_id + AND e.dependencies_id = found_dependencies_id; + + IF result IS NOT NULL THEN + -- row already exists + RETURN NULL; + ELSE + INSERT INTO + public.environment( + benchmark_language_id + , language_implementation_version_id + , dependencies_id + ) + VALUES (found_language_id, found_version_id, found_dependencies_id) + RETURNING environment_id INTO NEW.environment_id; + END IF; + + RETURN NEW; + END +$$ +LANGUAGE plpgsql; + +-- MACHINE_VIEW_INSERT_ROW +CREATE OR REPLACE FUNCTION public.machine_view_insert_row() +RETURNS trigger AS +$$ + DECLARE + found_cpu_id integer; + found_gpu_id integer; + found_os_id integer; + result integer; + BEGIN + IF ( + NEW.machine_name IS NULL + OR NEW.memory_bytes IS NULL + OR NEW.cpu_model_name IS NULL + OR NEW.cpu_core_count IS NULL + OR NEW.cpu_thread_count IS NULL + OR NEW.cpu_frequency_max_Hz IS NULL + OR NEW.cpu_frequency_min_Hz IS NULL + OR NEW.cpu_L1d_cache_bytes IS NULL + OR NEW.cpu_L1i_cache_bytes IS NULL + OR NEW.cpu_L2_cache_bytes IS NULL + OR NEW.cpu_L3_cache_bytes IS NULL + OR NEW.os_name IS NULL + OR NEW.architecture_name IS NULL + ) + THEN + RAISE EXCEPTION 'None of the columns in "machine_view" can be NULL. ' + 'all columns in table "gpu" will default to the empty string '''', ' + 'as will blank "os.kernel_name". This is to allow uniqueness ' + 'constraints to work. Thank you!.'; + END IF; + + SELECT public.get_cpu_id( + NEW.cpu_model_name + , NEW.cpu_core_count + , NEW.cpu_thread_count + , NEW.cpu_frequency_max_Hz + , NEW.cpu_frequency_min_Hz + , NEW.cpu_L1d_cache_bytes + , NEW.cpu_L1i_cache_bytes + , NEW.cpu_L2_cache_bytes + , NEW.cpu_L3_cache_bytes + ) INTO found_cpu_id; + + SELECT public.get_gpu_id( + NEW.gpu_information + , NEW.gpu_part_number + , NEW.gpu_product_name + ) INTO found_gpu_id; + + SELECT public.get_os_id( + NEW.os_name + , NEW.architecture_name + , NEW.kernel_name + ) INTO found_os_id; + + SELECT machine_id INTO result FROM public.machine AS m + WHERE m.os_id = found_os_id + AND m.cpu_id = found_cpu_id + AND m.gpu_id = found_gpu_id + AND m.machine_name = NEW.machine_name + AND m.memory_bytes = NEW.memory_bytes + AND m.cpu_actual_frequency_Hz = NEW.cpu_actual_frequency_Hz; + + IF result IS NOT NULL THEN + -- row already exists + RETURN NULL; + ELSE + INSERT INTO public.machine( + os_id + , cpu_id + , gpu_id + , machine_name + , mac_address + , memory_bytes + , cpu_actual_frequency_Hz + , machine_other_attributes + ) + VALUES ( + found_os_id + , found_cpu_id + , found_gpu_id + , NEW.machine_name + , NEW.mac_address + , NEW.memory_bytes + , NEW.cpu_actual_frequency_Hz + , NEW.machine_other_attributes + ) + RETURNING machine_id INTO NEW.machine_id; + END IF; + + RETURN NEW; + END +$$ +LANGUAGE plpgsql; + +-- UNIT_VIEW_INSERT_ROW +CREATE OR REPLACE FUNCTION public.unit_view_insert_row() +RETURNS trigger AS +$$ + DECLARE + found_benchmark_type_id integer; + result integer; + BEGIN + IF (NEW.benchmark_type IS NULL OR NEW.units IS NULL) + THEN + RAISE EXCEPTION E'"benchmark_type" and "units" cannot be NULL.\n' + 'Further, if the "benchmark_type" has never been defined, ' + '"lessisbetter" must be defined or there will be an error.'; + END IF; + + -- It's OK for "lessisbetter" = NULL if "benchmark_type" already exists. + SELECT public.get_benchmark_type_id(NEW.benchmark_type, NEW.lessisbetter) + INTO found_benchmark_type_id; + + SELECT unit_id INTO result FROM public.unit AS u + WHERE u.benchmark_type_id = found_benchmark_type_id + AND u.units = NEW.units; + + IF result IS NOT NULL THEN + -- row already exists + RETURN NULL; + ELSE + INSERT INTO public.unit ( + benchmark_type_id + , units + ) + VALUES ( + found_benchmark_type_id + , NEW.units + ) + RETURNING unit_id INTO NEW.unit_id; + END IF; + + RETURN NEW; + END +$$ +LANGUAGE plpgsql; + +-- BENCHMARK_VIEW_INSERT_ROW +CREATE OR REPLACE FUNCTION public.benchmark_view_insert_row() +RETURNS trigger AS +$$ + DECLARE + found_benchmark_language_id integer; + found_units_id integer; + result integer; + BEGIN + IF ( + NEW.benchmark_name IS NULL + OR NEW.benchmark_version IS NULL + OR NEW.benchmark_language IS NULL + OR NEW.benchmark_type IS NULL + OR NEW.benchmark_description IS NULL + OR NEW.units IS NULL + ) + THEN + RAISE EXCEPTION 'The only nullable column in this view is ' + '"benchmark.parameter_names".'; + END IF; + + SELECT public.get_benchmark_language_id( + NEW.benchmark_language + ) INTO found_benchmark_language_id; + + SELECT public.get_unit_id(NEW.units) INTO found_units_id; + + SELECT benchmark_id INTO result FROM public.benchmark AS b + WHERE b.benchmark_language_id = found_benchmark_language_id + AND b.benchmark_name = NEW.benchmark_name + -- handle nullable "parameter_names" + AND b.parameter_names IS NOT DISTINCT FROM NEW.parameter_names + AND b.benchmark_description = NEW.benchmark_description + AND b.benchmark_version = NEW.benchmark_version + AND b.unit_id = found_units_id; + + IF result IS NOT NULL THEN + -- row already exists + RETURN NULL; + ELSE + INSERT INTO public.benchmark( + benchmark_language_id + , benchmark_name + , parameter_names + , benchmark_description + , benchmark_version + , unit_id + ) + VALUES ( + found_benchmark_language_id + , NEW.benchmark_name + , NEW.parameter_names + , NEW.benchmark_description + , NEW.benchmark_version + , found_units_id + ) + RETURNING benchmark_id INTO NEW.benchmark_id; + END IF; + + RETURN NEW; + END +$$ +LANGUAGE plpgsql; + +-- BENCHMARK_RUN_VIEW_INSERT_ROW +CREATE OR REPLACE FUNCTION public.benchmark_run_view_insert_row() +RETURNS trigger AS +$$ + DECLARE + found_benchmark_id integer; + found_benchmark_language_id integer; + found_machine_id integer; + found_environment_id integer; + found_language_implementation_version_id integer; + BEGIN + IF ( + NEW.benchmark_name IS NULL + OR NEW.benchmark_version IS NULL + OR NEW.benchmark_language IS NULL + OR NEW.value IS NULL + OR NEW.run_timestamp IS NULL + OR NEW.git_commit_timestamp IS NULL + OR NEW.git_hash IS NULL + OR NEW.language_implementation_version IS NULL + OR NEW.mac_address IS NULL + ) + THEN + RAISE EXCEPTION 'Only the following columns can be NULL: ' + '"parameter_names", "val_min", "val_q1", "val_q3", "val_max".'; + END IF; + + SELECT public.get_benchmark_id( + NEW.benchmark_language + , NEW.benchmark_name + , NEW.benchmark_version + ) INTO found_benchmark_id; + + SELECT public.get_benchmark_language_id( + NEW.benchmark_language + ) INTO found_benchmark_language_id; + + SELECT public.get_machine_id( + NEW.mac_address + ) INTO found_machine_id; + + SELECT public.get_environment_id( + NEW.benchmark_language + , NEW.language_implementation_version + , NEW.dependencies + ) INTO found_environment_id; + + SELECT public.get_language_implementation_version_id( + found_benchmark_language_id, + NEW.language_implementation_version + ) INTO found_language_implementation_version_id; + + INSERT INTO public.benchmark_run ( + parameter_values + , value + , git_commit_timestamp + , git_hash + , val_min + , val_q1 + , val_q3 + , val_max + , std_dev + , n_obs + , run_timestamp + , run_metadata + , run_notes + , machine_id + , benchmark_language_id + , language_implementation_version_id + , environment_id + , benchmark_id + ) + VALUES ( + COALESCE(NEW.parameter_values, '{}'::jsonb) + , NEW.value + , NEW.git_commit_timestamp + , NEW.git_hash + , NEW.val_min + , NEW.val_q1 + , NEW.val_q3 + , NEW.val_max + , NEW.std_dev + , NEW.n_obs + , NEW.run_timestamp + , NEW.run_metadata + , NEW.run_notes + , found_machine_id + , found_benchmark_language_id + , found_language_implementation_version_id + , found_environment_id + , found_benchmark_id + ) returning benchmark_run_id INTO NEW.benchmark_run_id; + + RETURN NEW; + END +$$ +LANGUAGE plpgsql; + +-- FULL_BENCHMARK_RUN_VIEW_INSERT_ROW +CREATE OR REPLACE FUNCTION public.full_benchmark_run_view_insert_row() +RETURNS trigger AS +$$ + DECLARE + found_benchmark_id integer; + found_benchmark_language_id integer; + found_machine_id integer; + found_environment_id integer; + found_language_implementation_version_id integer; + BEGIN + IF ( + NEW.value IS NULL + OR NEW.git_hash IS NULL + OR NEW.git_commit_timestamp IS NULL + OR NEW.run_timestamp IS NULL + -- benchmark + OR NEW.benchmark_name IS NULL + OR NEW.benchmark_description IS NULL + OR NEW.benchmark_version IS NULL + OR NEW.benchmark_language IS NULL + -- unit + OR NEW.benchmark_type IS NULL + OR NEW.units IS NULL + OR NEW.lessisbetter IS NULL + -- machine + OR NEW.machine_name IS NULL + OR NEW.memory_bytes IS NULL + OR NEW.cpu_model_name IS NULL + OR NEW.cpu_core_count IS NULL + OR NEW.os_name IS NULL + OR NEW.architecture_name IS NULL + OR NEW.kernel_name IS NULL + OR NEW.cpu_model_name IS NULL + OR NEW.cpu_core_count IS NULL + OR NEW.cpu_thread_count IS NULL + OR NEW.cpu_frequency_max_Hz IS NULL + OR NEW.cpu_frequency_min_Hz IS NULL + OR NEW.cpu_L1d_cache_bytes IS NULL + OR NEW.cpu_L1i_cache_bytes IS NULL + OR NEW.cpu_L2_cache_bytes IS NULL + OR NEW.cpu_L3_cache_bytes IS NULL + ) + THEN + RAISE EXCEPTION 'Only the following columns can be NULL: ' + '"machine_other_attributes", "parameter_names", "val_min", ' + '"val_q1", "val_q3", "val_max", "run_metadata", "run_notes". ' + 'If "gpu_information", "gpu_part_number", "gpu_product_name", or ' + '"kernel_name" are null, they will be silently turned into an ' + 'empty string ('''').'; + END IF; + + SELECT public.get_benchmark_id( + NEW.benchmark_language + , NEW.benchmark_name + , NEW.parameter_names + , NEW.benchmark_description + , NEW.benchmark_version + , NEW.benchmark_type + , NEW.units + , NEW.lessisbetter + ) INTO found_benchmark_id; + + SELECT public.get_benchmark_language_id( + NEW.benchmark_language + ) INTO found_benchmark_language_id; + + SELECT public.get_machine_id( + NEW.mac_address + , NEW.machine_name + , NEW.memory_bytes + , NEW.cpu_actual_frequency_Hz + -- os + , NEW.os_name + , NEW.architecture_name + , NEW.kernel_name + -- cpu + , NEW.cpu_model_name + , NEW.cpu_core_count + , NEW.cpu_thread_count + , NEW.cpu_frequency_max_Hz + , NEW.cpu_frequency_min_Hz + , NEW.cpu_L1d_cache_bytes + , NEW.cpu_L1i_cache_bytes + , NEW.cpu_L2_cache_bytes + , NEW.cpu_L3_cache_bytes + -- gpu + , NEW.gpu_information + , NEW.gpu_part_number + , NEW.gpu_product_name + -- nullable machine attributes + , NEW.machine_other_attributes + ) INTO found_machine_id; + + SELECT public.get_environment_id( + NEW.benchmark_language + , NEW.language_implementation_version + , NEW.dependencies + ) INTO found_environment_id; + + SELECT public.get_language_implementation_version_id( + found_benchmark_language_id, + NEW.language_implementation_version + ) INTO found_language_implementation_version_id; + + INSERT INTO public.benchmark_run ( + parameter_values + , value + , git_commit_timestamp + , git_hash + , val_min + , val_q1 + , val_q3 + , val_max + , std_dev + , n_obs + , run_timestamp + , run_metadata + , run_notes + , machine_id + , benchmark_language_id + , language_implementation_version_id + , environment_id + , benchmark_id + ) + VALUES ( + NEW.parameter_values + , NEW.value + , NEW.git_commit_timestamp + , NEW.git_hash + , NEW.val_min + , NEW.val_q1 + , NEW.val_q3 + , NEW.val_max + , NEW.std_dev + , NEW.n_obs + , NEW.run_timestamp + , NEW.run_metadata + , NEW.run_notes + , found_machine_id + , found_benchmark_language_id + , found_language_implementation_version_id + , found_environment_id + , found_benchmark_id + ) returning benchmark_run_id INTO NEW.benchmark_run_id; + + RETURN NEW; + END +$$ +LANGUAGE plpgsql; diff --git a/src/arrow/dev/benchmarking/ddl/3_02_functions_ingestion.sql b/src/arrow/dev/benchmarking/ddl/3_02_functions_ingestion.sql new file mode 100644 index 000000000..000c61d00 --- /dev/null +++ b/src/arrow/dev/benchmarking/ddl/3_02_functions_ingestion.sql @@ -0,0 +1,323 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + + +-------------------------- IMPORT HELPERS -------------------------- +-- Load from JSON (from https://stackoverflow.com/a/48396608) +-- How to use it in the psql client: +-- \set content `cat /examples/machine.json` +-- select ingest_machine(:'content'::jsonb); +-- INGEST_MACHINE_VIEW +CREATE OR REPLACE FUNCTION public.ingest_machine_view(from_jsonb jsonb) +RETURNS integer AS +$$ + DECLARE + result integer; + BEGIN + INSERT INTO public.machine_view + SELECT * FROM jsonb_populate_record(null::public.machine_view, from_jsonb) + RETURNING machine_id INTO result; + RETURN result; + END +$$ +LANGUAGE plpgsql; +COMMENT ON FUNCTION public.ingest_machine_view(jsonb) IS + E'The argument is a JSON object. NOTE: key names must be entirely\n' + 'lowercase, or the insert will fail. Extra key-value pairs are ignored.\n' + 'Example::\n\n' + ' {\n' + ' "mac_address": "0a:00:2d:01:02:03",\n' + ' "machine_name": "Yet-Another-Machine-Name",\n' + ' "memory_bytes": 8589934592,\n' + ' "cpu_actual_frequency_hz": 2300000000,\n' + ' "os_name": "OSX",\n' + ' "architecture_name": "x86_64",\n' + ' "kernel_name": "18.2.0",\n' + ' "cpu_model_name": "Intel(R) Core(TM) i5-7360U CPU @ 2.30GHz",\n' + ' "cpu_core_count": 2,\n' + ' "cpu_thread_count": 4,\n' + ' "cpu_frequency_max_hz": 2300000000,\n' + ' "cpu_frequency_min_hz": 2300000000,\n' + ' "cpu_l1d_cache_bytes": 32768,\n' + ' "cpu_l1i_cache_bytes": 32768,\n' + ' "cpu_l2_cache_bytes": 262144,\n' + ' "cpu_l3_cache_bytes": 4194304,\n' + ' "machine_other_attributes": {"just": "an example"},\n' + ' "gpu_information": "",\n' + ' "gpu_part_number": "",\n' + ' "gpu_product_name": ""\n' + ' }\n\n' + 'To identify which columns in "machine_view" are required,\n' + 'please see the view documentation in :ref:`benchmark-data-model`.\n'; + +-- INGEST_BENCHMARK_VIEW +CREATE OR REPLACE FUNCTION public.ingest_benchmark_view(from_jsonb jsonb) +RETURNS setof integer AS +$$ + BEGIN + RETURN QUERY + INSERT INTO public.benchmark_view + SELECT * FROM jsonb_populate_recordset( + null::public.benchmark_view + , from_jsonb + ) + RETURNING benchmark_id; + END +$$ +LANGUAGE plpgsql; +COMMENT ON FUNCTION public.ingest_benchmark_view(jsonb) IS + E'The argument is a JSON object. NOTE: key names must be entirely\n' + 'lowercase, or the insert will fail. Extra key-value pairs are ignored.\n' + 'Example::\n\n' + ' [\n' + ' {\n' + ' "benchmark_name": "Benchmark 1",\n' + ' "parameter_names": ["arg0", "arg1", "arg2"],\n' + ' "benchmark_description": "First benchmark",\n' + ' "benchmark_type": "Time",\n' + ' "units": "miliseconds",\n' + ' "lessisbetter": true,\n' + ' "benchmark_version": "second version",\n' + ' "benchmark_language": "Python"\n' + ' },\n' + ' {\n' + ' "benchmark_name": "Benchmark 2",\n' + ' "parameter_names": ["arg0", "arg1"],\n' + ' "benchmark_description": "Description 2.",\n' + ' "benchmark_type": "Time",\n' + ' "units": "nanoseconds",\n' + ' "lessisbetter": true,\n' + ' "benchmark_version": "second version",\n' + ' "benchmark_language": "Python"\n' + ' }\n' + ' ]\n\n' + 'To identify which columns in "benchmark_view" are required,\n' + 'please see the view documentation in :ref:`benchmark-data-model`.\n'; + +-- INGEST_BENCHMARK_RUN_VIEW +CREATE OR REPLACE FUNCTION public.ingest_benchmark_run_view(from_jsonb jsonb) +RETURNS setof bigint AS +$$ + BEGIN + RETURN QUERY + INSERT INTO public.benchmark_run_view + SELECT * FROM + jsonb_populate_recordset(null::public.benchmark_run_view, from_jsonb) + RETURNING benchmark_run_id; + END +$$ +LANGUAGE plpgsql; +COMMENT ON FUNCTION public.ingest_benchmark_run_view(jsonb) IS + E'The argument is a JSON object. NOTE: key names must be entirely\n' + 'lowercase, or the insert will fail. Extra key-value pairs are ignored.\n' + 'Example::\n\n' + ' [\n' + ' {\n' + ' "benchmark_name": "Benchmark 2",\n' + ' "benchmark_version": "version 0",\n' + ' "parameter_values": {"arg0": 100, "arg1": 5},\n' + ' "value": 2.5,\n' + ' "git_commit_timestamp": "2019-02-08 22:35:53 +0100",\n' + ' "git_hash": "324d3cf198444a",\n' + ' "val_min": 1,\n' + ' "val_q1": 2,\n' + ' "val_q3": 3,\n' + ' "val_max": 4,\n' + ' "std_dev": 1.41,\n' + ' "n_obs": 8,\n' + ' "run_timestamp": "2019-02-14 03:00:05 -0600",\n' + ' "mac_address": "08:00:2b:01:02:03",\n' + ' "benchmark_language": "Python",\n' + ' "language_implementation_version": "CPython 2.7",\n' + ' "dependencies": {"six": "", "numpy": "1.14", "other_lib": "1.0"}\n' + ' },\n' + ' {\n' + ' "benchmark_name": "Benchmark 2",\n' + ' "benchmark_version": "version 0",\n' + ' "parameter_values": {"arg0": 1000, "arg1": 5},\n' + ' "value": 5,\n' + ' "git_commit_timestamp": "2019-02-08 22:35:53 +0100",\n' + ' "git_hash": "324d3cf198444a",\n' + ' "std_dev": 3.14,\n' + ' "n_obs": 8,\n' + ' "run_timestamp": "2019-02-14 03:00:10 -0600",\n' + ' "mac_address": "08:00:2b:01:02:03",\n' + ' "benchmark_language": "Python",\n' + ' "language_implementation_version": "CPython 2.7",\n' + ' "dependencies": {"six": "", "numpy": "1.14", "other_lib": "1.0"}\n' + ' }\n' + ' ]\n' + 'To identify which columns in "benchmark_run_view" are required,\n' + 'please see the view documentation in :ref:`benchmark-data-model`.\n'; + +-- INGEST_BENCHMARK_RUNS_WITH_CONTEXT +CREATE OR REPLACE FUNCTION public.ingest_benchmark_runs_with_context(from_jsonb jsonb) +RETURNS setof bigint AS +$$ + DECLARE + context_jsonb jsonb; + found_environment_id integer; + found_machine_id integer; + BEGIN + SELECT from_jsonb -> 'context' INTO context_jsonb; + + SELECT public.get_machine_id((context_jsonb ->> 'mac_address')::macaddr) + INTO found_machine_id; + + SELECT get_environment_id( + (context_jsonb ->> 'benchmark_language')::citext + , (context_jsonb ->> 'language_implementation_version')::citext + , context_jsonb -> 'dependencies' + ) INTO found_environment_id; + + RETURN QUERY + WITH run_datum AS ( + SELECT * + FROM jsonb_to_recordset(from_jsonb -> 'benchmarks') + AS x( + benchmark_name citext + , parameter_values jsonb + , value numeric + , val_min numeric + , val_q1 numeric + , val_q3 numeric + , val_max numeric + , std_dev numeric + , n_obs integer + , run_timestamp timestamp (0) with time zone + , run_metadata jsonb + , run_notes text + ) + ), benchmark_name_and_id AS ( + SELECT + key AS benchmark_name + , public.get_benchmark_id( + (context_jsonb ->> 'benchmark_language')::citext + , key::citext -- benchmark_name + , value::citext -- benchmark_version + ) AS benchmark_id + FROM jsonb_each_text(from_jsonb -> 'benchmark_version') + ) + INSERT INTO public.benchmark_run ( + benchmark_id + -- run_datum + , parameter_values + , value + , val_min + , val_q1 + , val_q3 + , val_max + , std_dev + , n_obs + , run_metadata + , run_notes + -- additional context information + , git_commit_timestamp + , git_hash + , run_timestamp + -- machine + , machine_id + -- environment + , environment_id + , language_implementation_version_id + , benchmark_language_id + ) + SELECT + b.benchmark_id + -- run_datum + , run_datum.parameter_values + , run_datum.value + , run_datum.val_min + , run_datum.val_q1 + , run_datum.val_q3 + , run_datum.val_max + , run_datum.std_dev + , run_datum.n_obs + , run_datum.run_metadata + , run_datum.run_notes + -- additional context information + , (context_jsonb ->> 'git_commit_timestamp')::timestamp (0) with time zone + , context_jsonb ->> 'git_hash' + , (context_jsonb ->> 'run_timestamp')::timestamp (0) with time zone + -- machine + , found_machine_id + -- environment + , e.environment_id + , e.language_implementation_version_id + , e.benchmark_language_id + FROM run_datum + JOIN public.environment AS e + ON e.environment_id = found_environment_id + JOIN benchmark_name_and_id AS b + ON b.benchmark_name = run_datum.benchmark_name + RETURNING benchmark_run_id; + END +$$ +LANGUAGE plpgsql; +COMMENT ON FUNCTION public.ingest_benchmark_runs_with_context(jsonb) IS + E'The argument is a JSON object. NOTE: key names must be entirely\n' + 'lowercase, or the insert will fail. Extra key-value pairs are ignored.\n' + 'The object contains three key-value pairs::\n\n' + ' {"context": {\n' + ' "mac_address": "08:00:2b:01:02:03",\n' + ' "benchmark_language": "Python",\n' + ' "language_implementation_version": "CPython 3.6",\n' + ' "dependencies": {"six": "", "numpy": "1.14", "other_lib": "1.0"},\n' + ' "git_commit_timestamp": "2019-02-14 22:42:22 +0100",\n' + ' "git_hash": "123456789abcde",\n' + ' "run_timestamp": "2019-02-14 03:00:40 -0600",\n' + ' "extra stuff": "does not hurt anything and will not be added."\n' + ' },\n' + ' "benchmark_version": {\n' + ' "Benchmark Name 1": "Any string can be a version.",\n' + ' "Benchmark Name 2": "A git hash can be a version.",\n' + ' "An Unused Benchmark Name": "Will be ignored."\n' + ' },\n' + ' "benchmarks": [\n' + ' {\n' + ' "benchmark_name": "Benchmark Name 1",\n' + ' "parameter_values": {"argument1": 1, "argument2": "value2"},\n' + ' "value": 42,\n' + ' "val_min": 41.2,\n' + ' "val_q1": 41.5,\n' + ' "val_q3": 42.5,\n' + ' "val_max": 42.8,\n' + ' "std_dev": 0.5,\n' + ' "n_obs": 100,\n' + ' "run_metadata": {"any": "key-value pairs"},\n' + ' "run_notes": "Any relevant notes."\n' + ' },\n' + ' {\n' + ' "benchmark_name": "Benchmark Name 2",\n' + ' "parameter_values": {"not nullable": "Use {} if no params."},\n' + ' "value": 8,\n' + ' "std_dev": 1,\n' + ' "n_obs": 2,\n' + ' }\n' + ' ]\n' + ' }\n\n' + '- The entry for "context" contains the machine, environment, and timestamp\n' + ' information common to all of the runs\n' + '- The entry for "benchmark_version" maps benchmark\n' + ' names to their version strings. (Which can be a git hash,\n' + ' the entire code string, a number, or any other string of your choice.)\n' + '- The entry for "benchmarks" is a list of benchmark run data\n' + ' for the given context and benchmark versions. The first example\n' + ' benchmark run entry contains all possible values, even\n' + ' nullable ones, and the second entry omits all nullable values.\n\n'; diff --git a/src/arrow/dev/benchmarking/ddl/3_10_functions_documentation.sql b/src/arrow/dev/benchmarking/ddl/3_10_functions_documentation.sql new file mode 100644 index 000000000..6b2a05790 --- /dev/null +++ b/src/arrow/dev/benchmarking/ddl/3_10_functions_documentation.sql @@ -0,0 +1,395 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + + +-- _DOCUMENTATION_INGESTION +CREATE OR REPLACE FUNCTION public._documentation_ingestion() +RETURNS text AS +$$ + WITH ingestion_docs AS ( + SELECT + proname || E'\n' + || rpad('', character_length(proname), '-') + || E'\n\n:code:`' + || proname || '(' + || string_agg(a.argname || ' ' || typname , ', ') + || E')`\n\n' + || description + || E'\n\n\nback to `Benchmark data model <benchmark-data-model>`_\n' + AS docs + FROM pg_catalog.pg_proc + JOIN pg_catalog.pg_namespace + ON nspname='public' + AND pg_namespace.oid = pronamespace + AND proname LIKE '%ingest%' + JOIN pg_catalog.pg_description + ON pg_description.objoid=pg_proc.oid, + LATERAL unnest(proargnames, proargtypes) AS a(argname, argtype) + JOIN pg_catalog.pg_type + ON pg_type.oid = a.argtype + GROUP BY proname, description + ) + SELECT + string_agg(docs, E'\n\n') AS docs + FROM ingestion_docs; +$$ +LANGUAGE sql STABLE; + +-- _DOCUMENTATION_VIEW_DETAILS +CREATE OR REPLACE FUNCTION public._documentation_view_details(view_name citext) +RETURNS TABLE( + column_name name + , type_name name + , nullable text + , default_value text + , description text +) AS +$$ + WITH view_columns AS ( + SELECT + attname AS column_name + , attnum AS column_order + FROM pg_catalog.pg_attribute + WHERE attrelid=view_name::regclass + ) + SELECT + t.column_name + , type_name + , coalesce(nullable, '') + , coalesce(default_value, '') + , coalesce(description, '') + FROM public.summarized_tables_view AS t + JOIN view_columns AS v ON v.column_name = t.column_name + WHERE t.table_name || '_view' = view_name OR t.column_name NOT LIKE '%_id' + ORDER BY column_order; +$$ +LANGUAGE sql STABLE; + + +-- _DOCUMENTATION_VIEW_PIECES +CREATE OR REPLACE FUNCTION public._documentation_view_pieces(view_name citext) +RETURNS TABLE (rst_formatted text) +AS +$$ +DECLARE + column_length integer; + type_length integer; + nullable_length integer; + default_length integer; + description_length integer; + sep text; + border text; +BEGIN + + -- All of the hard-coded constants here are the string length of the table + -- column headers: 'Column', 'Type', 'Nullable', 'Default', 'Description' + SELECT greatest(6, max(character_length(column_name))) + FROM public._documentation_view_details(view_name) INTO column_length; + + SELECT greatest(4, max(character_length(type_name))) + FROM public._documentation_view_details(view_name) INTO type_length; + + SELECT greatest(8, max(character_length(nullable))) + FROM public._documentation_view_details(view_name) INTO nullable_length; + + SELECT greatest(7, max(character_length(default_value))) + FROM public._documentation_view_details(view_name) INTO default_length; + + SELECT greatest(11, max(character_length(description))) + FROM public._documentation_view_details(view_name) INTO description_length; + + SELECT ' ' INTO sep; + + SELECT + concat_ws(sep + , rpad('', column_length, '=') + , rpad('', type_length, '=') + , rpad('', nullable_length, '=') + , rpad('', default_length, '=') + , rpad('', description_length, '=') + ) + INTO border; + + RETURN QUERY + SELECT + border + UNION ALL + SELECT + concat_ws(sep + , rpad('Column', column_length, ' ') + , rpad('Type', type_length, ' ') + , rpad('Nullable', nullable_length, ' ') + , rpad('Default', default_length, ' ') + , rpad('Description', description_length, ' ') + ) + UNION ALL + SELECT border + UNION ALL + SELECT + concat_ws(sep + , rpad(v.column_name, column_length, ' ') + , rpad(v.type_name, type_length, ' ') + , rpad(v.nullable, nullable_length, ' ') + , rpad(v.default_value, default_length, ' ') + , rpad(v.description, description_length, ' ') + ) + FROM public._documentation_view_details(view_name) AS v + UNION ALL + SELECT border; + +END +$$ +LANGUAGE plpgsql STABLE; + + +-- DOCUMENTATION_FOR +CREATE OR REPLACE FUNCTION public.documentation_for(view_name citext) +RETURNS text AS +$$ + DECLARE + view_description text; + view_table_markup text; + BEGIN + SELECT description FROM pg_catalog.pg_description + WHERE pg_description.objoid = view_name::regclass + INTO view_description; + + SELECT + view_name || E'\n' || rpad('', length(view_name), '-') || E'\n\n' || + view_description || E'\n\n' || + string_agg(rst_formatted, E'\n') + INTO view_table_markup + FROM public._documentation_view_pieces(view_name); + + RETURN view_table_markup; + END +$$ +LANGUAGE plpgsql STABLE; +COMMENT ON FUNCTION public.documentation_for(citext) +IS E'Create an ".rst"-formatted table describing a specific view.\n' + 'Example: SELECT public.documentation_for(''endpoint'');'; + + +-- DOCUMENTATION +CREATE OR REPLACE FUNCTION public.documentation(dotfile_name text) +RETURNS TABLE (full_text text) AS +$$ + WITH v AS ( + SELECT + public.documentation_for(relname::citext) + || E'\n\nback to `Benchmark data model <benchmark-data-model>`_\n' + AS view_documentation + FROM pg_catalog.pg_trigger + JOIN pg_catalog.pg_class ON pg_trigger.tgrelid = pg_class.oid + WHERE NOT tgisinternal + ) + SELECT + E'\n.. _benchmark-data-model:\n\n' + 'Benchmark data model\n' + '====================\n\n\n' + '.. graphviz:: ' + || dotfile_name + || E'\n\n\n.. _benchmark-ingestion:\n\n' + 'Benchmark ingestion helper functions\n' + '====================================\n\n' + || public._documentation_ingestion() + || E'\n\n\n.. _benchmark-views:\n\n' + 'Benchmark views\n' + '===============\n\n\n' + || string_agg(v.view_documentation, E'\n') + FROM v + GROUP BY True; +$$ +LANGUAGE sql STABLE; +COMMENT ON FUNCTION public.documentation(text) +IS E'Create an ".rst"-formatted file that shows the columns in ' + 'every insertable view in the "public" schema.\n' + 'The text argument is the name of the generated dotfile to be included.\n' + 'Example: SELECT public.documentation(''data_model.dot'');'; + + +-- _DOCUMENTATION_DOTFILE_NODE_FOR +CREATE OR REPLACE FUNCTION public._documentation_dotfile_node_for(tablename name) +RETURNS text AS +$$ +DECLARE + result text; +BEGIN + WITH node AS ( + SELECT + tablename::text AS lines + UNION ALL + SELECT + E'[label = \n' + ' <<table border="0" cellborder="1" cellspacing="0" cellpadding="2">' + UNION ALL + -- table name + SELECT + ' <tr><td border="0"><font point-size="14">' + || tablename + || '</font></td></tr>' + UNION ALL + -- primary keys + SELECT + ' <tr><td port="' || column_name || '"><b>' + || column_name + || ' (pk)</b></td></tr>' + FROM public.summarized_tables_view + WHERE table_name = tablename + AND description LIKE '%primary key%' + UNION ALL + -- columns + SELECT + ' <tr><td>' + || column_name + || CASE WHEN description LIKE '%unique' THEN ' (u)' ELSE '' END + || CASE WHEN nullable <> 'not null' THEN ' (o)' ELSE '' END + || '</td></tr>' + FROM public.summarized_tables_view + WHERE table_name = tablename + AND (description IS NULL OR description not like '%key%') + UNION ALL + -- foreign keys + SELECT + ' <tr><td port="' || column_name || '">' + || column_name + || CASE WHEN description LIKE '%unique' THEN ' (u)' ELSE '' END + || ' (fk) </td></tr>' + FROM public.summarized_tables_view + WHERE table_name = tablename + AND description LIKE '%foreign key%' + AND description NOT LIKE '%primary key%' + UNION ALL + SELECT + E' </table>>\n];' + ) + SELECT + string_agg(lines, E'\n') + INTO result + FROM node; + + RETURN result; +END +$$ +LANGUAGE plpgsql STABLE; + + +-- _DOCUMENTATION_DOTFILE_EDGES +CREATE OR REPLACE FUNCTION public._documentation_dotfile_edges() +RETURNS text AS +$$ +DECLARE + result text; +BEGIN + WITH relationship AS ( + SELECT + conrelid AS fk_table_id + , confrelid AS pk_table_id + , unnest(conkey) AS fk_colnum + , unnest(confkey) AS pk_colnum + FROM pg_catalog.pg_constraint + WHERE confkey IS NOT NULL + AND connamespace='public'::regnamespace + ), all_edges AS ( + SELECT + fk_tbl.relname || ':' || fk_col.attname + || ' -> ' + || pk_tbl.relname || ':' || pk_col.attname + || ';' AS lines + FROM relationship + -- foreign key table + column + JOIN pg_catalog.pg_attribute AS fk_col + ON fk_col.attrelid = relationship.fk_table_id + AND fk_col.attnum = relationship.fk_colnum + JOIN pg_catalog.pg_class AS fk_tbl + ON fk_tbl.oid = relationship.fk_table_id + -- primary key table + column + JOIN pg_catalog.pg_attribute AS pk_col + ON pk_col.attrelid = relationship.pk_table_id + AND pk_col.attnum = relationship.pk_colnum + JOIN pg_catalog.pg_class AS pk_tbl + ON pk_tbl.oid = relationship.pk_table_id + ) + SELECT + string_agg(lines, E'\n') + INTO result + FROM all_edges; + + RETURN result; +END +$$ +LANGUAGE plpgsql STABLE; + + +-- DOCUMENTATION_DOTFILE +CREATE OR REPLACE FUNCTION public.documentation_dotfile() +RETURNS text AS +$$ +DECLARE + schemaname name := 'public'; + result text; +BEGIN + WITH file_contents AS ( + SELECT + E'digraph database {\n concentrate = true;\n' + ' rankdir = LR;\n' + ' ratio = ".75";\n' + ' node [shape = none, fontsize="11", fontname="Helvetica"];\n' + ' edge [fontsize="8", fontname="Helvetica"];' + AS lines + UNION ALL + SELECT + E'legend\n[fontsize = "14"\nlabel =\n' + '<<table border="0" cellpadding="0">\n' + ' <tr><td align="left"><font point-size="16">Legend</font></td></tr>\n' + ' <tr><td align="left">pk = primary key</td></tr>\n' + ' <tr><td align="left">fk = foreign key</td></tr>\n' + ' <tr><td align="left">u = unique*</td></tr>\n' + ' <tr><td align="left">o = optional</td></tr>\n' + ' <tr><td align="left">' + '* multiple uniques in the same table are a unique group</td></tr>\n' + '</table>>\n];' + UNION ALL + SELECT + string_agg( + public._documentation_dotfile_node_for(relname), + E'\n' -- Forcing the 'env' table to the end makes a better image + ORDER BY (CASE WHEN relname LIKE 'env%' THEN 'z' ELSE relname END) + ) + FROM pg_catalog.pg_class + WHERE relkind='r' AND relnamespace = schemaname::regnamespace + UNION ALL + SELECT + public._documentation_dotfile_edges() + UNION ALL + SELECT + '}' + ) + SELECT + string_agg(lines, E'\n') AS dotfile + INTO result + FROM file_contents; + RETURN result; +END +$$ +LANGUAGE plpgsql STABLE; +COMMENT ON FUNCTION public.documentation_dotfile() +IS E'Create a Graphviz dotfile of the data model: ' + 'every table in the "public" schema.\n' + 'Example: SELECT public.documentation_dotfile();'; diff --git a/src/arrow/dev/benchmarking/ddl/4_00_triggers.sql b/src/arrow/dev/benchmarking/ddl/4_00_triggers.sql new file mode 100644 index 000000000..5fb0e5018 --- /dev/null +++ b/src/arrow/dev/benchmarking/ddl/4_00_triggers.sql @@ -0,0 +1,61 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + + +-- LANGUAGE_IMPLEMENTATION_VERSION_VIEW_TRIGGER_INSERT +CREATE TRIGGER language_implementation_version_view_trigger_insert + INSTEAD OF INSERT ON public.language_implementation_version_view + FOR EACH ROW + EXECUTE FUNCTION public.language_implementation_version_view_insert_row(); + +-- ENVIRONMENT_VIEW_TRIGGER_INSERT +CREATE TRIGGER environment_view_trigger_insert + INSTEAD OF INSERT ON public.environment_view + FOR EACH ROW + EXECUTE FUNCTION public.environment_view_insert_row(); + +-- MACHINE_VIEW_TRIGGER_INSERT +CREATE TRIGGER machine_view_trigger_insert + INSTEAD OF INSERT ON public.machine_view + FOR EACH ROW + EXECUTE FUNCTION public.machine_view_insert_row(); + +-- UNIT_VIEW_TRIGGER_INSERT +CREATE TRIGGER unit_view_trigger_insert + INSTEAD OF INSERT ON public.unit_view + FOR EACH ROW + EXECUTE FUNCTION public.unit_view_insert_row(); + +-- BENCHMARK_VIEW_TRIGGER_INSERT +CREATE TRIGGER benchmark_view_trigger_insert + INSTEAD OF INSERT ON public.benchmark_view + FOR EACH ROW + EXECUTE FUNCTION public.benchmark_view_insert_row(); + +-- BENCHMARK_RUN_VIEW_TRIGGER_INSERT +CREATE TRIGGER benchmark_run_view_trigger_insert + INSTEAD OF INSERT ON public.benchmark_run_view + FOR EACH ROW + EXECUTE FUNCTION public.benchmark_run_view_insert_row(); + +-- FULL_BENCHMARK_RUN_VIEW_TRIGGER_INSERT +CREATE TRIGGER full_benchmark_run_view_trigger_insert + INSTEAD OF INSERT ON public.full_benchmark_run_view + FOR EACH ROW + EXECUTE FUNCTION public.full_benchmark_run_view_insert_row(); diff --git a/src/arrow/dev/benchmarking/ddl/5_00_permissions.sql b/src/arrow/dev/benchmarking/ddl/5_00_permissions.sql new file mode 100644 index 000000000..dd72c40db --- /dev/null +++ b/src/arrow/dev/benchmarking/ddl/5_00_permissions.sql @@ -0,0 +1,73 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ +---------------------------- ROLES ---------------------------- +-- ARROW_WEB +CREATE ROLE arrow_web login password 'arrow'; +COMMENT ON ROLE arrow_web IS 'Anonymous login user.'; + +-- ARROW_ADMIN +CREATE ROLE arrow_admin; +COMMENT ON ROLE arrow_admin + IS 'Can select, insert, update, and delete on all public tables.'; + +-- ARROW_ANONYMOUS +CREATE ROLE arrow_anonymous; +COMMENT ON ROLE arrow_anonymous + IS 'Can insert and select on all public tables.'; + +GRANT arrow_anonymous TO arrow_web; + + +---------------------------- PRIVILEGES ---------------------------- +GRANT USAGE ON SCHEMA public TO arrow_anonymous, arrow_admin; + +-- ARROW_ADMIN +GRANT EXECUTE ON ALL FUNCTIONS IN SCHEMA public TO arrow_admin; +GRANT USAGE, SELECT ON ALL SEQUENCES IN SCHEMA public to arrow_admin; +GRANT SELECT, UPDATE, INSERT, DELETE ON ALL TABLES IN SCHEMA public + TO arrow_admin; + +-- ARROW_ANONYMOUS +GRANT EXECUTE ON ALL FUNCTIONS IN SCHEMA public TO arrow_anonymous; +GRANT SELECT ON ALL TABLES IN SCHEMA public TO arrow_anonymous; +GRANT USAGE, SELECT ON ALL SEQUENCES IN SCHEMA public to arrow_anonymous; +GRANT INSERT ON + public.benchmark + , public.benchmark_language + , public.dependencies + , public.language_implementation_version + , public.benchmark_run + , public.benchmark_type + , public.cpu + , public.environment + , public.environment_view + , public.gpu + , public.machine + , public.machine_view + , public.os + , public.unit + --, public.project -- The only disallowed table is `project`. + , public.benchmark_run_view + , public.benchmark_view + , public.environment_view + , public.full_benchmark_run_view + , public.language_implementation_version_view + , public.machine_view + , public.unit_view +TO arrow_anonymous; |