summaryrefslogtreecommitdiffstats
path: root/src/arrow/dev/conbench_envs
diff options
context:
space:
mode:
Diffstat (limited to 'src/arrow/dev/conbench_envs')
-rw-r--r--src/arrow/dev/conbench_envs/README.md214
-rw-r--r--src/arrow/dev/conbench_envs/benchmarks.env50
-rwxr-xr-xsrc/arrow/dev/conbench_envs/hooks.sh91
3 files changed, 355 insertions, 0 deletions
diff --git a/src/arrow/dev/conbench_envs/README.md b/src/arrow/dev/conbench_envs/README.md
new file mode 100644
index 000000000..5a4eb58b2
--- /dev/null
+++ b/src/arrow/dev/conbench_envs/README.md
@@ -0,0 +1,214 @@
+<!---
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+# Benchmark Builds Env and Hooks
+This directory contains:
+- [benchmarks.env](benchmarks.env) - list of env vars used for building Arrow C++/Python/R/Java/JavaScript and running benchmarks using [conbench](https://ursalabs.org/blog/announcing-conbench/).
+- [hooks.sh](hooks.sh) - hooks used by <b>@ursabot</b> benchmark builds that are triggered by `@ursabot please benchmark` PR comments.
+
+## How to add or update Arrow build and run env vars used by `@ursabot` benchmark builds
+1. Create `apache/arrow` PR
+2. Update or add env var value in [benchmarks.env](../../dev/conbench_envs/benchmarks.env)
+3. Add `@ursabot please benchmark` comment to PR
+4. Once benchmark builds are done, benchmark results can be viewed via compare/runs links in the PR comment where
+- baseline = PR base HEAD commit with unaltered `/dev/conbench_envs/benchmarks.env`
+- contender = PR branch HEAD commit with overridden `/dev/conbench_envs/benchmarks.env`
+
+## Why do`@ursabot` benchmark builds need `hooks.sh`?
+`@ursabot` benchmark builds are maintained in Ursa's private repo.
+Benchmark builds use `hooks.sh` functions as hooks to create conda env with Arrow dependencies and build Arrow C++/Python/R/Java/JavaScript from source for a specific Arrow repo's commit.
+
+Defining hooks in Arrow repo allows benchmark builds for a specific commit to be
+compatible with the files/scripts *in that commit* which are used for installing Arrow
+dependencies and building Arrow. This allows Arrow contributors to asses the perfomance
+implications of different build options, dependency versions, etc by updating
+`hooks.sh`.
+
+## Can other repos and services use `benchmarks.env` and `hooks.sh`?
+
+Yes, other repos and services are welcome to use `benchmarks.env` and `hooks.sh` as long as
+- existing hooks are not removed or renamed.
+- function definitions for exiting hooks can only be updated in the Arrow commit where Arrow build scripts or files with dependencies have been renamed, moved or added.
+- benchmark builds are run using `@ursabot please benchmark` PR comment to confirm that function definition updates do not break benchmark builds.
+
+## How can other repos and services use `benchmarks.env` and `hooks.sh` to setup benchmark env?
+Here are steps how `@ursabot` benchmark builds use `benchmarks.env` and `hooks.sh` to setup benchmarking env on Ubuntu:
+
+### 1. Install Arrow dependencies
+ sudo su
+ apt-get update -y -q && \
+ apt-get install -y -q --no-install-recommends \
+ autoconf \
+ ca-certificates \
+ ccache \
+ cmake \
+ g++ \
+ gcc \
+ gdb \
+ git \
+ libbenchmark-dev \
+ libboost-filesystem-dev \
+ libboost-regex-dev \
+ libboost-system-dev \
+ libbrotli-dev \
+ libbz2-dev \
+ libgflags-dev \
+ libcurl4-openssl-dev \
+ libgoogle-glog-dev \
+ liblz4-dev \
+ libprotobuf-dev \
+ libprotoc-dev \
+ libre2-dev \
+ libsnappy-dev \
+ libssl-dev \
+ libthrift-dev \
+ libutf8proc-dev \
+ libzstd-dev \
+ make \
+ ninja-build \
+ pkg-config \
+ protobuf-compiler \
+ rapidjson-dev \
+ tzdata \
+ wget && \
+ apt-get clean && \
+ rm -rf /var/lib/apt/lists*
+
+ apt-get update -y -q && \
+ apt-get install -y -q \
+ python3 \
+ python3-pip \
+ python3-dev && \
+ apt-get clean && \
+ rm -rf /var/lib/apt/lists/*
+
+### 2. Install Arrow dependencies for Java
+ sudo su
+ apt-get install openjdk-8-jdk
+ apt-get install maven
+
+Verify that you have at least these versions of `java`, `javac` and `maven`:
+
+ # java -version
+ openjdk version "1.8.0_292"
+ ..
+ # javac -version
+ javac 1.8.0_292
+ ...
+ # mvn -version
+ Apache Maven 3.6.3
+ ...
+
+### 3. Install Arrow dependencies for Java Script
+ sudo apt update
+ sudo apt -y upgrade
+ sudo apt update
+ sudo apt -y install curl dirmngr apt-transport-https lsb-release ca-certificates
+ curl -fsSL https://deb.nodesource.com/setup_14.x | sudo -E bash -
+ sudo apt-get install -y nodejs
+ sudo apt -y install yarn
+ sudo apt -y install gcc g++ make
+
+Verify that you have at least these versions of `node` and `yarn`:
+
+ # node --version
+ v14.17.2
+ ...
+ # yarn --version
+ 1.22.5
+ ...
+
+### 4. Install Conda
+ sudo apt install curl
+ curl -LO https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
+ sudo bash Miniconda3-latest-Linux-x86_64.sh
+
+### 5. Set env vars:
+ export ARROW_REPO=https://github.com/apache/arrow.git
+ export BENCHMARKABLE=e6e9e6ea52b7a8f2682ffc4160168c936ca1d3e6
+ export BENCHMARKABLE_TYPE=arrow-commit
+ export PYTHON_VERSION=3.8
+ export CONBENCH_EMAIL=...
+ export CONBENCH_URL="https://conbench.ursa.dev"
+ export CONBENCH_PASSWORD=...
+ export MACHINE=...
+
+### 6. Use `create_conda_env_with_arrow_python` hook to create conda env and build Arrow C++ and Arrow Python
+ git clone "${ARROW_REPO}"
+ pushd arrow
+ git fetch -v --prune -- origin "${BENCHMARKABLE}"
+ git checkout -f "${BENCHMARKABLE}"
+ source dev/conbench_envs/hooks.sh create_conda_env_with_arrow_python
+ popd
+
+### 7. Install conbench
+ git clone https://github.com/ursacomputing/conbench.git
+ pushd conbench
+ pip install -r requirements-cli.txt
+ pip install -U PyYAML
+ python setup.py install
+ popd
+
+### 8. Setup benchmarks repo
+ git clone https://github.com/ursacomputing/benchmarks.git
+ pushd benchmarks
+ python setup.py develop
+ popd
+
+### 9. Setup conbench credentials
+ pushd benchmarks
+ touch .conbench
+ echo "url: $CONBENCH_URL" >> .conbench
+ echo "email: $CONBENCH_EMAIL" >> .conbench
+ echo "password: $CONBENCH_PASSWORD" >> .conbench
+ echo "host_name: $MACHINE" >> .conbench
+ popd
+
+### 10. Run Python benchmarks
+ cd benchmarks
+ conbench file-read ALL --iterations=3 --all=true --drop-caches=true
+
+### 11. Use `install_archery` hook to setup archery and run C++ benchmarks
+ pushd arrow
+ source dev/conbench_envs/hooks.sh install_archery
+ popd
+ cd benchmarks
+ conbench cpp-micro --iterations=1
+
+### 12. Use `build_arrow_r` hook to build Arrow R and run R benchmarks
+ pushd arrow
+ source dev/conbench_envs/hooks.sh build_arrow_r
+ popd
+ R -e "remotes::install_github('ursacomputing/arrowbench')"
+ cd benchmarks
+ conbench dataframe-to-table ALL --iterations=3 --drop-caches=true --language=R
+
+### 13. Use `build_arrow_java` and `install_archery` hooks to build Arrow Java and run Java benchmarks
+ pushd arrow
+ source dev/conbench_envs/hooks.sh build_arrow_java
+ source dev/conbench_envs/hooks.sh install_archery
+ popd
+ cd benchmarks
+ conbench java-micro --iterations=1
+
+### 14. Use `install_java_script_project_dependencies` hook to install Java Script dependencies and run Java Script benchmarks
+ pushd arrow
+ source dev/conbench_envs/hooks.sh install_java_script_project_dependencies
+ popd
+ cd benchmarks
+ conbench js-micro
diff --git a/src/arrow/dev/conbench_envs/benchmarks.env b/src/arrow/dev/conbench_envs/benchmarks.env
new file mode 100644
index 000000000..6c151aa7c
--- /dev/null
+++ b/src/arrow/dev/conbench_envs/benchmarks.env
@@ -0,0 +1,50 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+ARROW_BUILD_TESTS=OFF
+ARROW_BUILD_TYPE=release
+ARROW_DEPENDENCY_SOURCE=AUTO
+ARROW_DATASET=ON
+ARROW_DEFAULT_MEMORY_POOL=mimalloc
+ARROW_ENABLE_UNSAFE_MEMORY_ACCESS=true
+ARROW_ENABLE_NULL_CHECK_FOR_GET=false
+ARROW_FLIGHT=OFF
+ARROW_GANDIVA=OFF
+ARROW_HDFS=ON
+ARROW_HOME=$CONDA_PREFIX
+ARROW_INSTALL_NAME_RPATH=OFF
+ARROW_MIMALLOC=ON
+ARROW_NO_DEPRECATED_API=ON
+ARROW_ORC=ON
+ARROW_PARQUET=ON
+ARROW_PLASMA=ON
+ARROW_PYTHON=ON
+ARROW_S3=ON
+ARROW_USE_ASAN=OFF
+ARROW_USE_CCACHE=ON
+ARROW_USE_UBSAN=OFF
+ARROW_WITH_BROTLI=ON
+ARROW_WITH_BZ2=ON
+ARROW_WITH_LZ4=ON
+ARROW_WITH_SNAPPY=ON
+ARROW_WITH_ZLIB=ON
+ARROW_WITH_ZSTD=ON
+GTest_SOURCE=BUNDLED
+ORC_SOURCE=BUNDLED
+PARQUET_BUILD_EXAMPLES=ON
+PARQUET_BUILD_EXECUTABLES=ON
+PYTHON=python
+LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH \ No newline at end of file
diff --git a/src/arrow/dev/conbench_envs/hooks.sh b/src/arrow/dev/conbench_envs/hooks.sh
new file mode 100755
index 000000000..6bcfbe446
--- /dev/null
+++ b/src/arrow/dev/conbench_envs/hooks.sh
@@ -0,0 +1,91 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+## These hooks are used by benchmark builds
+# to create a conda env with Arrow dependencies and build Arrow C++, Python, etc
+create_conda_env_for_benchmark_build() {
+ conda create -y -n "${BENCHMARKABLE_TYPE}" -c conda-forge \
+ --file ci/conda_env_unix.txt \
+ --file ci/conda_env_cpp.txt \
+ --file ci/conda_env_python.txt \
+ --file ci/conda_env_gandiva.txt \
+ compilers \
+ python="${PYTHON_VERSION}" \
+ pandas \
+ aws-sdk-cpp \
+ r
+}
+
+activate_conda_env_for_benchmark_build() {
+ conda init bash
+ conda activate "${BENCHMARKABLE_TYPE}"
+}
+
+install_arrow_python_dependencies() {
+ pip install -r python/requirements-build.txt -r python/requirements-test.txt
+}
+
+set_arrow_build_and_run_env_vars() {
+ set -a
+ source dev/conbench_envs/benchmarks.env
+ set +a
+}
+
+build_arrow_cpp() {
+ # Ignore the error when a cache can't be created
+ if ! ci/scripts/cpp_build.sh $(pwd) $(pwd) 2> error.log; then
+ if ! grep -q -F "Can\'t create temporary cache file" error.log; then
+ cat error.log
+ fi
+ fi
+}
+
+build_arrow_python() {
+ ci/scripts/python_build.sh $(pwd) $(pwd)
+}
+
+build_arrow_r() {
+ cat ci/etc/rprofile >> $(R RHOME)/etc/Rprofile.site
+ ci/scripts/r_deps.sh $(pwd) $(pwd)
+ (cd r; R CMD INSTALL .;)
+}
+
+build_arrow_java() {
+ ci/scripts/java_build.sh $(pwd) $(pwd)
+}
+
+install_archery() {
+ pip install -e dev/archery
+}
+
+install_java_script_project_dependencies() {
+ (cd js; yarn;)
+}
+
+create_conda_env_with_arrow_python() {
+ create_conda_env_for_benchmark_build
+ activate_conda_env_for_benchmark_build
+ install_arrow_python_dependencies
+ set_arrow_build_and_run_env_vars
+ build_arrow_cpp
+ build_arrow_python
+}
+
+"$@"