summaryrefslogtreecommitdiffstats
path: root/src/arrow/ci
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-21 11:54:28 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-21 11:54:28 +0000
commite6918187568dbd01842d8d1d2c808ce16a894239 (patch)
tree64f88b554b444a49f656b6c656111a145cbbaa28 /src/arrow/ci
parentInitial commit. (diff)
downloadceph-e6918187568dbd01842d8d1d2c808ce16a894239.tar.xz
ceph-e6918187568dbd01842d8d1d2c808ce16a894239.zip
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r--src/arrow/ci/appveyor-cpp-build.bat163
-rw-r--r--src/arrow/ci/appveyor-cpp-setup.bat108
-rw-r--r--src/arrow/ci/conda_env_archery.txt42
-rw-r--r--src/arrow/ci/conda_env_cpp.txt42
-rw-r--r--src/arrow/ci/conda_env_crossbow.txt25
-rw-r--r--src/arrow/ci/conda_env_gandiva.txt19
-rw-r--r--src/arrow/ci/conda_env_gandiva_win.txt20
-rw-r--r--src/arrow/ci/conda_env_python.txt32
-rw-r--r--src/arrow/ci/conda_env_r.txt38
-rw-r--r--src/arrow/ci/conda_env_sphinx.txt23
-rw-r--r--src/arrow/ci/conda_env_unix.txt23
-rw-r--r--src/arrow/ci/detect-changes.py362
-rw-r--r--src/arrow/ci/docker/conda-cpp.dockerfile53
-rw-r--r--src/arrow/ci/docker/conda-integration.dockerfile73
-rw-r--r--src/arrow/ci/docker/conda-python-dask.dockerfile25
-rw-r--r--src/arrow/ci/docker/conda-python-hdfs.dockerfile52
-rw-r--r--src/arrow/ci/docker/conda-python-jpype.dockerfile29
-rw-r--r--src/arrow/ci/docker/conda-python-kartothek.dockerfile46
-rw-r--r--src/arrow/ci/docker/conda-python-pandas.dockerfile27
-rw-r--r--src/arrow/ci/docker/conda-python-spark.dockerfile43
-rw-r--r--src/arrow/ci/docker/conda-python-turbodbc.dockerfile50
-rw-r--r--src/arrow/ci/docker/conda-python.dockerfile37
-rw-r--r--src/arrow/ci/docker/conda.dockerfile55
-rw-r--r--src/arrow/ci/docker/debian-10-cpp.dockerfile106
-rw-r--r--src/arrow/ci/docker/debian-10-go-cgo-python.dockerfile36
-rw-r--r--src/arrow/ci/docker/debian-10-go.dockerfile26
-rw-r--r--src/arrow/ci/docker/debian-10-js.dockerfile27
-rw-r--r--src/arrow/ci/docker/debian-11-cpp.dockerfile100
-rw-r--r--src/arrow/ci/docker/debian-11-go-cgo-python.dockerfile36
-rw-r--r--src/arrow/ci/docker/debian-11-go.dockerfile26
-rw-r--r--src/arrow/ci/docker/debian-11-js.dockerfile27
-rw-r--r--src/arrow/ci/docker/debian-9-java.dockerfile28
-rw-r--r--src/arrow/ci/docker/debian-go-cgo.dockerfile32
-rw-r--r--src/arrow/ci/docker/fedora-33-cpp.dockerfile94
-rw-r--r--src/arrow/ci/docker/java-jni-manylinux-201x.dockerfile37
-rw-r--r--src/arrow/ci/docker/linux-apt-c-glib.dockerfile65
-rw-r--r--src/arrow/ci/docker/linux-apt-docs.dockerfile110
-rw-r--r--src/arrow/ci/docker/linux-apt-jni.dockerfile87
-rw-r--r--src/arrow/ci/docker/linux-apt-lint.dockerfile95
-rw-r--r--src/arrow/ci/docker/linux-apt-python-3.dockerfile46
-rw-r--r--src/arrow/ci/docker/linux-apt-r.dockerfile114
-rw-r--r--src/arrow/ci/docker/linux-apt-ruby.dockerfile27
-rw-r--r--src/arrow/ci/docker/linux-dnf-python-3.dockerfile41
-rw-r--r--src/arrow/ci/docker/linux-r.dockerfile48
-rw-r--r--src/arrow/ci/docker/python-sdist.dockerfile36
-rw-r--r--src/arrow/ci/docker/python-wheel-manylinux-201x.dockerfile112
-rw-r--r--src/arrow/ci/docker/python-wheel-manylinux-test.dockerfile27
-rw-r--r--src/arrow/ci/docker/python-wheel-windows-vs2017.dockerfile105
-rw-r--r--src/arrow/ci/docker/ubuntu-18.04-cpp.dockerfile130
-rw-r--r--src/arrow/ci/docker/ubuntu-18.04-csharp.dockerfile22
-rw-r--r--src/arrow/ci/docker/ubuntu-20.04-cpp-minimal.dockerfile69
-rw-r--r--src/arrow/ci/docker/ubuntu-20.04-cpp.dockerfile143
-rw-r--r--src/arrow/ci/docker/ubuntu-20.10-cpp.dockerfile140
-rw-r--r--src/arrow/ci/docker/ubuntu-21.04-cpp.dockerfile162
-rw-r--r--src/arrow/ci/etc/hdfs-site.xml52
-rw-r--r--src/arrow/ci/etc/rprofile62
-rw-r--r--src/arrow/ci/etc/valgrind-cran.supp34
-rw-r--r--src/arrow/ci/scripts/PKGBUILD138
-rwxr-xr-xsrc/arrow/ci/scripts/c_glib_build.sh45
-rwxr-xr-xsrc/arrow/ci/scripts/c_glib_test.sh48
-rwxr-xr-xsrc/arrow/ci/scripts/ccache_setup.sh26
-rwxr-xr-xsrc/arrow/ci/scripts/cpp_build.sh164
-rwxr-xr-xsrc/arrow/ci/scripts/cpp_test.sh118
-rwxr-xr-xsrc/arrow/ci/scripts/csharp_build.sh26
-rwxr-xr-xsrc/arrow/ci/scripts/csharp_pack.sh26
-rwxr-xr-xsrc/arrow/ci/scripts/csharp_test.sh29
-rwxr-xr-xsrc/arrow/ci/scripts/docs_build.sh48
-rwxr-xr-xsrc/arrow/ci/scripts/go_build.sh40
-rwxr-xr-xsrc/arrow/ci/scripts/go_cgo_python_test.sh45
-rwxr-xr-xsrc/arrow/ci/scripts/go_test.sh60
-rwxr-xr-xsrc/arrow/ci/scripts/install_conda.sh66
-rwxr-xr-xsrc/arrow/ci/scripts/install_dask.sh36
-rwxr-xr-xsrc/arrow/ci/scripts/install_gcs_testbench.sh38
-rwxr-xr-xsrc/arrow/ci/scripts/install_iwyu.sh48
-rwxr-xr-xsrc/arrow/ci/scripts/install_kartothek.sh41
-rwxr-xr-xsrc/arrow/ci/scripts/install_minio.sh62
-rwxr-xr-xsrc/arrow/ci/scripts/install_osx_sdk.sh41
-rwxr-xr-xsrc/arrow/ci/scripts/install_pandas.sh46
-rwxr-xr-xsrc/arrow/ci/scripts/install_python.sh68
-rwxr-xr-xsrc/arrow/ci/scripts/install_spark.sh31
-rwxr-xr-xsrc/arrow/ci/scripts/install_turbodbc.sh43
-rwxr-xr-xsrc/arrow/ci/scripts/install_vcpkg.sh39
-rwxr-xr-xsrc/arrow/ci/scripts/integration_arrow.sh40
-rwxr-xr-xsrc/arrow/ci/scripts/integration_dask.sh40
-rwxr-xr-xsrc/arrow/ci/scripts/integration_hdfs.sh69
-rwxr-xr-xsrc/arrow/ci/scripts/integration_hiveserver2.sh32
-rwxr-xr-xsrc/arrow/ci/scripts/integration_kartothek.sh31
-rwxr-xr-xsrc/arrow/ci/scripts/integration_spark.sh87
-rwxr-xr-xsrc/arrow/ci/scripts/integration_turbodbc.sh47
-rwxr-xr-xsrc/arrow/ci/scripts/java_build.sh107
-rwxr-xr-xsrc/arrow/ci/scripts/java_cdata_build.sh45
-rwxr-xr-xsrc/arrow/ci/scripts/java_full_build.sh42
-rwxr-xr-xsrc/arrow/ci/scripts/java_jni_macos_build.sh115
-rwxr-xr-xsrc/arrow/ci/scripts/java_jni_manylinux_build.sh137
-rwxr-xr-xsrc/arrow/ci/scripts/java_test.sh54
-rwxr-xr-xsrc/arrow/ci/scripts/js_build.sh36
-rwxr-xr-xsrc/arrow/ci/scripts/js_test.sh29
-rwxr-xr-xsrc/arrow/ci/scripts/matlab_build.sh29
-rwxr-xr-xsrc/arrow/ci/scripts/msys2_setup.sh79
-rwxr-xr-xsrc/arrow/ci/scripts/msys2_system_clean.sh33
-rwxr-xr-xsrc/arrow/ci/scripts/msys2_system_upgrade.sh28
-rwxr-xr-xsrc/arrow/ci/scripts/python_benchmark.sh40
-rwxr-xr-xsrc/arrow/ci/scripts/python_build.sh54
-rwxr-xr-xsrc/arrow/ci/scripts/python_sdist_build.sh27
-rwxr-xr-xsrc/arrow/ci/scripts/python_sdist_test.sh58
-rwxr-xr-xsrc/arrow/ci/scripts/python_test.sh32
-rwxr-xr-xsrc/arrow/ci/scripts/python_wheel_macos_build.sh166
-rwxr-xr-xsrc/arrow/ci/scripts/python_wheel_manylinux_build.sh149
-rwxr-xr-xsrc/arrow/ci/scripts/python_wheel_unix_test.sh84
-rw-r--r--src/arrow/ci/scripts/python_wheel_windows_build.bat109
-rwxr-xr-xsrc/arrow/ci/scripts/python_wheel_windows_test.bat55
-rwxr-xr-xsrc/arrow/ci/scripts/r_build.sh33
-rwxr-xr-xsrc/arrow/ci/scripts/r_deps.sh45
-rwxr-xr-xsrc/arrow/ci/scripts/r_docker_configure.sh81
-rwxr-xr-xsrc/arrow/ci/scripts/r_pkgdown_check.sh41
-rwxr-xr-xsrc/arrow/ci/scripts/r_revdepcheck.sh88
-rwxr-xr-xsrc/arrow/ci/scripts/r_sanitize.sh46
-rwxr-xr-xsrc/arrow/ci/scripts/r_test.sh106
-rwxr-xr-xsrc/arrow/ci/scripts/r_valgrind.sh46
-rwxr-xr-xsrc/arrow/ci/scripts/r_windows_build.sh116
-rwxr-xr-xsrc/arrow/ci/scripts/release_test.sh28
-rwxr-xr-xsrc/arrow/ci/scripts/ruby_test.sh29
-rwxr-xr-xsrc/arrow/ci/scripts/rust_build.sh59
-rwxr-xr-xsrc/arrow/ci/scripts/util_checkout.sh28
-rwxr-xr-xsrc/arrow/ci/scripts/util_cleanup.sh54
-rwxr-xr-xsrc/arrow/ci/scripts/util_download_apache.sh55
-rwxr-xr-xsrc/arrow/ci/scripts/util_wait_for_it.sh199
-rw-r--r--src/arrow/ci/vcpkg/arm64-linux-static-debug.cmake28
-rw-r--r--src/arrow/ci/vcpkg/arm64-linux-static-release.cmake28
-rw-r--r--src/arrow/ci/vcpkg/arm64-osx-static-debug.cmake26
-rw-r--r--src/arrow/ci/vcpkg/arm64-osx-static-release.cmake26
-rw-r--r--src/arrow/ci/vcpkg/ports.patch63
-rw-r--r--src/arrow/ci/vcpkg/universal2-osx-static-debug.cmake26
-rw-r--r--src/arrow/ci/vcpkg/universal2-osx-static-release.cmake26
-rw-r--r--src/arrow/ci/vcpkg/x64-linux-static-debug.cmake24
-rw-r--r--src/arrow/ci/vcpkg/x64-linux-static-release.cmake24
-rw-r--r--src/arrow/ci/vcpkg/x64-osx-static-debug.cmake25
-rw-r--r--src/arrow/ci/vcpkg/x64-osx-static-release.cmake25
-rw-r--r--src/arrow/ci/vcpkg/x64-windows-static-md-debug.cmake22
-rw-r--r--src/arrow/ci/vcpkg/x64-windows-static-md-release.cmake22
140 files changed, 8334 insertions, 0 deletions
diff --git a/src/arrow/ci/appveyor-cpp-build.bat b/src/arrow/ci/appveyor-cpp-build.bat
new file mode 100644
index 000000000..534f73c2d
--- /dev/null
+++ b/src/arrow/ci/appveyor-cpp-build.bat
@@ -0,0 +1,163 @@
+@rem Licensed to the Apache Software Foundation (ASF) under one
+@rem or more contributor license agreements. See the NOTICE file
+@rem distributed with this work for additional information
+@rem regarding copyright ownership. The ASF licenses this file
+@rem to you under the Apache License, Version 2.0 (the
+@rem "License"); you may not use this file except in compliance
+@rem with the License. You may obtain a copy of the License at
+@rem
+@rem http://www.apache.org/licenses/LICENSE-2.0
+@rem
+@rem Unless required by applicable law or agreed to in writing,
+@rem software distributed under the License is distributed on an
+@rem "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+@rem KIND, either express or implied. See the License for the
+@rem specific language governing permissions and limitations
+@rem under the License.
+
+@echo on
+
+git config core.symlinks true
+git reset --hard
+
+@rem Retrieve git submodules, configure env var for Parquet unit tests
+git submodule update --init || exit /B
+
+set ARROW_TEST_DATA=%CD%\testing\data
+set PARQUET_TEST_DATA=%CD%\cpp\submodules\parquet-testing\data
+
+@rem
+@rem In the configurations below we disable building the Arrow static library
+@rem to save some time. Unfortunately this will still build the Parquet static
+@rem library because of PARQUET-1420 (Thrift-generated symbols not exported in DLL).
+@rem
+if "%JOB%" == "Build_Debug" (
+ mkdir cpp\build-debug
+ pushd cpp\build-debug
+
+ cmake -G "%GENERATOR%" ^
+ -DARROW_BOOST_USE_SHARED=OFF ^
+ -DARROW_BUILD_EXAMPLES=ON ^
+ -DARROW_BUILD_STATIC=OFF ^
+ -DARROW_BUILD_TESTS=ON ^
+ -DARROW_CXXFLAGS="/MP" ^
+ -DARROW_ENABLE_TIMING_TESTS=OFF ^
+ -DARROW_USE_PRECOMPILED_HEADERS=OFF ^
+ -DARROW_VERBOSE_THIRDPARTY_BUILD=OFF ^
+ -DCMAKE_BUILD_TYPE="Debug" ^
+ -DCMAKE_UNITY_BUILD=ON ^
+ .. || exit /B
+
+ cmake --build . --config Debug || exit /B
+ ctest --output-on-failure -j2 || exit /B
+ popd
+
+ @rem Finish Debug build successfully
+ exit /B 0
+)
+
+call activate arrow
+
+@rem Use Boost from Anaconda
+set BOOST_ROOT=%CONDA_PREFIX%\Library
+set BOOST_LIBRARYDIR=%CONDA_PREFIX%\Library\lib
+
+@rem The "main" C++ build script for Windows CI
+@rem (i.e. for usual configurations)
+
+if "%JOB%" == "Toolchain" (
+ set CMAKE_ARGS=-DARROW_DEPENDENCY_SOURCE=CONDA -DARROW_WITH_BZ2=ON
+) else (
+ @rem We're in a conda environment but don't want to use it for the dependencies
+ set CMAKE_ARGS=-DARROW_DEPENDENCY_SOURCE=AUTO
+)
+
+@rem Enable warnings-as-errors
+set ARROW_CXXFLAGS=/WX /MP
+
+@rem
+@rem Build and test Arrow C++ libraries (including Parquet)
+@rem
+
+mkdir cpp\build
+pushd cpp\build
+
+@rem XXX Without forcing CMAKE_CXX_COMPILER, CMake can re-run itself and
+@rem unfortunately switch from Release to Debug mode...
+@rem
+@rem In release mode, disable optimizations (/Od) for faster compiling
+@rem and enable runtime assertions.
+
+cmake -G "%GENERATOR%" %CMAKE_ARGS% ^
+ -DARROW_BOOST_USE_SHARED=ON ^
+ -DARROW_BUILD_EXAMPLES=ON ^
+ -DARROW_BUILD_STATIC=OFF ^
+ -DARROW_BUILD_TESTS=ON ^
+ -DARROW_CSV=ON ^
+ -DARROW_CXXFLAGS="%ARROW_CXXFLAGS%" ^
+ -DARROW_DATASET=ON ^
+ -DARROW_ENABLE_TIMING_TESTS=OFF ^
+ -DARROW_ENGINE=ON ^
+ -DARROW_FLIGHT=%ARROW_BUILD_FLIGHT% ^
+ -DARROW_GANDIVA=%ARROW_BUILD_GANDIVA% ^
+ -DARROW_MIMALLOC=ON ^
+ -DARROW_PARQUET=ON ^
+ -DARROW_PYTHON=ON ^
+ -DARROW_S3=%ARROW_S3% ^
+ -DARROW_VERBOSE_THIRDPARTY_BUILD=OFF ^
+ -DARROW_WITH_BROTLI=ON ^
+ -DARROW_WITH_LZ4=ON ^
+ -DARROW_WITH_SNAPPY=ON ^
+ -DARROW_WITH_ZLIB=ON ^
+ -DARROW_WITH_ZSTD=ON ^
+ -DCMAKE_BUILD_TYPE="Release" ^
+ -DCMAKE_CXX_COMPILER=clcache ^
+ -DCMAKE_CXX_FLAGS_RELEASE="/MD /Od /UNDEBUG" ^
+ -DCMAKE_INSTALL_PREFIX=%CONDA_PREFIX%\Library ^
+ -DCMAKE_UNITY_BUILD=ON ^
+ -DCMAKE_VERBOSE_MAKEFILE=OFF ^
+ -DPARQUET_BUILD_EXECUTABLES=ON ^
+ -DPARQUET_REQUIRE_ENCRYPTION=ON ^
+ .. || exit /B
+cmake --build . --target install --config %CONFIGURATION% || exit /B
+
+@rem Needed so arrow-python-test.exe works
+set OLD_PYTHONHOME=%PYTHONHOME%
+set PYTHONHOME=%CONDA_PREFIX%
+
+ctest --output-on-failure -j2 || exit /B
+
+set PYTHONHOME=%OLD_PYTHONHOME%
+popd
+
+@rem
+@rem Build and install pyarrow
+@rem
+
+pushd python
+
+set PYARROW_BUNDLE_BOOST=OFF
+set PYARROW_CMAKE_GENERATOR=%GENERATOR%
+set PYARROW_CXXFLAGS=%ARROW_CXXFLAGS%
+set PYARROW_PARALLEL=2
+set PYARROW_WITH_DATASET=ON
+set PYARROW_WITH_FLIGHT=%ARROW_BUILD_FLIGHT%
+set PYARROW_WITH_GANDIVA=%ARROW_BUILD_GANDIVA%
+set PYARROW_WITH_PARQUET=ON
+set PYARROW_WITH_S3=%ARROW_S3%
+set PYARROW_WITH_STATIC_BOOST=ON
+
+set ARROW_HOME=%CONDA_PREFIX%\Library
+@rem ARROW-3075; pkgconfig is broken for Parquet for now
+set PARQUET_HOME=%CONDA_PREFIX%\Library
+
+python setup.py develop -q || exit /B
+
+set PYTHONDEVMODE=1
+
+py.test -r sxX --durations=15 --pyargs pyarrow.tests || exit /B
+
+@rem
+@rem Wheels are built and tested separately (see ARROW-5142).
+@rem
+
diff --git a/src/arrow/ci/appveyor-cpp-setup.bat b/src/arrow/ci/appveyor-cpp-setup.bat
new file mode 100644
index 000000000..cee9bc28e
--- /dev/null
+++ b/src/arrow/ci/appveyor-cpp-setup.bat
@@ -0,0 +1,108 @@
+@rem Licensed to the Apache Software Foundation (ASF) under one
+@rem or more contributor license agreements. See the NOTICE file
+@rem distributed with this work for additional information
+@rem regarding copyright ownership. The ASF licenses this file
+@rem to you under the Apache License, Version 2.0 (the
+@rem "License"); you may not use this file except in compliance
+@rem with the License. You may obtain a copy of the License at
+@rem
+@rem http://www.apache.org/licenses/LICENSE-2.0
+@rem
+@rem Unless required by applicable law or agreed to in writing,
+@rem software distributed under the License is distributed on an
+@rem "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+@rem KIND, either express or implied. See the License for the
+@rem specific language governing permissions and limitations
+@rem under the License.
+
+@echo on
+
+set "PATH=C:\Miniconda37-x64;C:\Miniconda37-x64\Scripts;C:\Miniconda37-x64\Library\bin;%PATH%"
+set BOOST_ROOT=C:\Libraries\boost_1_67_0
+set BOOST_LIBRARYDIR=C:\Libraries\boost_1_67_0\lib64-msvc-14.0
+
+@rem
+@rem Avoid picking up AppVeyor-installed OpenSSL (linker errors with gRPC)
+@rem XXX Perhaps there is a smarter way of solving this issue?
+@rem
+rd /s /q C:\OpenSSL-Win32
+rd /s /q C:\OpenSSL-Win64
+rd /s /q C:\OpenSSL-v11-Win32
+rd /s /q C:\OpenSSL-v11-Win64
+rd /s /q C:\OpenSSL-v111-Win32
+rd /s /q C:\OpenSSL-v111-Win64
+
+@rem
+@rem Configure miniconda
+@rem
+conda config --set auto_update_conda false
+conda config --set show_channel_urls True
+@rem Help with SSL timeouts to S3
+conda config --set remote_connect_timeout_secs 12
+@rem Workaround for ARROW-13636
+conda config --append disallowed_packages pypy3
+conda info -a
+
+@rem
+@rem Create conda environment for Build and Toolchain jobs
+@rem
+@rem Avoid Boost 1.70 because of https://github.com/boostorg/process/issues/85
+
+set CONDA_PACKAGES=
+
+if "%ARROW_BUILD_GANDIVA%" == "ON" (
+ @rem Install llvmdev in the toolchain if building gandiva.dll
+ set CONDA_PACKAGES=%CONDA_PACKAGES% --file=ci\conda_env_gandiva_win.txt
+)
+if "%JOB%" == "Toolchain" (
+ @rem Install pre-built "toolchain" packages for faster builds
+ set CONDA_PACKAGES=%CONDA_PACKAGES% --file=ci\conda_env_cpp.txt
+)
+if "%JOB%" NEQ "Build_Debug" (
+ @rem Arrow conda environment is only required for the Build and Toolchain jobs
+ conda create -n arrow -q -y -c conda-forge ^
+ --file=ci\conda_env_python.txt ^
+ %CONDA_PACKAGES% ^
+ "cmake=3.17" ^
+ "ninja" ^
+ "nomkl" ^
+ "pandas" ^
+ "fsspec" ^
+ "python=%PYTHON%" ^
+ || exit /B
+
+ @rem On Windows, GTest is always bundled from source instead of using
+ @rem conda binaries, avoid any interference between the two versions.
+ if "%JOB%" == "Toolchain" (
+ conda uninstall -n arrow -q -y -c conda-forge gtest
+ )
+)
+
+@rem
+@rem Configure compiler
+@rem
+if "%GENERATOR%"=="Ninja" set need_vcvarsall=1
+if defined need_vcvarsall (
+ if "%APPVEYOR_BUILD_WORKER_IMAGE%" NEQ "Visual Studio 2017" (
+ @rem ARROW-14070 Visual Studio 2015 no longer supported
+ exit /B
+ )
+ call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvarsall.bat" amd64
+)
+
+@rem
+@rem Use clcache for faster builds
+@rem
+pip install -q clcache-alt || exit /B
+@rem Limit cache size to 500 MB
+clcache -M 500000000
+clcache -c
+clcache -s
+powershell.exe -Command "Start-Process clcache-server" || exit /B
+
+@rem
+@rem Download Minio somewhere on PATH, for unit tests
+@rem
+if "%ARROW_S3%" == "ON" (
+ appveyor DownloadFile https://dl.min.io/server/minio/release/windows-amd64/minio.exe -FileName C:\Windows\Minio.exe || exit /B
+)
diff --git a/src/arrow/ci/conda_env_archery.txt b/src/arrow/ci/conda_env_archery.txt
new file mode 100644
index 000000000..ace7a42ac
--- /dev/null
+++ b/src/arrow/ci/conda_env_archery.txt
@@ -0,0 +1,42 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# cli
+click
+
+# bot, crossbow
+github3.py
+jinja2
+jira
+pygit2
+pygithub
+ruamel.yaml
+setuptools_scm
+toolz
+
+# benchmark
+pandas
+
+# docker
+python-dotenv
+#ruamel.yaml
+
+# release
+gitpython
+#jinja2
+#jira
+semver
diff --git a/src/arrow/ci/conda_env_cpp.txt b/src/arrow/ci/conda_env_cpp.txt
new file mode 100644
index 000000000..d2ccb66a2
--- /dev/null
+++ b/src/arrow/ci/conda_env_cpp.txt
@@ -0,0 +1,42 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+aws-sdk-cpp
+benchmark>=1.5.4
+boost-cpp>=1.68.0
+brotli
+bzip2
+c-ares
+cmake
+gflags
+glog
+gmock>=1.10.0
+grpc-cpp>=1.27.3
+gtest=1.10.0
+libprotobuf
+libutf8proc
+lz4-c
+make
+ninja
+pkg-config
+python
+rapidjson
+re2
+snappy
+thrift-cpp>=0.11.0
+zlib
+zstd
diff --git a/src/arrow/ci/conda_env_crossbow.txt b/src/arrow/ci/conda_env_crossbow.txt
new file mode 100644
index 000000000..347294650
--- /dev/null
+++ b/src/arrow/ci/conda_env_crossbow.txt
@@ -0,0 +1,25 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+click
+github3.py
+jinja2
+jira
+pygit2
+ruamel.yaml
+setuptools_scm
+toolz
diff --git a/src/arrow/ci/conda_env_gandiva.txt b/src/arrow/ci/conda_env_gandiva.txt
new file mode 100644
index 000000000..024b9fe74
--- /dev/null
+++ b/src/arrow/ci/conda_env_gandiva.txt
@@ -0,0 +1,19 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+clang=11
+llvmdev=11
diff --git a/src/arrow/ci/conda_env_gandiva_win.txt b/src/arrow/ci/conda_env_gandiva_win.txt
new file mode 100644
index 000000000..9098b53d1
--- /dev/null
+++ b/src/arrow/ci/conda_env_gandiva_win.txt
@@ -0,0 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# llvmdev=9 or later require Visual Studio 2017
+clangdev=8
+llvmdev=8
diff --git a/src/arrow/ci/conda_env_python.txt b/src/arrow/ci/conda_env_python.txt
new file mode 100644
index 000000000..5f4d4c815
--- /dev/null
+++ b/src/arrow/ci/conda_env_python.txt
@@ -0,0 +1,32 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# don't add pandas here, because it is not a mandatory test dependency
+boto3 # not a direct dependency of s3fs, but needed for our s3fs fixture
+cffi
+cython
+cloudpickle
+fsspec
+hypothesis
+numpy>=1.16.6
+pytest
+pytest-faulthandler
+pytest-lazy-fixture
+pytz
+s3fs>=2021.8.0
+setuptools
+setuptools_scm
diff --git a/src/arrow/ci/conda_env_r.txt b/src/arrow/ci/conda_env_r.txt
new file mode 100644
index 000000000..151aefb60
--- /dev/null
+++ b/src/arrow/ci/conda_env_r.txt
@@ -0,0 +1,38 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+r-assertthat
+r-base
+r-bit64
+r-dplyr
+r-purrr
+r-r6
+# We are currently vendoring cpp11; restore in ARROW-13610
+# r-cpp11
+r-rlang
+r-tidyselect
+r-vctrs
+# Test/"Suggests" dependencies
+pandoc
+r-covr
+r-hms
+r-lubridate
+r-rcmdcheck
+r-reticulate
+r-rmarkdown
+r-testthat
+r-tibble
diff --git a/src/arrow/ci/conda_env_sphinx.txt b/src/arrow/ci/conda_env_sphinx.txt
new file mode 100644
index 000000000..64e1c16a5
--- /dev/null
+++ b/src/arrow/ci/conda_env_sphinx.txt
@@ -0,0 +1,23 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Requirements for building the documentation
+breathe
+doxygen
+ipython
+sphinx=4.2
+pydata-sphinx-theme
diff --git a/src/arrow/ci/conda_env_unix.txt b/src/arrow/ci/conda_env_unix.txt
new file mode 100644
index 000000000..1973238ad
--- /dev/null
+++ b/src/arrow/ci/conda_env_unix.txt
@@ -0,0 +1,23 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# conda package dependencies specific to Unix-like environments (Linux and macOS)
+
+autoconf
+ccache
+orc
+pkg-config
diff --git a/src/arrow/ci/detect-changes.py b/src/arrow/ci/detect-changes.py
new file mode 100644
index 000000000..14e71ed48
--- /dev/null
+++ b/src/arrow/ci/detect-changes.py
@@ -0,0 +1,362 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from __future__ import print_function
+
+import functools
+import os
+import pprint
+import re
+import sys
+import subprocess
+
+
+perr = functools.partial(print, file=sys.stderr)
+
+
+def dump_env_vars(prefix, pattern=None):
+ if pattern is not None:
+ match = lambda s: re.search(pattern, s)
+ else:
+ match = lambda s: True
+ for name in sorted(os.environ):
+ if name.startswith(prefix) and match(name):
+ perr("- {0}: {1!r}".format(name, os.environ[name]))
+
+
+def run_cmd(cmdline):
+ proc = subprocess.Popen(cmdline,
+ stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ out, err = proc.communicate()
+ if proc.returncode != 0:
+ raise RuntimeError("Command {cmdline} failed with code {returncode}, "
+ "stderr was:\n{stderr}\n"
+ .format(cmdline=cmdline, returncode=proc.returncode,
+ stderr=err.decode()))
+ return out
+
+
+def get_commit_description(commit):
+ """
+ Return the textual description (title + body) of the given git commit.
+ """
+ out = run_cmd(["git", "show", "--no-patch", "--pretty=format:%B",
+ commit])
+ return out.decode('utf-8', 'ignore')
+
+
+def list_affected_files(commit_range):
+ """
+ Return a list of files changed by the given git commit range.
+ """
+ perr("Getting affected files from", repr(commit_range))
+ out = run_cmd(["git", "diff", "--name-only", commit_range])
+ return list(filter(None, (s.strip() for s in out.decode().splitlines())))
+
+
+def get_travis_head_commit():
+ return os.environ['TRAVIS_COMMIT']
+
+
+def get_travis_commit_range():
+ if os.environ['TRAVIS_EVENT_TYPE'] == 'pull_request':
+ # TRAVIS_COMMIT_RANGE is too pessimistic for PRs, as it may contain
+ # unrelated changes. Instead, use the same strategy as on AppVeyor
+ # below.
+ run_cmd(["git", "fetch", "-q", "origin",
+ "+refs/heads/{0}".format(os.environ['TRAVIS_BRANCH'])])
+ merge_base = run_cmd(["git", "merge-base",
+ "HEAD", "FETCH_HEAD"]).decode().strip()
+ return "{0}..HEAD".format(merge_base)
+ else:
+ cr = os.environ['TRAVIS_COMMIT_RANGE']
+ # See
+ # https://github.com/travis-ci/travis-ci/issues/4596#issuecomment-139811122
+ return cr.replace('...', '..')
+
+
+def get_travis_commit_description():
+ # Prefer this to get_commit_description(get_travis_head_commit()),
+ # as rebasing or other repository events may make TRAVIS_COMMIT invalid
+ # at the time we inspect it
+ return os.environ['TRAVIS_COMMIT_MESSAGE']
+
+
+def list_travis_affected_files():
+ """
+ Return a list of files affected in the current Travis build.
+ """
+ commit_range = get_travis_commit_range()
+ try:
+ return list_affected_files(commit_range)
+ except RuntimeError:
+ # TRAVIS_COMMIT_RANGE can contain invalid revisions when
+ # building a branch (not a PR) after rebasing:
+ # https://github.com/travis-ci/travis-ci/issues/2668
+ if os.environ['TRAVIS_EVENT_TYPE'] == 'pull_request':
+ raise
+ # If it's a rebase, it's probably enough to use the last commit only
+ commit_range = '{0}^..'.format(get_travis_head_commit())
+ return list_affected_files(commit_range)
+
+
+def list_appveyor_affected_files():
+ """
+ Return a list of files affected in the current AppVeyor build.
+ This only works for PR builds.
+ """
+ # Re-fetch PR base branch (e.g. origin/master), pointing FETCH_HEAD to it
+ run_cmd(["git", "fetch", "-q", "origin",
+ "+refs/heads/{0}".format(os.environ['APPVEYOR_REPO_BRANCH'])])
+ # Compute base changeset between FETCH_HEAD (PR base) and HEAD (PR head)
+ merge_base = run_cmd(["git", "merge-base",
+ "HEAD", "FETCH_HEAD"]).decode().strip()
+ # Compute changes files between base changeset and HEAD
+ return list_affected_files("{0}..HEAD".format(merge_base))
+
+
+def list_github_actions_affected_files():
+ """
+ Return a list of files affected in the current GitHub Actions build.
+ """
+ # GitHub Actions checkout `refs/remotes/pull/$PR/merge` where `HEAD` points
+ # to the merge commit while `HEAD^` points to the commit before. Hence,
+ # `..HEAD^` points to all commit between master and the PR.
+ return list_affected_files("HEAD^..")
+
+
+LANGUAGE_TOPICS = ['c_glib', 'cpp', 'docs', 'go', 'java', 'js', 'python',
+ 'r', 'ruby', 'csharp']
+
+ALL_TOPICS = LANGUAGE_TOPICS + ['integration', 'dev']
+
+
+AFFECTED_DEPENDENCIES = {
+ 'java': ['integration', 'python'],
+ 'js': ['integration'],
+ 'ci': ALL_TOPICS,
+ 'cpp': ['python', 'c_glib', 'r', 'ruby', 'integration'],
+ 'format': LANGUAGE_TOPICS,
+ 'go': ['integration'],
+ '.travis.yml': ALL_TOPICS,
+ 'appveyor.yml': ALL_TOPICS,
+ # In theory, it should ignore CONTRIBUTING.md and ISSUE_TEMPLATE.md, but in
+ # practice it's going to be CI
+ '.github': ALL_TOPICS,
+ 'c_glib': ['ruby']
+}
+
+COMPONENTS = {'cpp', 'java', 'c_glib', 'r', 'ruby', 'integration', 'js',
+ 'csharp', 'go', 'docs', 'python', 'dev'}
+
+
+def get_affected_topics(affected_files):
+ """
+ Return a dict of topics affected by the given files.
+ Each dict value is True if affected, False otherwise.
+ """
+ affected = dict.fromkeys(ALL_TOPICS, False)
+
+ for path in affected_files:
+ parts = []
+ head = path
+ while head:
+ head, tail = os.path.split(head)
+ parts.append(tail)
+ parts.reverse()
+ assert parts
+ p = parts[0]
+ fn = parts[-1]
+ if fn.startswith('README'):
+ continue
+
+ if p in COMPONENTS:
+ affected[p] = True
+
+ _path_already_affected = {}
+
+ def _affect_dependencies(component):
+ if component in _path_already_affected:
+ # For circular dependencies, terminate
+ return
+ for topic in AFFECTED_DEPENDENCIES.get(component, ()):
+ affected[topic] = True
+ _affect_dependencies(topic)
+ _path_already_affected[topic] = True
+
+ _affect_dependencies(p)
+
+ return affected
+
+
+def make_env_for_topics(affected):
+ return {'ARROW_CI_{0}_AFFECTED'.format(k.upper()): '1' if v else '0'
+ for k, v in affected.items()}
+
+
+def get_unix_shell_eval(env):
+ """
+ Return a shell-evalable string to setup some environment variables.
+ """
+ return "; ".join(("export {0}='{1}'".format(k, v)
+ for k, v in env.items()))
+
+
+def get_windows_shell_eval(env):
+ """
+ Return a shell-evalable string to setup some environment variables.
+ """
+ return "\n".join(('set "{0}={1}"'.format(k, v)
+ for k, v in env.items()))
+
+
+def run_from_travis():
+ perr("Environment variables (excerpt):")
+ dump_env_vars('TRAVIS_', '(BRANCH|COMMIT|PULL)')
+ if (os.environ['TRAVIS_REPO_SLUG'] == 'apache/arrow' and
+ os.environ['TRAVIS_BRANCH'] == 'master' and
+ os.environ['TRAVIS_EVENT_TYPE'] != 'pull_request'):
+ # Never skip anything on master builds in the official repository
+ affected = dict.fromkeys(ALL_TOPICS, True)
+ else:
+ desc = get_travis_commit_description()
+ if '[skip travis]' in desc:
+ # Skip everything
+ affected = dict.fromkeys(ALL_TOPICS, False)
+ elif '[force ci]' in desc or '[force travis]' in desc:
+ # Test everything
+ affected = dict.fromkeys(ALL_TOPICS, True)
+ else:
+ # Test affected topics
+ affected_files = list_travis_affected_files()
+ perr("Affected files:", affected_files)
+ affected = get_affected_topics(affected_files)
+ assert set(affected) <= set(ALL_TOPICS), affected
+
+ perr("Affected topics:")
+ perr(pprint.pformat(affected))
+ return get_unix_shell_eval(make_env_for_topics(affected))
+
+
+def run_from_appveyor():
+ perr("Environment variables (excerpt):")
+ dump_env_vars('APPVEYOR_', '(PULL|REPO)')
+ if not os.environ.get('APPVEYOR_PULL_REQUEST_HEAD_COMMIT'):
+ # Not a PR build, test everything
+ affected = dict.fromkeys(ALL_TOPICS, True)
+ else:
+ affected_files = list_appveyor_affected_files()
+ perr("Affected files:", affected_files)
+ affected = get_affected_topics(affected_files)
+ assert set(affected) <= set(ALL_TOPICS), affected
+
+ perr("Affected topics:")
+ perr(pprint.pformat(affected))
+ return get_windows_shell_eval(make_env_for_topics(affected))
+
+
+def run_from_github():
+ perr("Environment variables (excerpt):")
+ dump_env_vars('GITHUB_', '(REPOSITORY|ACTOR|SHA|REF|HEAD_REF|BASE_REF|EVENT_NAME)')
+ if os.environ['GITHUB_EVENT_NAME'] != 'pull_request':
+ # Not a PR build, test everything
+ affected = dict.fromkeys(ALL_TOPICS, True)
+ else:
+ affected_files = list_github_actions_affected_files()
+ perr("Affected files:", affected_files)
+ affected = get_affected_topics(affected_files)
+ assert set(affected) <= set(ALL_TOPICS), affected
+
+ perr("Affected topics:")
+ perr(pprint.pformat(affected))
+ return get_unix_shell_eval(make_env_for_topics(affected))
+
+
+def test_get_affected_topics():
+ affected_topics = get_affected_topics(['cpp/CMakeLists.txt'])
+ assert affected_topics == {
+ 'c_glib': True,
+ 'cpp': True,
+ 'docs': False,
+ 'go': False,
+ 'java': False,
+ 'js': False,
+ 'python': True,
+ 'r': True,
+ 'ruby': True,
+ 'csharp': False,
+ 'integration': True,
+ 'dev': False
+ }
+
+ affected_topics = get_affected_topics(['format/Schema.fbs'])
+ assert affected_topics == {
+ 'c_glib': True,
+ 'cpp': True,
+ 'docs': True,
+ 'go': True,
+ 'java': True,
+ 'js': True,
+ 'python': True,
+ 'r': True,
+ 'ruby': True,
+ 'csharp': True,
+ 'integration': True,
+ 'dev': False
+ }
+
+ affected_topics = get_affected_topics(['.github/workflows'])
+ assert affected_topics == {
+ 'c_glib': True,
+ 'cpp': True,
+ 'docs': True,
+ 'go': True,
+ 'java': True,
+ 'js': True,
+ 'python': True,
+ 'r': True,
+ 'ruby': True,
+ 'csharp': True,
+ 'integration': True,
+ 'dev': True,
+ }
+
+
+if __name__ == "__main__":
+ # This script should have its output evaluated by a shell,
+ # e.g. "eval `python ci/detect-changes.py`"
+ if os.environ.get('TRAVIS'):
+ try:
+ print(run_from_travis())
+ except Exception:
+ # Make sure the enclosing eval will return an error
+ print("exit 1")
+ raise
+ elif os.environ.get('APPVEYOR'):
+ try:
+ print(run_from_appveyor())
+ except Exception:
+ print("exit 1")
+ raise
+ elif os.environ.get('GITHUB_WORKFLOW'):
+ try:
+ print(run_from_github())
+ except Exception:
+ print("exit 1")
+ raise
+ else:
+ sys.exit("Script must be run under Travis-CI, AppVeyor or GitHub Actions")
diff --git a/src/arrow/ci/docker/conda-cpp.dockerfile b/src/arrow/ci/docker/conda-cpp.dockerfile
new file mode 100644
index 000000000..ff31930c0
--- /dev/null
+++ b/src/arrow/ci/docker/conda-cpp.dockerfile
@@ -0,0 +1,53 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ARG repo
+ARG arch
+FROM ${repo}:${arch}-conda
+
+# install the required conda packages into the test environment
+COPY ci/conda_env_cpp.txt \
+ ci/conda_env_gandiva.txt \
+ /arrow/ci/
+RUN conda install \
+ --file arrow/ci/conda_env_cpp.txt \
+ --file arrow/ci/conda_env_gandiva.txt \
+ compilers \
+ doxygen \
+ valgrind && \
+ conda clean --all
+
+ENV ARROW_BUILD_TESTS=ON \
+ ARROW_DATASET=ON \
+ ARROW_DEPENDENCY_SOURCE=CONDA \
+ ARROW_FLIGHT=ON \
+ ARROW_GANDIVA=ON \
+ ARROW_HOME=$CONDA_PREFIX \
+ ARROW_ORC=ON \
+ ARROW_PARQUET=ON \
+ ARROW_PLASMA=ON \
+ ARROW_S3=ON \
+ ARROW_USE_CCACHE=ON \
+ ARROW_WITH_BROTLI=ON \
+ ARROW_WITH_BZ2=ON \
+ ARROW_WITH_LZ4=ON \
+ ARROW_WITH_SNAPPY=ON \
+ ARROW_WITH_ZLIB=ON \
+ ARROW_WITH_ZSTD=ON \
+ PARQUET_BUILD_EXAMPLES=ON \
+ PARQUET_BUILD_EXECUTABLES=ON \
+ PARQUET_HOME=$CONDA_PREFIX
diff --git a/src/arrow/ci/docker/conda-integration.dockerfile b/src/arrow/ci/docker/conda-integration.dockerfile
new file mode 100644
index 000000000..43d8d943b
--- /dev/null
+++ b/src/arrow/ci/docker/conda-integration.dockerfile
@@ -0,0 +1,73 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ARG repo
+ARG arch=amd64
+FROM ${repo}:${arch}-conda-cpp
+
+ARG arch=amd64
+ARG maven=3.5
+ARG node=14
+ARG jdk=8
+ARG go=1.15
+
+# Install Archery and integration dependencies
+COPY ci/conda_env_archery.txt /arrow/ci/
+RUN conda install -q \
+ --file arrow/ci/conda_env_archery.txt \
+ "python>=3.7" \
+ numpy \
+ compilers \
+ maven=${maven} \
+ nodejs=${node} \
+ yarn \
+ openjdk=${jdk} && \
+ conda clean --all --force-pkgs-dirs
+
+# Install Rust with only the needed components
+# (rustfmt is needed for tonic-build to compile the protobuf definitions)
+RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- --profile=minimal -y && \
+ $HOME/.cargo/bin/rustup component add rustfmt
+
+ENV GOROOT=/opt/go \
+ GOBIN=/opt/go/bin \
+ GOPATH=/go \
+ PATH=/opt/go/bin:$PATH
+RUN wget -nv -O - https://dl.google.com/go/go${go}.linux-${arch}.tar.gz | tar -xzf - -C /opt
+
+ENV DOTNET_ROOT=/opt/dotnet \
+ PATH=/opt/dotnet:$PATH
+RUN curl -sSL https://dot.net/v1/dotnet-install.sh | bash /dev/stdin -Channel 3.1 -InstallDir /opt/dotnet
+
+ENV ARROW_BUILD_INTEGRATION=ON \
+ ARROW_BUILD_STATIC=OFF \
+ ARROW_BUILD_TESTS=OFF \
+ ARROW_COMPUTE=OFF \
+ ARROW_CSV=OFF \
+ ARROW_DATASET=OFF \
+ ARROW_FILESYSTEM=OFF \
+ ARROW_FLIGHT=ON \
+ ARROW_GANDIVA=OFF \
+ ARROW_HDFS=OFF \
+ ARROW_JEMALLOC=OFF \
+ ARROW_JSON=OFF \
+ ARROW_ORC=OFF \
+ ARROW_PARQUET=OFF \
+ ARROW_PLASMA=OFF \
+ ARROW_S3=OFF \
+ ARROW_USE_GLOG=OFF \
+ CMAKE_UNITY_BUILD=ON
diff --git a/src/arrow/ci/docker/conda-python-dask.dockerfile b/src/arrow/ci/docker/conda-python-dask.dockerfile
new file mode 100644
index 000000000..cd59a5538
--- /dev/null
+++ b/src/arrow/ci/docker/conda-python-dask.dockerfile
@@ -0,0 +1,25 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ARG repo
+ARG arch=amd64
+ARG python=3.6
+FROM ${repo}:${arch}-conda-python-${python}
+
+ARG dask=latest
+COPY ci/scripts/install_dask.sh /arrow/ci/scripts/
+RUN /arrow/ci/scripts/install_dask.sh ${dask} \ No newline at end of file
diff --git a/src/arrow/ci/docker/conda-python-hdfs.dockerfile b/src/arrow/ci/docker/conda-python-hdfs.dockerfile
new file mode 100644
index 000000000..f6ffc71ce
--- /dev/null
+++ b/src/arrow/ci/docker/conda-python-hdfs.dockerfile
@@ -0,0 +1,52 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ARG repo
+ARG arch=amd64
+ARG python=3.6
+FROM ${repo}:${arch}-conda-python-${python}
+
+ARG jdk=8
+ARG maven=3.5
+RUN conda install -q \
+ maven=${maven} \
+ openjdk=${jdk} \
+ pandas && \
+ conda clean --all
+
+# installing libhdfs (JNI)
+ARG hdfs=3.2.1
+ENV HADOOP_HOME=/opt/hadoop-${hdfs} \
+ HADOOP_OPTS=-Djava.library.path=/opt/hadoop-${hdfs}/lib/native \
+ PATH=$PATH:/opt/hadoop-${hdfs}/bin:/opt/hadoop-${hdfs}/sbin
+COPY ci/scripts/util_download_apache.sh /arrow/ci/scripts/
+RUN /arrow/ci/scripts/util_download_apache.sh \
+ "hadoop/common/hadoop-${hdfs}/hadoop-${hdfs}.tar.gz" /opt
+
+COPY ci/etc/hdfs-site.xml $HADOOP_HOME/etc/hadoop/
+
+# build cpp with tests
+ENV CC=gcc \
+ CXX=g++ \
+ ARROW_FLIGHT=OFF \
+ ARROW_GANDIVA=OFF \
+ ARROW_PLASMA=OFF \
+ ARROW_PARQUET=ON \
+ ARROW_ORC=OFF \
+ ARROW_HDFS=ON \
+ ARROW_PYTHON=ON \
+ ARROW_BUILD_TESTS=ON
diff --git a/src/arrow/ci/docker/conda-python-jpype.dockerfile b/src/arrow/ci/docker/conda-python-jpype.dockerfile
new file mode 100644
index 000000000..f77ef9bf6
--- /dev/null
+++ b/src/arrow/ci/docker/conda-python-jpype.dockerfile
@@ -0,0 +1,29 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ARG repo
+ARG arch=amd64
+ARG python=3.6
+FROM ${repo}:${arch}-conda-python-${python}
+
+ARG jdk=11
+ARG maven=3.6
+RUN conda install -q \
+ maven=${maven} \
+ openjdk=${jdk} \
+ jpype1 && \
+ conda clean --all
diff --git a/src/arrow/ci/docker/conda-python-kartothek.dockerfile b/src/arrow/ci/docker/conda-python-kartothek.dockerfile
new file mode 100644
index 000000000..d52316182
--- /dev/null
+++ b/src/arrow/ci/docker/conda-python-kartothek.dockerfile
@@ -0,0 +1,46 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ARG repo
+ARG arch=amd64
+ARG python=3.6
+FROM ${repo}:${arch}-conda-python-${python}
+
+# install kartothek dependencies from conda-forge
+RUN conda install -c conda-forge -q \
+ attrs \
+ click \
+ cloudpickle \
+ dask \
+ decorator \
+ freezegun \
+ msgpack-python \
+ prompt-toolkit \
+ pytest-mock \
+ pytest-xdist \
+ pyyaml \
+ simplejson \
+ simplekv \
+ storefact \
+ toolz \
+ urlquote \
+ zstandard && \
+ conda clean --all
+
+ARG kartothek=latest
+COPY ci/scripts/install_kartothek.sh /arrow/ci/scripts/
+RUN /arrow/ci/scripts/install_kartothek.sh ${kartothek} /kartothek
diff --git a/src/arrow/ci/docker/conda-python-pandas.dockerfile b/src/arrow/ci/docker/conda-python-pandas.dockerfile
new file mode 100644
index 000000000..303cc80e4
--- /dev/null
+++ b/src/arrow/ci/docker/conda-python-pandas.dockerfile
@@ -0,0 +1,27 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ARG repo
+ARG arch=amd64
+ARG python=3.6
+FROM ${repo}:${arch}-conda-python-${python}
+
+ARG pandas=latest
+ARG numpy=latest
+COPY ci/scripts/install_pandas.sh /arrow/ci/scripts/
+RUN conda uninstall -q -y numpy && \
+ /arrow/ci/scripts/install_pandas.sh ${pandas} ${numpy}
diff --git a/src/arrow/ci/docker/conda-python-spark.dockerfile b/src/arrow/ci/docker/conda-python-spark.dockerfile
new file mode 100644
index 000000000..a2af2ac13
--- /dev/null
+++ b/src/arrow/ci/docker/conda-python-spark.dockerfile
@@ -0,0 +1,43 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ARG repo
+ARG arch=amd64
+ARG python=3.6
+FROM ${repo}:${arch}-conda-python-${python}
+
+ARG jdk=8
+ARG maven=3.5
+
+RUN conda install -q \
+ openjdk=${jdk} \
+ maven=${maven} \
+ pandas && \
+ conda clean --all
+
+# installing specific version of spark
+ARG spark=master
+COPY ci/scripts/install_spark.sh /arrow/ci/scripts/
+RUN /arrow/ci/scripts/install_spark.sh ${spark} /spark
+
+# build cpp with tests
+ENV CC=gcc \
+ CXX=g++ \
+ ARROW_PYTHON=ON \
+ ARROW_HDFS=ON \
+ ARROW_BUILD_TESTS=OFF \
+ SPARK_VERSION=${spark}
diff --git a/src/arrow/ci/docker/conda-python-turbodbc.dockerfile b/src/arrow/ci/docker/conda-python-turbodbc.dockerfile
new file mode 100644
index 000000000..e748604de
--- /dev/null
+++ b/src/arrow/ci/docker/conda-python-turbodbc.dockerfile
@@ -0,0 +1,50 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ARG repo
+ARG arch=amd64
+ARG python=3.6
+FROM ${repo}:${arch}-conda-python-${python}
+
+RUN export DEBIAN_FRONTEND=noninteractive && \
+ apt-get update -y -q && \
+ apt-get install -y -q --no-install-recommends \
+ odbc-postgresql \
+ postgresql \
+ sudo && \
+ apt-get clean && \
+ rm -rf /var/lib/apt/lists/*
+
+# install turbodbc dependencies from conda-forge
+RUN conda install -c conda-forge -q \
+ pybind11 \
+ pytest-cov \
+ mock \
+ unixodbc && \
+ conda clean --all
+
+RUN service postgresql start && \
+ sudo -u postgres psql -U postgres -c \
+ "CREATE DATABASE test_db;" && \
+ sudo -u postgres psql -U postgres -c \
+ "ALTER USER postgres WITH PASSWORD 'password';"
+
+ARG turbodbc=latest
+COPY ci/scripts/install_turbodbc.sh /arrow/ci/scripts/
+RUN /arrow/ci/scripts/install_turbodbc.sh ${turbodbc} /turbodbc
+
+ENV TURBODBC_TEST_CONFIGURATION_FILES "query_fixtures_postgresql.json"
diff --git a/src/arrow/ci/docker/conda-python.dockerfile b/src/arrow/ci/docker/conda-python.dockerfile
new file mode 100644
index 000000000..ab3f77be1
--- /dev/null
+++ b/src/arrow/ci/docker/conda-python.dockerfile
@@ -0,0 +1,37 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ARG repo
+ARG arch
+FROM ${repo}:${arch}-conda-cpp
+
+# install python specific packages
+ARG python=3.6
+COPY ci/conda_env_python.txt /arrow/ci/
+RUN conda install -q \
+ --file arrow/ci/conda_env_python.txt \
+ $([ "$python" == "3.6" -o "$python" == "3.7" ] && echo "pickle5") \
+ python=${python} \
+ nomkl && \
+ conda clean --all
+
+ENV ARROW_PYTHON=ON \
+ ARROW_BUILD_STATIC=OFF \
+ ARROW_BUILD_TESTS=OFF \
+ ARROW_BUILD_UTILITIES=OFF \
+ ARROW_TENSORFLOW=ON \
+ ARROW_USE_GLOG=OFF
diff --git a/src/arrow/ci/docker/conda.dockerfile b/src/arrow/ci/docker/conda.dockerfile
new file mode 100644
index 000000000..adb64f9fa
--- /dev/null
+++ b/src/arrow/ci/docker/conda.dockerfile
@@ -0,0 +1,55 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ARG arch=amd64
+FROM ${arch}/ubuntu:18.04
+
+# arch is unset after the FROM statement, so need to define it again
+ARG arch=amd64
+ARG prefix=/opt/conda
+
+# install build essentials
+RUN export DEBIAN_FRONTEND=noninteractive && \
+ apt-get update -y -q && \
+ apt-get install -y -q wget tzdata libc6-dbg gdb \
+ && apt-get clean \
+ && rm -rf /var/lib/apt/lists/*
+
+ENV PATH=${prefix}/bin:$PATH
+# install conda and minio
+COPY ci/scripts/install_conda.sh \
+ ci/scripts/install_minio.sh \
+ /arrow/ci/scripts/
+RUN /arrow/ci/scripts/install_conda.sh ${arch} linux latest ${prefix}
+RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest ${prefix}
+COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts
+RUN /arrow/ci/scripts/install_gcs_testbench.sh ${arch} default
+
+# create a conda environment
+ADD ci/conda_env_unix.txt /arrow/ci/
+RUN conda create -n arrow --file arrow/ci/conda_env_unix.txt git && \
+ conda clean --all
+
+# activate the created environment by default
+RUN echo "conda activate arrow" >> ~/.profile
+ENV CONDA_PREFIX=${prefix}/envs/arrow
+
+# use login shell to activate arrow environment un the RUN commands
+SHELL [ "/bin/bash", "-c", "-l" ]
+
+# use login shell when running the container
+ENTRYPOINT [ "/bin/bash", "-c", "-l" ]
diff --git a/src/arrow/ci/docker/debian-10-cpp.dockerfile b/src/arrow/ci/docker/debian-10-cpp.dockerfile
new file mode 100644
index 000000000..16e867fc3
--- /dev/null
+++ b/src/arrow/ci/docker/debian-10-cpp.dockerfile
@@ -0,0 +1,106 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ARG arch=amd64
+FROM ${arch}/debian:10
+ARG arch
+
+ENV DEBIAN_FRONTEND noninteractive
+
+RUN \
+ echo "deb http://deb.debian.org/debian buster-backports main" > \
+ /etc/apt/sources.list.d/backports.list
+
+ARG llvm
+RUN apt-get update -y -q && \
+ apt-get install -y -q --no-install-recommends \
+ apt-transport-https \
+ ca-certificates \
+ gnupg \
+ wget && \
+ wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - && \
+ echo "deb https://apt.llvm.org/buster/ llvm-toolchain-buster-${llvm} main" > \
+ /etc/apt/sources.list.d/llvm.list && \
+ apt-get update -y -q && \
+ apt-get install -y -q --no-install-recommends \
+ autoconf \
+ ccache \
+ clang-${llvm} \
+ cmake \
+ g++ \
+ gcc \
+ gdb \
+ git \
+ libbenchmark-dev \
+ libboost-all-dev \
+ libbrotli-dev \
+ libbz2-dev \
+ libc-ares-dev \
+ libcurl4-openssl-dev \
+ libgflags-dev \
+ libgmock-dev \
+ libgoogle-glog-dev \
+ liblz4-dev \
+ libre2-dev \
+ libsnappy-dev \
+ libssl-dev \
+ libthrift-dev \
+ libutf8proc-dev \
+ llvm-${llvm}-dev \
+ make \
+ ninja-build \
+ pkg-config \
+ protobuf-compiler \
+ python3-pip \
+ rapidjson-dev \
+ tzdata \
+ zlib1g-dev && \
+ apt-get clean && \
+ rm -rf /var/lib/apt/lists/*
+
+COPY ci/scripts/install_minio.sh /arrow/ci/scripts/
+RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest /usr/local
+COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/
+RUN /arrow/ci/scripts/install_gcs_testbench.sh ${arch} default
+
+ENV ARROW_BUILD_TESTS=ON \
+ ARROW_DATASET=ON \
+ ARROW_DEPENDENCY_SOURCE=SYSTEM \
+ ARROW_FLIGHT=ON \
+ ARROW_GANDIVA=ON \
+ ARROW_HOME=/usr/local \
+ ARROW_ORC=ON \
+ ARROW_PARQUET=ON \
+ ARROW_PLASMA=ON \
+ ARROW_S3=ON \
+ ARROW_USE_CCACHE=ON \
+ ARROW_WITH_BROTLI=ON \
+ ARROW_WITH_BZ2=ON \
+ ARROW_WITH_LZ4=ON \
+ ARROW_WITH_SNAPPY=ON \
+ ARROW_WITH_ZLIB=ON \
+ ARROW_WITH_ZSTD=ON \
+ AWSSDK_SOURCE=BUNDLED \
+ cares_SOURCE=BUNDLED \
+ CC=gcc \
+ CXX=g++ \
+ gRPC_SOURCE=BUNDLED \
+ GTest_SOURCE=BUNDLED \
+ ORC_SOURCE=BUNDLED \
+ PATH=/usr/lib/ccache/:$PATH \
+ Protobuf_SOURCE=BUNDLED \
+ zstd_SOURCE=BUNDLED
diff --git a/src/arrow/ci/docker/debian-10-go-cgo-python.dockerfile b/src/arrow/ci/docker/debian-10-go-cgo-python.dockerfile
new file mode 100644
index 000000000..46455a42b
--- /dev/null
+++ b/src/arrow/ci/docker/debian-10-go-cgo-python.dockerfile
@@ -0,0 +1,36 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ARG base
+FROM ${base}
+
+ENV DEBIAN_FRONTEND noninteractive
+
+# Install python3 and pip so we can install pyarrow to test the C data interface.
+RUN apt-get update -y -q && \
+ apt-get install -y -q --no-install-recommends \
+ python3 \
+ python3-pip && \
+ apt-get clean
+
+RUN ln -s /usr/bin/python3 /usr/local/bin/python && \
+ ln -s /usr/bin/pip3 /usr/local/bin/pip
+
+# Need a newer pip than Debian's to install manylinux201x wheels
+RUN pip install -U pip
+
+RUN pip install pyarrow cffi --only-binary pyarrow
diff --git a/src/arrow/ci/docker/debian-10-go.dockerfile b/src/arrow/ci/docker/debian-10-go.dockerfile
new file mode 100644
index 000000000..3a24b8afe
--- /dev/null
+++ b/src/arrow/ci/docker/debian-10-go.dockerfile
@@ -0,0 +1,26 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ARG arch=amd64
+ARG go=1.15
+FROM ${arch}/golang:${go}-buster
+
+
+# TODO(kszucs):
+# 1. add the files required to install the dependencies to .dockerignore
+# 2. copy these files to their appropriate path
+# 3. download and compile the dependencies
diff --git a/src/arrow/ci/docker/debian-10-js.dockerfile b/src/arrow/ci/docker/debian-10-js.dockerfile
new file mode 100644
index 000000000..5bb31f2e3
--- /dev/null
+++ b/src/arrow/ci/docker/debian-10-js.dockerfile
@@ -0,0 +1,27 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ARG arch=amd64
+ARG node=14
+FROM ${arch}/node:${node}
+
+ENV NODE_NO_WARNINGS=1
+
+# TODO(kszucs):
+# 1. add the files required to install the dependencies to .dockerignore
+# 2. copy these files to their appropriate path
+# 3. download and compile the dependencies
diff --git a/src/arrow/ci/docker/debian-11-cpp.dockerfile b/src/arrow/ci/docker/debian-11-cpp.dockerfile
new file mode 100644
index 000000000..659881b0c
--- /dev/null
+++ b/src/arrow/ci/docker/debian-11-cpp.dockerfile
@@ -0,0 +1,100 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ARG arch=amd64
+FROM ${arch}/debian:11
+ARG arch
+
+ENV DEBIAN_FRONTEND noninteractive
+
+ARG llvm
+RUN apt-get update -y -q && \
+ apt-get install -y -q --no-install-recommends \
+ apt-transport-https \
+ ca-certificates \
+ gnupg \
+ wget && \
+ wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - && \
+ echo "deb https://apt.llvm.org/bullseye/ llvm-toolchain-bullseye-${llvm} main" > \
+ /etc/apt/sources.list.d/llvm.list && \
+ apt-get update -y -q && \
+ apt-get install -y -q --no-install-recommends \
+ autoconf \
+ ccache \
+ clang-${llvm} \
+ cmake \
+ g++ \
+ gcc \
+ gdb \
+ git \
+ libbenchmark-dev \
+ libboost-all-dev \
+ libbrotli-dev \
+ libbz2-dev \
+ libc-ares-dev \
+ libcurl4-openssl-dev \
+ libgflags-dev \
+ libgmock-dev \
+ libgoogle-glog-dev \
+ libgrpc++-dev \
+ liblz4-dev \
+ libre2-dev \
+ libsnappy-dev \
+ libssl-dev \
+ libthrift-dev \
+ libutf8proc-dev \
+ libzstd-dev \
+ llvm-${llvm}-dev \
+ make \
+ ninja-build \
+ pkg-config \
+ protobuf-compiler-grpc \
+ python3-pip \
+ rapidjson-dev \
+ tzdata \
+ zlib1g-dev && \
+ apt-get clean && \
+ rm -rf /var/lib/apt/lists/*
+
+COPY ci/scripts/install_minio.sh /arrow/ci/scripts/
+RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest /usr/local
+COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/
+RUN /arrow/ci/scripts/install_gcs_testbench.sh ${arch} default
+
+ENV ARROW_BUILD_TESTS=ON \
+ ARROW_DATASET=ON \
+ ARROW_DEPENDENCY_SOURCE=SYSTEM \
+ ARROW_FLIGHT=ON \
+ ARROW_GANDIVA=ON \
+ ARROW_HOME=/usr/local \
+ ARROW_ORC=ON \
+ ARROW_PARQUET=ON \
+ ARROW_PLASMA=ON \
+ ARROW_S3=ON \
+ ARROW_USE_CCACHE=ON \
+ ARROW_WITH_BROTLI=ON \
+ ARROW_WITH_BZ2=ON \
+ ARROW_WITH_LZ4=ON \
+ ARROW_WITH_SNAPPY=ON \
+ ARROW_WITH_ZLIB=ON \
+ ARROW_WITH_ZSTD=ON \
+ AWSSDK_SOURCE=BUNDLED \
+ CC=gcc \
+ CXX=g++ \
+ ORC_SOURCE=BUNDLED \
+ PATH=/usr/lib/ccache/:$PATH \
+ Protobuf_SOURCE=BUNDLED
diff --git a/src/arrow/ci/docker/debian-11-go-cgo-python.dockerfile b/src/arrow/ci/docker/debian-11-go-cgo-python.dockerfile
new file mode 100644
index 000000000..46455a42b
--- /dev/null
+++ b/src/arrow/ci/docker/debian-11-go-cgo-python.dockerfile
@@ -0,0 +1,36 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ARG base
+FROM ${base}
+
+ENV DEBIAN_FRONTEND noninteractive
+
+# Install python3 and pip so we can install pyarrow to test the C data interface.
+RUN apt-get update -y -q && \
+ apt-get install -y -q --no-install-recommends \
+ python3 \
+ python3-pip && \
+ apt-get clean
+
+RUN ln -s /usr/bin/python3 /usr/local/bin/python && \
+ ln -s /usr/bin/pip3 /usr/local/bin/pip
+
+# Need a newer pip than Debian's to install manylinux201x wheels
+RUN pip install -U pip
+
+RUN pip install pyarrow cffi --only-binary pyarrow
diff --git a/src/arrow/ci/docker/debian-11-go.dockerfile b/src/arrow/ci/docker/debian-11-go.dockerfile
new file mode 100644
index 000000000..3a24b8afe
--- /dev/null
+++ b/src/arrow/ci/docker/debian-11-go.dockerfile
@@ -0,0 +1,26 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ARG arch=amd64
+ARG go=1.15
+FROM ${arch}/golang:${go}-buster
+
+
+# TODO(kszucs):
+# 1. add the files required to install the dependencies to .dockerignore
+# 2. copy these files to their appropriate path
+# 3. download and compile the dependencies
diff --git a/src/arrow/ci/docker/debian-11-js.dockerfile b/src/arrow/ci/docker/debian-11-js.dockerfile
new file mode 100644
index 000000000..5bb31f2e3
--- /dev/null
+++ b/src/arrow/ci/docker/debian-11-js.dockerfile
@@ -0,0 +1,27 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ARG arch=amd64
+ARG node=14
+FROM ${arch}/node:${node}
+
+ENV NODE_NO_WARNINGS=1
+
+# TODO(kszucs):
+# 1. add the files required to install the dependencies to .dockerignore
+# 2. copy these files to their appropriate path
+# 3. download and compile the dependencies
diff --git a/src/arrow/ci/docker/debian-9-java.dockerfile b/src/arrow/ci/docker/debian-9-java.dockerfile
new file mode 100644
index 000000000..2cc36e3ea
--- /dev/null
+++ b/src/arrow/ci/docker/debian-9-java.dockerfile
@@ -0,0 +1,28 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ARG arch=amd64
+ARG jdk=8
+ARG maven=3.5.4
+FROM ${arch}/maven:${maven}-jdk-${jdk}
+
+ENV ARROW_JAVA_SHADE_FLATBUFS=ON
+
+# TODO(kszucs):
+# 1. add the files required to install the dependencies to .dockerignore
+# 2. copy these files to their appropriate path
+# 3. download and compile the dependencies
diff --git a/src/arrow/ci/docker/debian-go-cgo.dockerfile b/src/arrow/ci/docker/debian-go-cgo.dockerfile
new file mode 100644
index 000000000..a494d1e15
--- /dev/null
+++ b/src/arrow/ci/docker/debian-go-cgo.dockerfile
@@ -0,0 +1,32 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ARG base
+FROM ${base}
+
+ENV DEBIAN_FRONTEND noninteractive
+
+# install libarrow-dev to link against with CGO
+RUN apt-get update -y -q && \
+ apt-get install -y -q --no-install-recommends ca-certificates lsb-release wget && \
+ wget https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb && \
+ apt-get install -y -q --no-install-recommends ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb && \
+ apt-get update -y -q && \
+ apt-get install -y -q --no-install-recommends \
+ cmake \
+ libarrow-dev && \
+ apt-get clean
diff --git a/src/arrow/ci/docker/fedora-33-cpp.dockerfile b/src/arrow/ci/docker/fedora-33-cpp.dockerfile
new file mode 100644
index 000000000..61964a476
--- /dev/null
+++ b/src/arrow/ci/docker/fedora-33-cpp.dockerfile
@@ -0,0 +1,94 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ARG arch
+FROM ${arch}/fedora:33
+ARG arch
+
+# install dependencies
+RUN dnf update -y && \
+ dnf install -y \
+ autoconf \
+ boost-devel \
+ brotli-devel \
+ bzip2-devel \
+ c-ares-devel \
+ ccache \
+ clang-devel \
+ cmake \
+ curl-devel \
+ flatbuffers-devel \
+ gcc \
+ gcc-c++ \
+ gflags-devel \
+ git \
+ glog-devel \
+ gmock-devel \
+ google-benchmark-devel \
+ grpc-devel \
+ grpc-plugins \
+ gtest-devel \
+ java-latest-openjdk-devel \
+ java-latest-openjdk-headless \
+ libzstd-devel \
+ llvm-devel \
+ llvm-static \
+ lz4-devel \
+ make \
+ ninja-build \
+ openssl-devel \
+ protobuf-devel \
+ python \
+ python-pip \
+ rapidjson-devel \
+ re2-devel \
+ snappy-devel \
+ thrift-devel \
+ utf8proc-devel \
+ wget \
+ which \
+ zlib-devel
+
+COPY ci/scripts/install_minio.sh /arrow/ci/scripts/
+RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest /usr/local
+COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/
+RUN /arrow/ci/scripts/install_gcs_testbench.sh ${arch} default
+
+ENV ARROW_BUILD_TESTS=ON \
+ ARROW_DEPENDENCY_SOURCE=SYSTEM \
+ ARROW_DATASET=ON \
+ ARROW_FLIGHT=ON \
+ ARROW_GANDIVA_JAVA=ON \
+ ARROW_GANDIVA=ON \
+ ARROW_HOME=/usr/local \
+ ARROW_ORC=ON \
+ ARROW_PARQUET=ON \
+ ARROW_S3=ON \
+ ARROW_USE_CCACHE=ON \
+ ARROW_WITH_BROTLI=ON \
+ ARROW_WITH_BZ2=ON \
+ ARROW_WITH_LZ4=ON \
+ ARROW_WITH_SNAPPY=ON \
+ ARROW_WITH_ZLIB=ON \
+ ARROW_WITH_ZSTD=ON \
+ AWSSDK_SOURCE=BUNDLED \
+ CC=gcc \
+ CXX=g++ \
+ ORC_SOURCE=BUNDLED \
+ PARQUET_BUILD_EXECUTABLES=ON \
+ PARQUET_BUILD_EXAMPLES=ON \
+ PATH=/usr/lib/ccache/:$PATH
diff --git a/src/arrow/ci/docker/java-jni-manylinux-201x.dockerfile b/src/arrow/ci/docker/java-jni-manylinux-201x.dockerfile
new file mode 100644
index 000000000..021dab686
--- /dev/null
+++ b/src/arrow/ci/docker/java-jni-manylinux-201x.dockerfile
@@ -0,0 +1,37 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ARG base
+FROM ${base}
+
+# Install the libaries required by the Gandiva to run
+RUN vcpkg install --clean-after-build \
+ llvm \
+ boost-system \
+ boost-date-time \
+ boost-regex \
+ boost-predef \
+ boost-algorithm \
+ boost-locale \
+ boost-format \
+ boost-variant \
+ boost-multiprecision
+
+# Install Java
+ARG java=1.8.0
+RUN yum install -y java-$java-openjdk-devel && yum clean all
+ENV JAVA_HOME=/usr/lib/jvm/java-$java-openjdk/
diff --git a/src/arrow/ci/docker/linux-apt-c-glib.dockerfile b/src/arrow/ci/docker/linux-apt-c-glib.dockerfile
new file mode 100644
index 000000000..12c6e23a0
--- /dev/null
+++ b/src/arrow/ci/docker/linux-apt-c-glib.dockerfile
@@ -0,0 +1,65 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ARG base
+FROM ${base}
+
+RUN apt-get update -y -q && \
+ apt-get install -y -q \
+ python3 \
+ python3-pip \
+ gtk-doc-tools \
+ libgirepository1.0-dev \
+ libglib2.0-doc \
+ lsb-release \
+ luarocks \
+ pkg-config \
+ ruby-dev && \
+ if [ "$(lsb_release --codename --short)" = "xenial" ]; then \
+ apt-get install -y -q --no-install-recommends -t xenial-backports \
+ ninja-build; \
+ fi && \
+ apt-get clean && \
+ rm -rf /var/lib/apt/lists/*
+
+RUN luarocks install lgi
+
+# pip on Ubuntu 20.04 may be buggy:
+#
+# Collecting meson
+# Downloading meson-0.53.2.tar.gz (1.6 MB)
+# Installing build dependencies: started
+# Installing build dependencies: finished with status 'done'
+# Getting requirements to build wheel: started
+# Getting requirements to build wheel: finished with status 'error'
+# ERROR: Command errored out with exit status 1:
+# command: /usr/bin/python3 /usr/share/python-wheels/pep517-0.7.0-py2.py3-none-any.whl/pep517/_in_process.py get_requires_for_build_wheel /tmp/tmpsk4jveay
+# cwd: /tmp/pip-install-jn79a_kh/meson
+# Complete output (1 lines):
+# /usr/bin/python3: can't find '__main__' module in '/usr/share/python-wheels/pep517-0.7.0-py2.py3-none-any.whl/pep517/_in_process.py'
+# ----------------------------------------
+# ERROR: Command errored out with exit status 1: /usr/bin/python3 /usr/share/python-wheels/pep517-0.7.0-py2.py3-none-any.whl/pep517/_in_process.py get_requires_for_build_wheel /tmp/tmpsk4jveay Check the logs for full command output.
+RUN (python3 -m pip install meson || \
+ python3 -m pip install --no-use-pep517 meson) && \
+ gem install --no-document bundler
+
+COPY c_glib/Gemfile /arrow/c_glib/
+RUN bundle install --gemfile /arrow/c_glib/Gemfile
+
+ENV ARROW_BUILD_TESTS=OFF \
+ ARROW_BUILD_UTILITIES=OFF \
+ ARROW_INSTALL_NAME_RPATH=OFF
diff --git a/src/arrow/ci/docker/linux-apt-docs.dockerfile b/src/arrow/ci/docker/linux-apt-docs.dockerfile
new file mode 100644
index 000000000..12c797f96
--- /dev/null
+++ b/src/arrow/ci/docker/linux-apt-docs.dockerfile
@@ -0,0 +1,110 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ARG base
+FROM ${base}
+
+ARG r=4.1
+ARG jdk=8
+
+# See R install instructions at https://cloud.r-project.org/bin/linux/ubuntu/
+RUN apt-get update -y && \
+ apt-get install -y \
+ dirmngr \
+ apt-transport-https \
+ software-properties-common && \
+ apt-key adv \
+ --keyserver keyserver.ubuntu.com \
+ --recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9 && \
+ add-apt-repository 'deb https://cloud.r-project.org/bin/linux/ubuntu '$(lsb_release -cs)'-cran40/' && \
+ apt-get install -y --no-install-recommends \
+ autoconf-archive \
+ automake \
+ curl \
+ doxygen \
+ gobject-introspection \
+ gtk-doc-tools \
+ libcurl4-openssl-dev \
+ libfontconfig1-dev \
+ libfribidi-dev \
+ libgirepository1.0-dev \
+ libglib2.0-doc \
+ libharfbuzz-dev \
+ libtiff-dev \
+ libtool \
+ libxml2-dev \
+ ninja-build \
+ nvidia-cuda-toolkit \
+ openjdk-${jdk}-jdk-headless \
+ pandoc \
+ r-recommended=${r}* \
+ r-base=${r}* \
+ rsync \
+ ruby-dev \
+ wget && \
+ apt-get clean && \
+ rm -rf /var/lib/apt/lists/*
+
+ENV JAVA_HOME=/usr/lib/jvm/java-${jdk}-openjdk-amd64
+
+ARG maven=3.5.4
+COPY ci/scripts/util_download_apache.sh /arrow/ci/scripts/
+RUN /arrow/ci/scripts/util_download_apache.sh \
+ "maven/maven-3/${maven}/binaries/apache-maven-${maven}-bin.tar.gz" /opt
+ENV PATH=/opt/apache-maven-${maven}/bin:$PATH
+RUN mvn -version
+
+ARG node=14
+RUN wget -q -O - https://deb.nodesource.com/setup_${node}.x | bash - && \
+ apt-get install -y nodejs && \
+ apt-get clean && \
+ rm -rf /var/lib/apt/lists/* && \
+ npm install -g yarn
+
+# ARROW-13353: breathe >= 4.29.1 tries to parse template arguments,
+# but Sphinx can't parse constructs like `typename...`.
+RUN pip install \
+ meson \
+ breathe==4.29.0 \
+ ipython \
+ sphinx \
+ pydata-sphinx-theme
+
+COPY c_glib/Gemfile /arrow/c_glib/
+RUN gem install --no-document bundler && \
+ bundle install --gemfile /arrow/c_glib/Gemfile
+
+# Ensure parallel R package installation, set CRAN repo mirror,
+# and use pre-built binaries where possible
+COPY ci/etc/rprofile /arrow/ci/etc/
+RUN cat /arrow/ci/etc/rprofile >> $(R RHOME)/etc/Rprofile.site
+# Also ensure parallel compilation of C/C++ code
+RUN echo "MAKEFLAGS=-j$(R -s -e 'cat(parallel::detectCores())')" >> $(R RHOME)/etc/Renviron.site
+
+COPY ci/scripts/r_deps.sh /arrow/ci/scripts/
+COPY r/DESCRIPTION /arrow/r/
+RUN /arrow/ci/scripts/r_deps.sh /arrow && \
+ R -e "install.packages('pkgdown')"
+
+ENV ARROW_FLIGHT=ON \
+ ARROW_PYTHON=ON \
+ ARROW_S3=ON \
+ ARROW_BUILD_STATIC=OFF \
+ ARROW_BUILD_TESTS=OFF \
+ ARROW_BUILD_UTILITIES=OFF \
+ ARROW_USE_GLOG=OFF \
+ CMAKE_UNITY_BUILD=ON \
diff --git a/src/arrow/ci/docker/linux-apt-jni.dockerfile b/src/arrow/ci/docker/linux-apt-jni.dockerfile
new file mode 100644
index 000000000..ddfa72e17
--- /dev/null
+++ b/src/arrow/ci/docker/linux-apt-jni.dockerfile
@@ -0,0 +1,87 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ARG base
+FROM ${base}
+
+# pipefail is enabled for proper error detection in the `wget | apt-key add`
+# step
+SHELL ["/bin/bash", "-o", "pipefail", "-c"]
+
+ENV DEBIAN_FRONTEND noninteractive
+
+ARG llvm
+RUN apt-get update -y -q && \
+ apt-get install -y -q --no-install-recommends \
+ apt-transport-https \
+ lsb-release \
+ software-properties-common \
+ wget && \
+ code_name=$(lsb_release --codename --short) && \
+ wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - && \
+ apt-add-repository -y \
+ "deb https://apt.llvm.org/${code_name}/ llvm-toolchain-${code_name}-${llvm} main" && \
+ apt-get update -y -q && \
+ apt-get install -y -q --no-install-recommends \
+ ca-certificates \
+ ccache \
+ clang-${llvm} \
+ cmake \
+ git \
+ g++ \
+ gcc \
+ libboost-all-dev \
+ libgflags-dev \
+ libgoogle-glog-dev \
+ libgtest-dev \
+ liblz4-dev \
+ libre2-dev \
+ libsnappy-dev \
+ libssl-dev \
+ llvm-${llvm}-dev \
+ make \
+ ninja-build \
+ pkg-config \
+ protobuf-compiler \
+ rapidjson-dev \
+ tzdata \
+ zlib1g-dev && \
+ apt-get clean && \
+ rm -rf /var/lib/apt/lists/*
+
+ARG cmake=3.11.4
+RUN wget -nv -O - https://github.com/Kitware/CMake/releases/download/v${cmake}/cmake-${cmake}-Linux-x86_64.tar.gz | tar -xzf - -C /opt
+ENV PATH=/opt/cmake-${cmake}-Linux-x86_64/bin:$PATH
+
+ENV ARROW_BUILD_TESTS=OFF \
+ ARROW_DATASET=ON \
+ ARROW_FLIGHT=OFF \
+ ARROW_GANDIVA_JAVA=ON \
+ ARROW_GANDIVA=ON \
+ ARROW_HOME=/usr/local \
+ ARROW_JAVA_CDATA=ON \
+ ARROW_JNI=ON \
+ ARROW_ORC=ON \
+ ARROW_PARQUET=ON \
+ ARROW_PLASMA_JAVA_CLIENT=ON \
+ ARROW_PLASMA=ON \
+ ARROW_USE_CCACHE=ON \
+ CC=gcc \
+ CXX=g++ \
+ ORC_SOURCE=BUNDLED \
+ PATH=/usr/lib/ccache/:$PATH \
+ Protobuf_SOURCE=BUNDLED
diff --git a/src/arrow/ci/docker/linux-apt-lint.dockerfile b/src/arrow/ci/docker/linux-apt-lint.dockerfile
new file mode 100644
index 000000000..84de6b05f
--- /dev/null
+++ b/src/arrow/ci/docker/linux-apt-lint.dockerfile
@@ -0,0 +1,95 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ARG base
+FROM hadolint/hadolint:v1.17.2 AS hadolint
+FROM ${base}
+
+ARG clang_tools
+RUN apt-get update && \
+ apt-get install -y -q \
+ clang-${clang_tools} \
+ clang-format-${clang_tools} \
+ clang-tidy-${clang_tools} \
+ clang-tools-${clang_tools} \
+ cmake \
+ curl \
+ libclang-${clang_tools}-dev \
+ llvm-${clang_tools}-dev \
+ openjdk-11-jdk-headless \
+ python3 \
+ python3-dev \
+ python3-pip \
+ ruby \
+ apt-transport-https \
+ software-properties-common \
+ && apt-get clean \
+ && rm -rf /var/lib/apt/lists/*
+
+ARG r=4.1
+RUN apt-key adv \
+ --keyserver keyserver.ubuntu.com \
+ --recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9 && \
+ # NOTE: R 3.5 and 3.6 are available in the repos with -cran35 suffix
+ # for trusty, xenial, bionic, and eoan (as of May 2020)
+ # -cran40 has 4.0 versions for bionic and focal
+ # R 3.2, 3.3, 3.4 are available without the suffix but only for trusty and xenial
+ # TODO: make sure OS version and R version are valid together and conditionally set repo suffix
+ # This is a hack to turn 3.6 into 35, and 4.0/4.1 into 40:
+ add-apt-repository 'deb https://cloud.r-project.org/bin/linux/ubuntu '$(lsb_release -cs)'-cran'$(echo "${r}" | tr -d . | tr 6 5 | tr 1 0)'/' && \
+ apt-get install -y \
+ r-base=${r}* \
+ r-recommended=${r}* \
+ libxml2-dev
+
+# Ensure parallel R package installation, set CRAN repo mirror,
+# and use pre-built binaries where possible
+COPY ci/etc/rprofile /arrow/ci/etc/
+RUN cat /arrow/ci/etc/rprofile >> $(R RHOME)/etc/Rprofile.site
+# Also ensure parallel compilation of C/C++ code
+RUN echo "MAKEFLAGS=-j$(R -s -e 'cat(parallel::detectCores())')" >> $(R RHOME)/etc/Renviron.site
+
+
+COPY ci/scripts/r_deps.sh /arrow/ci/scripts/
+COPY r/DESCRIPTION /arrow/r/
+# We need to install Arrow's dependencies in order for lintr's namespace searching to work.
+# This could be removed if lintr no longer loads the dependency namespaces (see issues/PRs below)
+RUN /arrow/ci/scripts/r_deps.sh /arrow
+# This fork has a number of changes that have PRs and Issues to resolve upstream:
+# https://github.com/jimhester/lintr/pull/843
+# https://github.com/jimhester/lintr/pull/841
+# https://github.com/jimhester/lintr/pull/845
+# https://github.com/jimhester/lintr/issues/842
+# https://github.com/jimhester/lintr/issues/846
+RUN R -e "remotes::install_github('jonkeane/lintr@arrow-branch')"
+
+# Docker linter
+COPY --from=hadolint /bin/hadolint /usr/bin/hadolint
+
+# IWYU
+COPY ci/scripts/install_iwyu.sh /arrow/ci/scripts/
+RUN arrow/ci/scripts/install_iwyu.sh /tmp/iwyu /usr/local ${clang_tools}
+
+# Use python3 by default in scripts
+RUN ln -s /usr/bin/python3 /usr/local/bin/python && \
+ ln -s /usr/bin/pip3 /usr/local/bin/pip
+
+COPY dev/archery/setup.py /arrow/dev/archery/
+RUN pip install -e arrow/dev/archery[lint]
+
+ENV LC_ALL=C.UTF-8 \
+ LANG=C.UTF-8
diff --git a/src/arrow/ci/docker/linux-apt-python-3.dockerfile b/src/arrow/ci/docker/linux-apt-python-3.dockerfile
new file mode 100644
index 000000000..753ba0d3a
--- /dev/null
+++ b/src/arrow/ci/docker/linux-apt-python-3.dockerfile
@@ -0,0 +1,46 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ARG base
+FROM ${base}
+
+RUN apt-get update -y -q && \
+ apt-get install -y -q \
+ python3 \
+ python3-pip \
+ python3-dev && \
+ apt-get clean && \
+ rm -rf /var/lib/apt/lists/*
+
+RUN ln -s /usr/bin/python3 /usr/local/bin/python && \
+ ln -s /usr/bin/pip3 /usr/local/bin/pip
+
+RUN pip install -U pip setuptools
+
+COPY python/requirements-build.txt \
+ python/requirements-test.txt \
+ /arrow/python/
+
+RUN pip install \
+ -r arrow/python/requirements-build.txt \
+ -r arrow/python/requirements-test.txt
+
+ENV ARROW_PYTHON=ON \
+ ARROW_BUILD_STATIC=OFF \
+ ARROW_BUILD_TESTS=OFF \
+ ARROW_BUILD_UTILITIES=OFF \
+ ARROW_USE_GLOG=OFF \
diff --git a/src/arrow/ci/docker/linux-apt-r.dockerfile b/src/arrow/ci/docker/linux-apt-r.dockerfile
new file mode 100644
index 000000000..6d33d1800
--- /dev/null
+++ b/src/arrow/ci/docker/linux-apt-r.dockerfile
@@ -0,0 +1,114 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ARG base
+FROM ${base}
+ARG arch
+
+ARG tz="UTC"
+ENV TZ=${tz}
+
+# Build R
+# [1] https://www.digitalocean.com/community/tutorials/how-to-install-r-on-ubuntu-18-04
+# [2] https://linuxize.com/post/how-to-install-r-on-ubuntu-18-04/#installing-r-packages-from-cran
+ARG r=3.6
+RUN apt-get update -y && \
+ apt-get install -y \
+ dirmngr \
+ apt-transport-https \
+ software-properties-common && \
+ apt-key adv \
+ --keyserver keyserver.ubuntu.com \
+ --recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9 && \
+ # NOTE: R 3.5 and 3.6 are available in the repos with -cran35 suffix
+ # for trusty, xenial, bionic, and eoan (as of May 2020)
+ # -cran40 has 4.0 versions for bionic and focal
+ # R 3.2, 3.3, 3.4 are available without the suffix but only for trusty and xenial
+ # TODO: make sure OS version and R version are valid together and conditionally set repo suffix
+ # This is a hack to turn 3.6 into 35, and 4.0/4.1 into 40:
+ add-apt-repository 'deb https://cloud.r-project.org/bin/linux/ubuntu '$(lsb_release -cs)'-cran'$(echo "${r}" | tr -d . | tr 6 5 | tr 1 0)'/' && \
+ apt-get install -y \
+ r-base=${r}* \
+ r-recommended=${r}* \
+ # system libs needed by core R packages
+ libxml2-dev \
+ libgit2-dev \
+ libssl-dev \
+ # install clang to mirror what was done on Travis
+ clang \
+ clang-format \
+ clang-tidy \
+ # R CMD CHECK --as-cran needs pdflatex to build the package manual
+ texlive-latex-base \
+ # Need locales so we can set UTF-8
+ locales \
+ # Need Python to check py-to-r bridge
+ python3 \
+ python3-pip \
+ python3-dev && \
+ locale-gen en_US.UTF-8 && \
+ apt-get clean && \
+ rm -rf /var/lib/apt/lists/*
+
+ARG gcc_version=""
+RUN if [ "${gcc_version}" != "" ]; then \
+ update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-${gcc_version} 100 && \
+ update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-${gcc_version} 100 && \
+ update-alternatives --install /usr/bin/cc cc /usr/bin/gcc 30 && \
+ update-alternatives --set cc /usr/bin/gcc && \
+ update-alternatives --install /usr/bin/c++ c++ /usr/bin/g++ 30 && \
+ update-alternatives --set c++ /usr/bin/g++; \
+ fi
+
+# Ensure parallel R package installation, set CRAN repo mirror,
+# and use pre-built binaries where possible
+COPY ci/etc/rprofile /arrow/ci/etc/
+RUN cat /arrow/ci/etc/rprofile >> $(R RHOME)/etc/Rprofile.site
+# Also ensure parallel compilation of C/C++ code
+RUN echo "MAKEFLAGS=-j$(R -s -e 'cat(parallel::detectCores())')" >> $(R RHOME)/etc/Renviron.site
+
+COPY ci/scripts/r_deps.sh /arrow/ci/scripts/
+COPY r/DESCRIPTION /arrow/r/
+RUN /arrow/ci/scripts/r_deps.sh /arrow
+
+COPY ci/scripts/install_minio.sh /arrow/ci/scripts/
+RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest /usr/local
+COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/
+RUN /arrow/ci/scripts/install_gcs_testbench.sh ${arch} default
+
+# Set up Python 3 and its dependencies
+RUN ln -s /usr/bin/python3 /usr/local/bin/python && \
+ ln -s /usr/bin/pip3 /usr/local/bin/pip
+
+COPY python/requirements-build.txt /arrow/python/
+RUN pip install -r arrow/python/requirements-build.txt
+
+ENV \
+ ARROW_BUILD_STATIC=OFF \
+ ARROW_BUILD_TESTS=OFF \
+ ARROW_BUILD_UTILITIES=OFF \
+ ARROW_FLIGHT=OFF \
+ ARROW_GANDIVA=OFF \
+ ARROW_NO_DEPRECATED_API=ON \
+ ARROW_ORC=OFF \
+ ARROW_PARQUET=ON \
+ ARROW_PLASMA=OFF \
+ ARROW_PYTHON=ON \
+ ARROW_S3=ON \
+ ARROW_USE_CCACHE=ON \
+ ARROW_USE_GLOG=OFF \
+ LC_ALL=en_US.UTF-8
diff --git a/src/arrow/ci/docker/linux-apt-ruby.dockerfile b/src/arrow/ci/docker/linux-apt-ruby.dockerfile
new file mode 100644
index 000000000..58fd65bd5
--- /dev/null
+++ b/src/arrow/ci/docker/linux-apt-ruby.dockerfile
@@ -0,0 +1,27 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# depends on a C GLib image
+ARG base
+FROM ${base}
+
+COPY ruby/ /arrow/ruby/
+RUN bundle install --gemfile /arrow/ruby/Gemfile
+RUN \
+ for package in /arrow/ruby/*; do \
+ bundle install --gemfile ${package}/Gemfile; \
+ done
diff --git a/src/arrow/ci/docker/linux-dnf-python-3.dockerfile b/src/arrow/ci/docker/linux-dnf-python-3.dockerfile
new file mode 100644
index 000000000..8c3c5c701
--- /dev/null
+++ b/src/arrow/ci/docker/linux-dnf-python-3.dockerfile
@@ -0,0 +1,41 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ARG base
+FROM ${base}
+
+RUN dnf install -y \
+ python3 \
+ python3-pip \
+ python3-devel
+
+RUN ln -s /usr/bin/python3 /usr/local/bin/python && \
+ ln -s /usr/bin/pip3 /usr/local/bin/pip
+
+COPY python/requirements-build.txt \
+ python/requirements-test.txt \
+ /arrow/python/
+
+RUN pip install \
+ -r arrow/python/requirements-build.txt \
+ -r arrow/python/requirements-test.txt
+
+ENV ARROW_PYTHON=ON \
+ ARROW_BUILD_STATIC=OFF \
+ ARROW_BUILD_TESTS=OFF \
+ ARROW_BUILD_UTILITIES=OFF \
+ ARROW_USE_GLOG=OFF \
diff --git a/src/arrow/ci/docker/linux-r.dockerfile b/src/arrow/ci/docker/linux-r.dockerfile
new file mode 100644
index 000000000..568b90c22
--- /dev/null
+++ b/src/arrow/ci/docker/linux-r.dockerfile
@@ -0,0 +1,48 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# General purpose Dockerfile to take a Docker image containing R
+# and install Arrow R package dependencies
+
+ARG base
+FROM ${base}
+
+ARG r_bin=R
+ENV R_BIN=${r_bin}
+
+ARG r_dev=FALSE
+ENV ARROW_R_DEV=${r_dev}
+
+ARG devtoolset_version=-1
+ENV DEVTOOLSET_VERSION=${devtoolset_version}
+
+ARG tz="UTC"
+ENV TZ=${tz}
+
+# Make sure R is on the path for the R-hub devel versions (where RPREFIX is set in its dockerfile)
+ENV PATH "${RPREFIX}/bin:${PATH}"
+
+# Patch up some of the docker images
+COPY ci/scripts/r_docker_configure.sh /arrow/ci/scripts/
+COPY ci/etc/rprofile /arrow/ci/etc/
+COPY ci/scripts/install_minio.sh /arrow/ci/scripts/
+COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/
+RUN /arrow/ci/scripts/r_docker_configure.sh
+
+COPY ci/scripts/r_deps.sh /arrow/ci/scripts/
+COPY r/DESCRIPTION /arrow/r/
+RUN /arrow/ci/scripts/r_deps.sh /arrow
diff --git a/src/arrow/ci/docker/python-sdist.dockerfile b/src/arrow/ci/docker/python-sdist.dockerfile
new file mode 100644
index 000000000..853b532ab
--- /dev/null
+++ b/src/arrow/ci/docker/python-sdist.dockerfile
@@ -0,0 +1,36 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+FROM amd64/ubuntu:20.04
+
+SHELL ["/bin/bash", "-o", "pipefail", "-c"]
+
+RUN echo "debconf debconf/frontend select Noninteractive" | \
+ debconf-set-selections
+
+RUN apt-get update -y -q && \
+ apt-get install -y -q --no-install-recommends \
+ git \
+ python3-pip && \
+ apt-get clean && \
+ rm -rf /var/lib/apt/lists*
+
+COPY python/requirements-build.txt \
+ /arrow/python/requirements-build.txt
+RUN pip3 install --requirement /arrow/python/requirements-build.txt
+
+ENV PYTHON=/usr/bin/python3
diff --git a/src/arrow/ci/docker/python-wheel-manylinux-201x.dockerfile b/src/arrow/ci/docker/python-wheel-manylinux-201x.dockerfile
new file mode 100644
index 000000000..ae1b0a776
--- /dev/null
+++ b/src/arrow/ci/docker/python-wheel-manylinux-201x.dockerfile
@@ -0,0 +1,112 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ARG base
+FROM ${base}
+
+ARG arch_alias
+ARG arch_short_alias
+
+RUN yum install -y git flex curl autoconf zip wget
+
+# Install CMake
+ARG cmake=3.19.3
+RUN wget -q https://github.com/Kitware/CMake/releases/download/v${cmake}/cmake-${cmake}-Linux-${arch_alias}.tar.gz -O - | \
+ tar -xzf - --directory /usr/local --strip-components=1
+
+# Install Ninja
+ARG ninja=1.10.2
+RUN mkdir /tmp/ninja && \
+ wget -q https://github.com/ninja-build/ninja/archive/v${ninja}.tar.gz -O - | \
+ tar -xzf - --directory /tmp/ninja --strip-components=1 && \
+ cd /tmp/ninja && \
+ ./configure.py --bootstrap && \
+ mv ninja /usr/local/bin && \
+ rm -rf /tmp/ninja
+
+# Install ccache
+ARG ccache=4.1
+RUN mkdir /tmp/ccache && \
+ wget -q https://github.com/ccache/ccache/archive/v${ccache}.tar.gz -O - | \
+ tar -xzf - --directory /tmp/ccache --strip-components=1 && \
+ cd /tmp/ccache && \
+ mkdir build && \
+ cd build && \
+ cmake -GNinja -DCMAKE_BUILD_TYPE=Release -DZSTD_FROM_INTERNET=ON .. && \
+ ninja install && \
+ rm -rf /tmp/ccache
+
+# Install vcpkg
+ARG vcpkg
+RUN git clone https://github.com/microsoft/vcpkg /opt/vcpkg && \
+ git -C /opt/vcpkg checkout ${vcpkg} && \
+ /opt/vcpkg/bootstrap-vcpkg.sh -useSystemBinaries -disableMetrics && \
+ ln -s /opt/vcpkg/vcpkg /usr/bin/vcpkg
+
+# Patch ports files as needed
+COPY ci/vcpkg/*.patch \
+ ci/vcpkg/*linux*.cmake \
+ arrow/ci/vcpkg/
+RUN cd /opt/vcpkg && git apply --ignore-whitespace /arrow/ci/vcpkg/ports.patch
+
+ARG build_type=release
+ENV CMAKE_BUILD_TYPE=${build_type} \
+ VCPKG_FORCE_SYSTEM_BINARIES=1 \
+ VCPKG_OVERLAY_TRIPLETS=/arrow/ci/vcpkg \
+ VCPKG_DEFAULT_TRIPLET=${arch_short_alias}-linux-static-${build_type} \
+ VCPKG_FEATURE_FLAGS=-manifests
+
+# Need to install the boost-build prior installing the boost packages, otherwise
+# vcpkg will raise an error.
+# TODO(kszucs): factor out the package enumeration to a text file and reuse it
+# from the windows image and potentially in a future macos wheel build
+RUN vcpkg install --clean-after-build \
+ boost-build:${arch_short_alias}-linux && \
+ vcpkg install --clean-after-build \
+ abseil \
+ aws-sdk-cpp[config,cognito-identity,core,identity-management,s3,sts,transfer] \
+ boost-filesystem \
+ brotli \
+ bzip2 \
+ c-ares \
+ curl \
+ flatbuffers \
+ gflags \
+ glog \
+ grpc \
+ lz4 \
+ openssl \
+ orc \
+ protobuf \
+ rapidjson \
+ re2 \
+ snappy \
+ thrift \
+ utf8proc \
+ zlib \
+ zstd
+
+ARG python=3.6
+ENV PYTHON_VERSION=${python}
+RUN PYTHON_ROOT=$(find /opt/python -name cp${PYTHON_VERSION/./}-*) && \
+ echo "export PATH=$PYTHON_ROOT/bin:\$PATH" >> /etc/profile.d/python.sh
+
+SHELL ["/bin/bash", "-i", "-c"]
+ENTRYPOINT ["/bin/bash", "-i", "-c"]
+
+COPY python/requirements-wheel-build.txt /arrow/python/
+RUN pip install -r /arrow/python/requirements-wheel-build.txt
diff --git a/src/arrow/ci/docker/python-wheel-manylinux-test.dockerfile b/src/arrow/ci/docker/python-wheel-manylinux-test.dockerfile
new file mode 100644
index 000000000..55c27d1d7
--- /dev/null
+++ b/src/arrow/ci/docker/python-wheel-manylinux-test.dockerfile
@@ -0,0 +1,27 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ARG arch
+ARG python
+FROM ${arch}/python:${python}
+
+# RUN pip install --upgrade pip
+
+# pandas doesn't provide wheel for aarch64 yet, so cache the compiled
+# test dependencies in a docker image
+COPY python/requirements-wheel-test.txt /arrow/python/
+RUN pip install -r /arrow/python/requirements-wheel-test.txt
diff --git a/src/arrow/ci/docker/python-wheel-windows-vs2017.dockerfile b/src/arrow/ci/docker/python-wheel-windows-vs2017.dockerfile
new file mode 100644
index 000000000..9a2afb781
--- /dev/null
+++ b/src/arrow/ci/docker/python-wheel-windows-vs2017.dockerfile
@@ -0,0 +1,105 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# based on mcr.microsoft.com/windows/servercore:ltsc2019
+# contains choco and vs2017 preinstalled
+FROM abrarov/msvc-2017:2.10.0
+
+# Install CMake and Ninja
+RUN choco install --no-progress -r -y cmake --installargs 'ADD_CMAKE_TO_PATH=System' && \
+ choco install --no-progress -r -y gzip wget ninja
+
+# Add unix tools to path
+RUN setx path "%path%;C:\Program Files\Git\usr\bin"
+
+# Install vcpkg
+#
+# Compiling vcpkg itself from a git tag doesn't work anymore since vcpkg has
+# started to ship precompiled binaries for the vcpkg-tool.
+ARG vcpkg
+RUN git clone https://github.com/Microsoft/vcpkg && \
+ vcpkg\bootstrap-vcpkg.bat -disableMetrics && \
+ setx PATH "%PATH%;C:\vcpkg" && \
+ git -C vcpkg checkout %vcpkg%
+
+# Patch ports files as needed
+COPY ci/vcpkg/*.patch \
+ ci/vcpkg/*windows*.cmake \
+ arrow/ci/vcpkg/
+RUN cd vcpkg && git apply --ignore-whitespace C:/arrow/ci/vcpkg/ports.patch
+
+# Configure vcpkg and install dependencies
+# NOTE: use windows batch environment notation for build arguments in RUN
+# statements but bash notation in ENV statements
+# VCPKG_FORCE_SYSTEM_BINARIES=1 spare around ~750MB of image size if the system
+# cmake's and ninja's versions are recent enough
+ARG build_type=release
+ENV CMAKE_BUILD_TYPE=${build_type} \
+ VCPKG_OVERLAY_TRIPLETS=C:\\arrow\\ci\\vcpkg \
+ VCPKG_DEFAULT_TRIPLET=x64-windows-static-md-${build_type} \
+ VCPKG_FEATURE_FLAGS=-manifests
+
+RUN vcpkg install --clean-after-build \
+ abseil \
+ aws-sdk-cpp[config,cognito-identity,core,identity-management,s3,sts,transfer] \
+ boost-filesystem \
+ boost-multiprecision \
+ boost-system \
+ brotli \
+ bzip2 \
+ c-ares \
+ curl \
+ flatbuffers \
+ gflags \
+ glog \
+ grpc \
+ lz4 \
+ openssl \
+ orc \
+ protobuf \
+ rapidjson \
+ re2 \
+ snappy \
+ thrift \
+ utf8proc \
+ zlib \
+ zstd
+
+# Remove previous installations of python from the base image
+RUN wmic product where "name like 'python%%'" call uninstall /nointeractive && \
+ rm -rf Python*
+
+# Define the full version number otherwise choco falls back to patch number 0 (3.7 => 3.7.0)
+ARG python=3.6
+RUN (if "%python%"=="3.6" setx PYTHON_VERSION 3.6.8) & \
+ (if "%python%"=="3.7" setx PYTHON_VERSION 3.7.4) & \
+ (if "%python%"=="3.8" setx PYTHON_VERSION 3.8.6) & \
+ (if "%python%"=="3.9" setx PYTHON_VERSION 3.9.1) & \
+ (if "%python%"=="3.10" setx PYTHON_VERSION 3.10.0)
+RUN choco install -r -y --no-progress python --version=%PYTHON_VERSION%
+RUN python -m pip install -U pip
+
+COPY python/requirements-wheel-build.txt arrow/python/
+RUN pip install -r arrow/python/requirements-wheel-build.txt
+
+# TODO(kszucs): set clcache as the compiler
+ENV CLCACHE_DIR="C:\clcache"
+RUN if "%python%" NEQ "3.10" pip install clcache
+
+# For debugging purposes
+# RUN wget --no-check-certificate https://github.com/lucasg/Dependencies/releases/download/v1.10/Dependencies_x64_Release.zip
+# RUN unzip Dependencies_x64_Release.zip -d Dependencies && setx path "%path%;C:\Depencencies"
diff --git a/src/arrow/ci/docker/ubuntu-18.04-cpp.dockerfile b/src/arrow/ci/docker/ubuntu-18.04-cpp.dockerfile
new file mode 100644
index 000000000..0c05ac4ee
--- /dev/null
+++ b/src/arrow/ci/docker/ubuntu-18.04-cpp.dockerfile
@@ -0,0 +1,130 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ARG base=amd64/ubuntu:18.04
+FROM ${base}
+
+# pipefail is enabled for proper error detection in the `wget | apt-key add`
+# step
+SHELL ["/bin/bash", "-o", "pipefail", "-c"]
+
+ENV DEBIAN_FRONTEND=noninteractive
+
+# Installs LLVM toolchain, for Gandiva and testing other compilers
+#
+# Note that this is installed before the base packages to improve iteration
+# while debugging package list with docker build.
+ARG clang_tools
+ARG llvm
+RUN apt-get update -y -q && \
+ apt-get install -y -q --no-install-recommends \
+ apt-transport-https \
+ ca-certificates \
+ gnupg \
+ wget && \
+ wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - && \
+ echo "deb https://apt.llvm.org/bionic/ llvm-toolchain-bionic-${llvm} main" > \
+ /etc/apt/sources.list.d/llvm.list && \
+ if [ "${clang_tools}" != "${llvm}" -a "${clang_tools}" -ge 10 ]; then \
+ echo "deb https://apt.llvm.org/bionic/ llvm-toolchain-bionic-${clang_tools} main" > \
+ /etc/apt/sources.list.d/clang-tools.list; \
+ fi && \
+ apt-get update -y -q && \
+ apt-get install -y -q --no-install-recommends \
+ clang-${clang_tools} \
+ clang-${llvm} \
+ clang-format-${clang_tools} \
+ clang-tidy-${clang_tools} \
+ llvm-${llvm}-dev && \
+ apt-get clean && \
+ rm -rf /var/lib/apt/lists*
+
+# Installs C++ toolchain and dependencies
+RUN apt-get update -y -q && \
+ apt-get install -y -q --no-install-recommends \
+ autoconf \
+ ca-certificates \
+ ccache \
+ cmake \
+ g++ \
+ gcc \
+ gdb \
+ git \
+ libbenchmark-dev \
+ libboost-filesystem-dev \
+ libboost-system-dev \
+ libbrotli-dev \
+ libbz2-dev \
+ libc-ares-dev \
+ libcurl4-openssl-dev \
+ libgflags-dev \
+ libgoogle-glog-dev \
+ liblz4-dev \
+ libprotobuf-dev \
+ libprotoc-dev \
+ libre2-dev \
+ libsnappy-dev \
+ libssl-dev \
+ ninja-build \
+ pkg-config \
+ protobuf-compiler \
+ rapidjson-dev \
+ tzdata && \
+ apt-get clean && \
+ rm -rf /var/lib/apt/lists*
+
+# Prioritize system packages and local installation
+# The following dependencies will be downloaded due to missing/invalid packages
+# provided by the distribution:
+# - libc-ares-dev does not install CMake config files
+# - flatbuffer is not packaged
+# - libgtest-dev only provide sources
+# - libprotobuf-dev only provide sources
+# - thrift is too old
+# - utf8proc is too old(v2.1.0)
+# - s3 tests would require boost-asio that is included since Boost 1.66.0
+ENV ARROW_BUILD_TESTS=ON \
+ ARROW_DATASET=ON \
+ ARROW_DEPENDENCY_SOURCE=SYSTEM \
+ ARROW_FLIGHT=OFF \
+ ARROW_GANDIVA=ON \
+ ARROW_HDFS=ON \
+ ARROW_HOME=/usr/local \
+ ARROW_INSTALL_NAME_RPATH=OFF \
+ ARROW_NO_DEPRECATED_API=ON \
+ ARROW_ORC=ON \
+ ARROW_PARQUET=ON \
+ ARROW_PLASMA=ON \
+ ARROW_USE_ASAN=OFF \
+ ARROW_USE_CCACHE=ON \
+ ARROW_USE_TSAN=OFF \
+ ARROW_USE_UBSAN=OFF \
+ ARROW_WITH_BROTLI=ON \
+ ARROW_WITH_BZ2=ON \
+ ARROW_WITH_LZ4=ON \
+ ARROW_WITH_SNAPPY=ON \
+ ARROW_WITH_ZLIB=ON \
+ ARROW_WITH_ZSTD=ON \
+ AWSSDK_SOURCE=BUNDLED \
+ GTest_SOURCE=BUNDLED \
+ ORC_SOURCE=BUNDLED \
+ PARQUET_BUILD_EXAMPLES=ON \
+ PARQUET_BUILD_EXECUTABLES=ON \
+ PATH=/usr/lib/ccache/:$PATH \
+ Thrift_SOURCE=BUNDLED \
+ utf8proc_SOURCE=BUNDLED \
+ zstd_SOURCE=BUNDLED
diff --git a/src/arrow/ci/docker/ubuntu-18.04-csharp.dockerfile b/src/arrow/ci/docker/ubuntu-18.04-csharp.dockerfile
new file mode 100644
index 000000000..624ce259d
--- /dev/null
+++ b/src/arrow/ci/docker/ubuntu-18.04-csharp.dockerfile
@@ -0,0 +1,22 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ARG platform=bionic
+ARG dotnet=3.1
+FROM mcr.microsoft.com/dotnet/core/sdk:${dotnet}-${platform}
+
+RUN dotnet tool install --tool-path /usr/local/bin sourcelink
diff --git a/src/arrow/ci/docker/ubuntu-20.04-cpp-minimal.dockerfile b/src/arrow/ci/docker/ubuntu-20.04-cpp-minimal.dockerfile
new file mode 100644
index 000000000..d7076b45b
--- /dev/null
+++ b/src/arrow/ci/docker/ubuntu-20.04-cpp-minimal.dockerfile
@@ -0,0 +1,69 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ARG base=amd64/ubuntu:20.04
+FROM ${base}
+ARG arch
+
+SHELL ["/bin/bash", "-o", "pipefail", "-c"]
+
+RUN echo "debconf debconf/frontend select Noninteractive" | \
+ debconf-set-selections
+
+RUN apt-get update -y -q && \
+ apt-get install -y -q \
+ build-essential \
+ ccache \
+ cmake \
+ git \
+ libssl-dev \
+ libcurl4-openssl-dev \
+ python3-pip \
+ wget && \
+ apt-get clean && \
+ rm -rf /var/lib/apt/lists*
+
+COPY ci/scripts/install_minio.sh /arrow/ci/scripts/
+RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest /usr/local
+COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/
+RUN /arrow/ci/scripts/install_gcs_testbench.sh ${arch} default
+
+ENV ARROW_BUILD_TESTS=ON \
+ ARROW_DATASET=ON \
+ ARROW_FLIGHT=ON \
+ ARROW_GANDIVA=OFF \
+ ARROW_GCS=ON \
+ ARROW_HDFS=ON \
+ ARROW_HOME=/usr/local \
+ ARROW_INSTALL_NAME_RPATH=OFF \
+ ARROW_NO_DEPRECATED_API=ON \
+ ARROW_ORC=ON \
+ ARROW_PARQUET=ON \
+ ARROW_PLASMA=ON \
+ ARROW_S3=ON \
+ ARROW_USE_CCACHE=ON \
+ ARROW_WITH_BROTLI=ON \
+ ARROW_WITH_BZ2=ON \
+ ARROW_WITH_LZ4=ON \
+ ARROW_WITH_SNAPPY=ON \
+ ARROW_WITH_ZLIB=ON \
+ ARROW_WITH_ZSTD=ON \
+ CMAKE_GENERATOR="Unix Makefiles" \
+ PARQUET_BUILD_EXAMPLES=ON \
+ PARQUET_BUILD_EXECUTABLES=ON \
+ PATH=/usr/lib/ccache/:$PATH \
+ PYTHON=python3
diff --git a/src/arrow/ci/docker/ubuntu-20.04-cpp.dockerfile b/src/arrow/ci/docker/ubuntu-20.04-cpp.dockerfile
new file mode 100644
index 000000000..de872da9a
--- /dev/null
+++ b/src/arrow/ci/docker/ubuntu-20.04-cpp.dockerfile
@@ -0,0 +1,143 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ARG base=amd64/ubuntu:20.04
+FROM ${base}
+ARG arch
+
+SHELL ["/bin/bash", "-o", "pipefail", "-c"]
+
+RUN echo "debconf debconf/frontend select Noninteractive" | \
+ debconf-set-selections
+
+# Installs LLVM toolchain, for Gandiva and testing other compilers
+#
+# Note that this is installed before the base packages to improve iteration
+# while debugging package list with docker build.
+ARG clang_tools
+ARG llvm
+RUN if [ "${llvm}" -gt "10" ]; then \
+ apt-get update -y -q && \
+ apt-get install -y -q --no-install-recommends \
+ apt-transport-https \
+ ca-certificates \
+ gnupg \
+ wget && \
+ wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - && \
+ echo "deb https://apt.llvm.org/focal/ llvm-toolchain-focal-${llvm} main" > \
+ /etc/apt/sources.list.d/llvm.list && \
+ if [ "${clang_tools}" != "${llvm}" -a "${clang_tools}" -gt 10 ]; then \
+ echo "deb https://apt.llvm.org/focal/ llvm-toolchain-focal-${clang_tools} main" > \
+ /etc/apt/sources.list.d/clang-tools.list; \
+ fi \
+ fi && \
+ apt-get update -y -q && \
+ apt-get install -y -q --no-install-recommends \
+ clang-${clang_tools} \
+ clang-${llvm} \
+ clang-format-${clang_tools} \
+ clang-tidy-${clang_tools} \
+ llvm-${llvm}-dev && \
+ apt-get clean && \
+ rm -rf /var/lib/apt/lists*
+
+# Installs C++ toolchain and dependencies
+RUN apt-get update -y -q && \
+ apt-get install -y -q --no-install-recommends \
+ autoconf \
+ ca-certificates \
+ ccache \
+ cmake \
+ g++ \
+ gcc \
+ gdb \
+ git \
+ libbenchmark-dev \
+ libboost-filesystem-dev \
+ libboost-system-dev \
+ libbrotli-dev \
+ libbz2-dev \
+ libc-ares-dev \
+ libcurl4-openssl-dev \
+ libgflags-dev \
+ libgoogle-glog-dev \
+ liblz4-dev \
+ libprotobuf-dev \
+ libprotoc-dev \
+ libre2-dev \
+ libsnappy-dev \
+ libssl-dev \
+ libthrift-dev \
+ libutf8proc-dev \
+ libzstd-dev \
+ make \
+ ninja-build \
+ pkg-config \
+ protobuf-compiler \
+ python3-pip \
+ rapidjson-dev \
+ tzdata \
+ wget && \
+ apt-get clean && \
+ rm -rf /var/lib/apt/lists*
+
+COPY ci/scripts/install_minio.sh /arrow/ci/scripts/
+RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest /usr/local
+COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/
+RUN /arrow/ci/scripts/install_gcs_testbench.sh ${arch} default
+
+# Prioritize system packages and local installation
+# The following dependencies will be downloaded due to missing/invalid packages
+# provided by the distribution:
+# - libc-ares-dev does not install CMake config files
+# - flatbuffer is not packaged
+# - libgtest-dev only provide sources
+# - libprotobuf-dev only provide sources
+ENV ARROW_BUILD_TESTS=ON \
+ ARROW_DEPENDENCY_SOURCE=SYSTEM \
+ ARROW_DATASET=ON \
+ ARROW_FLIGHT=OFF \
+ ARROW_GANDIVA=ON \
+ ARROW_GCS=ON \
+ ARROW_HDFS=ON \
+ ARROW_HOME=/usr/local \
+ ARROW_INSTALL_NAME_RPATH=OFF \
+ ARROW_NO_DEPRECATED_API=ON \
+ ARROW_ORC=ON \
+ ARROW_PARQUET=ON \
+ ARROW_PLASMA=ON \
+ ARROW_S3=ON \
+ ARROW_USE_ASAN=OFF \
+ ARROW_USE_CCACHE=ON \
+ ARROW_USE_UBSAN=OFF \
+ ARROW_WITH_BROTLI=ON \
+ ARROW_WITH_BZ2=ON \
+ ARROW_WITH_LZ4=ON \
+ ARROW_WITH_SNAPPY=ON \
+ ARROW_WITH_ZLIB=ON \
+ ARROW_WITH_ZSTD=ON \
+ ASAN_SYMBOLIZER_PATH=/usr/lib/llvm-${llvm}/bin/llvm-symbolizer \
+ AWSSDK_SOURCE=BUNDLED \
+ google_cloud_cpp_storage_SOURCE=BUNDLED \
+ GTest_SOURCE=BUNDLED \
+ gRPC_SOURCE=BUNDLED \
+ ORC_SOURCE=BUNDLED \
+ PARQUET_BUILD_EXAMPLES=ON \
+ PARQUET_BUILD_EXECUTABLES=ON \
+ PATH=/usr/lib/ccache/:$PATH \
+ Protobuf_SOURCE=BUNDLED \
+ PYTHON=python3
diff --git a/src/arrow/ci/docker/ubuntu-20.10-cpp.dockerfile b/src/arrow/ci/docker/ubuntu-20.10-cpp.dockerfile
new file mode 100644
index 000000000..59f5fa4c8
--- /dev/null
+++ b/src/arrow/ci/docker/ubuntu-20.10-cpp.dockerfile
@@ -0,0 +1,140 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ARG base=amd64/ubuntu:20.10
+FROM ${base}
+ARG arch
+
+SHELL ["/bin/bash", "-o", "pipefail", "-c"]
+
+RUN echo "debconf debconf/frontend select Noninteractive" | \
+ debconf-set-selections
+
+# Installs LLVM toolchain, for Gandiva and testing other compilers
+#
+# Note that this is installed before the base packages to improve iteration
+# while debugging package list with docker build.
+ARG clang_tools
+ARG llvm
+RUN if [ "${llvm}" -gt "10" ]; then \
+ apt-get update -y -q && \
+ apt-get install -y -q --no-install-recommends \
+ apt-transport-https \
+ ca-certificates \
+ gnupg \
+ wget && \
+ wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - && \
+ echo "deb https://apt.llvm.org/groovy/ llvm-toolchain-groovy-${llvm} main" > \
+ /etc/apt/sources.list.d/llvm.list && \
+ if [ "${clang_tools}" != "${llvm}" -a "${clang_tools}" -gt 10 ]; then \
+ echo "deb https://apt.llvm.org/groovy/ llvm-toolchain-groovy-${clang_tools} main" > \
+ /etc/apt/sources.list.d/clang-tools.list; \
+ fi \
+ fi && \
+ apt-get update -y -q && \
+ apt-get install -y -q --no-install-recommends \
+ clang-${clang_tools} \
+ clang-${llvm} \
+ clang-format-${clang_tools} \
+ clang-tidy-${clang_tools} \
+ llvm-${llvm}-dev && \
+ apt-get clean && \
+ rm -rf /var/lib/apt/lists*
+
+# Installs C++ toolchain and dependencies
+RUN apt-get update -y -q && \
+ apt-get install -y -q --no-install-recommends \
+ autoconf \
+ ca-certificates \
+ ccache \
+ cmake \
+ g++ \
+ gcc \
+ gdb \
+ git \
+ libbenchmark-dev \
+ libboost-filesystem-dev \
+ libboost-system-dev \
+ libbrotli-dev \
+ libbz2-dev \
+ libc-ares-dev \
+ libcurl4-openssl-dev \
+ libgflags-dev \
+ libgoogle-glog-dev \
+ libgrpc++-dev \
+ liblz4-dev \
+ libprotobuf-dev \
+ libprotoc-dev \
+ libre2-dev \
+ libsnappy-dev \
+ libssl-dev \
+ libthrift-dev \
+ libutf8proc-dev \
+ libzstd-dev \
+ make \
+ ninja-build \
+ pkg-config \
+ protobuf-compiler \
+ protobuf-compiler-grpc \
+ python3-pip \
+ rapidjson-dev \
+ tzdata \
+ wget && \
+ apt-get clean && \
+ rm -rf /var/lib/apt/lists*
+
+COPY ci/scripts/install_minio.sh /arrow/ci/scripts/
+RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest /usr/local
+COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/
+RUN /arrow/ci/scripts/install_gcs_testbench.sh ${arch} default
+
+# Prioritize system packages and local installation
+# The following dependencies will be downloaded due to missing/invalid packages
+# provided by the distribution:
+# - libc-ares-dev does not install CMake config files
+# - flatbuffer is not packaged
+# - libgtest-dev only provide sources
+# - libprotobuf-dev only provide sources
+ENV ARROW_BUILD_TESTS=ON \
+ ARROW_DEPENDENCY_SOURCE=SYSTEM \
+ ARROW_DATASET=ON \
+ ARROW_FLIGHT=OFF \
+ ARROW_GANDIVA=ON \
+ ARROW_HDFS=ON \
+ ARROW_HOME=/usr/local \
+ ARROW_INSTALL_NAME_RPATH=OFF \
+ ARROW_NO_DEPRECATED_API=ON \
+ ARROW_ORC=ON \
+ ARROW_PARQUET=ON \
+ ARROW_PLASMA=ON \
+ ARROW_S3=ON \
+ ARROW_USE_ASAN=OFF \
+ ARROW_USE_CCACHE=ON \
+ ARROW_USE_UBSAN=OFF \
+ ARROW_WITH_BROTLI=ON \
+ ARROW_WITH_BZ2=ON \
+ ARROW_WITH_LZ4=ON \
+ ARROW_WITH_SNAPPY=ON \
+ ARROW_WITH_ZLIB=ON \
+ ARROW_WITH_ZSTD=ON \
+ AWSSDK_SOURCE=BUNDLED \
+ GTest_SOURCE=BUNDLED \
+ ORC_SOURCE=BUNDLED \
+ PARQUET_BUILD_EXAMPLES=ON \
+ PARQUET_BUILD_EXECUTABLES=ON \
+ PATH=/usr/lib/ccache/:$PATH \
+ PYTHON=python3
diff --git a/src/arrow/ci/docker/ubuntu-21.04-cpp.dockerfile b/src/arrow/ci/docker/ubuntu-21.04-cpp.dockerfile
new file mode 100644
index 000000000..9fc857c62
--- /dev/null
+++ b/src/arrow/ci/docker/ubuntu-21.04-cpp.dockerfile
@@ -0,0 +1,162 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ARG base=amd64/ubuntu:20.04
+FROM ${base}
+ARG arch
+
+SHELL ["/bin/bash", "-o", "pipefail", "-c"]
+
+RUN echo "debconf debconf/frontend select Noninteractive" | \
+ debconf-set-selections
+
+# Installs LLVM toolchain, for Gandiva and testing other compilers
+#
+# Note that this is installed before the base packages to improve iteration
+# while debugging package list with docker build.
+ARG clang_tools
+ARG llvm
+RUN if [ "${llvm}" -gt "10" ]; then \
+ apt-get update -y -q && \
+ apt-get install -y -q --no-install-recommends \
+ apt-transport-https \
+ ca-certificates \
+ gnupg \
+ wget && \
+ wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - && \
+ echo "deb https://apt.llvm.org/hirsute/ llvm-toolchain-hirsute-${llvm} main" > \
+ /etc/apt/sources.list.d/llvm.list && \
+ if [ "${clang_tools}" != "${llvm}" -a "${clang_tools}" -gt 10 ]; then \
+ echo "deb https://apt.llvm.org/hirsute/ llvm-toolchain-hirsute-${clang_tools} main" > \
+ /etc/apt/sources.list.d/clang-tools.list; \
+ fi \
+ fi && \
+ apt-get update -y -q && \
+ apt-get install -y -q --no-install-recommends \
+ clang-${clang_tools} \
+ clang-${llvm} \
+ clang-format-${clang_tools} \
+ clang-tidy-${clang_tools} \
+ llvm-${llvm}-dev && \
+ apt-get clean && \
+ rm -rf /var/lib/apt/lists*
+
+# Installs C++ toolchain and dependencies
+RUN apt-get update -y -q && \
+ apt-get install -y -q --no-install-recommends \
+ autoconf \
+ ca-certificates \
+ ccache \
+ cmake \
+ gdb \
+ git \
+ libbenchmark-dev \
+ libboost-filesystem-dev \
+ libboost-system-dev \
+ libbrotli-dev \
+ libbz2-dev \
+ libc-ares-dev \
+ libcurl4-openssl-dev \
+ libgflags-dev \
+ libgoogle-glog-dev \
+ libgrpc++-dev \
+ liblz4-dev \
+ libprotobuf-dev \
+ libprotoc-dev \
+ libre2-dev \
+ libsnappy-dev \
+ libssl-dev \
+ libthrift-dev \
+ libutf8proc-dev \
+ libzstd-dev \
+ make \
+ ninja-build \
+ pkg-config \
+ protobuf-compiler \
+ protobuf-compiler-grpc \
+ python3-pip \
+ rapidjson-dev \
+ tzdata \
+ wget && \
+ apt-get clean && \
+ rm -rf /var/lib/apt/lists*
+
+COPY ci/scripts/install_minio.sh /arrow/ci/scripts/
+RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest /usr/local
+COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/
+RUN /arrow/ci/scripts/install_gcs_testbench.sh ${arch} default
+
+# Prioritize system packages and local installation
+# The following dependencies will be downloaded due to missing/invalid packages
+# provided by the distribution:
+# - libc-ares-dev does not install CMake config files
+# - flatbuffer is not packaged
+# - libgtest-dev only provide sources
+# - libprotobuf-dev only provide sources
+ENV ARROW_BUILD_TESTS=ON \
+ ARROW_DEPENDENCY_SOURCE=SYSTEM \
+ ARROW_DATASET=ON \
+ ARROW_FLIGHT=OFF \
+ ARROW_GANDIVA=ON \
+ ARROW_HDFS=ON \
+ ARROW_HOME=/usr/local \
+ ARROW_INSTALL_NAME_RPATH=OFF \
+ ARROW_NO_DEPRECATED_API=ON \
+ ARROW_ORC=ON \
+ ARROW_PARQUET=ON \
+ ARROW_PLASMA=ON \
+ ARROW_S3=ON \
+ ARROW_USE_ASAN=OFF \
+ ARROW_USE_CCACHE=ON \
+ ARROW_USE_UBSAN=OFF \
+ ARROW_WITH_BROTLI=ON \
+ ARROW_WITH_BZ2=ON \
+ ARROW_WITH_LZ4=ON \
+ ARROW_WITH_SNAPPY=ON \
+ ARROW_WITH_ZLIB=ON \
+ ARROW_WITH_ZSTD=ON \
+ AWSSDK_SOURCE=BUNDLED \
+ GTest_SOURCE=BUNDLED \
+ ORC_SOURCE=BUNDLED \
+ PARQUET_BUILD_EXAMPLES=ON \
+ PARQUET_BUILD_EXECUTABLES=ON \
+ PATH=/usr/lib/ccache/:$PATH \
+ PYTHON=python3
+
+ARG gcc_version=""
+RUN if [ "${gcc_version}" = "" ]; then \
+ apt-get update -y -q && \
+ apt-get install -y -q --no-install-recommends \
+ g++ \
+ gcc; \
+ else \
+ if [ "${gcc_version}" -gt "10" ]; then \
+ apt-get update -y -q && \
+ apt-get install -y -q --no-install-recommends software-properties-common && \
+ add-apt-repository ppa:ubuntu-toolchain-r/volatile; \
+ fi; \
+ apt-get update -y -q && \
+ apt-get install -y -q --no-install-recommends \
+ g++-${gcc_version} \
+ gcc-${gcc_version} && \
+ update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-${gcc_version} 100 && \
+ update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-${gcc_version} 100 && \
+ update-alternatives --install /usr/bin/cc cc /usr/bin/gcc 100 && \
+ update-alternatives --set cc /usr/bin/gcc && \
+ update-alternatives --install /usr/bin/c++ c++ /usr/bin/g++ 100 && \
+ update-alternatives --set c++ /usr/bin/g++; \
+ fi
diff --git a/src/arrow/ci/etc/hdfs-site.xml b/src/arrow/ci/etc/hdfs-site.xml
new file mode 100644
index 000000000..97214337f
--- /dev/null
+++ b/src/arrow/ci/etc/hdfs-site.xml
@@ -0,0 +1,52 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<!--
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. See accompanying LICENSE file.
+-->
+
+<!-- This is the client configuration for the HDFS integration tests. -->
+
+<configuration>
+ <property>
+ <name>dfs.replication</name>
+ <value>2</value>
+ </property>
+ <property>
+ <name>dfs.datanode.data.dir</name>
+ <value>file:///data/dfs/data</value>
+ </property>
+ <property>
+ <name>dfs.namenode.name.dir</name>
+ <value>file:///data/dfs/name</value>
+ </property>
+ <property>
+ <name>dfs.namenode.checkpoint.dir</name>
+ <value>file:///data/dfs/namesecondary </value>
+ </property>
+ <property>
+ <name>dfs.namenode.datanode.registration.ip-hostname-check</name>
+ <value>false</value>
+ </property>
+ <property>
+ <name>dfs.default.replica</name>
+ <value>1</value>
+ </property>
+ <property>
+ <name>dfs.support.append</name>
+ <value>true</value>
+ </property>
+ <property>
+ <name>dfs.client.block.write.replace-datanode-on-failure.enable</name>
+ <value>false</value>
+ </property>
+</configuration>
diff --git a/src/arrow/ci/etc/rprofile b/src/arrow/ci/etc/rprofile
new file mode 100644
index 000000000..5ef1dca8f
--- /dev/null
+++ b/src/arrow/ci/etc/rprofile
@@ -0,0 +1,62 @@
+ local({
+ .pick_cran <- function() {
+ # Return a CRAN repo URL, preferring RSPM binaries if available for this OS
+ rspm_template <- "https://packagemanager.rstudio.com/cran/__linux__/%s/latest"
+ supported_os <- c("focal", "xenial", "bionic", "centos7", "centos8", "opensuse42", "opensuse15", "opensuse152")
+
+ if (nzchar(Sys.which("lsb_release"))) {
+ os <- tolower(system("lsb_release -cs", intern = TRUE))
+ if (os %in% supported_os) {
+ return(sprintf(rspm_template, os))
+ }
+ }
+ if (file.exists("/etc/os-release")) {
+ os_release <- readLines("/etc/os-release")
+ vals <- sub("^.*=(.*)$", "\\1", os_release)
+ os <- intersect(vals, supported_os)
+ if (length(os)) {
+ # e.g. "bionic"
+ return(sprintf(rspm_template, os))
+ } else {
+ names(vals) <- sub("^(.*)=.*$", "\\1", os_release)
+ if (vals["ID"] == "opensuse") {
+ version <- sub('^"?([0-9]+).*"?.*$', "\\1", vals["VERSION_ID"])
+ os <- paste0("opensuse", version)
+ if (os %in% supported_os) {
+ return(sprintf(rspm_template, os))
+ }
+ }
+ }
+ }
+ if (file.exists("/etc/system-release")) {
+ # Something like "CentOS Linux release 7.7.1908 (Core)"
+ system_release <- tolower(utils::head(readLines("/etc/system-release"), 1))
+ # Extract from that the distro and the major version number
+ os <- sub("^([a-z]+) .* ([0-9]+).*$", "\\1\\2", system_release)
+ if (os %in% supported_os) {
+ return(sprintf(rspm_template, os))
+ }
+ }
+
+ return("https://cloud.r-project.org")
+ }
+
+ options(
+ Ncpus = parallel::detectCores(),
+ repos = tryCatch(.pick_cran(), error = function(e) "https://cloud.r-project.org"),
+ HTTPUserAgent = sprintf(
+ 'R/%s R (%s)',
+ getRversion(),
+ paste(getRversion(), R.version$platform, R.version$arch, R.version$os)
+ )
+ )
+
+ # there's a bug in 3.5 that will warn/error on these, so only set it around that
+ if (getRversion() >= "3.6.0" || getRversion() < "3.5.0") {
+ options(
+ warnPartialMatchAttr = TRUE,
+ warnPartialMatchDollar = TRUE,
+ warnPartialMatchArgs = TRUE
+ )
+ }
+})
diff --git a/src/arrow/ci/etc/valgrind-cran.supp b/src/arrow/ci/etc/valgrind-cran.supp
new file mode 100644
index 000000000..4d2922026
--- /dev/null
+++ b/src/arrow/ci/etc/valgrind-cran.supp
@@ -0,0 +1,34 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+{
+ # `testthat::skip()`s cause a valgrind error that does not show up on CRAN.
+ <testthat_skip_error>
+ Memcheck:Cond
+ fun:gregexpr_Regexc
+ fun:do_regexpr
+ fun:bcEval
+ fun:Rf_eval
+ fun:R_execClosure
+ fun:Rf_applyClosure
+ fun:bcEval
+ fun:Rf_eval
+ fun:forcePromise
+ fun:FORCE_PROMISE
+ fun:getvar
+ fun:bcEval
+}
diff --git a/src/arrow/ci/scripts/PKGBUILD b/src/arrow/ci/scripts/PKGBUILD
new file mode 100644
index 000000000..975d1514f
--- /dev/null
+++ b/src/arrow/ci/scripts/PKGBUILD
@@ -0,0 +1,138 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+_realname=arrow
+pkgbase=mingw-w64-${_realname}
+pkgname="${MINGW_PACKAGE_PREFIX}-${_realname}"
+pkgver=6.0.1
+pkgrel=8000
+pkgdesc="Apache Arrow is a cross-language development platform for in-memory data (mingw-w64)"
+arch=("any")
+url="https://arrow.apache.org/"
+license=("Apache-2.0")
+depends=("${MINGW_PACKAGE_PREFIX}-aws-sdk-cpp"
+ "${MINGW_PACKAGE_PREFIX}-libutf8proc"
+ "${MINGW_PACKAGE_PREFIX}-re2"
+ "${MINGW_PACKAGE_PREFIX}-thrift"
+ "${MINGW_PACKAGE_PREFIX}-snappy"
+ "${MINGW_PACKAGE_PREFIX}-zlib"
+ "${MINGW_PACKAGE_PREFIX}-lz4"
+ "${MINGW_PACKAGE_PREFIX}-zstd")
+makedepends=("${MINGW_PACKAGE_PREFIX}-ccache"
+ "${MINGW_PACKAGE_PREFIX}-cmake"
+ "${MINGW_PACKAGE_PREFIX}-gcc")
+options=("staticlibs" "strip" "!buildflags")
+
+# For installing from a local checkout, set source_dir to . and don't include
+# a "source" param below
+source_dir="$ARROW_HOME"
+# else
+# source_dir=apache-${_realname}-${pkgver}
+
+# For released version:
+#source=("https://archive.apache.org/dist/arrow/arrow-${pkgver}/apache-arrow-${pkgver}.tar.gz")
+#sha256sums=("ac2a77dd9168e9892e432c474611e86ded0be6dfe15f689c948751d37f81391a")
+# For github dev version:
+# Append `#commit=54b1b2f688e5e84b4c664b1e12a95f93b94ab2f3` to the URL to select a revision
+# source=("${source_dir}"::"git+https://github.com/apache/arrow")
+# sha256sums=("SKIP")
+# source_dir="${APPVEYOR_BUILD_FOLDER}/${source_dir}"
+
+cpp_build_dir=build-${CARCH}-cpp
+
+pkgver() {
+ # The only purpose of this here is to cause the job to error if the
+ # version in pkgver is different from what is in r/DESCRIPTION
+ grep Version "${source_dir}/r/DESCRIPTION" | cut -d " " -f 2
+}
+
+build() {
+ ARROW_CPP_DIR="${source_dir}/cpp"
+ [[ -d ${cpp_build_dir} ]] && rm -rf ${cpp_build_dir}
+ mkdir -p ${cpp_build_dir}
+ pushd ${cpp_build_dir}
+
+ # The Rtools libutf8proc is a static lib, but Findutf8proc.cmake doesn't
+ # set the appropriate compiler definition.
+ export CPPFLAGS="-DUTF8PROC_STATIC"
+
+ # This is the difference between rtools-packages and rtools-backports
+ # Remove this when submitting to rtools-packages
+ if [ "$RTOOLS_VERSION" = "35" ]; then
+ export CC="/C/Rtools${MINGW_PREFIX/mingw/mingw_}/bin/gcc"
+ export CXX="/C/Rtools${MINGW_PREFIX/mingw/mingw_}/bin/g++"
+ export PATH="/C/Rtools${MINGW_PREFIX/mingw/mingw_}/bin:$PATH"
+ export CPPFLAGS="${CPPFLAGS} -I${MINGW_PREFIX}/include"
+ export LIBS="-L${MINGW_PREFIX}/libs"
+ export ARROW_S3=OFF
+ export ARROW_WITH_RE2=OFF
+ # Without this, some dataset functionality segfaults
+ export CMAKE_UNITY_BUILD=ON
+ else
+ export ARROW_S3=ON
+ export ARROW_WITH_RE2=ON
+ # Without this, some compute functionality segfaults in tests
+ export CMAKE_UNITY_BUILD=OFF
+ fi
+
+ MSYS2_ARG_CONV_EXCL="-DCMAKE_INSTALL_PREFIX=" \
+ ${MINGW_PREFIX}/bin/cmake.exe \
+ ${ARROW_CPP_DIR} \
+ -G "MSYS Makefiles" \
+ -DARROW_BUILD_SHARED=OFF \
+ -DARROW_BUILD_STATIC=ON \
+ -DARROW_BUILD_UTILITIES=OFF \
+ -DARROW_COMPUTE=ON \
+ -DARROW_CSV=ON \
+ -DARROW_DATASET=ON \
+ -DARROW_FILESYSTEM=ON \
+ -DARROW_HDFS=OFF \
+ -DARROW_JEMALLOC=OFF \
+ -DARROW_JSON=ON \
+ -DARROW_LZ4_USE_SHARED=OFF \
+ -DARROW_MIMALLOC=ON \
+ -DARROW_PACKAGE_PREFIX="${MINGW_PREFIX}" \
+ -DARROW_PARQUET=ON \
+ -DARROW_S3="${ARROW_S3}" \
+ -DARROW_SNAPPY_USE_SHARED=OFF \
+ -DARROW_USE_GLOG=OFF \
+ -DARROW_WITH_LZ4=ON \
+ -DARROW_WITH_RE2="${ARROW_WITH_RE2}" \
+ -DARROW_WITH_SNAPPY=ON \
+ -DARROW_WITH_ZLIB=ON \
+ -DARROW_WITH_ZSTD=ON \
+ -DARROW_ZSTD_USE_SHARED=OFF \
+ -DARROW_CXXFLAGS="${CPPFLAGS}" \
+ -DCMAKE_BUILD_TYPE="release" \
+ -DCMAKE_INSTALL_PREFIX=${MINGW_PREFIX} \
+ -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD} \
+ -DCMAKE_VERBOSE_MAKEFILE=ON
+
+ make -j3
+ popd
+}
+
+package() {
+ make -C ${cpp_build_dir} DESTDIR="${pkgdir}" install
+
+ local PREFIX_DEPS=$(cygpath -am ${MINGW_PREFIX})
+ pushd "${pkgdir}${MINGW_PREFIX}/lib/pkgconfig"
+ for pc in *.pc; do
+ sed -s "s|${PREFIX_DEPS}|${MINGW_PREFIX}|g" -i $pc
+ done
+ popd
+}
diff --git a/src/arrow/ci/scripts/c_glib_build.sh b/src/arrow/ci/scripts/c_glib_build.sh
new file mode 100755
index 000000000..ce3cea18e
--- /dev/null
+++ b/src/arrow/ci/scripts/c_glib_build.sh
@@ -0,0 +1,45 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+source_dir=${1}/c_glib
+build_dir=${2}/c_glib
+: ${ARROW_GLIB_GTK_DOC:=false}
+: ${ARROW_GLIB_DEVELOPMENT_MODE:=false}
+
+export PKG_CONFIG_PATH=${ARROW_HOME}/lib/pkgconfig
+
+export CFLAGS="-DARROW_NO_DEPRECATED_API"
+export CXXFLAGS="-DARROW_NO_DEPRECATED_API"
+
+mkdir -p ${build_dir}
+
+# Build with Meson
+meson --prefix=$ARROW_HOME \
+ --libdir=lib \
+ -Ddevelopment_mode=${ARROW_GLIB_DEVELOPMENT_MODE} \
+ -Dgtk_doc=${ARROW_GLIB_GTK_DOC} \
+ ${build_dir} \
+ ${source_dir}
+
+pushd ${build_dir}
+ninja
+ninja install
+popd
diff --git a/src/arrow/ci/scripts/c_glib_test.sh b/src/arrow/ci/scripts/c_glib_test.sh
new file mode 100755
index 000000000..25c54138e
--- /dev/null
+++ b/src/arrow/ci/scripts/c_glib_test.sh
@@ -0,0 +1,48 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+source_dir=${1}/c_glib
+build_dir=${2}/c_glib
+
+export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH}
+export PKG_CONFIG_PATH=${ARROW_HOME}/lib/pkgconfig
+export GI_TYPELIB_PATH=${ARROW_HOME}/lib/girepository-1.0
+
+pushd ${source_dir}
+
+ruby test/run-test.rb
+
+if [[ "$(uname -s)" == "Linux" ]]; then
+ # TODO(kszucs): on osx it fails to load 'lgi.corelgilua51' despite that lgi
+ # was installed by luarocks
+ pushd example/lua
+ lua write-batch.lua
+ lua read-batch.lua
+ lua write-stream.lua
+ lua read-stream.lua
+ popd
+fi
+
+popd
+
+pushd ${build_dir}
+example/extension-type
+popd
diff --git a/src/arrow/ci/scripts/ccache_setup.sh b/src/arrow/ci/scripts/ccache_setup.sh
new file mode 100755
index 000000000..f77fbb373
--- /dev/null
+++ b/src/arrow/ci/scripts/ccache_setup.sh
@@ -0,0 +1,26 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -eux
+
+echo "ARROW_USE_CCACHE=ON" >> $GITHUB_ENV
+echo "CCACHE_COMPILERCHECK=content" >> $GITHUB_ENV
+echo "CCACHE_COMPRESS=1" >> $GITHUB_ENV
+echo "CCACHE_COMPRESSLEVEL=6" >> $GITHUB_ENV
+echo "CCACHE_MAXSIZE=500M" >> $GITHUB_ENV
diff --git a/src/arrow/ci/scripts/cpp_build.sh b/src/arrow/ci/scripts/cpp_build.sh
new file mode 100755
index 000000000..a11dd23b7
--- /dev/null
+++ b/src/arrow/ci/scripts/cpp_build.sh
@@ -0,0 +1,164 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+source_dir=${1}/cpp
+build_dir=${2}/cpp
+with_docs=${3:-false}
+
+: ${ARROW_USE_CCACHE:=OFF}
+
+# TODO(kszucs): consider to move these to CMake
+if [ ! -z "${CONDA_PREFIX}" ]; then
+ echo -e "===\n=== Conda environment for build\n==="
+ conda list
+
+ export CMAKE_ARGS="${CMAKE_ARGS} -DCMAKE_AR=${AR} -DCMAKE_RANLIB=${RANLIB}"
+ export ARROW_GANDIVA_PC_CXX_FLAGS=$(echo | ${CXX} -E -Wp,-v -xc++ - 2>&1 | grep '^ ' | awk '{print "-isystem;" substr($1, 1)}' | tr '\n' ';')
+elif [ -x "$(command -v xcrun)" ]; then
+ export ARROW_GANDIVA_PC_CXX_FLAGS="-isysroot;$(xcrun --show-sdk-path)"
+fi
+
+if [ "${ARROW_USE_CCACHE}" == "ON" ]; then
+ echo -e "===\n=== ccache statistics before build\n==="
+ ccache -s
+fi
+
+if [ "${ARROW_USE_TSAN}" == "ON" ] && [ ! -x "${ASAN_SYMBOLIZER_PATH}" ]; then
+ echo -e "Invalid value for \$ASAN_SYMBOLIZER_PATH: ${ASAN_SYMBOLIZER_PATH}"
+ exit 1
+fi
+
+mkdir -p ${build_dir}
+pushd ${build_dir}
+
+cmake -G "${CMAKE_GENERATOR:-Ninja}" \
+ -DARROW_BOOST_USE_SHARED=${ARROW_BOOST_USE_SHARED:-ON} \
+ -DARROW_BUILD_BENCHMARKS=${ARROW_BUILD_BENCHMARKS:-OFF} \
+ -DARROW_BUILD_BENCHMARKS_REFERENCE=${ARROW_BUILD_BENCHMARKS:-OFF} \
+ -DARROW_BUILD_EXAMPLES=${ARROW_BUILD_EXAMPLES:-OFF} \
+ -DARROW_BUILD_INTEGRATION=${ARROW_BUILD_INTEGRATION:-OFF} \
+ -DARROW_BUILD_SHARED=${ARROW_BUILD_SHARED:-ON} \
+ -DARROW_BUILD_STATIC=${ARROW_BUILD_STATIC:-ON} \
+ -DARROW_BUILD_TESTS=${ARROW_BUILD_TESTS:-OFF} \
+ -DARROW_BUILD_UTILITIES=${ARROW_BUILD_UTILITIES:-ON} \
+ -DARROW_COMPUTE=${ARROW_COMPUTE:-ON} \
+ -DARROW_CSV=${ARROW_CSV:-ON} \
+ -DARROW_CUDA=${ARROW_CUDA:-OFF} \
+ -DARROW_CXXFLAGS=${ARROW_CXXFLAGS:-} \
+ -DARROW_DATASET=${ARROW_DATASET:-ON} \
+ -DARROW_ENGINE=${ARROW_ENGINE:-ON} \
+ -DARROW_DEPENDENCY_SOURCE=${ARROW_DEPENDENCY_SOURCE:-AUTO} \
+ -DARROW_EXTRA_ERROR_CONTEXT=${ARROW_EXTRA_ERROR_CONTEXT:-OFF} \
+ -DARROW_ENABLE_TIMING_TESTS=${ARROW_ENABLE_TIMING_TESTS:-ON} \
+ -DARROW_FILESYSTEM=${ARROW_FILESYSTEM:-ON} \
+ -DARROW_FLIGHT=${ARROW_FLIGHT:-OFF} \
+ -DARROW_FUZZING=${ARROW_FUZZING:-OFF} \
+ -DARROW_GANDIVA_JAVA=${ARROW_GANDIVA_JAVA:-OFF} \
+ -DARROW_GANDIVA_PC_CXX_FLAGS=${ARROW_GANDIVA_PC_CXX_FLAGS:-} \
+ -DARROW_GANDIVA=${ARROW_GANDIVA:-OFF} \
+ -DARROW_GCS=${ARROW_GCS:-OFF} \
+ -DARROW_HDFS=${ARROW_HDFS:-ON} \
+ -DARROW_HIVESERVER2=${ARROW_HIVESERVER2:-OFF} \
+ -DARROW_INSTALL_NAME_RPATH=${ARROW_INSTALL_NAME_RPATH:-ON} \
+ -DARROW_JEMALLOC=${ARROW_JEMALLOC:-ON} \
+ -DARROW_JNI=${ARROW_JNI:-OFF} \
+ -DARROW_JSON=${ARROW_JSON:-ON} \
+ -DARROW_LARGE_MEMORY_TESTS=${ARROW_LARGE_MEMORY_TESTS:-OFF} \
+ -DARROW_MIMALLOC=${ARROW_MIMALLOC:-OFF} \
+ -DARROW_NO_DEPRECATED_API=${ARROW_NO_DEPRECATED_API:-OFF} \
+ -DARROW_ORC=${ARROW_ORC:-OFF} \
+ -DARROW_PARQUET=${ARROW_PARQUET:-OFF} \
+ -DARROW_PLASMA_JAVA_CLIENT=${ARROW_PLASMA_JAVA_CLIENT:-OFF} \
+ -DARROW_PLASMA=${ARROW_PLASMA:-OFF} \
+ -DARROW_PYTHON=${ARROW_PYTHON:-OFF} \
+ -DARROW_RUNTIME_SIMD_LEVEL=${ARROW_RUNTIME_SIMD_LEVEL:-MAX} \
+ -DARROW_S3=${ARROW_S3:-OFF} \
+ -DARROW_TEST_LINKAGE=${ARROW_TEST_LINKAGE:-shared} \
+ -DARROW_TEST_MEMCHECK=${ARROW_TEST_MEMCHECK:-OFF} \
+ -DARROW_USE_ASAN=${ARROW_USE_ASAN:-OFF} \
+ -DARROW_USE_CCACHE=${ARROW_USE_CCACHE:-ON} \
+ -DARROW_USE_GLOG=${ARROW_USE_GLOG:-OFF} \
+ -DARROW_USE_LD_GOLD=${ARROW_USE_LD_GOLD:-OFF} \
+ -DARROW_USE_PRECOMPILED_HEADERS=${ARROW_USE_PRECOMPILED_HEADERS:-OFF} \
+ -DARROW_USE_STATIC_CRT=${ARROW_USE_STATIC_CRT:-OFF} \
+ -DARROW_USE_TSAN=${ARROW_USE_TSAN:-OFF} \
+ -DARROW_USE_UBSAN=${ARROW_USE_UBSAN:-OFF} \
+ -DARROW_VERBOSE_THIRDPARTY_BUILD=${ARROW_VERBOSE_THIRDPARTY_BUILD:-OFF} \
+ -DARROW_WITH_BROTLI=${ARROW_WITH_BROTLI:-OFF} \
+ -DARROW_WITH_BZ2=${ARROW_WITH_BZ2:-OFF} \
+ -DARROW_WITH_LZ4=${ARROW_WITH_LZ4:-OFF} \
+ -DARROW_WITH_SNAPPY=${ARROW_WITH_SNAPPY:-OFF} \
+ -DARROW_WITH_UTF8PROC=${ARROW_WITH_UTF8PROC:-ON} \
+ -DARROW_WITH_ZLIB=${ARROW_WITH_ZLIB:-OFF} \
+ -DARROW_WITH_ZSTD=${ARROW_WITH_ZSTD:-OFF} \
+ -DAWSSDK_SOURCE=${AWSSDK_SOURCE:-} \
+ -Dbenchmark_SOURCE=${benchmark_SOURCE:-} \
+ -DBOOST_SOURCE=${BOOST_SOURCE:-} \
+ -DBrotli_SOURCE=${Brotli_SOURCE:-} \
+ -DBUILD_WARNING_LEVEL=${BUILD_WARNING_LEVEL:-CHECKIN} \
+ -Dc-ares_SOURCE=${cares_SOURCE:-} \
+ -DCMAKE_BUILD_TYPE=${ARROW_BUILD_TYPE:-debug} \
+ -DCMAKE_C_FLAGS="${CFLAGS:-}" \
+ -DCMAKE_CXX_FLAGS="${CXXFLAGS:-}" \
+ -DCMAKE_INSTALL_LIBDIR=${CMAKE_INSTALL_LIBDIR:-lib} \
+ -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX:-${ARROW_HOME}} \
+ -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD:-OFF} \
+ -Dgflags_SOURCE=${gflags_SOURCE:-} \
+ -Dgoogle_cloud_cpp_storage_SOURCE=${google_cloud_cpp_storage_SOURCE:-} \
+ -DgRPC_SOURCE=${gRPC_SOURCE:-} \
+ -DGTest_SOURCE=${GTest_SOURCE:-} \
+ -DLz4_SOURCE=${Lz4_SOURCE:-} \
+ -DORC_SOURCE=${ORC_SOURCE:-} \
+ -DPARQUET_BUILD_EXECUTABLES=${PARQUET_BUILD_EXECUTABLES:-OFF} \
+ -DPARQUET_BUILD_EXAMPLES=${PARQUET_BUILD_EXAMPLES:-OFF} \
+ -DPARQUET_REQUIRE_ENCRYPTION=${PARQUET_REQUIRE_ENCRYPTION:-ON} \
+ -DProtobuf_SOURCE=${Protobuf_SOURCE:-} \
+ -DRapidJSON_SOURCE=${RapidJSON_SOURCE:-} \
+ -Dre2_SOURCE=${re2_SOURCE:-} \
+ -DSnappy_SOURCE=${Snappy_SOURCE:-} \
+ -DThrift_SOURCE=${Thrift_SOURCE:-} \
+ -Dutf8proc_SOURCE=${utf8proc_SOURCE:-} \
+ -Dzstd_SOURCE=${zstd_SOURCE:-} \
+ ${CMAKE_ARGS} \
+ ${source_dir}
+
+if [ ! -z "${CPP_MAKE_PARALLELISM}" ]; then
+ time cmake --build . --target install -- -j${CPP_MAKE_PARALLELISM}
+else
+ time cmake --build . --target install
+fi
+
+popd
+
+if [ -x "$(command -v ldconfig)" ]; then
+ ldconfig
+fi
+
+if [ "${ARROW_USE_CCACHE}" == "ON" ]; then
+ echo -e "===\n=== ccache statistics after build\n==="
+ ccache -s
+fi
+
+if [ "${with_docs}" == "true" ]; then
+ pushd ${source_dir}/apidoc
+ doxygen
+ popd
+fi
diff --git a/src/arrow/ci/scripts/cpp_test.sh b/src/arrow/ci/scripts/cpp_test.sh
new file mode 100755
index 000000000..822557f25
--- /dev/null
+++ b/src/arrow/ci/scripts/cpp_test.sh
@@ -0,0 +1,118 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+if [[ $# < 2 ]]; then
+ echo "Usage: $0 <Arrow dir> <build dir> [ctest args ...]"
+ exit 1
+fi
+
+arrow_dir=${1}; shift
+build_dir=${1}/cpp; shift
+source_dir=${arrow_dir}/cpp
+binary_output_dir=${build_dir}/${ARROW_BUILD_TYPE:-debug}
+
+export ARROW_TEST_DATA=${arrow_dir}/testing/data
+export PARQUET_TEST_DATA=${source_dir}/submodules/parquet-testing/data
+export LD_LIBRARY_PATH=${ARROW_HOME}/${CMAKE_INSTALL_LIBDIR:-lib}:${LD_LIBRARY_PATH}
+
+# By default, aws-sdk tries to contact a non-existing local ip host
+# to retrieve metadata. Disable this so that S3FileSystem tests run faster.
+export AWS_EC2_METADATA_DISABLED=TRUE
+
+ctest_options=()
+case "$(uname)" in
+ Linux)
+ n_jobs=$(nproc)
+ ;;
+ Darwin)
+ n_jobs=$(sysctl -n hw.ncpu)
+ ;;
+ MINGW*)
+ n_jobs=${NUMBER_OF_PROCESSORS:-1}
+ # TODO: Enable these crashed tests.
+ # https://issues.apache.org/jira/browse/ARROW-9072
+ exclude_tests="gandiva-internals-test"
+ exclude_tests="${exclude_tests}|gandiva-projector-test"
+ exclude_tests="${exclude_tests}|gandiva-utf8-test"
+ if [ "${MSYSTEM}" = "MINGW32" ]; then
+ exclude_tests="${exclude_tests}|gandiva-projector-test"
+ exclude_tests="${exclude_tests}|gandiva-binary-test"
+ exclude_tests="${exclude_tests}|gandiva-boolean-expr-test"
+ exclude_tests="${exclude_tests}|gandiva-date-time-test"
+ exclude_tests="${exclude_tests}|gandiva-decimal-single-test"
+ exclude_tests="${exclude_tests}|gandiva-decimal-test"
+ exclude_tests="${exclude_tests}|gandiva-filter-project-test"
+ exclude_tests="${exclude_tests}|gandiva-filter-test"
+ exclude_tests="${exclude_tests}|gandiva-hash-test"
+ exclude_tests="${exclude_tests}|gandiva-if-expr-test"
+ exclude_tests="${exclude_tests}|gandiva-in-expr-test"
+ exclude_tests="${exclude_tests}|gandiva-literal-test"
+ exclude_tests="${exclude_tests}|gandiva-null-validity-test"
+ fi
+ ctest_options+=(--exclude-regex "${exclude_tests}")
+ ;;
+ *)
+ n_jobs=${NPROC:-1}
+ ;;
+esac
+
+pushd ${build_dir}
+
+if ! which python > /dev/null 2>&1; then
+ export PYTHON=python3
+fi
+ctest \
+ --label-regex unittest \
+ --output-on-failure \
+ --parallel ${n_jobs} \
+ --timeout 300 \
+ "${ctest_options[@]}" \
+ $@
+
+if [ "${ARROW_BUILD_EXAMPLES}" == "ON" ]; then
+ examples=$(find ${binary_output_dir} -executable -name "*example")
+ if [ "${examples}" == "" ]; then
+ echo "=================="
+ echo "No examples found!"
+ echo "=================="
+ exit 1
+ fi
+ for ex in ${examples}
+ do
+ echo "=================="
+ echo "Executing ${ex}"
+ echo "=================="
+ ${ex}
+ done
+fi
+
+if [ "${ARROW_FUZZING}" == "ON" ]; then
+ # Fuzzing regression tests
+ ${binary_output_dir}/arrow-ipc-stream-fuzz ${ARROW_TEST_DATA}/arrow-ipc-stream/crash-*
+ ${binary_output_dir}/arrow-ipc-stream-fuzz ${ARROW_TEST_DATA}/arrow-ipc-stream/*-testcase-*
+ ${binary_output_dir}/arrow-ipc-file-fuzz ${ARROW_TEST_DATA}/arrow-ipc-file/*-testcase-*
+ ${binary_output_dir}/arrow-ipc-tensor-stream-fuzz ${ARROW_TEST_DATA}/arrow-ipc-tensor-stream/*-testcase-*
+ if [ "${ARROW_PARQUET}" == "ON" ]; then
+ ${binary_output_dir}/parquet-arrow-fuzz ${ARROW_TEST_DATA}/parquet/fuzzing/*-testcase-*
+ fi
+fi
+
+popd
diff --git a/src/arrow/ci/scripts/csharp_build.sh b/src/arrow/ci/scripts/csharp_build.sh
new file mode 100755
index 000000000..5a3976794
--- /dev/null
+++ b/src/arrow/ci/scripts/csharp_build.sh
@@ -0,0 +1,26 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+source_dir=${1}/csharp
+
+pushd ${source_dir}
+dotnet build
+popd
diff --git a/src/arrow/ci/scripts/csharp_pack.sh b/src/arrow/ci/scripts/csharp_pack.sh
new file mode 100755
index 000000000..e9dfc664e
--- /dev/null
+++ b/src/arrow/ci/scripts/csharp_pack.sh
@@ -0,0 +1,26 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -eux
+
+source_dir=${1}/csharp
+
+pushd ${source_dir}
+dotnet pack -c Release
+popd
diff --git a/src/arrow/ci/scripts/csharp_test.sh b/src/arrow/ci/scripts/csharp_test.sh
new file mode 100755
index 000000000..9e4e35dd4
--- /dev/null
+++ b/src/arrow/ci/scripts/csharp_test.sh
@@ -0,0 +1,29 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+source_dir=${1}/csharp
+
+pushd ${source_dir}
+dotnet test
+for pdb in artifacts/Apache.Arrow/*/*/Apache.Arrow.pdb; do
+ sourcelink test ${pdb}
+done
+popd
diff --git a/src/arrow/ci/scripts/docs_build.sh b/src/arrow/ci/scripts/docs_build.sh
new file mode 100755
index 000000000..e6ee768ee
--- /dev/null
+++ b/src/arrow/ci/scripts/docs_build.sh
@@ -0,0 +1,48 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+set -ex
+
+arrow_dir=${1}
+build_dir=${2}/docs
+
+export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH}
+export PKG_CONFIG_PATH=${ARROW_HOME}/lib/pkgconfig:${PKG_CONFIG_PATH}
+export GI_TYPELIB_PATH=${ARROW_HOME}/lib/girepository-1.0
+export CFLAGS="-DARROW_NO_DEPRECATED_API"
+export CXXFLAGS="-DARROW_NO_DEPRECATED_API"
+
+ncpus=$(python3 -c "import os; print(os.cpu_count())")
+
+# Sphinx docs
+sphinx-build -b html -j ${ncpus} ${arrow_dir}/docs/source ${build_dir}
+
+# C++ - original doxygen
+# rsync -a ${arrow_dir}/cpp/apidoc/ ${build_dir}/cpp
+
+# R
+rsync -a ${arrow_dir}/r/docs/ ${build_dir}/r
+
+# C GLib
+rsync -a ${ARROW_HOME}/share/gtk-doc/html/ ${build_dir}/c_glib
+
+# Java
+rsync -a ${arrow_dir}/java/target/site/apidocs/ ${build_dir}/java/reference
+
+# Javascript
+rsync -a ${arrow_dir}/js/doc/ ${build_dir}/js
diff --git a/src/arrow/ci/scripts/go_build.sh b/src/arrow/ci/scripts/go_build.sh
new file mode 100755
index 000000000..267f78e59
--- /dev/null
+++ b/src/arrow/ci/scripts/go_build.sh
@@ -0,0 +1,40 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+source_dir=${1}/go
+
+pushd ${source_dir}/arrow
+
+if [[ -n "${ARROW_GO_TESTCGO}" ]]; then
+ TAGS="-tags ccalloc"
+fi
+
+go get -d -t -v ./...
+go install $TAGS -v ./...
+
+popd
+
+pushd ${source_dir}/parquet
+
+go get -d -t -v ./...
+go install -v ./...
+
+popd
diff --git a/src/arrow/ci/scripts/go_cgo_python_test.sh b/src/arrow/ci/scripts/go_cgo_python_test.sh
new file mode 100755
index 000000000..5f2032fba
--- /dev/null
+++ b/src/arrow/ci/scripts/go_cgo_python_test.sh
@@ -0,0 +1,45 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+source_dir=${1}/go
+
+pushd ${source_dir}/arrow/cdata/test
+
+case "$(uname)" in
+ Linux)
+ testlib="cgotest.so"
+ ;;
+ Darwin)
+ testlib="cgotest.so"
+ ;;
+ MINGW*)
+ testlib="cgotest.dll"
+ ;;
+esac
+
+go build -tags cdata_test,assert -buildmode=c-shared -o $testlib .
+
+python test_export_to_cgo.py
+
+rm $testlib
+rm "${testlib%.*}.h"
+
+popd
diff --git a/src/arrow/ci/scripts/go_test.sh b/src/arrow/ci/scripts/go_test.sh
new file mode 100755
index 000000000..f7b2cd963
--- /dev/null
+++ b/src/arrow/ci/scripts/go_test.sh
@@ -0,0 +1,60 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+source_dir=${1}/go
+
+testargs="-race"
+case "$(uname)" in
+ MINGW*)
+ # -race doesn't work on windows currently
+ testargs=""
+ ;;
+esac
+
+if [[ "$(go env GOHOSTARCH)" = "s390x" ]]; then
+ testargs="" # -race not supported on s390x
+fi
+
+pushd ${source_dir}/arrow
+
+TAGS="assert,test"
+if [[ -n "${ARROW_GO_TESTCGO}" ]]; then
+ TAGS="${TAGS},ccalloc"
+fi
+
+
+# the cgo implementation of the c data interface requires the "test"
+# tag in order to run its tests so that the testing functions implemented
+# in .c files don't get included in non-test builds.
+
+for d in $(go list ./... | grep -v vendor); do
+ go test $testargs -tags $TAGS $d
+done
+
+popd
+
+pushd ${source_dir}/parquet
+
+for d in $(go list ./... | grep -v vendor); do
+ go test $testargs -tags assert $d
+done
+
+popd
diff --git a/src/arrow/ci/scripts/install_conda.sh b/src/arrow/ci/scripts/install_conda.sh
new file mode 100755
index 000000000..f4d313b63
--- /dev/null
+++ b/src/arrow/ci/scripts/install_conda.sh
@@ -0,0 +1,66 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -e
+
+declare -A archs
+archs=([amd64]=x86_64
+ [arm32v7]=armv7l
+ [ppc64le]=ppc64le
+ [i386]=x86)
+
+declare -A platforms
+platforms=([windows]=Windows
+ [macos]=MacOSX
+ [linux]=Linux)
+
+if [ "$#" -ne 4 ]; then
+ echo "Usage: $0 <architecture> <platform> <version> <prefix>"
+ exit 1
+elif [[ -z ${archs[$1]} ]]; then
+ echo "Unexpected architecture: ${1}"
+ exit 1
+elif [[ -z ${platforms[$2]} ]]; then
+ echo "Unexpected platform: ${2}"
+ exit 1
+fi
+
+arch=${archs[$1]}
+platform=${platforms[$2]}
+version=$3
+prefix=$4
+
+echo "Downloading Miniconda installer..."
+wget -nv https://repo.continuum.io/miniconda/Miniconda3-${version}-${platform}-${arch}.sh -O /tmp/miniconda.sh
+bash /tmp/miniconda.sh -b -p ${prefix}
+rm /tmp/miniconda.sh
+
+# Like "conda init", but for POSIX sh rather than bash
+ln -s ${prefix}/etc/profile.d/conda.sh /etc/profile.d/conda.sh
+
+# Configure
+source /etc/profile.d/conda.sh
+conda config --add channels conda-forge
+conda config --set channel_priority strict
+conda config --set show_channel_urls True
+conda config --set remote_connect_timeout_secs 12
+
+# Update and clean
+conda update --all -y
+conda clean --all -y
diff --git a/src/arrow/ci/scripts/install_dask.sh b/src/arrow/ci/scripts/install_dask.sh
new file mode 100755
index 000000000..954ce3249
--- /dev/null
+++ b/src/arrow/ci/scripts/install_dask.sh
@@ -0,0 +1,36 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -e
+
+if [ "$#" -ne 1 ]; then
+ echo "Usage: $0 <dask version>"
+ exit 1
+fi
+
+dask=$1
+
+if [ "${dask}" = "master" ]; then
+ pip install https://github.com/dask/dask/archive/main.tar.gz#egg=dask[dataframe]
+elif [ "${dask}" = "latest" ]; then
+ conda install -q dask
+else
+ conda install -q dask=${dask}
+fi
+conda clean --all
diff --git a/src/arrow/ci/scripts/install_gcs_testbench.sh b/src/arrow/ci/scripts/install_gcs_testbench.sh
new file mode 100755
index 000000000..579a78944
--- /dev/null
+++ b/src/arrow/ci/scripts/install_gcs_testbench.sh
@@ -0,0 +1,38 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -e
+
+if [ "$#" -ne 2 ]; then
+ echo "Usage: $0 <arch> <storage-testbench version>"
+ exit 1
+fi
+
+arch=$1
+if [ "${arch}" != "amd64" ]; then
+ echo "GCS testbench won't install on non-x86 architecture"
+ exit 0
+fi
+
+version=$2
+if [[ "${version}" -eq "default" ]]; then
+ version="v0.7.0"
+fi
+
+pip install "https://github.com/googleapis/storage-testbench/archive/${version}.tar.gz"
diff --git a/src/arrow/ci/scripts/install_iwyu.sh b/src/arrow/ci/scripts/install_iwyu.sh
new file mode 100755
index 000000000..3cd2cbc95
--- /dev/null
+++ b/src/arrow/ci/scripts/install_iwyu.sh
@@ -0,0 +1,48 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+set -eu
+
+source_dir=${1:-/tmp/iwyu}
+install_prefix=${2:-/usr/local}
+clang_tools_version=${3:-8}
+
+iwyu_branch_name="clang_${clang_tools_version}"
+if [ ${clang_tools_version} -lt 10 ]; then
+ iwyu_branch_name="${iwyu_branch_name}.0"
+fi
+
+git clone --single-branch --branch ${iwyu_branch_name} \
+ https://github.com/include-what-you-use/include-what-you-use.git ${source_dir}
+
+mkdir -p ${source_dir}/build
+pushd ${source_dir}/build
+
+# Build IWYU for current Clang
+export CC=clang-${clang_tools_version}
+export CXX=clang++-${clang_tools_version}
+
+cmake -DCMAKE_PREFIX_PATH=/usr/lib/llvm-${clang_tools_version} \
+ -DCMAKE_INSTALL_PREFIX=${install_prefix} \
+ ${source_dir}
+make -j4
+make install
+
+popd
+
+rm -rf ${source_dir}
diff --git a/src/arrow/ci/scripts/install_kartothek.sh b/src/arrow/ci/scripts/install_kartothek.sh
new file mode 100755
index 000000000..4d88943b6
--- /dev/null
+++ b/src/arrow/ci/scripts/install_kartothek.sh
@@ -0,0 +1,41 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -e
+
+if [ "$#" -ne 2 ]; then
+ echo "Usage: $0 <kartothek version> <target directory>"
+ exit 1
+fi
+
+karthothek=$1
+target=$2
+
+git clone --recurse-submodules https://github.com/JDASoftwareGroup/kartothek "${target}"
+if [ "${kartothek}" = "master" ]; then
+ git -C "${target}" checkout master;
+elif [ "${kartothek}" = "latest" ]; then
+ git -C "${target}" checkout $(git describe --tags);
+else
+ git -C "${target}" checkout ${kartothek};
+fi
+
+pushd "${target}"
+pip install --no-deps .
+popd
diff --git a/src/arrow/ci/scripts/install_minio.sh b/src/arrow/ci/scripts/install_minio.sh
new file mode 100755
index 000000000..5cda46e59
--- /dev/null
+++ b/src/arrow/ci/scripts/install_minio.sh
@@ -0,0 +1,62 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -e
+
+declare -A archs
+archs=([amd64]=amd64
+ [arm64v8]=arm64
+ [arm32v7]=arm
+ [s390x]=s390x)
+
+declare -A platforms
+platforms=([linux]=linux
+ [macos]=darwin)
+
+arch=${archs[$1]}
+platform=${platforms[$2]}
+version=$3
+prefix=$4
+
+if [ "$#" -ne 4 ]; then
+ echo "Usage: $0 <architecture> <platform> <version> <prefix>"
+ exit 1
+elif [[ -z ${arch} ]]; then
+ echo "Unexpected architecture: ${1}"
+ exit 1
+elif [[ -z ${platform} ]]; then
+ echo "Unexpected platform: ${2}"
+ exit 1
+elif [[ ${version} != "latest" ]]; then
+ echo "Cannot fetch specific versions of minio, only latest is supported."
+ exit 1
+fi
+
+if [[ ! -x ${prefix}/bin/minio ]]; then
+ url="https://dl.min.io/server/minio/release/${platform}-${arch}/minio"
+ echo "Fetching ${url}..."
+ wget -nv -P ${prefix}/bin ${url}
+ chmod +x ${prefix}/bin/minio
+fi
+if [[ ! -x ${prefix}/bin/mc ]]; then
+ url="https://dl.min.io/client/mc/release/${platform}-${arch}/mc"
+ echo "Fetching ${url}..."
+ wget -nv -P ${prefix}/bin ${url}
+ chmod +x ${prefix}/bin/mc
+fi
diff --git a/src/arrow/ci/scripts/install_osx_sdk.sh b/src/arrow/ci/scripts/install_osx_sdk.sh
new file mode 100755
index 000000000..896d084e0
--- /dev/null
+++ b/src/arrow/ci/scripts/install_osx_sdk.sh
@@ -0,0 +1,41 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+if [ ${using_homebrew} != "yes" ]; then
+ export MACOSX_DEPLOYMENT_TARGET="10.9"
+ export CONDA_BUILD_SYSROOT="$(xcode-select -p)/Platforms/MacOSX.platform/Developer/SDKs/MacOSX${MACOSX_DEPLOYMENT_TARGET}.sdk"
+
+ if [[ ! -d ${CONDA_BUILD_SYSROOT} || "$OSX_FORCE_SDK_DOWNLOAD" == "1" ]]; then
+ echo "downloading ${macosx_deployment_target} sdk"
+ curl -L -O https://github.com/phracker/MacOSX-SDKs/releases/download/10.13/MacOSX${MACOSX_DEPLOYMENT_TARGET}.sdk.tar.xz
+ tar -xf MacOSX${MACOSX_DEPLOYMENT_TARGET}.sdk.tar.xz -C "$(dirname "$CONDA_BUILD_SYSROOT")"
+ # set minimum sdk version to our target
+ plutil -replace MinimumSDKVersion -string ${MACOSX_DEPLOYMENT_TARGET} $(xcode-select -p)/Platforms/MacOSX.platform/Info.plist
+ plutil -replace DTSDKName -string macosx${MACOSX_DEPLOYMENT_TARGET}internal $(xcode-select -p)/Platforms/MacOSX.platform/Info.plist
+ fi
+
+ if [ -d "${CONDA_BUILD_SYSROOT}" ]; then
+ echo "Found CONDA_BUILD_SYSROOT: ${CONDA_BUILD_SYSROOT}"
+ else
+ echo "Missing CONDA_BUILD_SYSROOT: ${CONDA_BUILD_SYSROOT}"
+ exit 1
+ fi
+fi
diff --git a/src/arrow/ci/scripts/install_pandas.sh b/src/arrow/ci/scripts/install_pandas.sh
new file mode 100755
index 000000000..5aca65f82
--- /dev/null
+++ b/src/arrow/ci/scripts/install_pandas.sh
@@ -0,0 +1,46 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -e
+
+if [ "$#" -lt 1 ]; then
+ echo "Usage: $0 <pandas version> <optional numpy version = latest>"
+ exit 1
+fi
+
+pandas=$1
+numpy=${2:-"latest"}
+
+if [ "${numpy}" = "nightly" ]; then
+ pip install --extra-index-url https://pypi.anaconda.org/scipy-wheels-nightly/simple --pre numpy
+elif [ "${numpy}" = "latest" ]; then
+ pip install numpy
+else
+ pip install numpy==${numpy}
+fi
+
+if [ "${pandas}" = "master" ]; then
+ pip install git+https://github.com/pandas-dev/pandas.git --no-build-isolation
+elif [ "${pandas}" = "nightly" ]; then
+ pip install --extra-index-url https://pypi.anaconda.org/scipy-wheels-nightly/simple --pre pandas
+elif [ "${pandas}" = "latest" ]; then
+ pip install pandas
+else
+ pip install pandas==${pandas}
+fi
diff --git a/src/arrow/ci/scripts/install_python.sh b/src/arrow/ci/scripts/install_python.sh
new file mode 100755
index 000000000..babb2c1e8
--- /dev/null
+++ b/src/arrow/ci/scripts/install_python.sh
@@ -0,0 +1,68 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -eu
+
+declare -A platforms
+platforms=([windows]=Windows
+ [macos]=MacOSX
+ [linux]=Linux)
+
+declare -A versions
+versions=([3.6]=3.6.8
+ [3.7]=3.7.9
+ [3.8]=3.8.10
+ [3.9]=3.9.6
+ [3.10]=3.10.0)
+
+if [ "$#" -ne 2 ]; then
+ echo "Usage: $0 <platform> <version>"
+ exit 1
+elif [[ -z ${platforms[$1]} ]]; then
+ echo "Unexpected platform: ${1}"
+ exit 1
+fi
+
+platform=${platforms[$1]}
+version=$2
+full_version=${versions[$2]}
+
+if [ $platform = "MacOSX" ]; then
+ echo "Downloading Python installer..."
+
+ if [ "$(uname -m)" = "arm64" ] || [ "$version" = "3.10" ]; then
+ fname="python-${full_version}-macos11.pkg"
+ else
+ fname="python-${full_version}-macosx10.9.pkg"
+ fi
+ wget "https://www.python.org/ftp/python/${full_version}/${fname}"
+
+ echo "Installing Python..."
+ installer -pkg $fname -target /
+ rm $fname
+
+ echo "Installing Pip..."
+ python="/Library/Frameworks/Python.framework/Versions/${version}/bin/python${version}"
+ pip="${python} -m pip"
+
+ $python -m ensurepip
+ $pip install -U pip setuptools virtualenv
+else
+ echo "Unsupported platform: $platform"
+fi
diff --git a/src/arrow/ci/scripts/install_spark.sh b/src/arrow/ci/scripts/install_spark.sh
new file mode 100755
index 000000000..936313fd8
--- /dev/null
+++ b/src/arrow/ci/scripts/install_spark.sh
@@ -0,0 +1,31 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -e
+
+if [ "$#" -ne 2 ]; then
+ echo "Usage: $0 <spark version> <target directory>"
+ exit 1
+fi
+
+spark=$1
+target=$2
+
+git clone https://github.com/apache/spark "${target}"
+git -C "${target}" checkout "${spark}"
diff --git a/src/arrow/ci/scripts/install_turbodbc.sh b/src/arrow/ci/scripts/install_turbodbc.sh
new file mode 100755
index 000000000..3e644a3e2
--- /dev/null
+++ b/src/arrow/ci/scripts/install_turbodbc.sh
@@ -0,0 +1,43 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -e
+
+if [ "$#" -ne 2 ]; then
+ echo "Usage: $0 <turbodbc version> <target directory>"
+ exit 1
+fi
+
+turbodbc=$1
+target=$2
+
+git clone --recurse-submodules https://github.com/blue-yonder/turbodbc "${target}"
+if [ "${turbodbc}" = "master" ]; then
+ git -C "${target}" checkout master;
+elif [ "${turbodbc}" = "latest" ]; then
+ git -C "${target}" checkout $(git describe --tags);
+else
+ git -C "${target}" checkout ${turbodbc};
+fi
+
+pushd ${target}
+wget -q https://github.com/pybind/pybind11/archive/v2.6.2.tar.gz
+tar xvf v2.6.2.tar.gz
+mv pybind11-2.6.2 pybind11
+popd
diff --git a/src/arrow/ci/scripts/install_vcpkg.sh b/src/arrow/ci/scripts/install_vcpkg.sh
new file mode 100755
index 000000000..fe99a7fea
--- /dev/null
+++ b/src/arrow/ci/scripts/install_vcpkg.sh
@@ -0,0 +1,39 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -e
+
+if [ "$#" -ne 2 ]; then
+ echo "Usage: $0 <vcpkg version> <target directory>"
+ exit 1
+fi
+
+vcpkg_version=$1
+vcpkg_destination=$2
+vcpkg_patch=$(realpath $(dirname "${0}")/../vcpkg/ports.patch)
+
+git clone --depth 1 --branch ${vcpkg_version} https://github.com/microsoft/vcpkg ${vcpkg_destination}
+
+pushd ${vcpkg_destination}
+
+./bootstrap-vcpkg.sh -useSystemBinaries -disableMetrics
+git apply --ignore-whitespace ${vcpkg_patch}
+echo "Patch successfully applied!"
+
+popd
diff --git a/src/arrow/ci/scripts/integration_arrow.sh b/src/arrow/ci/scripts/integration_arrow.sh
new file mode 100755
index 000000000..30cbb2d63
--- /dev/null
+++ b/src/arrow/ci/scripts/integration_arrow.sh
@@ -0,0 +1,40 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+arrow_dir=${1}
+gold_dir=$arrow_dir/testing/data/arrow-ipc-stream/integration
+
+pip install -e $arrow_dir/dev/archery
+
+# Rust can be enabled by exporting ARCHERY_INTEGRATION_WITH_RUST=1
+archery integration \
+ --run-flight \
+ --with-cpp=1 \
+ --with-csharp=1 \
+ --with-java=1 \
+ --with-js=1 \
+ --with-go=1 \
+ --gold-dirs=$gold_dir/0.14.1 \
+ --gold-dirs=$gold_dir/0.17.1 \
+ --gold-dirs=$gold_dir/1.0.0-bigendian \
+ --gold-dirs=$gold_dir/1.0.0-littleendian \
+ --gold-dirs=$gold_dir/2.0.0-compression \
+ --gold-dirs=$gold_dir/4.0.0-shareddict \
diff --git a/src/arrow/ci/scripts/integration_dask.sh b/src/arrow/ci/scripts/integration_dask.sh
new file mode 100755
index 000000000..e67a02945
--- /dev/null
+++ b/src/arrow/ci/scripts/integration_dask.sh
@@ -0,0 +1,40 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -e
+
+# check that optional pyarrow modules are available
+# because pytest would just skip the dask tests
+python -c "import pyarrow.orc"
+python -c "import pyarrow.parquet"
+
+# check that dask.dataframe is correctly installed
+python -c "import dask.dataframe"
+
+# TODO(kszucs): the following tests are also uses pyarrow
+# pytest -sv --pyargs dask.bytes.tests.test_s3
+# pytest -sv --pyargs dask.bytes.tests.test_hdfs
+# pytest -sv --pyargs dask.bytes.tests.test_local
+
+# skip failing pickle test, see https://github.com/dask/dask/issues/6374
+pytest -v --pyargs dask.dataframe.tests.test_dataframe -k "not test_dataframe_picklable and not test_describe_empty"
+pytest -v --pyargs dask.dataframe.io.tests.test_orc
+# skip failing parquet tests, see https://github.com/dask/dask/issues/6243
+pytest -v --pyargs dask.dataframe.io.tests.test_parquet \
+ -k "not test_to_parquet_pyarrow_w_inconsistent_schema_by_partition_fails_by_default and not test_timeseries_nulls_in_schema"
diff --git a/src/arrow/ci/scripts/integration_hdfs.sh b/src/arrow/ci/scripts/integration_hdfs.sh
new file mode 100755
index 000000000..c95449379
--- /dev/null
+++ b/src/arrow/ci/scripts/integration_hdfs.sh
@@ -0,0 +1,69 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -e
+
+source_dir=${1}/cpp
+build_dir=${2}/cpp
+
+export CLASSPATH=$($HADOOP_HOME/bin/hadoop classpath --glob)
+export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
+export LIBHDFS3_CONF=$HADOOP_CONF_DIR/hdfs-site.xml
+export ARROW_LIBHDFS3_DIR=$CONDA_PREFIX/lib
+
+libhdfs_dir=$HADOOP_HOME/lib/native
+hadoop_home=$HADOOP_HOME
+
+function use_hadoop_home() {
+ unset ARROW_LIBHDFS_DIR
+ export HADOOP_HOME=$hadoop_home
+}
+
+function use_libhdfs_dir() {
+ unset HADOOP_HOME
+ export ARROW_LIBHDFS_DIR=$libhdfs_dir
+}
+
+# execute cpp tests
+export ARROW_HDFS_TEST_LIBHDFS_REQUIRE=ON
+pushd ${build_dir}
+
+debug/arrow-io-hdfs-test
+debug/arrow-hdfs-test
+
+use_libhdfs_dir
+debug/arrow-io-hdfs-test
+debug/arrow-hdfs-test
+use_hadoop_home
+
+popd
+
+# cannot use --pyargs with custom arguments like --hdfs or --only-hdfs, because
+# pytest ignores them, see https://github.com/pytest-dev/pytest/issues/3517
+export PYARROW_TEST_HDFS=ON
+
+export PYARROW_HDFS_TEST_LIBHDFS_REQUIRE=ON
+
+pytest -vs --pyargs pyarrow.tests.test_fs
+pytest -vs --pyargs pyarrow.tests.test_hdfs
+
+use_libhdfs_dir
+pytest -vs --pyargs pyarrow.tests.test_fs
+pytest -vs --pyargs pyarrow.tests.test_hdfs
+use_hadoop_home
diff --git a/src/arrow/ci/scripts/integration_hiveserver2.sh b/src/arrow/ci/scripts/integration_hiveserver2.sh
new file mode 100755
index 000000000..36fba5ca8
--- /dev/null
+++ b/src/arrow/ci/scripts/integration_hiveserver2.sh
@@ -0,0 +1,32 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+set -e
+
+arrow_dir=${1}
+source_dir=${1}/cpp
+build_dir=${2}/cpp
+
+${arrow_dir}/ci/scripts/util_wait_for_it.sh impala:21050 -t 300 -s -- echo "impala is up"
+
+pushd ${build_dir}
+
+# ninja hiveserver2-test
+debug/hiveserver2-test
+
+popd
diff --git a/src/arrow/ci/scripts/integration_kartothek.sh b/src/arrow/ci/scripts/integration_kartothek.sh
new file mode 100755
index 000000000..379569b9c
--- /dev/null
+++ b/src/arrow/ci/scripts/integration_kartothek.sh
@@ -0,0 +1,31 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -e
+
+# check that optional pyarrow modules are available
+# because pytest would just skip the pyarrow tests
+python -c "import pyarrow.parquet"
+
+# check that kartothek is correctly installed
+python -c "import kartothek"
+
+pushd /kartothek
+# See ARROW-12314, test_load_dataframes_columns_raises_missing skipped because of changed error message
+pytest -n0 --ignore tests/cli/test_query.py -k "not test_load_dataframes_columns_raises_missing"
diff --git a/src/arrow/ci/scripts/integration_spark.sh b/src/arrow/ci/scripts/integration_spark.sh
new file mode 100755
index 000000000..90ecbce39
--- /dev/null
+++ b/src/arrow/ci/scripts/integration_spark.sh
@@ -0,0 +1,87 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# exit on any error
+set -eu
+
+source_dir=${1}
+spark_dir=${2}
+
+# Test Spark with latest PyArrow only, don't build with latest Arrow Java
+test_pyarrow_only=${3:-false}
+
+# Spark branch to checkout
+spark_version=${SPARK_VERSION:-master}
+
+# Use old behavior that always dropped tiemzones.
+export PYARROW_IGNORE_TIMEZONE=1
+
+if [ "${SPARK_VERSION:0:2}" == "2." ]; then
+ # https://github.com/apache/spark/blob/master/docs/sql-pyspark-pandas-with-arrow.md#compatibility-setting-for-pyarrow--0150-and-spark-23x-24x
+ export ARROW_PRE_0_15_IPC_FORMAT=1
+fi
+
+# Get Arrow Java version
+pushd ${source_dir}/java
+ arrow_version=`mvn org.apache.maven.plugins:maven-help-plugin:2.1.1:evaluate -Dexpression=project.version | sed -n -e '/^\[.*\]/ !{ /^[0-9]/ { p; q } }'`
+popd
+
+export MAVEN_OPTS="-Xss256m -Xmx2g -XX:ReservedCodeCacheSize=1g -Dorg.slf4j.simpleLogger.defaultLogLevel=warn"
+export MAVEN_OPTS="${MAVEN_OPTS} -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn"
+
+pushd ${spark_dir}
+
+ if [ "${test_pyarrow_only}" == "true" ]; then
+ echo "Building Spark ${SPARK_VERSION} to test pyarrow only"
+
+ # Build Spark only
+ build/mvn -B -DskipTests package
+
+ else
+
+ # Update Spark pom with the Arrow version just installed and build Spark, need package phase for pyspark
+ echo "Building Spark ${SPARK_VERSION} with Arrow ${arrow_version}"
+ build/mvn versions:set-property -Dproperty=arrow.version -DnewVersion=${arrow_version}
+
+ # Build Spark with new Arrow Java
+ build/mvn -B -DskipTests package
+
+ spark_scala_tests=(
+ "org.apache.spark.sql.execution.arrow"
+ "org.apache.spark.sql.execution.vectorized.ColumnarBatchSuite"
+ "org.apache.spark.sql.execution.vectorized.ArrowColumnVectorSuite")
+
+ (echo "Testing Spark:"; IFS=$'\n'; echo "${spark_scala_tests[*]}")
+
+ # TODO: should be able to only build spark-sql tests with adding "-pl sql/core" but not currently working
+ build/mvn -B -Dtest=none -DwildcardSuites=$(IFS=,; echo "${spark_scala_tests[*]}") test
+ fi
+
+ # Run pyarrow related Python tests only
+ spark_python_tests=(
+ "pyspark.sql.tests.test_arrow"
+ "pyspark.sql.tests.test_pandas_map"
+ "pyspark.sql.tests.test_pandas_cogrouped_map"
+ "pyspark.sql.tests.test_pandas_grouped_map"
+ "pyspark.sql.tests.test_pandas_udf"
+ "pyspark.sql.tests.test_pandas_udf_scalar"
+ "pyspark.sql.tests.test_pandas_udf_grouped_agg"
+ "pyspark.sql.tests.test_pandas_udf_window")
+
+ (echo "Testing PySpark:"; IFS=$'\n'; echo "${spark_python_tests[*]}")
+ python/run-tests --testnames "$(IFS=,; echo "${spark_python_tests[*]}")" --python-executables python
+popd
diff --git a/src/arrow/ci/scripts/integration_turbodbc.sh b/src/arrow/ci/scripts/integration_turbodbc.sh
new file mode 100755
index 000000000..f0fafd512
--- /dev/null
+++ b/src/arrow/ci/scripts/integration_turbodbc.sh
@@ -0,0 +1,47 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+source_dir=${1}
+build_dir=${2}/turbodbc
+
+# check that optional pyarrow modules are available
+# because pytest would just skip the pyarrow tests
+python -c "import pyarrow.orc"
+python -c "import pyarrow.parquet"
+
+mkdir -p ${build_dir}
+pushd ${build_dir}
+
+cmake -DCMAKE_INSTALL_PREFIX=${ARROW_HOME} \
+ -DCMAKE_CXX_FLAGS="${CXXFLAGS}" \
+ -DPYTHON_EXECUTABLE=$(which python) \
+ -GNinja \
+ ${source_dir}
+ninja install
+
+# TODO(ARROW-5074)
+export LD_LIBRARY_PATH="${ARROW_HOME}/lib:${LD_LIBRARY_PATH}"
+export ODBCSYSINI="${source_dir}/earthly/odbc/"
+
+service postgresql start
+ctest --output-on-failure
+
+popd
diff --git a/src/arrow/ci/scripts/java_build.sh b/src/arrow/ci/scripts/java_build.sh
new file mode 100755
index 000000000..1ba37606d
--- /dev/null
+++ b/src/arrow/ci/scripts/java_build.sh
@@ -0,0 +1,107 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+arrow_dir=${1}
+source_dir=${1}/java
+cpp_build_dir=${2}/cpp/${ARROW_BUILD_TYPE:-debug}
+cdata_dist_dir=${2}/java/c
+with_docs=${3:-false}
+
+if [[ "$(uname -s)" == "Linux" ]] && [[ "$(uname -m)" == "s390x" ]]; then
+ # Since some files for s390_64 are not available at maven central,
+ # download pre-build files from Artifactory and install them explicitly
+ mvn_install="mvn install:install-file"
+ wget="wget"
+ artifactory_base_url="https://apache.jfrog.io/artifactory/arrow"
+
+ artifactory_dir="protoc-binary"
+ group="com.google.protobuf"
+ artifact="protoc"
+ ver="3.7.1"
+ classifier="linux-s390_64"
+ extension="exe"
+ target=${artifact}-${ver}-${classifier}.${extension}
+ ${wget} ${artifactory_base_url}/${artifactory_dir}/${ver}/${target}
+ ${mvn_install} -DgroupId=${group} -DartifactId=${artifact} -Dversion=${ver} -Dclassifier=${classifier} -Dpackaging=${extension} -Dfile=$(pwd)/${target}
+ # protoc requires libprotoc.so.18 libprotobuf.so.18
+ ${wget} ${artifactory_base_url}/${artifactory_dir}/${ver}/libprotoc.so.18
+ ${wget} ${artifactory_base_url}/${artifactory_dir}/${ver}/libprotobuf.so.18
+ mkdir -p ${ARROW_HOME}/lib
+ cp lib*.so.18 ${ARROW_HOME}/lib
+ export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${ARROW_HOME}/lib
+
+ artifactory_dir="protoc-gen-grpc-java-binary"
+ group="io.grpc"
+ artifact="protoc-gen-grpc-java"
+ ver="1.30.2"
+ classifier="linux-s390_64"
+ extension="exe"
+ target=${artifact}-${ver}-${classifier}.${extension}
+ ${wget} ${artifactory_base_url}/${artifactory_dir}/${ver}/${target}
+ ${mvn_install} -DgroupId=${group} -DartifactId=${artifact} -Dversion=${ver} -Dclassifier=${classifier} -Dpackaging=${extension} -Dfile=$(pwd)/${target}
+
+ artifactory_dir="netty-binary"
+ group="io.netty"
+ artifact="netty-transport-native-unix-common"
+ ver="4.1.48.Final"
+ classifier="linux-s390_64"
+ extension="jar"
+ target=${artifact}-${ver}-${classifier}.${extension}
+ ${wget} ${artifactory_base_url}/${artifactory_dir}/${ver}/${target}
+ ${mvn_install} -DgroupId=${group} -DartifactId=${artifact} -Dversion=${ver} -Dclassifier=${classifier} -Dpackaging=${extension} -Dfile=$(pwd)/${target}
+ artifact="netty-transport-native-epoll"
+ extension="jar"
+ target=${artifact}-${ver}-${classifier}.${extension}
+ ${wget} ${artifactory_base_url}/${artifactory_dir}/${ver}/${target}
+ ${mvn_install} -DgroupId=${group} -DartifactId=${artifact} -Dversion=${ver} -Dclassifier=${classifier} -Dpackaging=${extension} -Dfile=$(pwd)/${target}
+fi
+
+mvn="mvn -B -DskipTests -Drat.skip=true -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn"
+# Use `2 * ncores` threads
+mvn="${mvn} -T 2C"
+
+pushd ${source_dir}
+
+${mvn} install
+
+if [ "${ARROW_JAVA_SHADE_FLATBUFFERS}" == "ON" ]; then
+ ${mvn} -Pshade-flatbuffers install
+fi
+
+if [ "${ARROW_JAVA_CDATA}" = "ON" ]; then
+ ${mvn} -Darrow.c.jni.dist.dir=${cdata_dist_dir} -Parrow-c-data install
+fi
+
+if [ "${ARROW_GANDIVA_JAVA}" = "ON" ]; then
+ ${mvn} -Darrow.cpp.build.dir=${cpp_build_dir} -Parrow-jni install
+fi
+
+if [ "${ARROW_PLASMA}" = "ON" ]; then
+ pushd ${source_dir}/plasma
+ ${mvn} clean install
+ popd
+fi
+
+if [ "${with_docs}" == "true" ]; then
+ # HTTP pooling is turned of to avoid download issues https://issues.apache.org/jira/browse/ARROW-11633
+ ${mvn} -Dcheckstyle.skip=true -Dhttp.keepAlive=false -Dmaven.wagon.http.pool=false install site
+fi
+
+popd
diff --git a/src/arrow/ci/scripts/java_cdata_build.sh b/src/arrow/ci/scripts/java_cdata_build.sh
new file mode 100755
index 000000000..730c775d4
--- /dev/null
+++ b/src/arrow/ci/scripts/java_cdata_build.sh
@@ -0,0 +1,45 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+arrow_dir=${1}
+build_dir=${2}
+# The directory where the final binaries will be stored when scripts finish
+dist_dir=${3}
+
+echo "=== Clear output directories and leftovers ==="
+# Clear output directories and leftovers
+rm -rf ${build_dir}
+
+echo "=== Building Arrow Java C Data Interface native library ==="
+mkdir -p "${build_dir}"
+pushd "${build_dir}"
+
+cmake \
+ -DCMAKE_BUILD_TYPE=${ARROW_BUILD_TYPE:-release} \
+ -DCMAKE_INSTALL_LIBDIR=lib \
+ -DCMAKE_INSTALL_PREFIX=${build_dir} \
+ ${arrow_dir}/java/c
+cmake --build . --target install --config ${ARROW_BUILD_TYPE:-release}
+popd
+
+echo "=== Copying libraries to the distribution folder ==="
+mkdir -p "${dist_dir}"
+cp -L ${build_dir}/lib/*arrow_cdata_jni.* ${dist_dir}
diff --git a/src/arrow/ci/scripts/java_full_build.sh b/src/arrow/ci/scripts/java_full_build.sh
new file mode 100755
index 000000000..e452b8098
--- /dev/null
+++ b/src/arrow/ci/scripts/java_full_build.sh
@@ -0,0 +1,42 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -e
+
+arrow_dir=${1}
+dist_dir=${2}
+
+export ARROW_TEST_DATA=${arrow_dir}/testing/data
+
+pushd ${arrow_dir}/java
+
+# build the entire project
+mvn clean install \
+ -Parrow-c-data \
+ -Parrow-jni \
+ -Darrow.cpp.build.dir=$dist_dir \
+ -Darrow.c.jni.dist.dir=$dist_dir
+
+# copy all jars and pom files to the distribution folder
+find ~/.m2/repository/org/apache/arrow \
+ "(" -name "*.jar" -o -name "*.pom" ")" \
+ -exec echo {} ";" \
+ -exec cp {} $dist_dir ";"
+
+popd
diff --git a/src/arrow/ci/scripts/java_jni_macos_build.sh b/src/arrow/ci/scripts/java_jni_macos_build.sh
new file mode 100755
index 000000000..218d2d396
--- /dev/null
+++ b/src/arrow/ci/scripts/java_jni_macos_build.sh
@@ -0,0 +1,115 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+arrow_dir=${1}
+build_dir=${2}
+# The directory where the final binaries will be stored when scripts finish
+dist_dir=${3}
+
+echo "=== Clear output directories and leftovers ==="
+# Clear output directories and leftovers
+rm -rf ${build_dir}
+
+echo "=== Building Arrow C++ libraries ==="
+: ${ARROW_BUILD_TESTS:=OFF}
+: ${ARROW_DATASET:=ON}
+: ${ARROW_FILESYSTEM:=ON}
+: ${ARROW_GANDIVA_JAVA:=ON}
+: ${ARROW_GANDIVA:=ON}
+: ${ARROW_ORC:=ON}
+: ${ARROW_PARQUET:=ON}
+: ${ARROW_PLASMA_JAVA_CLIENT:=ON}
+: ${ARROW_PLASMA:=ON}
+: ${ARROW_PYTHON:=OFF}
+: ${CMAKE_BUILD_TYPE:=Release}
+: ${CMAKE_UNITY_BUILD:=ON}
+
+export ARROW_TEST_DATA="${arrow_dir}/testing/data"
+export PARQUET_TEST_DATA="${arrow_dir}/cpp/submodules/parquet-testing/data"
+export AWS_EC2_METADATA_DISABLED=TRUE
+
+mkdir -p "${build_dir}"
+pushd "${build_dir}"
+
+cmake \
+ -DARROW_BOOST_USE_SHARED=OFF \
+ -DARROW_BROTLI_USE_SHARED=OFF \
+ -DARROW_BUILD_TESTS=${ARROW_BUILD_TESTS} \
+ -DARROW_BUILD_UTILITIES=OFF \
+ -DARROW_BZ2_USE_SHARED=OFF \
+ -DARROW_DATASET=${ARROW_DATASET} \
+ -DARROW_FILESYSTEM=${ARROW_FILESYSTEM} \
+ -DARROW_GANDIVA_JAVA=${ARROW_GANDIVA_JAVA} \
+ -DARROW_GANDIVA_STATIC_LIBSTDCPP=ON \
+ -DARROW_GANDIVA=${ARROW_GANDIVA} \
+ -DARROW_GFLAGS_USE_SHARED=OFF \
+ -DARROW_GRPC_USE_SHARED=OFF \
+ -DARROW_JNI=ON \
+ -DARROW_LZ4_USE_SHARED=OFF \
+ -DARROW_OPENSSL_USE_SHARED=OFF \
+ -DARROW_ORC=${ARROW_ORC} \
+ -DARROW_PARQUET=${ARROW_PARQUET} \
+ -DARROW_PLASMA_JAVA_CLIENT=${ARROW_PLASMA_JAVA_CLIENT} \
+ -DARROW_PLASMA=${ARROW_PLASMA} \
+ -DARROW_PROTOBUF_USE_SHARED=OFF \
+ -DARROW_PYTHON=${ARROW_PYTHON} \
+ -DARROW_SNAPPY_USE_SHARED=OFF \
+ -DARROW_THRIFT_USE_SHARED=OFF \
+ -DARROW_UTF8PROC_USE_SHARED=OFF \
+ -DARROW_ZSTD_USE_SHARED=OFF \
+ -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \
+ -DCMAKE_INSTALL_LIBDIR=lib \
+ -DCMAKE_INSTALL_PREFIX=${build_dir} \
+ -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD} \
+ -DPARQUET_BUILD_EXAMPLES=OFF \
+ -DPARQUET_BUILD_EXECUTABLES=OFF \
+ -DPARQUET_REQUIRE_ENCRYPTION=OFF \
+ -Dre2_SOURCE=BUNDLED \
+ ${arrow_dir}/cpp
+cmake --build . --target install
+
+if [ "${ARROW_BUILD_TESTS}" == "ON" ]; then
+ ctest
+fi
+
+popd
+
+echo "=== Copying libraries to the distribution folder ==="
+mkdir -p "${dist_dir}"
+cp -L ${build_dir}/lib/libgandiva_jni.dylib ${dist_dir}
+cp -L ${build_dir}/lib/libarrow_dataset_jni.dylib ${dist_dir}
+cp -L ${build_dir}/lib/libarrow_orc_jni.dylib ${dist_dir}
+
+echo "=== Checking shared dependencies for libraries ==="
+
+pushd ${dist_dir}
+archery linking check-dependencies \
+ --allow libarrow_dataset_jni \
+ --allow libarrow_orc_jni \
+ --allow libc++ \
+ --allow libgandiva_jni \
+ --allow libncurses \
+ --allow libSystem \
+ --allow libz \
+ libgandiva_jni.dylib \
+ libarrow_dataset_jni.dylib \
+ libarrow_orc_jni.dylib
+popd
diff --git a/src/arrow/ci/scripts/java_jni_manylinux_build.sh b/src/arrow/ci/scripts/java_jni_manylinux_build.sh
new file mode 100755
index 000000000..396c8fc19
--- /dev/null
+++ b/src/arrow/ci/scripts/java_jni_manylinux_build.sh
@@ -0,0 +1,137 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+arrow_dir=${1}
+build_dir=${2}
+# The directory where the final binaries will be stored when scripts finish
+dist_dir=${3}
+
+echo "=== Clear output directories and leftovers ==="
+# Clear output directories and leftovers
+rm -rf ${build_dir}
+
+echo "=== Building Arrow C++ libraries ==="
+devtoolset_version=$(rpm -qa "devtoolset-*-gcc" --queryformat %{VERSION} | \
+ grep -o "^[0-9]*")
+devtoolset_include_cpp="/opt/rh/devtoolset-${devtoolset_version}/root/usr/include/c++/${devtoolset_version}"
+: ${ARROW_DATASET:=ON}
+: ${ARROW_GANDIVA:=ON}
+: ${ARROW_GANDIVA_JAVA:=ON}
+: ${ARROW_FILESYSTEM:=ON}
+: ${ARROW_JEMALLOC:=ON}
+: ${ARROW_RPATH_ORIGIN:=ON}
+: ${ARROW_ORC:=ON}
+: ${ARROW_PARQUET:=ON}
+: ${ARROW_PLASMA:=ON}
+: ${ARROW_PLASMA_JAVA_CLIENT:=ON}
+: ${ARROW_PYTHON:=OFF}
+: ${ARROW_BUILD_TESTS:=OFF}
+: ${CMAKE_BUILD_TYPE:=Release}
+: ${CMAKE_UNITY_BUILD:=ON}
+: ${VCPKG_FEATURE_FLAGS:=-manifests}
+: ${VCPKG_TARGET_TRIPLET:=${VCPKG_DEFAULT_TRIPLET:-x64-linux-static-${CMAKE_BUILD_TYPE}}}
+: ${GANDIVA_CXX_FLAGS:=-isystem;${devtoolset_include_cpp};-isystem;${devtoolset_include_cpp}/x86_64-redhat-linux;-isystem;-lpthread}
+
+export ARROW_TEST_DATA="${arrow_dir}/testing/data"
+export PARQUET_TEST_DATA="${arrow_dir}/cpp/submodules/parquet-testing/data"
+export AWS_EC2_METADATA_DISABLED=TRUE
+
+mkdir -p "${build_dir}"
+pushd "${build_dir}"
+
+cmake \
+ -DARROW_BOOST_USE_SHARED=OFF \
+ -DARROW_BROTLI_USE_SHARED=OFF \
+ -DARROW_BUILD_SHARED=ON \
+ -DARROW_BUILD_TESTS=${ARROW_BUILD_TESTS} \
+ -DARROW_BUILD_UTILITIES=OFF \
+ -DARROW_BZ2_USE_SHARED=OFF \
+ -DARROW_DATASET=${ARROW_DATASET} \
+ -DARROW_DEPENDENCY_SOURCE="VCPKG" \
+ -DARROW_FILESYSTEM=${ARROW_FILESYSTEM} \
+ -DARROW_GANDIVA_JAVA=${ARROW_GANDIVA_JAVA} \
+ -DARROW_GANDIVA_PC_CXX_FLAGS=${GANDIVA_CXX_FLAGS} \
+ -DARROW_GANDIVA=${ARROW_GANDIVA} \
+ -DARROW_GRPC_USE_SHARED=OFF \
+ -DARROW_JEMALLOC=${ARROW_JEMALLOC} \
+ -DARROW_JNI=ON \
+ -DARROW_LZ4_USE_SHARED=OFF \
+ -DARROW_OPENSSL_USE_SHARED=OFF \
+ -DARROW_ORC=${ARROW_ORC} \
+ -DARROW_PARQUET=${ARROW_PARQUET} \
+ -DARROW_PLASMA_JAVA_CLIENT=${ARROW_PLASMA_JAVA_CLIENT} \
+ -DARROW_PLASMA=${ARROW_PLASMA} \
+ -DARROW_PROTOBUF_USE_SHARED=OFF \
+ -DARROW_PYTHON=${ARROW_PYTHON} \
+ -DARROW_RPATH_ORIGIN=${ARROW_RPATH_ORIGIN} \
+ -DARROW_SNAPPY_USE_SHARED=OFF \
+ -DARROW_THRIFT_USE_SHARED=OFF \
+ -DARROW_UTF8PROC_USE_SHARED=OFF \
+ -DARROW_ZSTD_USE_SHARED=OFF \
+ -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \
+ -DCMAKE_INSTALL_LIBDIR=lib \
+ -DCMAKE_INSTALL_PREFIX=${build_dir} \
+ -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD} \
+ -DPARQUET_BUILD_EXAMPLES=OFF \
+ -DPARQUET_BUILD_EXECUTABLES=OFF \
+ -DPARQUET_REQUIRE_ENCRYPTION=OFF \
+ -DPythonInterp_FIND_VERSION_MAJOR=3 \
+ -DPythonInterp_FIND_VERSION=ON \
+ -DVCPKG_MANIFEST_MODE=OFF \
+ -DVCPKG_TARGET_TRIPLET=${VCPKG_TARGET_TRIPLET} \
+ -GNinja \
+ ${arrow_dir}/cpp
+ninja install
+
+if [ $ARROW_BUILD_TESTS = "ON" ]; then
+ ctest \
+ --label-regex unittest \
+ --output-on-failure \
+ --parallel $(nproc) \
+ --timeout 300
+fi
+
+popd
+
+echo "=== Copying libraries to the distribution folder ==="
+mkdir -p "${dist_dir}"
+cp -L ${build_dir}/lib/libgandiva_jni.so ${dist_dir}
+cp -L ${build_dir}/lib/libarrow_dataset_jni.so ${dist_dir}
+cp -L ${build_dir}/lib/libarrow_orc_jni.so ${dist_dir}
+
+echo "=== Checking shared dependencies for libraries ==="
+
+pushd ${dist_dir}
+archery linking check-dependencies \
+ --allow ld-linux-x86-64 \
+ --allow libc \
+ --allow libdl \
+ --allow libgcc_s \
+ --allow libm \
+ --allow libpthread \
+ --allow librt \
+ --allow libstdc++ \
+ --allow libz \
+ --allow linux-vdso \
+ libgandiva_jni.so \
+ libarrow_dataset_jni.so \
+ libarrow_orc_jni.so
+popd
diff --git a/src/arrow/ci/scripts/java_test.sh b/src/arrow/ci/scripts/java_test.sh
new file mode 100755
index 000000000..0e755bcaf
--- /dev/null
+++ b/src/arrow/ci/scripts/java_test.sh
@@ -0,0 +1,54 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+arrow_dir=${1}
+source_dir=${1}/java
+cpp_build_dir=${2}/cpp/${ARROW_BUILD_TYPE:-debug}
+cdata_dist_dir=${2}/java/c
+
+# For JNI and Plasma tests
+export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH}
+export PLASMA_STORE=${ARROW_HOME}/bin/plasma-store-server
+
+mvn="mvn -B -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn"
+# Use `2 * ncores` threads
+mvn="${mvn} -T 2C"
+
+pushd ${source_dir}
+
+${mvn} test
+
+if [ "${ARROW_JNI}" = "ON" ]; then
+ ${mvn} test -Parrow-jni -pl adapter/orc,gandiva,dataset -Darrow.cpp.build.dir=${cpp_build_dir}
+fi
+
+if [ "${ARROW_JAVA_CDATA}" = "ON" ]; then
+ ${mvn} test -Parrow-c-data -pl c -Darrow.c.jni.dist.dir=${cdata_dist_dir}
+fi
+
+if [ "${ARROW_PLASMA}" = "ON" ]; then
+ pushd ${source_dir}/plasma
+ java -cp target/test-classes:target/classes \
+ -Djava.library.path=${cpp_build_dir} \
+ org.apache.arrow.plasma.PlasmaClientTest
+ popd
+fi
+
+popd
diff --git a/src/arrow/ci/scripts/js_build.sh b/src/arrow/ci/scripts/js_build.sh
new file mode 100755
index 000000000..10ceb41ee
--- /dev/null
+++ b/src/arrow/ci/scripts/js_build.sh
@@ -0,0 +1,36 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+source_dir=${1}/js
+with_docs=${2:-false}
+
+pushd ${source_dir}
+
+yarn --frozen-lockfile
+# TODO(kszucs): linting should be moved to archery
+yarn lint:ci
+yarn build
+
+if [ "${with_docs}" == "true" ]; then
+ yarn doc
+fi
+
+popd
diff --git a/src/arrow/ci/scripts/js_test.sh b/src/arrow/ci/scripts/js_test.sh
new file mode 100755
index 000000000..345d6cb81
--- /dev/null
+++ b/src/arrow/ci/scripts/js_test.sh
@@ -0,0 +1,29 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+source_dir=${1}/js
+
+pushd ${source_dir}
+
+yarn lint
+yarn test
+
+popd
diff --git a/src/arrow/ci/scripts/matlab_build.sh b/src/arrow/ci/scripts/matlab_build.sh
new file mode 100755
index 000000000..5e9bdd2a9
--- /dev/null
+++ b/src/arrow/ci/scripts/matlab_build.sh
@@ -0,0 +1,29 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Exit on error (-e) and print all commands (-x).
+set -ex
+
+base_dir=${1}
+source_dir=${base_dir}/matlab
+build_dir=${base_dir}/matlab/build
+
+cmake -S ${source_dir} -B ${build_dir} -G Ninja -D MATLAB_BUILD_TESTS=ON
+cmake --build ${build_dir} --config Release
+ctest --test-dir ${build_dir}
diff --git a/src/arrow/ci/scripts/msys2_setup.sh b/src/arrow/ci/scripts/msys2_setup.sh
new file mode 100755
index 000000000..6f6012c87
--- /dev/null
+++ b/src/arrow/ci/scripts/msys2_setup.sh
@@ -0,0 +1,79 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -eux
+
+target=$1
+
+packages=()
+case "${target}" in
+ cpp|c_glib|ruby)
+ packages+=(${MINGW_PACKAGE_PREFIX}-aws-sdk-cpp)
+ packages+=(${MINGW_PACKAGE_PREFIX}-boost)
+ packages+=(${MINGW_PACKAGE_PREFIX}-brotli)
+ packages+=(${MINGW_PACKAGE_PREFIX}-ccache)
+ packages+=(${MINGW_PACKAGE_PREFIX}-clang)
+ packages+=(${MINGW_PACKAGE_PREFIX}-cmake)
+ packages+=(${MINGW_PACKAGE_PREFIX}-gcc)
+ packages+=(${MINGW_PACKAGE_PREFIX}-gflags)
+ packages+=(${MINGW_PACKAGE_PREFIX}-grpc)
+ packages+=(${MINGW_PACKAGE_PREFIX}-gtest)
+ packages+=(${MINGW_PACKAGE_PREFIX}-libutf8proc)
+ packages+=(${MINGW_PACKAGE_PREFIX}-libxml2)
+ packages+=(${MINGW_PACKAGE_PREFIX}-llvm)
+ packages+=(${MINGW_PACKAGE_PREFIX}-lz4)
+ packages+=(${MINGW_PACKAGE_PREFIX}-make)
+ packages+=(${MINGW_PACKAGE_PREFIX}-mlir)
+ packages+=(${MINGW_PACKAGE_PREFIX}-ninja)
+ packages+=(${MINGW_PACKAGE_PREFIX}-polly)
+ packages+=(${MINGW_PACKAGE_PREFIX}-protobuf)
+ packages+=(${MINGW_PACKAGE_PREFIX}-python3-numpy)
+ packages+=(${MINGW_PACKAGE_PREFIX}-rapidjson)
+ packages+=(${MINGW_PACKAGE_PREFIX}-re2)
+ packages+=(${MINGW_PACKAGE_PREFIX}-snappy)
+ packages+=(${MINGW_PACKAGE_PREFIX}-thrift)
+ packages+=(${MINGW_PACKAGE_PREFIX}-zlib)
+ packages+=(${MINGW_PACKAGE_PREFIX}-zstd)
+ ;;
+esac
+
+case "${target}" in
+ c_glib|ruby)
+ packages+=(${MINGW_PACKAGE_PREFIX}-gobject-introspection)
+ packages+=(${MINGW_PACKAGE_PREFIX}-gtk-doc)
+ packages+=(${MINGW_PACKAGE_PREFIX}-meson)
+ ;;
+esac
+
+case "${target}" in
+ cgo)
+ packages+=(${MINGW_PACKAGE_PREFIX}-arrow)
+ packages+=(${MINGW_PACKAGE_PREFIX}-gcc)
+ ;;
+esac
+
+pacman \
+ --needed \
+ --noconfirm \
+ --refresh \
+ --sync \
+ "${packages[@]}"
+
+"$(dirname $0)/ccache_setup.sh"
+echo "CCACHE_DIR=$(cygpath --absolute --windows ccache)" >> $GITHUB_ENV
diff --git a/src/arrow/ci/scripts/msys2_system_clean.sh b/src/arrow/ci/scripts/msys2_system_clean.sh
new file mode 100755
index 000000000..a356aee66
--- /dev/null
+++ b/src/arrow/ci/scripts/msys2_system_clean.sh
@@ -0,0 +1,33 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -eux
+
+pacman \
+ --cascade \
+ --noconfirm \
+ --nosave \
+ --recursive \
+ --remove \
+ ${MINGW_PACKAGE_PREFIX}-clang-tools-extra \
+ ${MINGW_PACKAGE_PREFIX}-gcc-ada \
+ ${MINGW_PACKAGE_PREFIX}-gcc-fortran \
+ ${MINGW_PACKAGE_PREFIX}-gcc-libgfortran \
+ ${MINGW_PACKAGE_PREFIX}-gcc-objc \
+ ${MINGW_PACKAGE_PREFIX}-libgccjit
diff --git a/src/arrow/ci/scripts/msys2_system_upgrade.sh b/src/arrow/ci/scripts/msys2_system_upgrade.sh
new file mode 100755
index 000000000..646428fbb
--- /dev/null
+++ b/src/arrow/ci/scripts/msys2_system_upgrade.sh
@@ -0,0 +1,28 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -eux
+
+pacman \
+ --noconfirm \
+ --refresh \
+ --refresh \
+ --sync \
+ --sysupgrade \
+ --sysupgrade
diff --git a/src/arrow/ci/scripts/python_benchmark.sh b/src/arrow/ci/scripts/python_benchmark.sh
new file mode 100755
index 000000000..3a35298dc
--- /dev/null
+++ b/src/arrow/ci/scripts/python_benchmark.sh
@@ -0,0 +1,40 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Check the ASV benchmarking setup.
+# Unfortunately this won't ensure that all benchmarks succeed
+# (see https://github.com/airspeed-velocity/asv/issues/449)
+source deactivate
+conda create -y -q -n pyarrow_asv python=$PYTHON_VERSION
+conda activate pyarrow_asv
+pip install -q git+https://github.com/pitrou/asv.git@customize_commands
+
+export PYARROW_WITH_PARQUET=1
+export PYARROW_WITH_PLASMA=1
+export PYARROW_WITH_ORC=0
+export PYARROW_WITH_GANDIVA=0
+
+pushd $ARROW_PYTHON_DIR
+# Workaround for https://github.com/airspeed-velocity/asv/issues/631
+git fetch --depth=100 origin master:master
+# Generate machine information (mandatory)
+asv machine --yes
+# Run benchmarks on the changeset being tested
+asv run --no-pull --show-stderr --quick HEAD^!
+popd # $ARROW_PYTHON_DIR
diff --git a/src/arrow/ci/scripts/python_build.sh b/src/arrow/ci/scripts/python_build.sh
new file mode 100755
index 000000000..ec6d723b2
--- /dev/null
+++ b/src/arrow/ci/scripts/python_build.sh
@@ -0,0 +1,54 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+source_dir=${1}/python
+build_dir=${2}/python
+
+if [ ! -z "${CONDA_PREFIX}" ]; then
+ echo -e "===\n=== Conda environment for build\n==="
+ conda list
+fi
+
+export PYARROW_CMAKE_GENERATOR=${CMAKE_GENERATOR:-Ninja}
+export PYARROW_BUILD_TYPE=${CMAKE_BUILD_TYPE:-debug}
+export PYARROW_WITH_S3=${ARROW_S3:-OFF}
+export PYARROW_WITH_ORC=${ARROW_ORC:-OFF}
+export PYARROW_WITH_CUDA=${ARROW_CUDA:-OFF}
+export PYARROW_WITH_HDFS=${ARROW_HDFS:-OFF}
+export PYARROW_WITH_FLIGHT=${ARROW_FLIGHT:-OFF}
+export PYARROW_WITH_PLASMA=${ARROW_PLASMA:-OFF}
+export PYARROW_WITH_GANDIVA=${ARROW_GANDIVA:-OFF}
+export PYARROW_WITH_PARQUET=${ARROW_PARQUET:-OFF}
+export PYARROW_WITH_DATASET=${ARROW_DATASET:-OFF}
+
+export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH}
+
+pushd ${source_dir}
+
+relative_build_dir=$(realpath --relative-to=. $build_dir)
+
+# not nice, but prevents mutating the mounted the source directory for docker
+${PYTHON:-python} \
+ setup.py build --build-base $build_dir \
+ install --single-version-externally-managed \
+ --record $relative_build_dir/record.txt
+
+popd
diff --git a/src/arrow/ci/scripts/python_sdist_build.sh b/src/arrow/ci/scripts/python_sdist_build.sh
new file mode 100755
index 000000000..f9e9359b6
--- /dev/null
+++ b/src/arrow/ci/scripts/python_sdist_build.sh
@@ -0,0 +1,27 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -eux
+
+source_dir=${1}/python
+
+pushd ${source_dir}
+export SETUPTOOLS_SCM_PRETEND_VERSION=${PYARROW_VERSION:-}
+${PYTHON:-python} setup.py sdist
+popd
diff --git a/src/arrow/ci/scripts/python_sdist_test.sh b/src/arrow/ci/scripts/python_sdist_test.sh
new file mode 100755
index 000000000..3dd7d7ddd
--- /dev/null
+++ b/src/arrow/ci/scripts/python_sdist_test.sh
@@ -0,0 +1,58 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -eux
+
+arrow_dir=${1}
+
+export ARROW_SOURCE_DIR=${arrow_dir}
+export ARROW_TEST_DATA=${arrow_dir}/testing/data
+export PARQUET_TEST_DATA=${arrow_dir}/cpp/submodules/parquet-testing/data
+
+export PYARROW_CMAKE_GENERATOR=${CMAKE_GENERATOR:-Ninja}
+export PYARROW_BUILD_TYPE=${CMAKE_BUILD_TYPE:-debug}
+export PYARROW_WITH_S3=${ARROW_S3:-OFF}
+export PYARROW_WITH_ORC=${ARROW_ORC:-OFF}
+export PYARROW_WITH_CUDA=${ARROW_CUDA:-OFF}
+export PYARROW_WITH_HDFS=${ARROW_HDFS:-OFF}
+export PYARROW_WITH_FLIGHT=${ARROW_FLIGHT:-OFF}
+export PYARROW_WITH_PLASMA=${ARROW_PLASMA:-OFF}
+export PYARROW_WITH_GANDIVA=${ARROW_GANDIVA:-OFF}
+export PYARROW_WITH_PARQUET=${ARROW_PARQUET:-OFF}
+export PYARROW_WITH_DATASET=${ARROW_DATASET:-OFF}
+
+# TODO: Users should not require ARROW_HOME and pkg-config to find Arrow C++.
+# Related: ARROW-9171
+# unset ARROW_HOME
+# apt purge -y pkg-config
+
+# ARROW-12619
+if command -v git &> /dev/null; then
+ echo "Git exists, remove it from PATH before executing this script."
+ exit 1
+fi
+
+if [ -n "${PYARROW_VERSION:-}" ]; then
+ sdist="${arrow_dir}/python/dist/pyarrow-${PYARROW_VERSION}.tar.gz"
+else
+ sdist=$(ls ${arrow_dir}/python/dist/pyarrow-*.tar.gz | sort -r | head -n1)
+fi
+${PYTHON:-python} -m pip install ${sdist}
+
+pytest -r s ${PYTEST_ARGS:-} --pyargs pyarrow
diff --git a/src/arrow/ci/scripts/python_test.sh b/src/arrow/ci/scripts/python_test.sh
new file mode 100755
index 000000000..6e05af89a
--- /dev/null
+++ b/src/arrow/ci/scripts/python_test.sh
@@ -0,0 +1,32 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+arrow_dir=${1}
+
+export ARROW_SOURCE_DIR=${arrow_dir}
+export ARROW_TEST_DATA=${arrow_dir}/testing/data
+export PARQUET_TEST_DATA=${arrow_dir}/cpp/submodules/parquet-testing/data
+export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH}
+
+# Enable some checks inside Python itself
+export PYTHONDEVMODE=1
+
+pytest -r s -v ${PYTEST_ARGS} --pyargs pyarrow
diff --git a/src/arrow/ci/scripts/python_wheel_macos_build.sh b/src/arrow/ci/scripts/python_wheel_macos_build.sh
new file mode 100755
index 000000000..1a52a2ad5
--- /dev/null
+++ b/src/arrow/ci/scripts/python_wheel_macos_build.sh
@@ -0,0 +1,166 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+arch=${1}
+source_dir=${2}
+build_dir=${3}
+
+echo "=== (${PYTHON_VERSION}) Clear output directories and leftovers ==="
+# Clear output directories and leftovers
+rm -rf ${build_dir}/install
+rm -rf ${source_dir}/python/dist
+rm -rf ${source_dir}/python/build
+rm -rf ${source_dir}/python/repaired_wheels
+rm -rf ${source_dir}/python/pyarrow/*.so
+rm -rf ${source_dir}/python/pyarrow/*.so.*
+
+echo "=== (${PYTHON_VERSION}) Set SDK, C++ and Wheel flags ==="
+export _PYTHON_HOST_PLATFORM="macosx-${MACOSX_DEPLOYMENT_TARGET}-${arch}"
+export MACOSX_DEPLOYMENT_TARGET=${MACOSX_DEPLOYMENT_TARGET:-10.9}
+export SDKROOT=${SDKROOT:-$(xcrun --sdk macosx --show-sdk-path)}
+
+if [ $arch = "arm64" ]; then
+ export CMAKE_OSX_ARCHITECTURES="arm64"
+elif [ $arch = "x86_64" ]; then
+ export CMAKE_OSX_ARCHITECTURES="x86_64"
+elif [ $arch = "universal2" ]; then
+ export CMAKE_OSX_ARCHITECTURES="x86_64;arm64"
+else
+ echo "Unexpected architecture: $arch"
+ exit 1
+fi
+
+echo "=== (${PYTHON_VERSION}) Install Python build dependencies ==="
+export PIP_SITE_PACKAGES=$(python -c 'import site; print(site.getsitepackages()[0])')
+export PIP_TARGET_PLATFORM="macosx_${MACOSX_DEPLOYMENT_TARGET//./_}_${arch}"
+
+pip install \
+ --upgrade \
+ --only-binary=:all: \
+ --target $PIP_SITE_PACKAGES \
+ --platform $PIP_TARGET_PLATFORM \
+ -r ${source_dir}/python/requirements-wheel-build.txt
+pip install "delocate>=0.9"
+
+echo "=== (${PYTHON_VERSION}) Building Arrow C++ libraries ==="
+: ${ARROW_DATASET:=ON}
+: ${ARROW_FLIGHT:=ON}
+: ${ARROW_GANDIVA:=OFF}
+: ${ARROW_HDFS:=ON}
+: ${ARROW_JEMALLOC:=ON}
+: ${ARROW_MIMALLOC:=ON}
+: ${ARROW_ORC:=ON}
+: ${ARROW_PARQUET:=ON}
+: ${ARROW_PLASMA:=ON}
+: ${ARROW_S3:=ON}
+: ${ARROW_SIMD_LEVEL:="SSE4_2"}
+: ${ARROW_TENSORFLOW:=ON}
+: ${ARROW_WITH_BROTLI:=ON}
+: ${ARROW_WITH_BZ2:=ON}
+: ${ARROW_WITH_LZ4:=ON}
+: ${ARROW_WITH_SNAPPY:=ON}
+: ${ARROW_WITH_ZLIB:=ON}
+: ${ARROW_WITH_ZSTD:=ON}
+: ${CMAKE_BUILD_TYPE:=release}
+: ${CMAKE_GENERATOR:=Ninja}
+: ${CMAKE_UNITY_BUILD:=ON}
+: ${VCPKG_FEATURE_FLAGS:=-manifests}
+: ${VCPKG_TARGET_TRIPLET:=${VCPKG_DEFAULT_TRIPLET:-x64-osx-static-${CMAKE_BUILD_TYPE}}}
+
+mkdir -p ${build_dir}/build
+pushd ${build_dir}/build
+
+cmake \
+ -DARROW_BUILD_SHARED=ON \
+ -DCMAKE_APPLE_SILICON_PROCESSOR=arm64 \
+ -DCMAKE_OSX_ARCHITECTURES=${CMAKE_OSX_ARCHITECTURES} \
+ -DARROW_BUILD_STATIC=OFF \
+ -DARROW_BUILD_TESTS=OFF \
+ -DARROW_DATASET=${ARROW_DATASET} \
+ -DARROW_DEPENDENCY_SOURCE="VCPKG" \
+ -DARROW_DEPENDENCY_USE_SHARED=OFF \
+ -DARROW_FLIGHT=${ARROW_FLIGHT} \
+ -DARROW_GANDIVA=${ARROW_GANDIVA} \
+ -DARROW_HDFS=${ARROW_HDFS} \
+ -DARROW_JEMALLOC=${ARROW_JEMALLOC} \
+ -DARROW_MIMALLOC=${ARROW_MIMALLOC} \
+ -DARROW_ORC=${ARROW_ORC} \
+ -DARROW_PACKAGE_KIND="python-wheel-macos" \
+ -DARROW_PARQUET=${ARROW_PARQUET} \
+ -DARROW_PLASMA=${ARROW_PLASMA} \
+ -DARROW_PYTHON=ON \
+ -DARROW_RPATH_ORIGIN=ON \
+ -DARROW_S3=${ARROW_S3} \
+ -DARROW_SIMD_LEVEL=${ARROW_SIMD_LEVEL} \
+ -DARROW_TENSORFLOW=${ARROW_TENSORFLOW} \
+ -DARROW_USE_CCACHE=ON \
+ -DARROW_WITH_BROTLI=${ARROW_WITH_BROTLI} \
+ -DARROW_WITH_BZ2=${ARROW_WITH_BZ2} \
+ -DARROW_WITH_LZ4=${ARROW_WITH_LZ4} \
+ -DARROW_WITH_SNAPPY=${ARROW_WITH_SNAPPY} \
+ -DARROW_WITH_ZLIB=${ARROW_WITH_ZLIB} \
+ -DARROW_WITH_ZSTD=${ARROW_WITH_ZSTD} \
+ -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \
+ -DCMAKE_INSTALL_LIBDIR=lib \
+ -DCMAKE_INSTALL_PREFIX=${build_dir}/install \
+ -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD} \
+ -DOPENSSL_USE_STATIC_LIBS=ON \
+ -DVCPKG_MANIFEST_MODE=OFF \
+ -DVCPKG_TARGET_TRIPLET=${VCPKG_TARGET_TRIPLET} \
+ -G ${CMAKE_GENERATOR} \
+ ${source_dir}/cpp
+cmake --build . --target install
+popd
+
+echo "=== (${PYTHON_VERSION}) Building wheel ==="
+export PYARROW_BUILD_TYPE=${CMAKE_BUILD_TYPE}
+export PYARROW_BUNDLE_ARROW_CPP=1
+export PYARROW_CMAKE_GENERATOR=${CMAKE_GENERATOR}
+export PYARROW_INSTALL_TESTS=1
+export PYARROW_WITH_DATASET=${ARROW_DATASET}
+export PYARROW_WITH_FLIGHT=${ARROW_FLIGHT}
+export PYARROW_WITH_GANDIVA=${ARROW_GANDIVA}
+export PYARROW_WITH_HDFS=${ARROW_HDFS}
+export PYARROW_WITH_ORC=${ARROW_ORC}
+export PYARROW_WITH_PARQUET=${ARROW_PARQUET}
+export PYARROW_WITH_PLASMA=${ARROW_PLASMA}
+export PYARROW_WITH_S3=${ARROW_S3}
+export PYARROW_CMAKE_OPTIONS="-DCMAKE_OSX_ARCHITECTURES=${CMAKE_OSX_ARCHITECTURES} -DARROW_SIMD_LEVEL=${ARROW_SIMD_LEVEL}"
+# PyArrow build configuration
+export PKG_CONFIG_PATH=/usr/lib/pkgconfig:${build_dir}/install/lib/pkgconfig
+# Set PyArrow version explicitly
+export SETUPTOOLS_SCM_PRETEND_VERSION=${PYARROW_VERSION}
+
+pushd ${source_dir}/python
+python setup.py bdist_wheel
+popd
+
+echo "=== (${PYTHON_VERSION}) Show dynamic libraries the wheel depend on ==="
+deps=$(delocate-listdeps ${source_dir}/python/dist/*.whl)
+
+if echo $deps | grep -v "^pyarrow/lib\(arrow\|gandiva\|parquet\|plasma\)"; then
+ echo "There are non-bundled shared library dependencies."
+ exit 1
+fi
+
+# Move the verified wheels
+mkdir -p ${source_dir}/python/repaired_wheels
+mv ${source_dir}/python/dist/*.whl ${source_dir}/python/repaired_wheels/
diff --git a/src/arrow/ci/scripts/python_wheel_manylinux_build.sh b/src/arrow/ci/scripts/python_wheel_manylinux_build.sh
new file mode 100755
index 000000000..434605cf2
--- /dev/null
+++ b/src/arrow/ci/scripts/python_wheel_manylinux_build.sh
@@ -0,0 +1,149 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+function check_arrow_visibility {
+ nm --demangle --dynamic /tmp/arrow-dist/lib/libarrow.so > nm_arrow.log
+
+ # Filter out Arrow symbols and see if anything remains.
+ # '_init' and '_fini' symbols may or not be present, we don't care.
+ # (note we must ignore the grep exit status when no match is found)
+ grep ' T ' nm_arrow.log | grep -v -E '(arrow|\b_init\b|\b_fini\b)' | cat - > visible_symbols.log
+
+ if [[ -f visible_symbols.log && `cat visible_symbols.log | wc -l` -eq 0 ]]; then
+ return 0
+ else
+ echo "== Unexpected symbols exported by libarrow.so =="
+ cat visible_symbols.log
+ echo "================================================"
+
+ exit 1
+ fi
+}
+
+echo "=== (${PYTHON_VERSION}) Clear output directories and leftovers ==="
+# Clear output directories and leftovers
+rm -rf /tmp/arrow-build
+rm -rf /arrow/python/dist
+rm -rf /arrow/python/build
+rm -rf /arrow/python/repaired_wheels
+rm -rf /arrow/python/pyarrow/*.so
+rm -rf /arrow/python/pyarrow/*.so.*
+
+echo "=== (${PYTHON_VERSION}) Building Arrow C++ libraries ==="
+: ${ARROW_DATASET:=ON}
+: ${ARROW_FLIGHT:=ON}
+: ${ARROW_GANDIVA:=OFF}
+: ${ARROW_HDFS:=ON}
+: ${ARROW_JEMALLOC:=ON}
+: ${ARROW_MIMALLOC:=ON}
+: ${ARROW_ORC:=ON}
+: ${ARROW_PARQUET:=ON}
+: ${ARROW_PLASMA:=ON}
+: ${ARROW_S3:=ON}
+: ${ARROW_TENSORFLOW:=ON}
+: ${ARROW_WITH_BROTLI:=ON}
+: ${ARROW_WITH_BZ2:=ON}
+: ${ARROW_WITH_LZ4:=ON}
+: ${ARROW_WITH_SNAPPY:=ON}
+: ${ARROW_WITH_ZLIB:=ON}
+: ${ARROW_WITH_ZSTD:=ON}
+: ${CMAKE_BUILD_TYPE:=release}
+: ${CMAKE_UNITY_BUILD:=ON}
+: ${CMAKE_GENERATOR:=Ninja}
+: ${VCPKG_FEATURE_FLAGS:=-manifests}
+: ${VCPKG_TARGET_TRIPLET:=${VCPKG_DEFAULT_TRIPLET:-x64-linux-static-${CMAKE_BUILD_TYPE}}}
+
+if [[ "$(uname -m)" == arm* ]] || [[ "$(uname -m)" == aarch* ]]; then
+ # Build jemalloc --with-lg-page=16 in order to make the wheel work on both
+ # 4k and 64k page arm64 systems. For more context see
+ # https://github.com/apache/arrow/issues/10929
+ export ARROW_EXTRA_CMAKE_FLAGS="-DARROW_JEMALLOC_LG_PAGE=16"
+fi
+
+mkdir /tmp/arrow-build
+pushd /tmp/arrow-build
+cmake \
+ -DARROW_BROTLI_USE_SHARED=OFF \
+ -DARROW_BUILD_SHARED=ON \
+ -DARROW_BUILD_STATIC=OFF \
+ -DARROW_BUILD_TESTS=OFF \
+ -DARROW_DATASET=${ARROW_DATASET} \
+ -DARROW_DEPENDENCY_SOURCE="VCPKG" \
+ -DARROW_DEPENDENCY_USE_SHARED=OFF \
+ -DARROW_FLIGHT==${ARROW_FLIGHT} \
+ -DARROW_GANDIVA=${ARROW_GANDIVA} \
+ -DARROW_HDFS=${ARROW_HDFS} \
+ -DARROW_JEMALLOC=${ARROW_JEMALLOC} \
+ -DARROW_MIMALLOC=${ARROW_MIMALLOC} \
+ -DARROW_ORC=${ARROW_ORC} \
+ -DARROW_PACKAGE_KIND="python-wheel-manylinux${MANYLINUX_VERSION}" \
+ -DARROW_PARQUET=${ARROW_PARQUET} \
+ -DARROW_PLASMA=${ARROW_PLASMA} \
+ -DARROW_PYTHON=ON \
+ -DARROW_RPATH_ORIGIN=ON \
+ -DARROW_S3=${ARROW_S3} \
+ -DARROW_TENSORFLOW=${ARROW_TENSORFLOW} \
+ -DARROW_USE_CCACHE=ON \
+ -DARROW_UTF8PROC_USE_SHARED=OFF \
+ -DARROW_WITH_BROTLI=${ARROW_WITH_BROTLI} \
+ -DARROW_WITH_BZ2=${ARROW_WITH_BZ2} \
+ -DARROW_WITH_LZ4=${ARROW_WITH_LZ4} \
+ -DARROW_WITH_SNAPPY=${ARROW_WITH_SNAPPY} \
+ -DARROW_WITH_ZLIB=${ARROW_WITH_ZLIB} \
+ -DARROW_WITH_ZSTD=${ARROW_WITH_ZSTD} \
+ -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \
+ -DCMAKE_INSTALL_LIBDIR=lib \
+ -DCMAKE_INSTALL_PREFIX=/tmp/arrow-dist \
+ -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD} \
+ -DOPENSSL_USE_STATIC_LIBS=ON \
+ -DVCPKG_MANIFEST_MODE=OFF \
+ -DVCPKG_TARGET_TRIPLET=${VCPKG_TARGET_TRIPLET} \
+ ${ARROW_EXTRA_CMAKE_FLAGS} \
+ -G ${CMAKE_GENERATOR} \
+ /arrow/cpp
+cmake --build . --target install
+popd
+
+# Check that we don't expose any unwanted symbols
+check_arrow_visibility
+
+echo "=== (${PYTHON_VERSION}) Building wheel ==="
+export PYARROW_BUILD_TYPE=${CMAKE_BUILD_TYPE}
+export PYARROW_BUNDLE_ARROW_CPP=1
+export PYARROW_CMAKE_GENERATOR=${CMAKE_GENERATOR}
+export PYARROW_INSTALL_TESTS=1
+export PYARROW_WITH_DATASET=${ARROW_DATASET}
+export PYARROW_WITH_FLIGHT=${ARROW_FLIGHT}
+export PYARROW_WITH_GANDIVA=${ARROW_GANDIVA}
+export PYARROW_WITH_HDFS=${ARROW_HDFS}
+export PYARROW_WITH_ORC=${ARROW_ORC}
+export PYARROW_WITH_PARQUET=${ARROW_PARQUET}
+export PYARROW_WITH_PLASMA=${ARROW_PLASMA}
+export PYARROW_WITH_S3=${ARROW_S3}
+# PyArrow build configuration
+export PKG_CONFIG_PATH=/usr/lib/pkgconfig:/tmp/arrow-dist/lib/pkgconfig
+
+pushd /arrow/python
+python setup.py bdist_wheel
+
+echo "=== (${PYTHON_VERSION}) Tag the wheel with manylinux${MANYLINUX_VERSION} ==="
+auditwheel repair -L . dist/pyarrow-*.whl -w repaired_wheels
+popd
diff --git a/src/arrow/ci/scripts/python_wheel_unix_test.sh b/src/arrow/ci/scripts/python_wheel_unix_test.sh
new file mode 100755
index 000000000..ec703abfc
--- /dev/null
+++ b/src/arrow/ci/scripts/python_wheel_unix_test.sh
@@ -0,0 +1,84 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -e
+set -x
+set -o pipefail
+
+if [ "$#" -ne 1 ]; then
+ echo "Usage: $0 <arrow-src-dir>"
+ exit 1
+fi
+
+source_dir=${1}
+
+: ${ARROW_FLIGHT:=ON}
+: ${ARROW_S3:=ON}
+: ${CHECK_IMPORTS:=ON}
+: ${CHECK_UNITTESTS:=ON}
+: ${INSTALL_PYARROW:=ON}
+
+export PYARROW_TEST_CYTHON=OFF
+export PYARROW_TEST_DATASET=ON
+export PYARROW_TEST_FLIGHT=${ARROW_FLIGHT}
+export PYARROW_TEST_GANDIVA=OFF
+export PYARROW_TEST_HDFS=ON
+export PYARROW_TEST_ORC=ON
+export PYARROW_TEST_PANDAS=ON
+export PYARROW_TEST_PARQUET=ON
+export PYARROW_TEST_PLASMA=ON
+export PYARROW_TEST_S3=${ARROW_S3}
+export PYARROW_TEST_TENSORFLOW=ON
+
+export ARROW_TEST_DATA=${source_dir}/testing/data
+export PARQUET_TEST_DATA=${source_dir}/submodules/parquet-testing/data
+
+if [ "${INSTALL_PYARROW}" == "ON" ]; then
+ # Install the built wheels
+ pip install --force-reinstall ${source_dir}/python/repaired_wheels/*.whl
+fi
+
+if [ "${CHECK_IMPORTS}" == "ON" ]; then
+ # Test that the modules are importable
+ python -c "
+import pyarrow
+import pyarrow._hdfs
+import pyarrow.csv
+import pyarrow.dataset
+import pyarrow.fs
+import pyarrow.json
+import pyarrow.orc
+import pyarrow.parquet
+import pyarrow.plasma
+"
+ if [ "${PYARROW_TEST_S3}" == "ON" ]; then
+ python -c "import pyarrow._s3fs"
+ fi
+ if [ "${PYARROW_TEST_FLIGHT}" == "ON" ]; then
+ python -c "import pyarrow.flight"
+ fi
+fi
+
+if [ "${CHECK_UNITTESTS}" == "ON" ]; then
+ # Install testing dependencies
+ pip install -U -r ${source_dir}/python/requirements-wheel-test.txt
+ # Execute unittest, test dependencies must be installed
+ python -c 'import pyarrow; pyarrow.create_library_symlinks()'
+ python -m pytest -r s --pyargs pyarrow
+fi
diff --git a/src/arrow/ci/scripts/python_wheel_windows_build.bat b/src/arrow/ci/scripts/python_wheel_windows_build.bat
new file mode 100644
index 000000000..23be7f512
--- /dev/null
+++ b/src/arrow/ci/scripts/python_wheel_windows_build.bat
@@ -0,0 +1,109 @@
+@rem Licensed to the Apache Software Foundation (ASF) under one
+@rem or more contributor license agreements. See the NOTICE file
+@rem distributed with this work for additional information
+@rem regarding copyright ownership. The ASF licenses this file
+@rem to you under the Apache License, Version 2.0 (the
+@rem "License"); you may not use this file except in compliance
+@rem with the License. You may obtain a copy of the License at
+@rem
+@rem http://www.apache.org/licenses/LICENSE-2.0
+@rem
+@rem Unless required by applicable law or agreed to in writing,
+@rem software distributed under the License is distributed on an
+@rem "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+@rem KIND, either express or implied. See the License for the
+@rem specific language governing permissions and limitations
+@rem under the License.
+
+@echo on
+
+echo "Building windows wheel..."
+
+call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvars64.bat"
+
+echo "=== (%PYTHON_VERSION%) Clear output directories and leftovers ==="
+del /s /q C:\arrow-build
+del /s /q C:\arrow-dist
+del /s /q C:\arrow\python\dist
+del /s /q C:\arrow\python\build
+del /s /q C:\arrow\python\pyarrow\*.so
+del /s /q C:\arrow\python\pyarrow\*.so.*
+
+echo "=== (%PYTHON_VERSION%) Building Arrow C++ libraries ==="
+set ARROW_DATASET=ON
+set ARROW_FLIGHT=ON
+set ARROW_GANDIVA=OFF
+set ARROW_HDFS=ON
+set ARROW_ORC=OFF
+set ARROW_PARQUET=ON
+set ARROW_MIMALLOC=ON
+set ARROW_S3=ON
+set ARROW_TENSORFLOW=ON
+set ARROW_WITH_BROTLI=ON
+set ARROW_WITH_BZ2=ON
+set ARROW_WITH_LZ4=ON
+set ARROW_WITH_SNAPPY=ON
+set ARROW_WITH_ZLIB=ON
+set ARROW_WITH_ZSTD=ON
+set CMAKE_UNITY_BUILD=ON
+set CMAKE_GENERATOR=Visual Studio 15 2017 Win64
+set VCPKG_FEATURE_FLAGS=-manifests
+
+mkdir C:\arrow-build
+pushd C:\arrow-build
+cmake ^
+ -DARROW_BUILD_SHARED=ON ^
+ -DARROW_BUILD_STATIC=OFF ^
+ -DARROW_BUILD_TESTS=OFF ^
+ -DARROW_CXXFLAGS="/MP" ^
+ -DARROW_DATASET=%ARROW_DATASET% ^
+ -DARROW_DEPENDENCY_SOURCE=VCPKG ^
+ -DARROW_DEPENDENCY_USE_SHARED=OFF ^
+ -DARROW_FLIGHT=%ARROW_FLIGHT% ^
+ -DARROW_GANDIVA=%ARROW_GANDIVA% ^
+ -DARROW_HDFS=%ARROW_HDFS% ^
+ -DARROW_MIMALLOC=%ARROW_MIMALLOC% ^
+ -DARROW_ORC=%ARROW_ORC% ^
+ -DARROW_PACKAGE_KIND="python-wheel-windows" ^
+ -DARROW_PARQUET=%ARROW_PARQUET% ^
+ -DARROW_PYTHON=ON ^
+ -DARROW_S3=%ARROW_S3% ^
+ -DARROW_TENSORFLOW=%ARROW_TENSORFLOW% ^
+ -DARROW_WITH_BROTLI=%ARROW_WITH_BROTLI% ^
+ -DARROW_WITH_BZ2=%ARROW_WITH_BZ2% ^
+ -DARROW_WITH_LZ4=%ARROW_WITH_LZ4% ^
+ -DARROW_WITH_SNAPPY=%ARROW_WITH_SNAPPY% ^
+ -DARROW_WITH_ZLIB=%ARROW_WITH_ZLIB% ^
+ -DARROW_WITH_ZSTD=%ARROW_WITH_ZSTD% ^
+ -DCMAKE_BUILD_TYPE=%CMAKE_BUILD_TYPE% ^
+ -DCMAKE_CXX_COMPILER=clcache ^
+ -DCMAKE_INSTALL_PREFIX=C:\arrow-dist ^
+ -DCMAKE_UNITY_BUILD=%CMAKE_UNITY_BUILD% ^
+ -DMSVC_LINK_VERBOSE=ON ^
+ -DVCPKG_MANIFEST_MODE=OFF ^
+ -DVCPKG_TARGET_TRIPLET=x64-windows-static-md-%CMAKE_BUILD_TYPE% ^
+ -G "%CMAKE_GENERATOR%" ^
+ C:\arrow\cpp || exit /B
+cmake --build . --config %CMAKE_BUILD_TYPE% --target install || exit /B
+popd
+
+echo "=== (%PYTHON_VERSION%) Building wheel ==="
+set PYARROW_BUILD_TYPE=%CMAKE_BUILD_TYPE%
+set PYARROW_BUNDLE_ARROW_CPP=ON
+set PYARROW_BUNDLE_BOOST=OFF
+set PYARROW_CMAKE_GENERATOR=%CMAKE_GENERATOR%
+set PYARROW_INSTALL_TESTS=ON
+set PYARROW_WITH_DATASET=%ARROW_DATASET%
+set PYARROW_WITH_FLIGHT=%ARROW_FLIGHT%
+set PYARROW_WITH_GANDIVA=%ARROW_GANDIVA%
+set PYARROW_WITH_HDFS=%ARROW_HDFS%
+set PYARROW_WITH_ORC=%ARROW_ORC%
+set PYARROW_WITH_PARQUET=%ARROW_PARQUET%
+set PYARROW_WITH_S3=%ARROW_S3%
+set ARROW_HOME=C:\arrow-dist
+
+pushd C:\arrow\python
+@REM bundle the msvc runtime
+cp "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Redist\MSVC\14.16.27012\x64\Microsoft.VC141.CRT\msvcp140.dll" pyarrow\
+python setup.py bdist_wheel || exit /B
+popd
diff --git a/src/arrow/ci/scripts/python_wheel_windows_test.bat b/src/arrow/ci/scripts/python_wheel_windows_test.bat
new file mode 100755
index 000000000..1ea0f8acd
--- /dev/null
+++ b/src/arrow/ci/scripts/python_wheel_windows_test.bat
@@ -0,0 +1,55 @@
+@rem Licensed to the Apache Software Foundation (ASF) under one
+@rem or more contributor license agreements. See the NOTICE file
+@rem distributed with this work for additional information
+@rem regarding copyright ownership. The ASF licenses this file
+@rem to you under the Apache License, Version 2.0 (the
+@rem "License"); you may not use this file except in compliance
+@rem with the License. You may obtain a copy of the License at
+@rem
+@rem http://www.apache.org/licenses/LICENSE-2.0
+@rem
+@rem Unless required by applicable law or agreed to in writing,
+@rem software distributed under the License is distributed on an
+@rem "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+@rem KIND, either express or implied. See the License for the
+@rem specific language governing permissions and limitations
+@rem under the License.
+
+@echo on
+
+set PYARROW_TEST_CYTHON=OFF
+set PYARROW_TEST_DATASET=ON
+set PYARROW_TEST_FLIGHT=ON
+set PYARROW_TEST_GANDIVA=OFF
+set PYARROW_TEST_HDFS=ON
+set PYARROW_TEST_ORC=OFF
+set PYARROW_TEST_PARQUET=ON
+set PYARROW_TEST_PLASMA=OFF
+set PYARROW_TEST_S3=OFF
+set PYARROW_TEST_TENSORFLOW=ON
+
+@REM Enable again once https://github.com/scipy/oldest-supported-numpy/pull/27 gets merged
+@REM set PYARROW_TEST_PANDAS=ON
+
+set ARROW_TEST_DATA=C:\arrow\testing\data
+set PARQUET_TEST_DATA=C:\arrow\submodules\parquet-testing\data
+
+@REM Install testing dependencies
+pip install -r C:\arrow\python\requirements-wheel-test.txt || exit /B
+
+@REM Install the built wheels
+python -m pip install --no-index --find-links=C:\arrow\python\dist\ pyarrow || exit /B
+
+@REM Test that the modules are importable
+python -c "import pyarrow"
+python -c "import pyarrow._hdfs"
+python -c "import pyarrow._s3fs"
+python -c "import pyarrow.csv"
+python -c "import pyarrow.dataset"
+python -c "import pyarrow.flight"
+python -c "import pyarrow.fs"
+python -c "import pyarrow.json"
+python -c "import pyarrow.parquet"
+
+@REM Execute unittest
+pytest -r s --pyargs pyarrow || exit /B
diff --git a/src/arrow/ci/scripts/r_build.sh b/src/arrow/ci/scripts/r_build.sh
new file mode 100755
index 000000000..2a2b9d7d1
--- /dev/null
+++ b/src/arrow/ci/scripts/r_build.sh
@@ -0,0 +1,33 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+: ${R_BIN:=R}
+source_dir=${1}/r
+with_docs=${2:-false}
+
+pushd ${source_dir}
+
+${R_BIN} CMD INSTALL ${INSTALL_ARGS} .
+
+if [ "${with_docs}" == "true" ]; then
+ ${R_BIN} -e "pkgdown::build_site(install = FALSE)"
+fi
+
+popd \ No newline at end of file
diff --git a/src/arrow/ci/scripts/r_deps.sh b/src/arrow/ci/scripts/r_deps.sh
new file mode 100755
index 000000000..ad1b5ecc1
--- /dev/null
+++ b/src/arrow/ci/scripts/r_deps.sh
@@ -0,0 +1,45 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+: ${R_BIN:=R}
+
+source_dir=${1}/r
+
+pushd ${source_dir}
+
+if [ ${R_BIN} = "RDsan" ]; then
+ # To prevent the build from timing out, let's prune some optional deps (and their possible version requirements)
+ ${R_BIN} -e 'd <- read.dcf("DESCRIPTION")
+ to_prune <- c("duckdb", "DBI", "dbplyr", "decor", "knitr", "rmarkdown", "pkgload", "reticulate")
+ pattern <- paste0("\\n?", to_prune, " (\\\\(.*\\\\))?,?", collapse = "|")
+ d[,"Suggests"] <- gsub(pattern, "", d[,"Suggests"])
+ write.dcf(d, "DESCRIPTION")'
+fi
+
+# Install R package dependencies
+# install.packages() emits warnings if packages fail to install,
+# but we want to error/fail the build.
+# options(warn=2) turns warnings into errors
+${R_BIN} -e "options(warn=2); install.packages('remotes'); remotes::install_cran(c('glue', 'rcmdcheck', 'sys')); remotes::install_deps(INSTALL_opts = '"${INSTALL_ARGS}"')"
+# Separately install the optional/test dependencies but don't error on them,
+# they're not available everywhere and that's ok
+${R_BIN} -e "remotes::install_deps(dependencies = TRUE, INSTALL_opts = '"${INSTALL_ARGS}"')"
+
+popd
diff --git a/src/arrow/ci/scripts/r_docker_configure.sh b/src/arrow/ci/scripts/r_docker_configure.sh
new file mode 100755
index 000000000..20c987085
--- /dev/null
+++ b/src/arrow/ci/scripts/r_docker_configure.sh
@@ -0,0 +1,81 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+: ${R_BIN:=R}
+
+# The Dockerfile should have put this file here
+if [ -f "/arrow/ci/etc/rprofile" ]; then
+ # Ensure parallel R package installation, set CRAN repo mirror,
+ # and use pre-built binaries where possible
+ cat /arrow/ci/etc/rprofile >> $(${R_BIN} RHOME)/etc/Rprofile.site
+fi
+
+# Ensure parallel compilation of C/C++ code
+echo "MAKEFLAGS=-j$(${R_BIN} -s -e 'cat(parallel::detectCores())')" >> $(R RHOME)/etc/Renviron.site
+
+# Special hacking to try to reproduce quirks on fedora-clang-devel on CRAN
+# which uses a bespoke clang compiled to use libc++
+# https://www.stats.ox.ac.uk/pub/bdr/Rconfig/r-devel-linux-x86_64-fedora-clang
+if [ "$RHUB_PLATFORM" = "linux-x86_64-fedora-clang" ]; then
+ dnf install -y libcxx-devel
+ sed -i.bak -E -e 's/(CXX1?1? =.*)/\1 -stdlib=libc++/g' $(${R_BIN} RHOME)/etc/Makeconf
+ rm -rf $(${R_BIN} RHOME)/etc/Makeconf.bak
+
+ sed -i.bak -E -e 's/(CXXFLAGS = )(.*)/\1 -g -O3 -Wall -pedantic -frtti -fPIC/' $(${R_BIN} RHOME)/etc/Makeconf
+ rm -rf $(${R_BIN} RHOME)/etc/Makeconf.bak
+fi
+
+# Special hacking to try to reproduce quirks on centos using non-default build
+# tooling.
+if [[ "$DEVTOOLSET_VERSION" -gt 0 ]]; then
+ if [ "`which dnf`" ]; then
+ dnf install -y centos-release-scl
+ dnf install -y "devtoolset-$DEVTOOLSET_VERSION"
+ else
+ yum install -y centos-release-scl
+ yum install -y "devtoolset-$DEVTOOLSET_VERSION"
+ fi
+fi
+
+# Install openssl for S3 support
+if [ "$ARROW_S3" == "ON" ] || [ "$ARROW_R_DEV" == "TRUE" ]; then
+ if [ "`which dnf`" ]; then
+ dnf install -y libcurl-devel openssl-devel
+ elif [ "`which yum`" ]; then
+ yum install -y libcurl-devel openssl-devel
+ elif [ "`which zypper`" ]; then
+ zypper install -y libcurl-devel libopenssl-devel
+ else
+ apt-get update
+ apt-get install -y libcurl4-openssl-dev libssl-dev
+ fi
+
+ # The Dockerfile should have put this file here
+ if [ -f "/arrow/ci/scripts/install_minio.sh" ] && [ "`which wget`" ]; then
+ /arrow/ci/scripts/install_minio.sh amd64 linux latest /usr/local
+ fi
+
+ if [ -f "/arrow/ci/scripts/install_gcs_testbench.sh" ] && [ "`which pip`" ]; then
+ /arrow/ci/scripts/install_gcs_testbench.sh amd64 default
+ fi
+fi
+
+# Workaround for html help install failure; see https://github.com/r-lib/devtools/issues/2084#issuecomment-530912786
+Rscript -e 'x <- file.path(R.home("doc"), "html"); if (!file.exists(x)) {dir.create(x, recursive=TRUE); file.copy(system.file("html/R.css", package="stats"), x)}'
diff --git a/src/arrow/ci/scripts/r_pkgdown_check.sh b/src/arrow/ci/scripts/r_pkgdown_check.sh
new file mode 100755
index 000000000..327480a6b
--- /dev/null
+++ b/src/arrow/ci/scripts/r_pkgdown_check.sh
@@ -0,0 +1,41 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Use this script to prevent errors in the pkgdown site being rendered due to missing YAML entries
+
+# all .Rd files in the repo
+all_rd_files=`find ./r/man -maxdepth 1 -name "*.Rd" | sed -e 's/.\/r\/man\///g' | sed -e 's/.Rd//g' | sort`
+
+# .Rd files to exclude from search (i.e. are internal)
+exclusions=`grep "\keyword{internal}" -rl ./r/man --include=*.Rd | sed -e 's/.\/r\/man\///g' | sed -e 's/.Rd//g' | sort`
+
+# .Rd files to check against pkgdown.yml
+rd_files=`echo ${exclusions[@]} ${all_rd_files[@]} | tr ' ' '\n' | sort | uniq -u`
+
+# pkgdown sections
+pkgdown_sections=`awk '/^[^ ]/{ f=/reference:/; next } f{ if (sub(/:$/,"")) pkg=$2; else print pkg, $2 }' ./r/_pkgdown.yml | grep -v "title:" | sort`
+
+# get things that appear in man files that don't appear in pkgdown sections
+pkgdown_missing=`echo ${pkgdown_sections[@]} ${pkgdown_sections[@]} ${rd_files[@]} | tr ' ' '\n' | sort | uniq -u`
+
+# if any sections are missing raise an error
+if ([ ${#pkgdown_missing} -ge 1 ]); then
+ echo "Error! $pkgdown_missing missing from ./r/_pkgdown.yml"
+ exit 1
+fi
diff --git a/src/arrow/ci/scripts/r_revdepcheck.sh b/src/arrow/ci/scripts/r_revdepcheck.sh
new file mode 100755
index 000000000..b0a2bab64
--- /dev/null
+++ b/src/arrow/ci/scripts/r_revdepcheck.sh
@@ -0,0 +1,88 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+: ${R_BIN:=R}
+
+source_dir=${1}/r
+
+# cpp building dependencies
+apt install -y cmake
+
+# system dependencies needed for arrow's reverse dependencies
+apt install -y libxml2-dev \
+ libfontconfig1-dev \
+ libcairo2-dev \
+ libglpk-dev \
+ libmysqlclient-dev \
+ unixodbc-dev \
+ libpq-dev \
+ coinor-libsymphony-dev \
+ coinor-libcgl-dev \
+ coinor-symphony \
+ libzmq3-dev \
+ libudunits2-dev \
+ libgdal-dev \
+ libgeos-dev \
+ libproj-dev
+
+pushd ${source_dir}
+
+printenv
+
+# By default, aws-sdk tries to contact a non-existing local ip host
+# to retrieve metadata. Disable this so that S3FileSystem tests run faster.
+export AWS_EC2_METADATA_DISABLED=TRUE
+
+# Set crancache dir so we can cache it
+export CRANCACHE_DIR="/arrow/.crancache"
+
+SCRIPT="
+ # We can't use RSPM binaries because we need source packages
+ options('repos' = c(CRAN = 'https://packagemanager.rstudio.com/all/latest'))
+ remotes::install_github('r-lib/revdepcheck')
+
+ # zoo is needed by RcisTarget tests, though only listed in enhances so not installed by revdepcheck
+ install.packages('zoo')
+
+ # actually run revdepcheck
+ revdepcheck::revdep_check(
+ quiet = FALSE,
+ timeout = as.difftime(120, units = 'mins'),
+ num_workers = 1,
+ env = c(
+ ARROW_R_DEV = '$ARROW_R_DEV',
+ LIBARROW_DOWNLOAD = TRUE,
+ LIBARROW_MINIMAL = FALSE,
+ revdepcheck::revdep_env_vars()
+ ))
+ revdepcheck::revdep_report(all = TRUE)
+
+ # Go through the summary and fail if any of the statuses include -
+ summary <- revdepcheck::revdep_summary()
+ failed <- lapply(summary, function(check) grepl('-', check[['status']]))
+
+ if (any(unlist(failed))) {
+ quit(status = 1)
+ }
+ "
+
+echo "$SCRIPT" | ${R_BIN} --no-save
+
+popd
diff --git a/src/arrow/ci/scripts/r_sanitize.sh b/src/arrow/ci/scripts/r_sanitize.sh
new file mode 100755
index 000000000..6c79c0851
--- /dev/null
+++ b/src/arrow/ci/scripts/r_sanitize.sh
@@ -0,0 +1,46 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+: ${R_BIN:=RDsan}
+
+source_dir=${1}/r
+
+pushd ${source_dir}
+
+# Unity builds were causing the CI job to run out of memory
+export CMAKE_UNITY_BUILD=OFF
+# Make installation verbose so that the CI job doesn't time out due to silence
+export ARROW_R_DEV=TRUE
+${R_BIN} CMD INSTALL ${INSTALL_ARGS} .
+# But unset the env var so that it doesn't cause us to run extra dev tests
+unset ARROW_R_DEV
+
+export UBSAN_OPTIONS="print_stacktrace=1,suppressions=/arrow/r/tools/ubsan.supp"
+
+pushd tests
+${R_BIN} < testthat.R > testthat.out 2>&1 || { cat testthat.out; exit 1; }
+popd
+${R_BIN} -e 'library(arrow); testthat::test_examples(".")' >> testthat.out 2>&1 || { cat testthat.out; exit 1; }
+
+cat testthat.out
+if grep -q "runtime error" testthat.out; then
+ exit 1
+fi
+popd
diff --git a/src/arrow/ci/scripts/r_test.sh b/src/arrow/ci/scripts/r_test.sh
new file mode 100755
index 000000000..62e423cf5
--- /dev/null
+++ b/src/arrow/ci/scripts/r_test.sh
@@ -0,0 +1,106 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+: ${R_BIN:=R}
+
+source_dir=${1}/r
+
+pushd ${source_dir}
+
+printenv
+
+if [ "$ARROW_USE_PKG_CONFIG" != "false" ]; then
+ export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH}
+ export R_LD_LIBRARY_PATH=${LD_LIBRARY_PATH}
+fi
+export _R_CHECK_COMPILATION_FLAGS_KNOWN_=${ARROW_R_CXXFLAGS}
+if [ "$ARROW_R_DEV" = "TRUE" ]; then
+ # These are sometimes used in the Arrow C++ build and are not a problem
+ export _R_CHECK_COMPILATION_FLAGS_KNOWN_="${_R_CHECK_COMPILATION_FLAGS_KNOWN_} -Wno-attributes -msse4.2 -Wno-noexcept-type -Wno-subobject-linkage"
+ if [ "$NOT_CRAN" = "" ]; then
+ # Note that NOT_CRAN=true means (among other things) that optional dependencies are built
+ # You can set NOT_CRAN=false for the CRAN build and then
+ # ARROW_R_DEV=TRUE just adds verbosity
+ export NOT_CRAN=true
+ fi
+fi
+
+export _R_CHECK_CRAN_INCOMING_REMOTE_=FALSE
+if [ "$TEST_R_WITHOUT_LIBARROW" != "TRUE" ]; then
+ # --run-donttest was used in R < 4.0, this is used now
+ export _R_CHECK_DONTTEST_EXAMPLES_=TRUE
+fi
+# Not all Suggested packages are needed for checking, so in case they aren't installed don't fail
+export _R_CHECK_FORCE_SUGGESTS_=FALSE
+export _R_CHECK_LIMIT_CORES_=FALSE
+export _R_CHECK_TESTS_NLINES_=0
+
+# By default, aws-sdk tries to contact a non-existing local ip host
+# to retrieve metadata. Disable this so that S3FileSystem tests run faster.
+export AWS_EC2_METADATA_DISABLED=TRUE
+
+# Hack so that texlive2020 doesn't pollute the home dir
+export TEXMFCONFIG=/tmp/texmf-config
+export TEXMFVAR=/tmp/texmf-var
+
+if [[ "$DEVTOOLSET_VERSION" -gt 0 ]]; then
+ # enable the devtoolset version to use it
+ source /opt/rh/devtoolset-$DEVTOOLSET_VERSION/enable
+fi
+
+# Make sure we aren't writing to the home dir (CRAN _hates_ this but there is no official check)
+BEFORE=$(ls -alh ~/)
+
+SCRIPT="as_cran <- !identical(tolower(Sys.getenv('NOT_CRAN')), 'true')
+ if (as_cran) {
+ args <- '--as-cran'
+ build_args <- character()
+ } else {
+ args <- c('--no-manual', '--ignore-vignettes')
+ build_args <- '--no-build-vignettes'
+
+ if (nzchar(Sys.which('minio'))) {
+ message('Running minio for S3 tests (if build supports them)')
+ minio_dir <- tempfile()
+ dir.create(minio_dir)
+ pid <- sys::exec_background('minio', c('server', minio_dir))
+ on.exit(tools::pskill(pid))
+ }
+ }
+
+ run_donttest <- identical(tolower(Sys.getenv('_R_CHECK_DONTTEST_EXAMPLES_', 'true')), 'true')
+ if (run_donttest) {
+ args <- c(args, '--run-donttest')
+ }
+
+ install_args <- Sys.getenv('INSTALL_ARGS')
+ if (nzchar(install_args)) {
+ args <- c(args, paste0('--install-args=\"', install_args, '\"'))
+ }
+
+ rcmdcheck::rcmdcheck(build_args = build_args, args = args, error_on = 'warning', check_dir = 'check', timeout = 3600)"
+echo "$SCRIPT" | ${R_BIN} --no-save
+
+AFTER=$(ls -alh ~/)
+if [ "$NOT_CRAN" != "true" ] && [ "$BEFORE" != "$AFTER" ]; then
+ ls -alh ~/.cmake/packages
+ exit 1
+fi
+popd
diff --git a/src/arrow/ci/scripts/r_valgrind.sh b/src/arrow/ci/scripts/r_valgrind.sh
new file mode 100755
index 000000000..772d8f44e
--- /dev/null
+++ b/src/arrow/ci/scripts/r_valgrind.sh
@@ -0,0 +1,46 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+: ${R_BIN:=RDvalgrind}
+
+source_dir=${1}/r
+
+export CMAKE_BUILD_TYPE=RelWithDebInfo
+
+${R_BIN} CMD INSTALL ${INSTALL_ARGS} ${source_dir}
+pushd ${source_dir}/tests
+
+# to generate suppression files run:
+# ${R_BIN} --vanilla -d "valgrind --tool=memcheck --leak-check=full --track-origins=yes --gen-suppressions=all --log-file=memcheck.log" -f testtthat.supp
+${R_BIN} --vanilla -d "valgrind --tool=memcheck --leak-check=full --track-origins=yes --suppressions=/${1}/ci/etc/valgrind-cran.supp" -f testthat.R |& tee testthat.out
+
+# valgrind --error-exitcode=1 should return an erroring exit code that we can catch,
+# but R eats that and returns 0, so we need to look at the output and make sure that
+# we have 0 errors instead.
+if [ $(grep -c "ERROR SUMMARY: 0 errors" testthat.out) != 1 ]; then
+ cat testthat.out
+ echo "Found Valgrind errors"
+ exit 1
+fi
+
+# We might also considering using the greps that LibthGBM uses:
+# https://github.com/microsoft/LightGBM/blob/fa6d356555f9ef888acf5f5e259dca958ca24f6d/.ci/test_r_package_valgrind.sh#L20-L85
+
+popd
diff --git a/src/arrow/ci/scripts/r_windows_build.sh b/src/arrow/ci/scripts/r_windows_build.sh
new file mode 100755
index 000000000..5bb58c760
--- /dev/null
+++ b/src/arrow/ci/scripts/r_windows_build.sh
@@ -0,0 +1,116 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+: ${ARROW_HOME:=$(pwd)}
+# Make sure it is absolute and exported
+export ARROW_HOME="$(cd "${ARROW_HOME}" && pwd)"
+
+if [ "$RTOOLS_VERSION" = "35" ]; then
+ # Use rtools-backports if building with rtools35
+ curl https://raw.githubusercontent.com/r-windows/rtools-backports/master/pacman.conf > /etc/pacman.conf
+ pacman --noconfirm -Syy
+ # lib-4.9.3 is for libraries compiled with gcc 4.9 (Rtools 3.5)
+ RWINLIB_LIB_DIR="lib-4.9.3"
+ # This is the default (will build for each arch) but we can set up CI to
+ # do these in parallel
+ : ${MINGW_ARCH:="mingw32 mingw64"}
+else
+ # Uncomment L38-41 if you're testing a new rtools dependency that hasn't yet sync'd to CRAN
+ # curl https://raw.githubusercontent.com/r-windows/rtools-packages/master/pacman.conf > /etc/pacman.conf
+ # curl -OSsl "http://repo.msys2.org/msys/x86_64/msys2-keyring-r21.b39fb11-1-any.pkg.tar.xz"
+ # pacman -U --noconfirm msys2-keyring-r21.b39fb11-1-any.pkg.tar.xz && rm msys2-keyring-r21.b39fb11-1-any.pkg.tar.xz
+ # pacman --noconfirm -Scc
+
+ pacman --noconfirm -Syy
+ RWINLIB_LIB_DIR="lib"
+ : ${MINGW_ARCH:="mingw32 mingw64 ucrt64"}
+fi
+
+export MINGW_ARCH
+
+cp $ARROW_HOME/ci/scripts/PKGBUILD .
+printenv
+makepkg-mingw --noconfirm --noprogressbar --skippgpcheck --nocheck --syncdeps --cleanbuild
+
+VERSION=$(grep Version $ARROW_HOME/r/DESCRIPTION | cut -d " " -f 2)
+DST_DIR="arrow-$VERSION"
+
+# Collect the build artifacts and make the shape of zip file that rwinlib expects
+ls
+mkdir -p build
+mv mingw* build
+cd build
+
+# This may vary by system/CI provider
+MSYS_LIB_DIR="/c/rtools40"
+
+# Untar the builds we made
+ls *.xz | xargs -n 1 tar -xJf
+mkdir -p $DST_DIR
+# Grab the headers from one, either one is fine
+# (if we're building twice to combine old and new toolchains, this may already exist)
+if [ ! -d $DST_DIR/include ]; then
+ mv $(echo $MINGW_ARCH | cut -d ' ' -f 1)/include $DST_DIR
+fi
+
+# mingw64 -> x64
+# mingw32 -> i386
+# ucrt64 -> x64-ucrt
+
+if [ -d mingw64/lib/ ]; then
+ ls $MSYS_LIB_DIR/mingw64/lib/
+ # Make the rest of the directory structure
+ # lib-4.9.3 is for libraries compiled with gcc 4.9 (Rtools 3.5)
+ mkdir -p $DST_DIR/${RWINLIB_LIB_DIR}/x64
+ # lib is for the new gcc 8 toolchain (Rtools 4.0)
+ mkdir -p $DST_DIR/lib/x64
+ # Move the 64-bit versions of libarrow into the expected location
+ mv mingw64/lib/*.a $DST_DIR/${RWINLIB_LIB_DIR}/x64
+ # These may be from https://dl.bintray.com/rtools/backports/
+ cp $MSYS_LIB_DIR/mingw64/lib/lib{thrift,snappy}.a $DST_DIR/${RWINLIB_LIB_DIR}/x64
+ # These are from https://dl.bintray.com/rtools/mingw{32,64}/
+ cp $MSYS_LIB_DIR/mingw64/lib/lib{zstd,lz4,crypto,utf8proc,re2,aws*}.a $DST_DIR/lib/x64
+fi
+
+# Same for the 32-bit versions
+if [ -d mingw32/lib/ ]; then
+ ls $MSYS_LIB_DIR/mingw32/lib/
+ mkdir -p $DST_DIR/${RWINLIB_LIB_DIR}/i386
+ mkdir -p $DST_DIR/lib/i386
+ mv mingw32/lib/*.a $DST_DIR/${RWINLIB_LIB_DIR}/i386
+ cp $MSYS_LIB_DIR/mingw32/lib/lib{thrift,snappy}.a $DST_DIR/${RWINLIB_LIB_DIR}/i386
+ cp $MSYS_LIB_DIR/mingw32/lib/lib{zstd,lz4,crypto,utf8proc,re2,aws*}.a $DST_DIR/lib/i386
+fi
+
+# Do the same also for ucrt64
+if [ -d ucrt64/lib/ ]; then
+ ls $MSYS_LIB_DIR/ucrt64/lib/
+ mkdir -p $DST_DIR/lib/x64-ucrt
+ mv ucrt64/lib/*.a $DST_DIR/lib/x64-ucrt
+ cp $MSYS_LIB_DIR/ucrt64/lib/lib{thrift,snappy,zstd,lz4,crypto,utf8proc,re2,aws*}.a $DST_DIR/lib/x64-ucrt
+fi
+
+# Create build artifact
+zip -r ${DST_DIR}.zip $DST_DIR
+
+# Copy that to a file name/path that does not vary by version number so we
+# can easily find it in the R package tests on CI
+cp ${DST_DIR}.zip ../libarrow.zip
diff --git a/src/arrow/ci/scripts/release_test.sh b/src/arrow/ci/scripts/release_test.sh
new file mode 100755
index 000000000..ae2ab3288
--- /dev/null
+++ b/src/arrow/ci/scripts/release_test.sh
@@ -0,0 +1,28 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -eux
+
+arrow_dir=${1}
+
+pushd ${arrow_dir}
+
+dev/release/run-test.rb
+
+popd
diff --git a/src/arrow/ci/scripts/ruby_test.sh b/src/arrow/ci/scripts/ruby_test.sh
new file mode 100755
index 000000000..03d20e198
--- /dev/null
+++ b/src/arrow/ci/scripts/ruby_test.sh
@@ -0,0 +1,29 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+source_dir=${1}/ruby
+build_dir=${2}/ruby
+
+export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH}
+export PKG_CONFIG_PATH=${ARROW_HOME}/lib/pkgconfig
+export GI_TYPELIB_PATH=${ARROW_HOME}/lib/girepository-1.0
+
+rake -f ${source_dir}/Rakefile BUILD_DIR=${build_dir} USE_BUNDLER=yes
diff --git a/src/arrow/ci/scripts/rust_build.sh b/src/arrow/ci/scripts/rust_build.sh
new file mode 100755
index 000000000..3532ea3d5
--- /dev/null
+++ b/src/arrow/ci/scripts/rust_build.sh
@@ -0,0 +1,59 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -e
+
+arrow_dir=${1}
+source_dir=${1}/rust
+
+# This file is used to build the rust binaries needed for the archery
+# integration tests. Testing of the rust implementation in normal CI is handled
+# by github workflows in the arrow-rs repository.
+
+# Disable full debug symbol generation to speed up CI build / reduce memory required
+export RUSTFLAGS="-C debuginfo=1"
+
+export ARROW_TEST_DATA=${arrow_dir}/testing/data
+export PARQUET_TEST_DATA=${arrow_dir}/cpp/submodules/parquet-testing/data
+
+if [ "${ARCHERY_INTEGRATION_WITH_RUST}" -eq "0" ]; then
+ echo "====================================================================="
+ echo "Not building the Rust implementation."
+ echo "====================================================================="
+ exit 0;
+elif [ ! -d "${source_dir}" ]; then
+ echo "====================================================================="
+ echo "The Rust source is missing. Please clone the arrow-rs repository"
+ echo "to arrow/rust before running the integration tests:"
+ echo " git clone https://github.com/apache/arrow-rs.git path/to/arrow/rust"
+ echo "====================================================================="
+ exit 1;
+fi
+
+set -x
+
+# show activated toolchain
+rustup show
+
+pushd ${source_dir}
+
+# build only the integration testing binaries
+cargo build -p arrow-integration-testing
+
+popd
diff --git a/src/arrow/ci/scripts/util_checkout.sh b/src/arrow/ci/scripts/util_checkout.sh
new file mode 100755
index 000000000..25fe69aa1
--- /dev/null
+++ b/src/arrow/ci/scripts/util_checkout.sh
@@ -0,0 +1,28 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# this script is github actions specific to check out the submodules and tags
+
+# TODO(kszucs): remove it once the "submodules: recursive" feature is released
+auth_header="$(git config --local --get http.https://github.com/.extraheader)"
+git submodule sync --recursive
+git -c "http.extraheader=$auth_header" -c protocol.version=2 submodule update --init --force --recursive --depth=1
+
+# fetch all the tags
+git fetch --depth=1 origin +refs/tags/*:refs/tags/*
diff --git a/src/arrow/ci/scripts/util_cleanup.sh b/src/arrow/ci/scripts/util_cleanup.sh
new file mode 100755
index 000000000..3a13a1a78
--- /dev/null
+++ b/src/arrow/ci/scripts/util_cleanup.sh
@@ -0,0 +1,54 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# This script is Github Actions-specific to free up disk space,
+# to avoid disk full errors on some builds
+
+if [ $RUNNER_OS = "Linux" ]; then
+ df -h
+
+ # remove swap
+ sudo swapoff -a
+ sudo rm -f /swapfile
+
+ # clean apt cache
+ sudo apt clean
+
+ # remove haskell, consumes 8.6 GB
+ sudo rm -rf /opt/ghc
+
+ # 1 GB
+ sudo rm -rf /home/linuxbrew/.linuxbrew
+
+ # 1+ GB
+ sudo rm -rf /opt/hostedtoolcache/CodeQL
+
+ # 1+ GB
+ sudo rm -rf /usr/share/swift
+
+ # 12 GB, but takes a lot of time to delete
+ #sudo rm -rf /usr/local/lib/android
+
+ # remove cached docker images, around 13 GB
+ docker rmi $(docker image ls -aq)
+
+ # NOTE: /usr/share/dotnet is 25 GB
+fi
+
+df -h
diff --git a/src/arrow/ci/scripts/util_download_apache.sh b/src/arrow/ci/scripts/util_download_apache.sh
new file mode 100755
index 000000000..d8e9b6ca7
--- /dev/null
+++ b/src/arrow/ci/scripts/util_download_apache.sh
@@ -0,0 +1,55 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -x
+
+if [ "$#" -ne 2 ]; then
+ echo "Usage: $0 <apache tarball path> <target directory>"
+ exit 1
+fi
+
+tarball_path=$1
+target_dir=$2
+
+APACHE_MIRRORS=(
+ "http://www.apache.org/dyn/closer.cgi?action=download&filename="
+ "https://downloads.apache.org"
+ "https://apache.claz.org"
+ "https://apache.cs.utah.edu"
+ "https://apache.mirrors.lucidnetworks.net"
+ "https://apache.osuosl.org"
+ "https://ftp.wayne.edu/apache"
+ "https://mirror.olnevhost.net/pub/apache"
+ "https://mirrors.gigenet.com/apache"
+ "https://mirrors.koehn.com/apache"
+ "https://mirrors.ocf.berkeley.edu/apache"
+ "https://mirrors.sonic.net/apache"
+ "https://us.mirrors.quenda.co/apache"
+)
+
+mkdir -p "${target_dir}"
+
+for mirror in ${APACHE_MIRRORS[*]}
+do
+ curl -SL "${mirror}/${tarball_path}" | tar -xzf - -C "${target_dir}"
+ if [ $? == 0 ]; then
+ exit 0
+ fi
+done
+
+exit 1
diff --git a/src/arrow/ci/scripts/util_wait_for_it.sh b/src/arrow/ci/scripts/util_wait_for_it.sh
new file mode 100755
index 000000000..51ce816eb
--- /dev/null
+++ b/src/arrow/ci/scripts/util_wait_for_it.sh
@@ -0,0 +1,199 @@
+#!/usr/bin/env bash
+
+# The MIT License (MIT)
+# Copyright (c) 2016 Giles Hall
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of
+# this software and associated documentation files (the "Software"), to deal in
+# the Software without restriction, including without limitation the rights to
+# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+# of the Software, and to permit persons to whom the Software is furnished to do
+# so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+#
+# Use this script to test if a given TCP host/port are available
+
+cmdname=$(basename $0)
+
+echoerr() { if [[ $QUIET -ne 1 ]]; then echo "$@" 1>&2; fi }
+
+usage()
+{
+ cat << USAGE >&2
+Usage:
+ $cmdname host:port [-s] [-t timeout] [-- command args]
+ -h HOST | --host=HOST Host or IP under test
+ -p PORT | --port=PORT TCP port under test
+ Alternatively, you specify the host and port as host:port
+ -s | --strict Only execute subcommand if the test succeeds
+ -q | --quiet Don't output any status messages
+ -t TIMEOUT | --timeout=TIMEOUT
+ Timeout in seconds, zero for no timeout
+ -- COMMAND ARGS Execute command with args after the test finishes
+USAGE
+ exit 1
+}
+
+wait_for()
+{
+ if [[ $TIMEOUT -gt 0 ]]; then
+ echoerr "$cmdname: waiting $TIMEOUT seconds for $HOST:$PORT"
+ else
+ echoerr "$cmdname: waiting for $HOST:$PORT without a timeout"
+ fi
+ start_ts=$(date +%s)
+ while :
+ do
+ if [[ $ISBUSY -eq 1 ]]; then
+ nc -z $HOST $PORT
+ result=$?
+ else
+ (echo > /dev/tcp/$HOST/$PORT) >/dev/null 2>&1
+ result=$?
+ fi
+ if [[ $result -eq 0 ]]; then
+ end_ts=$(date +%s)
+ echoerr "$cmdname: $HOST:$PORT is available after $((end_ts - start_ts)) seconds"
+ break
+ fi
+ sleep 1
+ done
+ return $result
+}
+
+wait_for_wrapper()
+{
+ # In order to support SIGINT during timeout: http://unix.stackexchange.com/a/57692
+ if [[ $QUIET -eq 1 ]]; then
+ timeout $BUSYTIMEFLAG $TIMEOUT $0 --quiet --child --host=$HOST --port=$PORT --timeout=$TIMEOUT &
+ else
+ timeout $BUSYTIMEFLAG $TIMEOUT $0 --child --host=$HOST --port=$PORT --timeout=$TIMEOUT &
+ fi
+ PID=$!
+ trap "kill -INT -$PID" INT
+ wait $PID
+ RESULT=$?
+ if [[ $RESULT -ne 0 ]]; then
+ echoerr "$cmdname: timeout occurred after waiting $TIMEOUT seconds for $HOST:$PORT"
+ fi
+ return $RESULT
+}
+
+# process arguments
+while [[ $# -gt 0 ]]
+do
+ case "$1" in
+ *:* )
+ hostport=(${1//:/ })
+ HOST=${hostport[0]}
+ PORT=${hostport[1]}
+ shift 1
+ ;;
+ --child)
+ CHILD=1
+ shift 1
+ ;;
+ -q | --quiet)
+ QUIET=1
+ shift 1
+ ;;
+ -s | --strict)
+ STRICT=1
+ shift 1
+ ;;
+ -h)
+ HOST="$2"
+ if [[ $HOST == "" ]]; then break; fi
+ shift 2
+ ;;
+ --host=*)
+ HOST="${1#*=}"
+ shift 1
+ ;;
+ -p)
+ PORT="$2"
+ if [[ $PORT == "" ]]; then break; fi
+ shift 2
+ ;;
+ --port=*)
+ PORT="${1#*=}"
+ shift 1
+ ;;
+ -t)
+ TIMEOUT="$2"
+ if [[ $TIMEOUT == "" ]]; then break; fi
+ shift 2
+ ;;
+ --timeout=*)
+ TIMEOUT="${1#*=}"
+ shift 1
+ ;;
+ --)
+ shift
+ CLI=("$@")
+ break
+ ;;
+ --help)
+ usage
+ ;;
+ *)
+ echoerr "Unknown argument: $1"
+ usage
+ ;;
+ esac
+done
+
+if [[ "$HOST" == "" || "$PORT" == "" ]]; then
+ echoerr "Error: you need to provide a host and port to test."
+ usage
+fi
+
+TIMEOUT=${TIMEOUT:-15}
+STRICT=${STRICT:-0}
+CHILD=${CHILD:-0}
+QUIET=${QUIET:-0}
+
+# check to see if timeout is from busybox?
+# check to see if timeout is from busybox?
+TIMEOUT_PATH=$(realpath $(which timeout))
+if [[ $TIMEOUT_PATH =~ "busybox" ]]; then
+ ISBUSY=1
+ BUSYTIMEFLAG="-t"
+else
+ ISBUSY=0
+ BUSYTIMEFLAG=""
+fi
+
+if [[ $CHILD -gt 0 ]]; then
+ wait_for
+ RESULT=$?
+ exit $RESULT
+else
+ if [[ $TIMEOUT -gt 0 ]]; then
+ wait_for_wrapper
+ RESULT=$?
+ else
+ wait_for
+ RESULT=$?
+ fi
+fi
+
+if [[ $CLI != "" ]]; then
+ if [[ $RESULT -ne 0 && $STRICT -eq 1 ]]; then
+ echoerr "$cmdname: strict mode, refusing to execute subprocess"
+ exit $RESULT
+ fi
+ exec "${CLI[@]}"
+else
+ exit $RESULT
+fi
diff --git a/src/arrow/ci/vcpkg/arm64-linux-static-debug.cmake b/src/arrow/ci/vcpkg/arm64-linux-static-debug.cmake
new file mode 100644
index 000000000..6fea43694
--- /dev/null
+++ b/src/arrow/ci/vcpkg/arm64-linux-static-debug.cmake
@@ -0,0 +1,28 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set(VCPKG_TARGET_ARCHITECTURE arm64)
+set(VCPKG_CRT_LINKAGE dynamic)
+set(VCPKG_LIBRARY_LINKAGE static)
+set(VCPKG_CMAKE_SYSTEM_NAME Linux)
+set(VCPKG_BUILD_TYPE debug)
+
+if(NOT CMAKE_HOST_SYSTEM_PROCESSOR)
+ execute_process(COMMAND "uname" "-m"
+ OUTPUT_VARIABLE CMAKE_HOST_SYSTEM_PROCESSOR
+ OUTPUT_STRIP_TRAILING_WHITESPACE)
+endif()
diff --git a/src/arrow/ci/vcpkg/arm64-linux-static-release.cmake b/src/arrow/ci/vcpkg/arm64-linux-static-release.cmake
new file mode 100644
index 000000000..4012848b8
--- /dev/null
+++ b/src/arrow/ci/vcpkg/arm64-linux-static-release.cmake
@@ -0,0 +1,28 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set(VCPKG_TARGET_ARCHITECTURE arm64)
+set(VCPKG_CRT_LINKAGE dynamic)
+set(VCPKG_LIBRARY_LINKAGE static)
+set(VCPKG_CMAKE_SYSTEM_NAME Linux)
+set(VCPKG_BUILD_TYPE release)
+
+if(NOT CMAKE_HOST_SYSTEM_PROCESSOR)
+ execute_process(COMMAND "uname" "-m"
+ OUTPUT_VARIABLE CMAKE_HOST_SYSTEM_PROCESSOR
+ OUTPUT_STRIP_TRAILING_WHITESPACE)
+endif()
diff --git a/src/arrow/ci/vcpkg/arm64-osx-static-debug.cmake b/src/arrow/ci/vcpkg/arm64-osx-static-debug.cmake
new file mode 100644
index 000000000..f511819a2
--- /dev/null
+++ b/src/arrow/ci/vcpkg/arm64-osx-static-debug.cmake
@@ -0,0 +1,26 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set(VCPKG_TARGET_ARCHITECTURE arm64)
+set(VCPKG_CRT_LINKAGE dynamic)
+set(VCPKG_LIBRARY_LINKAGE static)
+
+set(VCPKG_CMAKE_SYSTEM_NAME Darwin)
+set(VCPKG_OSX_ARCHITECTURES arm64)
+set(VCPKG_OSX_DEPLOYMENT_TARGET "11.0")
+
+set(VCPKG_BUILD_TYPE debug)
diff --git a/src/arrow/ci/vcpkg/arm64-osx-static-release.cmake b/src/arrow/ci/vcpkg/arm64-osx-static-release.cmake
new file mode 100644
index 000000000..43d65efb2
--- /dev/null
+++ b/src/arrow/ci/vcpkg/arm64-osx-static-release.cmake
@@ -0,0 +1,26 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set(VCPKG_TARGET_ARCHITECTURE arm64)
+set(VCPKG_CRT_LINKAGE dynamic)
+set(VCPKG_LIBRARY_LINKAGE static)
+
+set(VCPKG_CMAKE_SYSTEM_NAME Darwin)
+set(VCPKG_OSX_ARCHITECTURES arm64)
+set(VCPKG_OSX_DEPLOYMENT_TARGET "11.0")
+
+set(VCPKG_BUILD_TYPE release)
diff --git a/src/arrow/ci/vcpkg/ports.patch b/src/arrow/ci/vcpkg/ports.patch
new file mode 100644
index 000000000..7bcba49c1
--- /dev/null
+++ b/src/arrow/ci/vcpkg/ports.patch
@@ -0,0 +1,63 @@
+diff --git a/ports/aws-c-common/portfile.cmake b/ports/aws-c-common/portfile.cmake
+index f3704ef05b..3af543058d 100644
+--- a/ports/aws-c-common/portfile.cmake
++++ b/ports/aws-c-common/portfile.cmake
+@@ -1,8 +1,8 @@
+ vcpkg_from_github(
+ OUT_SOURCE_PATH SOURCE_PATH
+ REPO awslabs/aws-c-common
+- REF 4a21a1c0757083a16497fea27886f5f20ccdf334 # v0.4.56
+- SHA512 68898a8ac15d5490f45676eabfbe0df9e45370a74c543a28909fd0d85fed48dfcf4bcd6ea2d01d1a036dd352e2e4e0b08c48c63ab2a2b477fe150b46a827136e
++ REF 13adef72b7813ec878817c6d50a7a3f241015d8a # v0.4.57
++ SHA512 28256522ac6af544d7464e3e7dcd4dc802ae2b09728bf8f167f86a6487bb756d0cad5eb4a2480610b2967b9c24c4a7f70621894517aa2828ffdeb0479453803b
+ HEAD_REF master
+ PATCHES
+ disable-error-4068.patch # This patch fixes dependency port compilation failure
+diff --git a/ports/curl/portfile.cmake b/ports/curl/portfile.cmake
+index be66d452be..a5ce325e9d 100644
+--- a/ports/curl/portfile.cmake
++++ b/ports/curl/portfile.cmake
+@@ -94,6 +94,8 @@ vcpkg_configure_cmake(
+ -DCMAKE_DISABLE_FIND_PACKAGE_Perl=ON
+ -DENABLE_DEBUG=ON
+ -DCURL_CA_FALLBACK=ON
++ -DCURL_CA_PATH=none
++ -DCURL_CA_BUNDLE=none
+ OPTIONS_DEBUG
+ ${EXTRA_ARGS_DEBUG}
+ OPTIONS_RELEASE
+diff --git a/ports/snappy/portfile.cmake b/ports/snappy/portfile.cmake
+index 75dd133027..84345c7caa 100644
+--- a/ports/snappy/portfile.cmake
++++ b/ports/snappy/portfile.cmake
+@@ -4,6 +4,7 @@ vcpkg_from_github(
+ REF 537f4ad6240e586970fe554614542e9717df7902 # 1.1.8
+ SHA512 555d3b69a6759592736cbaae8f41654f0cf14e8be693b5dde37640191e53daec189f895872557b173e905d10681ef502f3e6ed8566811add963ffef96ce4016d
+ HEAD_REF master
++ PATCHES "snappy-disable-bmi.patch"
+ )
+
+ vcpkg_configure_cmake(
+diff --git a/ports/snappy/snappy-disable-bmi.patch b/ports/snappy/snappy-disable-bmi.patch
+new file mode 100644
+index 0000000000..2cbb1533a8
+--- /dev/null
++++ b/ports/snappy/snappy-disable-bmi.patch
+@@ -0,0 +1,17 @@
++--- snappy.cc 2020-06-27 17:38:49.718993748 -0500
+++++ snappy.cc 2020-06-27 17:37:57.543268213 -0500
++@@ -717,14 +717,10 @@
++ static inline uint32 ExtractLowBytes(uint32 v, int n) {
++ assert(n >= 0);
++ assert(n <= 4);
++-#if SNAPPY_HAVE_BMI2
++- return _bzhi_u32(v, 8 * n);
++-#else
++ // This needs to be wider than uint32 otherwise `mask << 32` will be
++ // undefined.
++ uint64 mask = 0xffffffff;
++ return v & ~(mask << (8 * n));
++-#endif
++ }
++
++ static inline bool LeftShiftOverflows(uint8 value, uint32 shift) {
diff --git a/src/arrow/ci/vcpkg/universal2-osx-static-debug.cmake b/src/arrow/ci/vcpkg/universal2-osx-static-debug.cmake
new file mode 100644
index 000000000..706ac47a7
--- /dev/null
+++ b/src/arrow/ci/vcpkg/universal2-osx-static-debug.cmake
@@ -0,0 +1,26 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set(VCPKG_TARGET_ARCHITECTURE arm64)
+set(VCPKG_CRT_LINKAGE dynamic)
+set(VCPKG_LIBRARY_LINKAGE static)
+
+set(VCPKG_CMAKE_SYSTEM_NAME Darwin)
+set(VCPKG_OSX_ARCHITECTURES "x86_64\;arm64")
+set(VCPKG_OSX_DEPLOYMENT_TARGET "10.13")
+
+set(VCPKG_BUILD_TYPE debug)
diff --git a/src/arrow/ci/vcpkg/universal2-osx-static-release.cmake b/src/arrow/ci/vcpkg/universal2-osx-static-release.cmake
new file mode 100644
index 000000000..867069017
--- /dev/null
+++ b/src/arrow/ci/vcpkg/universal2-osx-static-release.cmake
@@ -0,0 +1,26 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set(VCPKG_TARGET_ARCHITECTURE arm64)
+set(VCPKG_CRT_LINKAGE dynamic)
+set(VCPKG_LIBRARY_LINKAGE static)
+
+set(VCPKG_CMAKE_SYSTEM_NAME Darwin)
+set(VCPKG_OSX_ARCHITECTURES "x86_64\;arm64")
+set(VCPKG_OSX_DEPLOYMENT_TARGET "10.13")
+
+set(VCPKG_BUILD_TYPE release)
diff --git a/src/arrow/ci/vcpkg/x64-linux-static-debug.cmake b/src/arrow/ci/vcpkg/x64-linux-static-debug.cmake
new file mode 100644
index 000000000..3acee2ee4
--- /dev/null
+++ b/src/arrow/ci/vcpkg/x64-linux-static-debug.cmake
@@ -0,0 +1,24 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set(VCPKG_TARGET_ARCHITECTURE x64)
+set(VCPKG_CRT_LINKAGE dynamic)
+set(VCPKG_LIBRARY_LINKAGE static)
+
+set(VCPKG_CMAKE_SYSTEM_NAME Linux)
+
+set(VCPKG_BUILD_TYPE debug)
diff --git a/src/arrow/ci/vcpkg/x64-linux-static-release.cmake b/src/arrow/ci/vcpkg/x64-linux-static-release.cmake
new file mode 100644
index 000000000..c2caa49fa
--- /dev/null
+++ b/src/arrow/ci/vcpkg/x64-linux-static-release.cmake
@@ -0,0 +1,24 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set(VCPKG_TARGET_ARCHITECTURE x64)
+set(VCPKG_CRT_LINKAGE dynamic)
+set(VCPKG_LIBRARY_LINKAGE static)
+
+set(VCPKG_CMAKE_SYSTEM_NAME Linux)
+
+set(VCPKG_BUILD_TYPE release)
diff --git a/src/arrow/ci/vcpkg/x64-osx-static-debug.cmake b/src/arrow/ci/vcpkg/x64-osx-static-debug.cmake
new file mode 100644
index 000000000..e8a321ec7
--- /dev/null
+++ b/src/arrow/ci/vcpkg/x64-osx-static-debug.cmake
@@ -0,0 +1,25 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set(VCPKG_TARGET_ARCHITECTURE x64)
+set(VCPKG_CRT_LINKAGE dynamic)
+set(VCPKG_LIBRARY_LINKAGE static)
+
+set(VCPKG_CMAKE_SYSTEM_NAME Darwin)
+set(VCPKG_OSX_ARCHITECTURES x86_64)
+
+set(VCPKG_BUILD_TYPE debug)
diff --git a/src/arrow/ci/vcpkg/x64-osx-static-release.cmake b/src/arrow/ci/vcpkg/x64-osx-static-release.cmake
new file mode 100644
index 000000000..956d5b92e
--- /dev/null
+++ b/src/arrow/ci/vcpkg/x64-osx-static-release.cmake
@@ -0,0 +1,25 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set(VCPKG_TARGET_ARCHITECTURE x64)
+set(VCPKG_CRT_LINKAGE dynamic)
+set(VCPKG_LIBRARY_LINKAGE static)
+
+set(VCPKG_CMAKE_SYSTEM_NAME Darwin)
+set(VCPKG_OSX_ARCHITECTURES x86_64)
+
+set(VCPKG_BUILD_TYPE release)
diff --git a/src/arrow/ci/vcpkg/x64-windows-static-md-debug.cmake b/src/arrow/ci/vcpkg/x64-windows-static-md-debug.cmake
new file mode 100644
index 000000000..3eae3cfda
--- /dev/null
+++ b/src/arrow/ci/vcpkg/x64-windows-static-md-debug.cmake
@@ -0,0 +1,22 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set(VCPKG_TARGET_ARCHITECTURE x64)
+set(VCPKG_CRT_LINKAGE dynamic)
+set(VCPKG_LIBRARY_LINKAGE static)
+
+set(VCPKG_BUILD_TYPE debug)
diff --git a/src/arrow/ci/vcpkg/x64-windows-static-md-release.cmake b/src/arrow/ci/vcpkg/x64-windows-static-md-release.cmake
new file mode 100644
index 000000000..b8dfbc884
--- /dev/null
+++ b/src/arrow/ci/vcpkg/x64-windows-static-md-release.cmake
@@ -0,0 +1,22 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set(VCPKG_TARGET_ARCHITECTURE x64)
+set(VCPKG_CRT_LINKAGE dynamic)
+set(VCPKG_LIBRARY_LINKAGE static)
+
+set(VCPKG_BUILD_TYPE release)