summaryrefslogtreecommitdiffstats
path: root/src/arrow/ci/scripts
diff options
context:
space:
mode:
Diffstat (limited to 'src/arrow/ci/scripts')
-rw-r--r--src/arrow/ci/scripts/PKGBUILD138
-rwxr-xr-xsrc/arrow/ci/scripts/c_glib_build.sh45
-rwxr-xr-xsrc/arrow/ci/scripts/c_glib_test.sh48
-rwxr-xr-xsrc/arrow/ci/scripts/ccache_setup.sh26
-rwxr-xr-xsrc/arrow/ci/scripts/cpp_build.sh164
-rwxr-xr-xsrc/arrow/ci/scripts/cpp_test.sh118
-rwxr-xr-xsrc/arrow/ci/scripts/csharp_build.sh26
-rwxr-xr-xsrc/arrow/ci/scripts/csharp_pack.sh26
-rwxr-xr-xsrc/arrow/ci/scripts/csharp_test.sh29
-rwxr-xr-xsrc/arrow/ci/scripts/docs_build.sh48
-rwxr-xr-xsrc/arrow/ci/scripts/go_build.sh40
-rwxr-xr-xsrc/arrow/ci/scripts/go_cgo_python_test.sh45
-rwxr-xr-xsrc/arrow/ci/scripts/go_test.sh60
-rwxr-xr-xsrc/arrow/ci/scripts/install_conda.sh66
-rwxr-xr-xsrc/arrow/ci/scripts/install_dask.sh36
-rwxr-xr-xsrc/arrow/ci/scripts/install_gcs_testbench.sh38
-rwxr-xr-xsrc/arrow/ci/scripts/install_iwyu.sh48
-rwxr-xr-xsrc/arrow/ci/scripts/install_kartothek.sh41
-rwxr-xr-xsrc/arrow/ci/scripts/install_minio.sh62
-rwxr-xr-xsrc/arrow/ci/scripts/install_osx_sdk.sh41
-rwxr-xr-xsrc/arrow/ci/scripts/install_pandas.sh46
-rwxr-xr-xsrc/arrow/ci/scripts/install_python.sh68
-rwxr-xr-xsrc/arrow/ci/scripts/install_spark.sh31
-rwxr-xr-xsrc/arrow/ci/scripts/install_turbodbc.sh43
-rwxr-xr-xsrc/arrow/ci/scripts/install_vcpkg.sh39
-rwxr-xr-xsrc/arrow/ci/scripts/integration_arrow.sh40
-rwxr-xr-xsrc/arrow/ci/scripts/integration_dask.sh40
-rwxr-xr-xsrc/arrow/ci/scripts/integration_hdfs.sh69
-rwxr-xr-xsrc/arrow/ci/scripts/integration_hiveserver2.sh32
-rwxr-xr-xsrc/arrow/ci/scripts/integration_kartothek.sh31
-rwxr-xr-xsrc/arrow/ci/scripts/integration_spark.sh87
-rwxr-xr-xsrc/arrow/ci/scripts/integration_turbodbc.sh47
-rwxr-xr-xsrc/arrow/ci/scripts/java_build.sh107
-rwxr-xr-xsrc/arrow/ci/scripts/java_cdata_build.sh45
-rwxr-xr-xsrc/arrow/ci/scripts/java_full_build.sh42
-rwxr-xr-xsrc/arrow/ci/scripts/java_jni_macos_build.sh115
-rwxr-xr-xsrc/arrow/ci/scripts/java_jni_manylinux_build.sh137
-rwxr-xr-xsrc/arrow/ci/scripts/java_test.sh54
-rwxr-xr-xsrc/arrow/ci/scripts/js_build.sh36
-rwxr-xr-xsrc/arrow/ci/scripts/js_test.sh29
-rwxr-xr-xsrc/arrow/ci/scripts/matlab_build.sh29
-rwxr-xr-xsrc/arrow/ci/scripts/msys2_setup.sh79
-rwxr-xr-xsrc/arrow/ci/scripts/msys2_system_clean.sh33
-rwxr-xr-xsrc/arrow/ci/scripts/msys2_system_upgrade.sh28
-rwxr-xr-xsrc/arrow/ci/scripts/python_benchmark.sh40
-rwxr-xr-xsrc/arrow/ci/scripts/python_build.sh54
-rwxr-xr-xsrc/arrow/ci/scripts/python_sdist_build.sh27
-rwxr-xr-xsrc/arrow/ci/scripts/python_sdist_test.sh58
-rwxr-xr-xsrc/arrow/ci/scripts/python_test.sh32
-rwxr-xr-xsrc/arrow/ci/scripts/python_wheel_macos_build.sh166
-rwxr-xr-xsrc/arrow/ci/scripts/python_wheel_manylinux_build.sh149
-rwxr-xr-xsrc/arrow/ci/scripts/python_wheel_unix_test.sh84
-rw-r--r--src/arrow/ci/scripts/python_wheel_windows_build.bat109
-rwxr-xr-xsrc/arrow/ci/scripts/python_wheel_windows_test.bat55
-rwxr-xr-xsrc/arrow/ci/scripts/r_build.sh33
-rwxr-xr-xsrc/arrow/ci/scripts/r_deps.sh45
-rwxr-xr-xsrc/arrow/ci/scripts/r_docker_configure.sh81
-rwxr-xr-xsrc/arrow/ci/scripts/r_pkgdown_check.sh41
-rwxr-xr-xsrc/arrow/ci/scripts/r_revdepcheck.sh88
-rwxr-xr-xsrc/arrow/ci/scripts/r_sanitize.sh46
-rwxr-xr-xsrc/arrow/ci/scripts/r_test.sh106
-rwxr-xr-xsrc/arrow/ci/scripts/r_valgrind.sh46
-rwxr-xr-xsrc/arrow/ci/scripts/r_windows_build.sh116
-rwxr-xr-xsrc/arrow/ci/scripts/release_test.sh28
-rwxr-xr-xsrc/arrow/ci/scripts/ruby_test.sh29
-rwxr-xr-xsrc/arrow/ci/scripts/rust_build.sh59
-rwxr-xr-xsrc/arrow/ci/scripts/util_checkout.sh28
-rwxr-xr-xsrc/arrow/ci/scripts/util_cleanup.sh54
-rwxr-xr-xsrc/arrow/ci/scripts/util_download_apache.sh55
-rwxr-xr-xsrc/arrow/ci/scripts/util_wait_for_it.sh199
70 files changed, 4280 insertions, 0 deletions
diff --git a/src/arrow/ci/scripts/PKGBUILD b/src/arrow/ci/scripts/PKGBUILD
new file mode 100644
index 000000000..975d1514f
--- /dev/null
+++ b/src/arrow/ci/scripts/PKGBUILD
@@ -0,0 +1,138 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+_realname=arrow
+pkgbase=mingw-w64-${_realname}
+pkgname="${MINGW_PACKAGE_PREFIX}-${_realname}"
+pkgver=6.0.1
+pkgrel=8000
+pkgdesc="Apache Arrow is a cross-language development platform for in-memory data (mingw-w64)"
+arch=("any")
+url="https://arrow.apache.org/"
+license=("Apache-2.0")
+depends=("${MINGW_PACKAGE_PREFIX}-aws-sdk-cpp"
+ "${MINGW_PACKAGE_PREFIX}-libutf8proc"
+ "${MINGW_PACKAGE_PREFIX}-re2"
+ "${MINGW_PACKAGE_PREFIX}-thrift"
+ "${MINGW_PACKAGE_PREFIX}-snappy"
+ "${MINGW_PACKAGE_PREFIX}-zlib"
+ "${MINGW_PACKAGE_PREFIX}-lz4"
+ "${MINGW_PACKAGE_PREFIX}-zstd")
+makedepends=("${MINGW_PACKAGE_PREFIX}-ccache"
+ "${MINGW_PACKAGE_PREFIX}-cmake"
+ "${MINGW_PACKAGE_PREFIX}-gcc")
+options=("staticlibs" "strip" "!buildflags")
+
+# For installing from a local checkout, set source_dir to . and don't include
+# a "source" param below
+source_dir="$ARROW_HOME"
+# else
+# source_dir=apache-${_realname}-${pkgver}
+
+# For released version:
+#source=("https://archive.apache.org/dist/arrow/arrow-${pkgver}/apache-arrow-${pkgver}.tar.gz")
+#sha256sums=("ac2a77dd9168e9892e432c474611e86ded0be6dfe15f689c948751d37f81391a")
+# For github dev version:
+# Append `#commit=54b1b2f688e5e84b4c664b1e12a95f93b94ab2f3` to the URL to select a revision
+# source=("${source_dir}"::"git+https://github.com/apache/arrow")
+# sha256sums=("SKIP")
+# source_dir="${APPVEYOR_BUILD_FOLDER}/${source_dir}"
+
+cpp_build_dir=build-${CARCH}-cpp
+
+pkgver() {
+ # The only purpose of this here is to cause the job to error if the
+ # version in pkgver is different from what is in r/DESCRIPTION
+ grep Version "${source_dir}/r/DESCRIPTION" | cut -d " " -f 2
+}
+
+build() {
+ ARROW_CPP_DIR="${source_dir}/cpp"
+ [[ -d ${cpp_build_dir} ]] && rm -rf ${cpp_build_dir}
+ mkdir -p ${cpp_build_dir}
+ pushd ${cpp_build_dir}
+
+ # The Rtools libutf8proc is a static lib, but Findutf8proc.cmake doesn't
+ # set the appropriate compiler definition.
+ export CPPFLAGS="-DUTF8PROC_STATIC"
+
+ # This is the difference between rtools-packages and rtools-backports
+ # Remove this when submitting to rtools-packages
+ if [ "$RTOOLS_VERSION" = "35" ]; then
+ export CC="/C/Rtools${MINGW_PREFIX/mingw/mingw_}/bin/gcc"
+ export CXX="/C/Rtools${MINGW_PREFIX/mingw/mingw_}/bin/g++"
+ export PATH="/C/Rtools${MINGW_PREFIX/mingw/mingw_}/bin:$PATH"
+ export CPPFLAGS="${CPPFLAGS} -I${MINGW_PREFIX}/include"
+ export LIBS="-L${MINGW_PREFIX}/libs"
+ export ARROW_S3=OFF
+ export ARROW_WITH_RE2=OFF
+ # Without this, some dataset functionality segfaults
+ export CMAKE_UNITY_BUILD=ON
+ else
+ export ARROW_S3=ON
+ export ARROW_WITH_RE2=ON
+ # Without this, some compute functionality segfaults in tests
+ export CMAKE_UNITY_BUILD=OFF
+ fi
+
+ MSYS2_ARG_CONV_EXCL="-DCMAKE_INSTALL_PREFIX=" \
+ ${MINGW_PREFIX}/bin/cmake.exe \
+ ${ARROW_CPP_DIR} \
+ -G "MSYS Makefiles" \
+ -DARROW_BUILD_SHARED=OFF \
+ -DARROW_BUILD_STATIC=ON \
+ -DARROW_BUILD_UTILITIES=OFF \
+ -DARROW_COMPUTE=ON \
+ -DARROW_CSV=ON \
+ -DARROW_DATASET=ON \
+ -DARROW_FILESYSTEM=ON \
+ -DARROW_HDFS=OFF \
+ -DARROW_JEMALLOC=OFF \
+ -DARROW_JSON=ON \
+ -DARROW_LZ4_USE_SHARED=OFF \
+ -DARROW_MIMALLOC=ON \
+ -DARROW_PACKAGE_PREFIX="${MINGW_PREFIX}" \
+ -DARROW_PARQUET=ON \
+ -DARROW_S3="${ARROW_S3}" \
+ -DARROW_SNAPPY_USE_SHARED=OFF \
+ -DARROW_USE_GLOG=OFF \
+ -DARROW_WITH_LZ4=ON \
+ -DARROW_WITH_RE2="${ARROW_WITH_RE2}" \
+ -DARROW_WITH_SNAPPY=ON \
+ -DARROW_WITH_ZLIB=ON \
+ -DARROW_WITH_ZSTD=ON \
+ -DARROW_ZSTD_USE_SHARED=OFF \
+ -DARROW_CXXFLAGS="${CPPFLAGS}" \
+ -DCMAKE_BUILD_TYPE="release" \
+ -DCMAKE_INSTALL_PREFIX=${MINGW_PREFIX} \
+ -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD} \
+ -DCMAKE_VERBOSE_MAKEFILE=ON
+
+ make -j3
+ popd
+}
+
+package() {
+ make -C ${cpp_build_dir} DESTDIR="${pkgdir}" install
+
+ local PREFIX_DEPS=$(cygpath -am ${MINGW_PREFIX})
+ pushd "${pkgdir}${MINGW_PREFIX}/lib/pkgconfig"
+ for pc in *.pc; do
+ sed -s "s|${PREFIX_DEPS}|${MINGW_PREFIX}|g" -i $pc
+ done
+ popd
+}
diff --git a/src/arrow/ci/scripts/c_glib_build.sh b/src/arrow/ci/scripts/c_glib_build.sh
new file mode 100755
index 000000000..ce3cea18e
--- /dev/null
+++ b/src/arrow/ci/scripts/c_glib_build.sh
@@ -0,0 +1,45 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+source_dir=${1}/c_glib
+build_dir=${2}/c_glib
+: ${ARROW_GLIB_GTK_DOC:=false}
+: ${ARROW_GLIB_DEVELOPMENT_MODE:=false}
+
+export PKG_CONFIG_PATH=${ARROW_HOME}/lib/pkgconfig
+
+export CFLAGS="-DARROW_NO_DEPRECATED_API"
+export CXXFLAGS="-DARROW_NO_DEPRECATED_API"
+
+mkdir -p ${build_dir}
+
+# Build with Meson
+meson --prefix=$ARROW_HOME \
+ --libdir=lib \
+ -Ddevelopment_mode=${ARROW_GLIB_DEVELOPMENT_MODE} \
+ -Dgtk_doc=${ARROW_GLIB_GTK_DOC} \
+ ${build_dir} \
+ ${source_dir}
+
+pushd ${build_dir}
+ninja
+ninja install
+popd
diff --git a/src/arrow/ci/scripts/c_glib_test.sh b/src/arrow/ci/scripts/c_glib_test.sh
new file mode 100755
index 000000000..25c54138e
--- /dev/null
+++ b/src/arrow/ci/scripts/c_glib_test.sh
@@ -0,0 +1,48 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+source_dir=${1}/c_glib
+build_dir=${2}/c_glib
+
+export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH}
+export PKG_CONFIG_PATH=${ARROW_HOME}/lib/pkgconfig
+export GI_TYPELIB_PATH=${ARROW_HOME}/lib/girepository-1.0
+
+pushd ${source_dir}
+
+ruby test/run-test.rb
+
+if [[ "$(uname -s)" == "Linux" ]]; then
+ # TODO(kszucs): on osx it fails to load 'lgi.corelgilua51' despite that lgi
+ # was installed by luarocks
+ pushd example/lua
+ lua write-batch.lua
+ lua read-batch.lua
+ lua write-stream.lua
+ lua read-stream.lua
+ popd
+fi
+
+popd
+
+pushd ${build_dir}
+example/extension-type
+popd
diff --git a/src/arrow/ci/scripts/ccache_setup.sh b/src/arrow/ci/scripts/ccache_setup.sh
new file mode 100755
index 000000000..f77fbb373
--- /dev/null
+++ b/src/arrow/ci/scripts/ccache_setup.sh
@@ -0,0 +1,26 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -eux
+
+echo "ARROW_USE_CCACHE=ON" >> $GITHUB_ENV
+echo "CCACHE_COMPILERCHECK=content" >> $GITHUB_ENV
+echo "CCACHE_COMPRESS=1" >> $GITHUB_ENV
+echo "CCACHE_COMPRESSLEVEL=6" >> $GITHUB_ENV
+echo "CCACHE_MAXSIZE=500M" >> $GITHUB_ENV
diff --git a/src/arrow/ci/scripts/cpp_build.sh b/src/arrow/ci/scripts/cpp_build.sh
new file mode 100755
index 000000000..a11dd23b7
--- /dev/null
+++ b/src/arrow/ci/scripts/cpp_build.sh
@@ -0,0 +1,164 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+source_dir=${1}/cpp
+build_dir=${2}/cpp
+with_docs=${3:-false}
+
+: ${ARROW_USE_CCACHE:=OFF}
+
+# TODO(kszucs): consider to move these to CMake
+if [ ! -z "${CONDA_PREFIX}" ]; then
+ echo -e "===\n=== Conda environment for build\n==="
+ conda list
+
+ export CMAKE_ARGS="${CMAKE_ARGS} -DCMAKE_AR=${AR} -DCMAKE_RANLIB=${RANLIB}"
+ export ARROW_GANDIVA_PC_CXX_FLAGS=$(echo | ${CXX} -E -Wp,-v -xc++ - 2>&1 | grep '^ ' | awk '{print "-isystem;" substr($1, 1)}' | tr '\n' ';')
+elif [ -x "$(command -v xcrun)" ]; then
+ export ARROW_GANDIVA_PC_CXX_FLAGS="-isysroot;$(xcrun --show-sdk-path)"
+fi
+
+if [ "${ARROW_USE_CCACHE}" == "ON" ]; then
+ echo -e "===\n=== ccache statistics before build\n==="
+ ccache -s
+fi
+
+if [ "${ARROW_USE_TSAN}" == "ON" ] && [ ! -x "${ASAN_SYMBOLIZER_PATH}" ]; then
+ echo -e "Invalid value for \$ASAN_SYMBOLIZER_PATH: ${ASAN_SYMBOLIZER_PATH}"
+ exit 1
+fi
+
+mkdir -p ${build_dir}
+pushd ${build_dir}
+
+cmake -G "${CMAKE_GENERATOR:-Ninja}" \
+ -DARROW_BOOST_USE_SHARED=${ARROW_BOOST_USE_SHARED:-ON} \
+ -DARROW_BUILD_BENCHMARKS=${ARROW_BUILD_BENCHMARKS:-OFF} \
+ -DARROW_BUILD_BENCHMARKS_REFERENCE=${ARROW_BUILD_BENCHMARKS:-OFF} \
+ -DARROW_BUILD_EXAMPLES=${ARROW_BUILD_EXAMPLES:-OFF} \
+ -DARROW_BUILD_INTEGRATION=${ARROW_BUILD_INTEGRATION:-OFF} \
+ -DARROW_BUILD_SHARED=${ARROW_BUILD_SHARED:-ON} \
+ -DARROW_BUILD_STATIC=${ARROW_BUILD_STATIC:-ON} \
+ -DARROW_BUILD_TESTS=${ARROW_BUILD_TESTS:-OFF} \
+ -DARROW_BUILD_UTILITIES=${ARROW_BUILD_UTILITIES:-ON} \
+ -DARROW_COMPUTE=${ARROW_COMPUTE:-ON} \
+ -DARROW_CSV=${ARROW_CSV:-ON} \
+ -DARROW_CUDA=${ARROW_CUDA:-OFF} \
+ -DARROW_CXXFLAGS=${ARROW_CXXFLAGS:-} \
+ -DARROW_DATASET=${ARROW_DATASET:-ON} \
+ -DARROW_ENGINE=${ARROW_ENGINE:-ON} \
+ -DARROW_DEPENDENCY_SOURCE=${ARROW_DEPENDENCY_SOURCE:-AUTO} \
+ -DARROW_EXTRA_ERROR_CONTEXT=${ARROW_EXTRA_ERROR_CONTEXT:-OFF} \
+ -DARROW_ENABLE_TIMING_TESTS=${ARROW_ENABLE_TIMING_TESTS:-ON} \
+ -DARROW_FILESYSTEM=${ARROW_FILESYSTEM:-ON} \
+ -DARROW_FLIGHT=${ARROW_FLIGHT:-OFF} \
+ -DARROW_FUZZING=${ARROW_FUZZING:-OFF} \
+ -DARROW_GANDIVA_JAVA=${ARROW_GANDIVA_JAVA:-OFF} \
+ -DARROW_GANDIVA_PC_CXX_FLAGS=${ARROW_GANDIVA_PC_CXX_FLAGS:-} \
+ -DARROW_GANDIVA=${ARROW_GANDIVA:-OFF} \
+ -DARROW_GCS=${ARROW_GCS:-OFF} \
+ -DARROW_HDFS=${ARROW_HDFS:-ON} \
+ -DARROW_HIVESERVER2=${ARROW_HIVESERVER2:-OFF} \
+ -DARROW_INSTALL_NAME_RPATH=${ARROW_INSTALL_NAME_RPATH:-ON} \
+ -DARROW_JEMALLOC=${ARROW_JEMALLOC:-ON} \
+ -DARROW_JNI=${ARROW_JNI:-OFF} \
+ -DARROW_JSON=${ARROW_JSON:-ON} \
+ -DARROW_LARGE_MEMORY_TESTS=${ARROW_LARGE_MEMORY_TESTS:-OFF} \
+ -DARROW_MIMALLOC=${ARROW_MIMALLOC:-OFF} \
+ -DARROW_NO_DEPRECATED_API=${ARROW_NO_DEPRECATED_API:-OFF} \
+ -DARROW_ORC=${ARROW_ORC:-OFF} \
+ -DARROW_PARQUET=${ARROW_PARQUET:-OFF} \
+ -DARROW_PLASMA_JAVA_CLIENT=${ARROW_PLASMA_JAVA_CLIENT:-OFF} \
+ -DARROW_PLASMA=${ARROW_PLASMA:-OFF} \
+ -DARROW_PYTHON=${ARROW_PYTHON:-OFF} \
+ -DARROW_RUNTIME_SIMD_LEVEL=${ARROW_RUNTIME_SIMD_LEVEL:-MAX} \
+ -DARROW_S3=${ARROW_S3:-OFF} \
+ -DARROW_TEST_LINKAGE=${ARROW_TEST_LINKAGE:-shared} \
+ -DARROW_TEST_MEMCHECK=${ARROW_TEST_MEMCHECK:-OFF} \
+ -DARROW_USE_ASAN=${ARROW_USE_ASAN:-OFF} \
+ -DARROW_USE_CCACHE=${ARROW_USE_CCACHE:-ON} \
+ -DARROW_USE_GLOG=${ARROW_USE_GLOG:-OFF} \
+ -DARROW_USE_LD_GOLD=${ARROW_USE_LD_GOLD:-OFF} \
+ -DARROW_USE_PRECOMPILED_HEADERS=${ARROW_USE_PRECOMPILED_HEADERS:-OFF} \
+ -DARROW_USE_STATIC_CRT=${ARROW_USE_STATIC_CRT:-OFF} \
+ -DARROW_USE_TSAN=${ARROW_USE_TSAN:-OFF} \
+ -DARROW_USE_UBSAN=${ARROW_USE_UBSAN:-OFF} \
+ -DARROW_VERBOSE_THIRDPARTY_BUILD=${ARROW_VERBOSE_THIRDPARTY_BUILD:-OFF} \
+ -DARROW_WITH_BROTLI=${ARROW_WITH_BROTLI:-OFF} \
+ -DARROW_WITH_BZ2=${ARROW_WITH_BZ2:-OFF} \
+ -DARROW_WITH_LZ4=${ARROW_WITH_LZ4:-OFF} \
+ -DARROW_WITH_SNAPPY=${ARROW_WITH_SNAPPY:-OFF} \
+ -DARROW_WITH_UTF8PROC=${ARROW_WITH_UTF8PROC:-ON} \
+ -DARROW_WITH_ZLIB=${ARROW_WITH_ZLIB:-OFF} \
+ -DARROW_WITH_ZSTD=${ARROW_WITH_ZSTD:-OFF} \
+ -DAWSSDK_SOURCE=${AWSSDK_SOURCE:-} \
+ -Dbenchmark_SOURCE=${benchmark_SOURCE:-} \
+ -DBOOST_SOURCE=${BOOST_SOURCE:-} \
+ -DBrotli_SOURCE=${Brotli_SOURCE:-} \
+ -DBUILD_WARNING_LEVEL=${BUILD_WARNING_LEVEL:-CHECKIN} \
+ -Dc-ares_SOURCE=${cares_SOURCE:-} \
+ -DCMAKE_BUILD_TYPE=${ARROW_BUILD_TYPE:-debug} \
+ -DCMAKE_C_FLAGS="${CFLAGS:-}" \
+ -DCMAKE_CXX_FLAGS="${CXXFLAGS:-}" \
+ -DCMAKE_INSTALL_LIBDIR=${CMAKE_INSTALL_LIBDIR:-lib} \
+ -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX:-${ARROW_HOME}} \
+ -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD:-OFF} \
+ -Dgflags_SOURCE=${gflags_SOURCE:-} \
+ -Dgoogle_cloud_cpp_storage_SOURCE=${google_cloud_cpp_storage_SOURCE:-} \
+ -DgRPC_SOURCE=${gRPC_SOURCE:-} \
+ -DGTest_SOURCE=${GTest_SOURCE:-} \
+ -DLz4_SOURCE=${Lz4_SOURCE:-} \
+ -DORC_SOURCE=${ORC_SOURCE:-} \
+ -DPARQUET_BUILD_EXECUTABLES=${PARQUET_BUILD_EXECUTABLES:-OFF} \
+ -DPARQUET_BUILD_EXAMPLES=${PARQUET_BUILD_EXAMPLES:-OFF} \
+ -DPARQUET_REQUIRE_ENCRYPTION=${PARQUET_REQUIRE_ENCRYPTION:-ON} \
+ -DProtobuf_SOURCE=${Protobuf_SOURCE:-} \
+ -DRapidJSON_SOURCE=${RapidJSON_SOURCE:-} \
+ -Dre2_SOURCE=${re2_SOURCE:-} \
+ -DSnappy_SOURCE=${Snappy_SOURCE:-} \
+ -DThrift_SOURCE=${Thrift_SOURCE:-} \
+ -Dutf8proc_SOURCE=${utf8proc_SOURCE:-} \
+ -Dzstd_SOURCE=${zstd_SOURCE:-} \
+ ${CMAKE_ARGS} \
+ ${source_dir}
+
+if [ ! -z "${CPP_MAKE_PARALLELISM}" ]; then
+ time cmake --build . --target install -- -j${CPP_MAKE_PARALLELISM}
+else
+ time cmake --build . --target install
+fi
+
+popd
+
+if [ -x "$(command -v ldconfig)" ]; then
+ ldconfig
+fi
+
+if [ "${ARROW_USE_CCACHE}" == "ON" ]; then
+ echo -e "===\n=== ccache statistics after build\n==="
+ ccache -s
+fi
+
+if [ "${with_docs}" == "true" ]; then
+ pushd ${source_dir}/apidoc
+ doxygen
+ popd
+fi
diff --git a/src/arrow/ci/scripts/cpp_test.sh b/src/arrow/ci/scripts/cpp_test.sh
new file mode 100755
index 000000000..822557f25
--- /dev/null
+++ b/src/arrow/ci/scripts/cpp_test.sh
@@ -0,0 +1,118 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+if [[ $# < 2 ]]; then
+ echo "Usage: $0 <Arrow dir> <build dir> [ctest args ...]"
+ exit 1
+fi
+
+arrow_dir=${1}; shift
+build_dir=${1}/cpp; shift
+source_dir=${arrow_dir}/cpp
+binary_output_dir=${build_dir}/${ARROW_BUILD_TYPE:-debug}
+
+export ARROW_TEST_DATA=${arrow_dir}/testing/data
+export PARQUET_TEST_DATA=${source_dir}/submodules/parquet-testing/data
+export LD_LIBRARY_PATH=${ARROW_HOME}/${CMAKE_INSTALL_LIBDIR:-lib}:${LD_LIBRARY_PATH}
+
+# By default, aws-sdk tries to contact a non-existing local ip host
+# to retrieve metadata. Disable this so that S3FileSystem tests run faster.
+export AWS_EC2_METADATA_DISABLED=TRUE
+
+ctest_options=()
+case "$(uname)" in
+ Linux)
+ n_jobs=$(nproc)
+ ;;
+ Darwin)
+ n_jobs=$(sysctl -n hw.ncpu)
+ ;;
+ MINGW*)
+ n_jobs=${NUMBER_OF_PROCESSORS:-1}
+ # TODO: Enable these crashed tests.
+ # https://issues.apache.org/jira/browse/ARROW-9072
+ exclude_tests="gandiva-internals-test"
+ exclude_tests="${exclude_tests}|gandiva-projector-test"
+ exclude_tests="${exclude_tests}|gandiva-utf8-test"
+ if [ "${MSYSTEM}" = "MINGW32" ]; then
+ exclude_tests="${exclude_tests}|gandiva-projector-test"
+ exclude_tests="${exclude_tests}|gandiva-binary-test"
+ exclude_tests="${exclude_tests}|gandiva-boolean-expr-test"
+ exclude_tests="${exclude_tests}|gandiva-date-time-test"
+ exclude_tests="${exclude_tests}|gandiva-decimal-single-test"
+ exclude_tests="${exclude_tests}|gandiva-decimal-test"
+ exclude_tests="${exclude_tests}|gandiva-filter-project-test"
+ exclude_tests="${exclude_tests}|gandiva-filter-test"
+ exclude_tests="${exclude_tests}|gandiva-hash-test"
+ exclude_tests="${exclude_tests}|gandiva-if-expr-test"
+ exclude_tests="${exclude_tests}|gandiva-in-expr-test"
+ exclude_tests="${exclude_tests}|gandiva-literal-test"
+ exclude_tests="${exclude_tests}|gandiva-null-validity-test"
+ fi
+ ctest_options+=(--exclude-regex "${exclude_tests}")
+ ;;
+ *)
+ n_jobs=${NPROC:-1}
+ ;;
+esac
+
+pushd ${build_dir}
+
+if ! which python > /dev/null 2>&1; then
+ export PYTHON=python3
+fi
+ctest \
+ --label-regex unittest \
+ --output-on-failure \
+ --parallel ${n_jobs} \
+ --timeout 300 \
+ "${ctest_options[@]}" \
+ $@
+
+if [ "${ARROW_BUILD_EXAMPLES}" == "ON" ]; then
+ examples=$(find ${binary_output_dir} -executable -name "*example")
+ if [ "${examples}" == "" ]; then
+ echo "=================="
+ echo "No examples found!"
+ echo "=================="
+ exit 1
+ fi
+ for ex in ${examples}
+ do
+ echo "=================="
+ echo "Executing ${ex}"
+ echo "=================="
+ ${ex}
+ done
+fi
+
+if [ "${ARROW_FUZZING}" == "ON" ]; then
+ # Fuzzing regression tests
+ ${binary_output_dir}/arrow-ipc-stream-fuzz ${ARROW_TEST_DATA}/arrow-ipc-stream/crash-*
+ ${binary_output_dir}/arrow-ipc-stream-fuzz ${ARROW_TEST_DATA}/arrow-ipc-stream/*-testcase-*
+ ${binary_output_dir}/arrow-ipc-file-fuzz ${ARROW_TEST_DATA}/arrow-ipc-file/*-testcase-*
+ ${binary_output_dir}/arrow-ipc-tensor-stream-fuzz ${ARROW_TEST_DATA}/arrow-ipc-tensor-stream/*-testcase-*
+ if [ "${ARROW_PARQUET}" == "ON" ]; then
+ ${binary_output_dir}/parquet-arrow-fuzz ${ARROW_TEST_DATA}/parquet/fuzzing/*-testcase-*
+ fi
+fi
+
+popd
diff --git a/src/arrow/ci/scripts/csharp_build.sh b/src/arrow/ci/scripts/csharp_build.sh
new file mode 100755
index 000000000..5a3976794
--- /dev/null
+++ b/src/arrow/ci/scripts/csharp_build.sh
@@ -0,0 +1,26 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+source_dir=${1}/csharp
+
+pushd ${source_dir}
+dotnet build
+popd
diff --git a/src/arrow/ci/scripts/csharp_pack.sh b/src/arrow/ci/scripts/csharp_pack.sh
new file mode 100755
index 000000000..e9dfc664e
--- /dev/null
+++ b/src/arrow/ci/scripts/csharp_pack.sh
@@ -0,0 +1,26 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -eux
+
+source_dir=${1}/csharp
+
+pushd ${source_dir}
+dotnet pack -c Release
+popd
diff --git a/src/arrow/ci/scripts/csharp_test.sh b/src/arrow/ci/scripts/csharp_test.sh
new file mode 100755
index 000000000..9e4e35dd4
--- /dev/null
+++ b/src/arrow/ci/scripts/csharp_test.sh
@@ -0,0 +1,29 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+source_dir=${1}/csharp
+
+pushd ${source_dir}
+dotnet test
+for pdb in artifacts/Apache.Arrow/*/*/Apache.Arrow.pdb; do
+ sourcelink test ${pdb}
+done
+popd
diff --git a/src/arrow/ci/scripts/docs_build.sh b/src/arrow/ci/scripts/docs_build.sh
new file mode 100755
index 000000000..e6ee768ee
--- /dev/null
+++ b/src/arrow/ci/scripts/docs_build.sh
@@ -0,0 +1,48 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+set -ex
+
+arrow_dir=${1}
+build_dir=${2}/docs
+
+export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH}
+export PKG_CONFIG_PATH=${ARROW_HOME}/lib/pkgconfig:${PKG_CONFIG_PATH}
+export GI_TYPELIB_PATH=${ARROW_HOME}/lib/girepository-1.0
+export CFLAGS="-DARROW_NO_DEPRECATED_API"
+export CXXFLAGS="-DARROW_NO_DEPRECATED_API"
+
+ncpus=$(python3 -c "import os; print(os.cpu_count())")
+
+# Sphinx docs
+sphinx-build -b html -j ${ncpus} ${arrow_dir}/docs/source ${build_dir}
+
+# C++ - original doxygen
+# rsync -a ${arrow_dir}/cpp/apidoc/ ${build_dir}/cpp
+
+# R
+rsync -a ${arrow_dir}/r/docs/ ${build_dir}/r
+
+# C GLib
+rsync -a ${ARROW_HOME}/share/gtk-doc/html/ ${build_dir}/c_glib
+
+# Java
+rsync -a ${arrow_dir}/java/target/site/apidocs/ ${build_dir}/java/reference
+
+# Javascript
+rsync -a ${arrow_dir}/js/doc/ ${build_dir}/js
diff --git a/src/arrow/ci/scripts/go_build.sh b/src/arrow/ci/scripts/go_build.sh
new file mode 100755
index 000000000..267f78e59
--- /dev/null
+++ b/src/arrow/ci/scripts/go_build.sh
@@ -0,0 +1,40 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+source_dir=${1}/go
+
+pushd ${source_dir}/arrow
+
+if [[ -n "${ARROW_GO_TESTCGO}" ]]; then
+ TAGS="-tags ccalloc"
+fi
+
+go get -d -t -v ./...
+go install $TAGS -v ./...
+
+popd
+
+pushd ${source_dir}/parquet
+
+go get -d -t -v ./...
+go install -v ./...
+
+popd
diff --git a/src/arrow/ci/scripts/go_cgo_python_test.sh b/src/arrow/ci/scripts/go_cgo_python_test.sh
new file mode 100755
index 000000000..5f2032fba
--- /dev/null
+++ b/src/arrow/ci/scripts/go_cgo_python_test.sh
@@ -0,0 +1,45 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+source_dir=${1}/go
+
+pushd ${source_dir}/arrow/cdata/test
+
+case "$(uname)" in
+ Linux)
+ testlib="cgotest.so"
+ ;;
+ Darwin)
+ testlib="cgotest.so"
+ ;;
+ MINGW*)
+ testlib="cgotest.dll"
+ ;;
+esac
+
+go build -tags cdata_test,assert -buildmode=c-shared -o $testlib .
+
+python test_export_to_cgo.py
+
+rm $testlib
+rm "${testlib%.*}.h"
+
+popd
diff --git a/src/arrow/ci/scripts/go_test.sh b/src/arrow/ci/scripts/go_test.sh
new file mode 100755
index 000000000..f7b2cd963
--- /dev/null
+++ b/src/arrow/ci/scripts/go_test.sh
@@ -0,0 +1,60 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+source_dir=${1}/go
+
+testargs="-race"
+case "$(uname)" in
+ MINGW*)
+ # -race doesn't work on windows currently
+ testargs=""
+ ;;
+esac
+
+if [[ "$(go env GOHOSTARCH)" = "s390x" ]]; then
+ testargs="" # -race not supported on s390x
+fi
+
+pushd ${source_dir}/arrow
+
+TAGS="assert,test"
+if [[ -n "${ARROW_GO_TESTCGO}" ]]; then
+ TAGS="${TAGS},ccalloc"
+fi
+
+
+# the cgo implementation of the c data interface requires the "test"
+# tag in order to run its tests so that the testing functions implemented
+# in .c files don't get included in non-test builds.
+
+for d in $(go list ./... | grep -v vendor); do
+ go test $testargs -tags $TAGS $d
+done
+
+popd
+
+pushd ${source_dir}/parquet
+
+for d in $(go list ./... | grep -v vendor); do
+ go test $testargs -tags assert $d
+done
+
+popd
diff --git a/src/arrow/ci/scripts/install_conda.sh b/src/arrow/ci/scripts/install_conda.sh
new file mode 100755
index 000000000..f4d313b63
--- /dev/null
+++ b/src/arrow/ci/scripts/install_conda.sh
@@ -0,0 +1,66 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -e
+
+declare -A archs
+archs=([amd64]=x86_64
+ [arm32v7]=armv7l
+ [ppc64le]=ppc64le
+ [i386]=x86)
+
+declare -A platforms
+platforms=([windows]=Windows
+ [macos]=MacOSX
+ [linux]=Linux)
+
+if [ "$#" -ne 4 ]; then
+ echo "Usage: $0 <architecture> <platform> <version> <prefix>"
+ exit 1
+elif [[ -z ${archs[$1]} ]]; then
+ echo "Unexpected architecture: ${1}"
+ exit 1
+elif [[ -z ${platforms[$2]} ]]; then
+ echo "Unexpected platform: ${2}"
+ exit 1
+fi
+
+arch=${archs[$1]}
+platform=${platforms[$2]}
+version=$3
+prefix=$4
+
+echo "Downloading Miniconda installer..."
+wget -nv https://repo.continuum.io/miniconda/Miniconda3-${version}-${platform}-${arch}.sh -O /tmp/miniconda.sh
+bash /tmp/miniconda.sh -b -p ${prefix}
+rm /tmp/miniconda.sh
+
+# Like "conda init", but for POSIX sh rather than bash
+ln -s ${prefix}/etc/profile.d/conda.sh /etc/profile.d/conda.sh
+
+# Configure
+source /etc/profile.d/conda.sh
+conda config --add channels conda-forge
+conda config --set channel_priority strict
+conda config --set show_channel_urls True
+conda config --set remote_connect_timeout_secs 12
+
+# Update and clean
+conda update --all -y
+conda clean --all -y
diff --git a/src/arrow/ci/scripts/install_dask.sh b/src/arrow/ci/scripts/install_dask.sh
new file mode 100755
index 000000000..954ce3249
--- /dev/null
+++ b/src/arrow/ci/scripts/install_dask.sh
@@ -0,0 +1,36 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -e
+
+if [ "$#" -ne 1 ]; then
+ echo "Usage: $0 <dask version>"
+ exit 1
+fi
+
+dask=$1
+
+if [ "${dask}" = "master" ]; then
+ pip install https://github.com/dask/dask/archive/main.tar.gz#egg=dask[dataframe]
+elif [ "${dask}" = "latest" ]; then
+ conda install -q dask
+else
+ conda install -q dask=${dask}
+fi
+conda clean --all
diff --git a/src/arrow/ci/scripts/install_gcs_testbench.sh b/src/arrow/ci/scripts/install_gcs_testbench.sh
new file mode 100755
index 000000000..579a78944
--- /dev/null
+++ b/src/arrow/ci/scripts/install_gcs_testbench.sh
@@ -0,0 +1,38 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -e
+
+if [ "$#" -ne 2 ]; then
+ echo "Usage: $0 <arch> <storage-testbench version>"
+ exit 1
+fi
+
+arch=$1
+if [ "${arch}" != "amd64" ]; then
+ echo "GCS testbench won't install on non-x86 architecture"
+ exit 0
+fi
+
+version=$2
+if [[ "${version}" -eq "default" ]]; then
+ version="v0.7.0"
+fi
+
+pip install "https://github.com/googleapis/storage-testbench/archive/${version}.tar.gz"
diff --git a/src/arrow/ci/scripts/install_iwyu.sh b/src/arrow/ci/scripts/install_iwyu.sh
new file mode 100755
index 000000000..3cd2cbc95
--- /dev/null
+++ b/src/arrow/ci/scripts/install_iwyu.sh
@@ -0,0 +1,48 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+set -eu
+
+source_dir=${1:-/tmp/iwyu}
+install_prefix=${2:-/usr/local}
+clang_tools_version=${3:-8}
+
+iwyu_branch_name="clang_${clang_tools_version}"
+if [ ${clang_tools_version} -lt 10 ]; then
+ iwyu_branch_name="${iwyu_branch_name}.0"
+fi
+
+git clone --single-branch --branch ${iwyu_branch_name} \
+ https://github.com/include-what-you-use/include-what-you-use.git ${source_dir}
+
+mkdir -p ${source_dir}/build
+pushd ${source_dir}/build
+
+# Build IWYU for current Clang
+export CC=clang-${clang_tools_version}
+export CXX=clang++-${clang_tools_version}
+
+cmake -DCMAKE_PREFIX_PATH=/usr/lib/llvm-${clang_tools_version} \
+ -DCMAKE_INSTALL_PREFIX=${install_prefix} \
+ ${source_dir}
+make -j4
+make install
+
+popd
+
+rm -rf ${source_dir}
diff --git a/src/arrow/ci/scripts/install_kartothek.sh b/src/arrow/ci/scripts/install_kartothek.sh
new file mode 100755
index 000000000..4d88943b6
--- /dev/null
+++ b/src/arrow/ci/scripts/install_kartothek.sh
@@ -0,0 +1,41 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -e
+
+if [ "$#" -ne 2 ]; then
+ echo "Usage: $0 <kartothek version> <target directory>"
+ exit 1
+fi
+
+karthothek=$1
+target=$2
+
+git clone --recurse-submodules https://github.com/JDASoftwareGroup/kartothek "${target}"
+if [ "${kartothek}" = "master" ]; then
+ git -C "${target}" checkout master;
+elif [ "${kartothek}" = "latest" ]; then
+ git -C "${target}" checkout $(git describe --tags);
+else
+ git -C "${target}" checkout ${kartothek};
+fi
+
+pushd "${target}"
+pip install --no-deps .
+popd
diff --git a/src/arrow/ci/scripts/install_minio.sh b/src/arrow/ci/scripts/install_minio.sh
new file mode 100755
index 000000000..5cda46e59
--- /dev/null
+++ b/src/arrow/ci/scripts/install_minio.sh
@@ -0,0 +1,62 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -e
+
+declare -A archs
+archs=([amd64]=amd64
+ [arm64v8]=arm64
+ [arm32v7]=arm
+ [s390x]=s390x)
+
+declare -A platforms
+platforms=([linux]=linux
+ [macos]=darwin)
+
+arch=${archs[$1]}
+platform=${platforms[$2]}
+version=$3
+prefix=$4
+
+if [ "$#" -ne 4 ]; then
+ echo "Usage: $0 <architecture> <platform> <version> <prefix>"
+ exit 1
+elif [[ -z ${arch} ]]; then
+ echo "Unexpected architecture: ${1}"
+ exit 1
+elif [[ -z ${platform} ]]; then
+ echo "Unexpected platform: ${2}"
+ exit 1
+elif [[ ${version} != "latest" ]]; then
+ echo "Cannot fetch specific versions of minio, only latest is supported."
+ exit 1
+fi
+
+if [[ ! -x ${prefix}/bin/minio ]]; then
+ url="https://dl.min.io/server/minio/release/${platform}-${arch}/minio"
+ echo "Fetching ${url}..."
+ wget -nv -P ${prefix}/bin ${url}
+ chmod +x ${prefix}/bin/minio
+fi
+if [[ ! -x ${prefix}/bin/mc ]]; then
+ url="https://dl.min.io/client/mc/release/${platform}-${arch}/mc"
+ echo "Fetching ${url}..."
+ wget -nv -P ${prefix}/bin ${url}
+ chmod +x ${prefix}/bin/mc
+fi
diff --git a/src/arrow/ci/scripts/install_osx_sdk.sh b/src/arrow/ci/scripts/install_osx_sdk.sh
new file mode 100755
index 000000000..896d084e0
--- /dev/null
+++ b/src/arrow/ci/scripts/install_osx_sdk.sh
@@ -0,0 +1,41 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+if [ ${using_homebrew} != "yes" ]; then
+ export MACOSX_DEPLOYMENT_TARGET="10.9"
+ export CONDA_BUILD_SYSROOT="$(xcode-select -p)/Platforms/MacOSX.platform/Developer/SDKs/MacOSX${MACOSX_DEPLOYMENT_TARGET}.sdk"
+
+ if [[ ! -d ${CONDA_BUILD_SYSROOT} || "$OSX_FORCE_SDK_DOWNLOAD" == "1" ]]; then
+ echo "downloading ${macosx_deployment_target} sdk"
+ curl -L -O https://github.com/phracker/MacOSX-SDKs/releases/download/10.13/MacOSX${MACOSX_DEPLOYMENT_TARGET}.sdk.tar.xz
+ tar -xf MacOSX${MACOSX_DEPLOYMENT_TARGET}.sdk.tar.xz -C "$(dirname "$CONDA_BUILD_SYSROOT")"
+ # set minimum sdk version to our target
+ plutil -replace MinimumSDKVersion -string ${MACOSX_DEPLOYMENT_TARGET} $(xcode-select -p)/Platforms/MacOSX.platform/Info.plist
+ plutil -replace DTSDKName -string macosx${MACOSX_DEPLOYMENT_TARGET}internal $(xcode-select -p)/Platforms/MacOSX.platform/Info.plist
+ fi
+
+ if [ -d "${CONDA_BUILD_SYSROOT}" ]; then
+ echo "Found CONDA_BUILD_SYSROOT: ${CONDA_BUILD_SYSROOT}"
+ else
+ echo "Missing CONDA_BUILD_SYSROOT: ${CONDA_BUILD_SYSROOT}"
+ exit 1
+ fi
+fi
diff --git a/src/arrow/ci/scripts/install_pandas.sh b/src/arrow/ci/scripts/install_pandas.sh
new file mode 100755
index 000000000..5aca65f82
--- /dev/null
+++ b/src/arrow/ci/scripts/install_pandas.sh
@@ -0,0 +1,46 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -e
+
+if [ "$#" -lt 1 ]; then
+ echo "Usage: $0 <pandas version> <optional numpy version = latest>"
+ exit 1
+fi
+
+pandas=$1
+numpy=${2:-"latest"}
+
+if [ "${numpy}" = "nightly" ]; then
+ pip install --extra-index-url https://pypi.anaconda.org/scipy-wheels-nightly/simple --pre numpy
+elif [ "${numpy}" = "latest" ]; then
+ pip install numpy
+else
+ pip install numpy==${numpy}
+fi
+
+if [ "${pandas}" = "master" ]; then
+ pip install git+https://github.com/pandas-dev/pandas.git --no-build-isolation
+elif [ "${pandas}" = "nightly" ]; then
+ pip install --extra-index-url https://pypi.anaconda.org/scipy-wheels-nightly/simple --pre pandas
+elif [ "${pandas}" = "latest" ]; then
+ pip install pandas
+else
+ pip install pandas==${pandas}
+fi
diff --git a/src/arrow/ci/scripts/install_python.sh b/src/arrow/ci/scripts/install_python.sh
new file mode 100755
index 000000000..babb2c1e8
--- /dev/null
+++ b/src/arrow/ci/scripts/install_python.sh
@@ -0,0 +1,68 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -eu
+
+declare -A platforms
+platforms=([windows]=Windows
+ [macos]=MacOSX
+ [linux]=Linux)
+
+declare -A versions
+versions=([3.6]=3.6.8
+ [3.7]=3.7.9
+ [3.8]=3.8.10
+ [3.9]=3.9.6
+ [3.10]=3.10.0)
+
+if [ "$#" -ne 2 ]; then
+ echo "Usage: $0 <platform> <version>"
+ exit 1
+elif [[ -z ${platforms[$1]} ]]; then
+ echo "Unexpected platform: ${1}"
+ exit 1
+fi
+
+platform=${platforms[$1]}
+version=$2
+full_version=${versions[$2]}
+
+if [ $platform = "MacOSX" ]; then
+ echo "Downloading Python installer..."
+
+ if [ "$(uname -m)" = "arm64" ] || [ "$version" = "3.10" ]; then
+ fname="python-${full_version}-macos11.pkg"
+ else
+ fname="python-${full_version}-macosx10.9.pkg"
+ fi
+ wget "https://www.python.org/ftp/python/${full_version}/${fname}"
+
+ echo "Installing Python..."
+ installer -pkg $fname -target /
+ rm $fname
+
+ echo "Installing Pip..."
+ python="/Library/Frameworks/Python.framework/Versions/${version}/bin/python${version}"
+ pip="${python} -m pip"
+
+ $python -m ensurepip
+ $pip install -U pip setuptools virtualenv
+else
+ echo "Unsupported platform: $platform"
+fi
diff --git a/src/arrow/ci/scripts/install_spark.sh b/src/arrow/ci/scripts/install_spark.sh
new file mode 100755
index 000000000..936313fd8
--- /dev/null
+++ b/src/arrow/ci/scripts/install_spark.sh
@@ -0,0 +1,31 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -e
+
+if [ "$#" -ne 2 ]; then
+ echo "Usage: $0 <spark version> <target directory>"
+ exit 1
+fi
+
+spark=$1
+target=$2
+
+git clone https://github.com/apache/spark "${target}"
+git -C "${target}" checkout "${spark}"
diff --git a/src/arrow/ci/scripts/install_turbodbc.sh b/src/arrow/ci/scripts/install_turbodbc.sh
new file mode 100755
index 000000000..3e644a3e2
--- /dev/null
+++ b/src/arrow/ci/scripts/install_turbodbc.sh
@@ -0,0 +1,43 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -e
+
+if [ "$#" -ne 2 ]; then
+ echo "Usage: $0 <turbodbc version> <target directory>"
+ exit 1
+fi
+
+turbodbc=$1
+target=$2
+
+git clone --recurse-submodules https://github.com/blue-yonder/turbodbc "${target}"
+if [ "${turbodbc}" = "master" ]; then
+ git -C "${target}" checkout master;
+elif [ "${turbodbc}" = "latest" ]; then
+ git -C "${target}" checkout $(git describe --tags);
+else
+ git -C "${target}" checkout ${turbodbc};
+fi
+
+pushd ${target}
+wget -q https://github.com/pybind/pybind11/archive/v2.6.2.tar.gz
+tar xvf v2.6.2.tar.gz
+mv pybind11-2.6.2 pybind11
+popd
diff --git a/src/arrow/ci/scripts/install_vcpkg.sh b/src/arrow/ci/scripts/install_vcpkg.sh
new file mode 100755
index 000000000..fe99a7fea
--- /dev/null
+++ b/src/arrow/ci/scripts/install_vcpkg.sh
@@ -0,0 +1,39 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -e
+
+if [ "$#" -ne 2 ]; then
+ echo "Usage: $0 <vcpkg version> <target directory>"
+ exit 1
+fi
+
+vcpkg_version=$1
+vcpkg_destination=$2
+vcpkg_patch=$(realpath $(dirname "${0}")/../vcpkg/ports.patch)
+
+git clone --depth 1 --branch ${vcpkg_version} https://github.com/microsoft/vcpkg ${vcpkg_destination}
+
+pushd ${vcpkg_destination}
+
+./bootstrap-vcpkg.sh -useSystemBinaries -disableMetrics
+git apply --ignore-whitespace ${vcpkg_patch}
+echo "Patch successfully applied!"
+
+popd
diff --git a/src/arrow/ci/scripts/integration_arrow.sh b/src/arrow/ci/scripts/integration_arrow.sh
new file mode 100755
index 000000000..30cbb2d63
--- /dev/null
+++ b/src/arrow/ci/scripts/integration_arrow.sh
@@ -0,0 +1,40 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+arrow_dir=${1}
+gold_dir=$arrow_dir/testing/data/arrow-ipc-stream/integration
+
+pip install -e $arrow_dir/dev/archery
+
+# Rust can be enabled by exporting ARCHERY_INTEGRATION_WITH_RUST=1
+archery integration \
+ --run-flight \
+ --with-cpp=1 \
+ --with-csharp=1 \
+ --with-java=1 \
+ --with-js=1 \
+ --with-go=1 \
+ --gold-dirs=$gold_dir/0.14.1 \
+ --gold-dirs=$gold_dir/0.17.1 \
+ --gold-dirs=$gold_dir/1.0.0-bigendian \
+ --gold-dirs=$gold_dir/1.0.0-littleendian \
+ --gold-dirs=$gold_dir/2.0.0-compression \
+ --gold-dirs=$gold_dir/4.0.0-shareddict \
diff --git a/src/arrow/ci/scripts/integration_dask.sh b/src/arrow/ci/scripts/integration_dask.sh
new file mode 100755
index 000000000..e67a02945
--- /dev/null
+++ b/src/arrow/ci/scripts/integration_dask.sh
@@ -0,0 +1,40 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -e
+
+# check that optional pyarrow modules are available
+# because pytest would just skip the dask tests
+python -c "import pyarrow.orc"
+python -c "import pyarrow.parquet"
+
+# check that dask.dataframe is correctly installed
+python -c "import dask.dataframe"
+
+# TODO(kszucs): the following tests are also uses pyarrow
+# pytest -sv --pyargs dask.bytes.tests.test_s3
+# pytest -sv --pyargs dask.bytes.tests.test_hdfs
+# pytest -sv --pyargs dask.bytes.tests.test_local
+
+# skip failing pickle test, see https://github.com/dask/dask/issues/6374
+pytest -v --pyargs dask.dataframe.tests.test_dataframe -k "not test_dataframe_picklable and not test_describe_empty"
+pytest -v --pyargs dask.dataframe.io.tests.test_orc
+# skip failing parquet tests, see https://github.com/dask/dask/issues/6243
+pytest -v --pyargs dask.dataframe.io.tests.test_parquet \
+ -k "not test_to_parquet_pyarrow_w_inconsistent_schema_by_partition_fails_by_default and not test_timeseries_nulls_in_schema"
diff --git a/src/arrow/ci/scripts/integration_hdfs.sh b/src/arrow/ci/scripts/integration_hdfs.sh
new file mode 100755
index 000000000..c95449379
--- /dev/null
+++ b/src/arrow/ci/scripts/integration_hdfs.sh
@@ -0,0 +1,69 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -e
+
+source_dir=${1}/cpp
+build_dir=${2}/cpp
+
+export CLASSPATH=$($HADOOP_HOME/bin/hadoop classpath --glob)
+export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
+export LIBHDFS3_CONF=$HADOOP_CONF_DIR/hdfs-site.xml
+export ARROW_LIBHDFS3_DIR=$CONDA_PREFIX/lib
+
+libhdfs_dir=$HADOOP_HOME/lib/native
+hadoop_home=$HADOOP_HOME
+
+function use_hadoop_home() {
+ unset ARROW_LIBHDFS_DIR
+ export HADOOP_HOME=$hadoop_home
+}
+
+function use_libhdfs_dir() {
+ unset HADOOP_HOME
+ export ARROW_LIBHDFS_DIR=$libhdfs_dir
+}
+
+# execute cpp tests
+export ARROW_HDFS_TEST_LIBHDFS_REQUIRE=ON
+pushd ${build_dir}
+
+debug/arrow-io-hdfs-test
+debug/arrow-hdfs-test
+
+use_libhdfs_dir
+debug/arrow-io-hdfs-test
+debug/arrow-hdfs-test
+use_hadoop_home
+
+popd
+
+# cannot use --pyargs with custom arguments like --hdfs or --only-hdfs, because
+# pytest ignores them, see https://github.com/pytest-dev/pytest/issues/3517
+export PYARROW_TEST_HDFS=ON
+
+export PYARROW_HDFS_TEST_LIBHDFS_REQUIRE=ON
+
+pytest -vs --pyargs pyarrow.tests.test_fs
+pytest -vs --pyargs pyarrow.tests.test_hdfs
+
+use_libhdfs_dir
+pytest -vs --pyargs pyarrow.tests.test_fs
+pytest -vs --pyargs pyarrow.tests.test_hdfs
+use_hadoop_home
diff --git a/src/arrow/ci/scripts/integration_hiveserver2.sh b/src/arrow/ci/scripts/integration_hiveserver2.sh
new file mode 100755
index 000000000..36fba5ca8
--- /dev/null
+++ b/src/arrow/ci/scripts/integration_hiveserver2.sh
@@ -0,0 +1,32 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+set -e
+
+arrow_dir=${1}
+source_dir=${1}/cpp
+build_dir=${2}/cpp
+
+${arrow_dir}/ci/scripts/util_wait_for_it.sh impala:21050 -t 300 -s -- echo "impala is up"
+
+pushd ${build_dir}
+
+# ninja hiveserver2-test
+debug/hiveserver2-test
+
+popd
diff --git a/src/arrow/ci/scripts/integration_kartothek.sh b/src/arrow/ci/scripts/integration_kartothek.sh
new file mode 100755
index 000000000..379569b9c
--- /dev/null
+++ b/src/arrow/ci/scripts/integration_kartothek.sh
@@ -0,0 +1,31 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -e
+
+# check that optional pyarrow modules are available
+# because pytest would just skip the pyarrow tests
+python -c "import pyarrow.parquet"
+
+# check that kartothek is correctly installed
+python -c "import kartothek"
+
+pushd /kartothek
+# See ARROW-12314, test_load_dataframes_columns_raises_missing skipped because of changed error message
+pytest -n0 --ignore tests/cli/test_query.py -k "not test_load_dataframes_columns_raises_missing"
diff --git a/src/arrow/ci/scripts/integration_spark.sh b/src/arrow/ci/scripts/integration_spark.sh
new file mode 100755
index 000000000..90ecbce39
--- /dev/null
+++ b/src/arrow/ci/scripts/integration_spark.sh
@@ -0,0 +1,87 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# exit on any error
+set -eu
+
+source_dir=${1}
+spark_dir=${2}
+
+# Test Spark with latest PyArrow only, don't build with latest Arrow Java
+test_pyarrow_only=${3:-false}
+
+# Spark branch to checkout
+spark_version=${SPARK_VERSION:-master}
+
+# Use old behavior that always dropped tiemzones.
+export PYARROW_IGNORE_TIMEZONE=1
+
+if [ "${SPARK_VERSION:0:2}" == "2." ]; then
+ # https://github.com/apache/spark/blob/master/docs/sql-pyspark-pandas-with-arrow.md#compatibility-setting-for-pyarrow--0150-and-spark-23x-24x
+ export ARROW_PRE_0_15_IPC_FORMAT=1
+fi
+
+# Get Arrow Java version
+pushd ${source_dir}/java
+ arrow_version=`mvn org.apache.maven.plugins:maven-help-plugin:2.1.1:evaluate -Dexpression=project.version | sed -n -e '/^\[.*\]/ !{ /^[0-9]/ { p; q } }'`
+popd
+
+export MAVEN_OPTS="-Xss256m -Xmx2g -XX:ReservedCodeCacheSize=1g -Dorg.slf4j.simpleLogger.defaultLogLevel=warn"
+export MAVEN_OPTS="${MAVEN_OPTS} -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn"
+
+pushd ${spark_dir}
+
+ if [ "${test_pyarrow_only}" == "true" ]; then
+ echo "Building Spark ${SPARK_VERSION} to test pyarrow only"
+
+ # Build Spark only
+ build/mvn -B -DskipTests package
+
+ else
+
+ # Update Spark pom with the Arrow version just installed and build Spark, need package phase for pyspark
+ echo "Building Spark ${SPARK_VERSION} with Arrow ${arrow_version}"
+ build/mvn versions:set-property -Dproperty=arrow.version -DnewVersion=${arrow_version}
+
+ # Build Spark with new Arrow Java
+ build/mvn -B -DskipTests package
+
+ spark_scala_tests=(
+ "org.apache.spark.sql.execution.arrow"
+ "org.apache.spark.sql.execution.vectorized.ColumnarBatchSuite"
+ "org.apache.spark.sql.execution.vectorized.ArrowColumnVectorSuite")
+
+ (echo "Testing Spark:"; IFS=$'\n'; echo "${spark_scala_tests[*]}")
+
+ # TODO: should be able to only build spark-sql tests with adding "-pl sql/core" but not currently working
+ build/mvn -B -Dtest=none -DwildcardSuites=$(IFS=,; echo "${spark_scala_tests[*]}") test
+ fi
+
+ # Run pyarrow related Python tests only
+ spark_python_tests=(
+ "pyspark.sql.tests.test_arrow"
+ "pyspark.sql.tests.test_pandas_map"
+ "pyspark.sql.tests.test_pandas_cogrouped_map"
+ "pyspark.sql.tests.test_pandas_grouped_map"
+ "pyspark.sql.tests.test_pandas_udf"
+ "pyspark.sql.tests.test_pandas_udf_scalar"
+ "pyspark.sql.tests.test_pandas_udf_grouped_agg"
+ "pyspark.sql.tests.test_pandas_udf_window")
+
+ (echo "Testing PySpark:"; IFS=$'\n'; echo "${spark_python_tests[*]}")
+ python/run-tests --testnames "$(IFS=,; echo "${spark_python_tests[*]}")" --python-executables python
+popd
diff --git a/src/arrow/ci/scripts/integration_turbodbc.sh b/src/arrow/ci/scripts/integration_turbodbc.sh
new file mode 100755
index 000000000..f0fafd512
--- /dev/null
+++ b/src/arrow/ci/scripts/integration_turbodbc.sh
@@ -0,0 +1,47 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+source_dir=${1}
+build_dir=${2}/turbodbc
+
+# check that optional pyarrow modules are available
+# because pytest would just skip the pyarrow tests
+python -c "import pyarrow.orc"
+python -c "import pyarrow.parquet"
+
+mkdir -p ${build_dir}
+pushd ${build_dir}
+
+cmake -DCMAKE_INSTALL_PREFIX=${ARROW_HOME} \
+ -DCMAKE_CXX_FLAGS="${CXXFLAGS}" \
+ -DPYTHON_EXECUTABLE=$(which python) \
+ -GNinja \
+ ${source_dir}
+ninja install
+
+# TODO(ARROW-5074)
+export LD_LIBRARY_PATH="${ARROW_HOME}/lib:${LD_LIBRARY_PATH}"
+export ODBCSYSINI="${source_dir}/earthly/odbc/"
+
+service postgresql start
+ctest --output-on-failure
+
+popd
diff --git a/src/arrow/ci/scripts/java_build.sh b/src/arrow/ci/scripts/java_build.sh
new file mode 100755
index 000000000..1ba37606d
--- /dev/null
+++ b/src/arrow/ci/scripts/java_build.sh
@@ -0,0 +1,107 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+arrow_dir=${1}
+source_dir=${1}/java
+cpp_build_dir=${2}/cpp/${ARROW_BUILD_TYPE:-debug}
+cdata_dist_dir=${2}/java/c
+with_docs=${3:-false}
+
+if [[ "$(uname -s)" == "Linux" ]] && [[ "$(uname -m)" == "s390x" ]]; then
+ # Since some files for s390_64 are not available at maven central,
+ # download pre-build files from Artifactory and install them explicitly
+ mvn_install="mvn install:install-file"
+ wget="wget"
+ artifactory_base_url="https://apache.jfrog.io/artifactory/arrow"
+
+ artifactory_dir="protoc-binary"
+ group="com.google.protobuf"
+ artifact="protoc"
+ ver="3.7.1"
+ classifier="linux-s390_64"
+ extension="exe"
+ target=${artifact}-${ver}-${classifier}.${extension}
+ ${wget} ${artifactory_base_url}/${artifactory_dir}/${ver}/${target}
+ ${mvn_install} -DgroupId=${group} -DartifactId=${artifact} -Dversion=${ver} -Dclassifier=${classifier} -Dpackaging=${extension} -Dfile=$(pwd)/${target}
+ # protoc requires libprotoc.so.18 libprotobuf.so.18
+ ${wget} ${artifactory_base_url}/${artifactory_dir}/${ver}/libprotoc.so.18
+ ${wget} ${artifactory_base_url}/${artifactory_dir}/${ver}/libprotobuf.so.18
+ mkdir -p ${ARROW_HOME}/lib
+ cp lib*.so.18 ${ARROW_HOME}/lib
+ export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${ARROW_HOME}/lib
+
+ artifactory_dir="protoc-gen-grpc-java-binary"
+ group="io.grpc"
+ artifact="protoc-gen-grpc-java"
+ ver="1.30.2"
+ classifier="linux-s390_64"
+ extension="exe"
+ target=${artifact}-${ver}-${classifier}.${extension}
+ ${wget} ${artifactory_base_url}/${artifactory_dir}/${ver}/${target}
+ ${mvn_install} -DgroupId=${group} -DartifactId=${artifact} -Dversion=${ver} -Dclassifier=${classifier} -Dpackaging=${extension} -Dfile=$(pwd)/${target}
+
+ artifactory_dir="netty-binary"
+ group="io.netty"
+ artifact="netty-transport-native-unix-common"
+ ver="4.1.48.Final"
+ classifier="linux-s390_64"
+ extension="jar"
+ target=${artifact}-${ver}-${classifier}.${extension}
+ ${wget} ${artifactory_base_url}/${artifactory_dir}/${ver}/${target}
+ ${mvn_install} -DgroupId=${group} -DartifactId=${artifact} -Dversion=${ver} -Dclassifier=${classifier} -Dpackaging=${extension} -Dfile=$(pwd)/${target}
+ artifact="netty-transport-native-epoll"
+ extension="jar"
+ target=${artifact}-${ver}-${classifier}.${extension}
+ ${wget} ${artifactory_base_url}/${artifactory_dir}/${ver}/${target}
+ ${mvn_install} -DgroupId=${group} -DartifactId=${artifact} -Dversion=${ver} -Dclassifier=${classifier} -Dpackaging=${extension} -Dfile=$(pwd)/${target}
+fi
+
+mvn="mvn -B -DskipTests -Drat.skip=true -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn"
+# Use `2 * ncores` threads
+mvn="${mvn} -T 2C"
+
+pushd ${source_dir}
+
+${mvn} install
+
+if [ "${ARROW_JAVA_SHADE_FLATBUFFERS}" == "ON" ]; then
+ ${mvn} -Pshade-flatbuffers install
+fi
+
+if [ "${ARROW_JAVA_CDATA}" = "ON" ]; then
+ ${mvn} -Darrow.c.jni.dist.dir=${cdata_dist_dir} -Parrow-c-data install
+fi
+
+if [ "${ARROW_GANDIVA_JAVA}" = "ON" ]; then
+ ${mvn} -Darrow.cpp.build.dir=${cpp_build_dir} -Parrow-jni install
+fi
+
+if [ "${ARROW_PLASMA}" = "ON" ]; then
+ pushd ${source_dir}/plasma
+ ${mvn} clean install
+ popd
+fi
+
+if [ "${with_docs}" == "true" ]; then
+ # HTTP pooling is turned of to avoid download issues https://issues.apache.org/jira/browse/ARROW-11633
+ ${mvn} -Dcheckstyle.skip=true -Dhttp.keepAlive=false -Dmaven.wagon.http.pool=false install site
+fi
+
+popd
diff --git a/src/arrow/ci/scripts/java_cdata_build.sh b/src/arrow/ci/scripts/java_cdata_build.sh
new file mode 100755
index 000000000..730c775d4
--- /dev/null
+++ b/src/arrow/ci/scripts/java_cdata_build.sh
@@ -0,0 +1,45 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+arrow_dir=${1}
+build_dir=${2}
+# The directory where the final binaries will be stored when scripts finish
+dist_dir=${3}
+
+echo "=== Clear output directories and leftovers ==="
+# Clear output directories and leftovers
+rm -rf ${build_dir}
+
+echo "=== Building Arrow Java C Data Interface native library ==="
+mkdir -p "${build_dir}"
+pushd "${build_dir}"
+
+cmake \
+ -DCMAKE_BUILD_TYPE=${ARROW_BUILD_TYPE:-release} \
+ -DCMAKE_INSTALL_LIBDIR=lib \
+ -DCMAKE_INSTALL_PREFIX=${build_dir} \
+ ${arrow_dir}/java/c
+cmake --build . --target install --config ${ARROW_BUILD_TYPE:-release}
+popd
+
+echo "=== Copying libraries to the distribution folder ==="
+mkdir -p "${dist_dir}"
+cp -L ${build_dir}/lib/*arrow_cdata_jni.* ${dist_dir}
diff --git a/src/arrow/ci/scripts/java_full_build.sh b/src/arrow/ci/scripts/java_full_build.sh
new file mode 100755
index 000000000..e452b8098
--- /dev/null
+++ b/src/arrow/ci/scripts/java_full_build.sh
@@ -0,0 +1,42 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -e
+
+arrow_dir=${1}
+dist_dir=${2}
+
+export ARROW_TEST_DATA=${arrow_dir}/testing/data
+
+pushd ${arrow_dir}/java
+
+# build the entire project
+mvn clean install \
+ -Parrow-c-data \
+ -Parrow-jni \
+ -Darrow.cpp.build.dir=$dist_dir \
+ -Darrow.c.jni.dist.dir=$dist_dir
+
+# copy all jars and pom files to the distribution folder
+find ~/.m2/repository/org/apache/arrow \
+ "(" -name "*.jar" -o -name "*.pom" ")" \
+ -exec echo {} ";" \
+ -exec cp {} $dist_dir ";"
+
+popd
diff --git a/src/arrow/ci/scripts/java_jni_macos_build.sh b/src/arrow/ci/scripts/java_jni_macos_build.sh
new file mode 100755
index 000000000..218d2d396
--- /dev/null
+++ b/src/arrow/ci/scripts/java_jni_macos_build.sh
@@ -0,0 +1,115 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+arrow_dir=${1}
+build_dir=${2}
+# The directory where the final binaries will be stored when scripts finish
+dist_dir=${3}
+
+echo "=== Clear output directories and leftovers ==="
+# Clear output directories and leftovers
+rm -rf ${build_dir}
+
+echo "=== Building Arrow C++ libraries ==="
+: ${ARROW_BUILD_TESTS:=OFF}
+: ${ARROW_DATASET:=ON}
+: ${ARROW_FILESYSTEM:=ON}
+: ${ARROW_GANDIVA_JAVA:=ON}
+: ${ARROW_GANDIVA:=ON}
+: ${ARROW_ORC:=ON}
+: ${ARROW_PARQUET:=ON}
+: ${ARROW_PLASMA_JAVA_CLIENT:=ON}
+: ${ARROW_PLASMA:=ON}
+: ${ARROW_PYTHON:=OFF}
+: ${CMAKE_BUILD_TYPE:=Release}
+: ${CMAKE_UNITY_BUILD:=ON}
+
+export ARROW_TEST_DATA="${arrow_dir}/testing/data"
+export PARQUET_TEST_DATA="${arrow_dir}/cpp/submodules/parquet-testing/data"
+export AWS_EC2_METADATA_DISABLED=TRUE
+
+mkdir -p "${build_dir}"
+pushd "${build_dir}"
+
+cmake \
+ -DARROW_BOOST_USE_SHARED=OFF \
+ -DARROW_BROTLI_USE_SHARED=OFF \
+ -DARROW_BUILD_TESTS=${ARROW_BUILD_TESTS} \
+ -DARROW_BUILD_UTILITIES=OFF \
+ -DARROW_BZ2_USE_SHARED=OFF \
+ -DARROW_DATASET=${ARROW_DATASET} \
+ -DARROW_FILESYSTEM=${ARROW_FILESYSTEM} \
+ -DARROW_GANDIVA_JAVA=${ARROW_GANDIVA_JAVA} \
+ -DARROW_GANDIVA_STATIC_LIBSTDCPP=ON \
+ -DARROW_GANDIVA=${ARROW_GANDIVA} \
+ -DARROW_GFLAGS_USE_SHARED=OFF \
+ -DARROW_GRPC_USE_SHARED=OFF \
+ -DARROW_JNI=ON \
+ -DARROW_LZ4_USE_SHARED=OFF \
+ -DARROW_OPENSSL_USE_SHARED=OFF \
+ -DARROW_ORC=${ARROW_ORC} \
+ -DARROW_PARQUET=${ARROW_PARQUET} \
+ -DARROW_PLASMA_JAVA_CLIENT=${ARROW_PLASMA_JAVA_CLIENT} \
+ -DARROW_PLASMA=${ARROW_PLASMA} \
+ -DARROW_PROTOBUF_USE_SHARED=OFF \
+ -DARROW_PYTHON=${ARROW_PYTHON} \
+ -DARROW_SNAPPY_USE_SHARED=OFF \
+ -DARROW_THRIFT_USE_SHARED=OFF \
+ -DARROW_UTF8PROC_USE_SHARED=OFF \
+ -DARROW_ZSTD_USE_SHARED=OFF \
+ -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \
+ -DCMAKE_INSTALL_LIBDIR=lib \
+ -DCMAKE_INSTALL_PREFIX=${build_dir} \
+ -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD} \
+ -DPARQUET_BUILD_EXAMPLES=OFF \
+ -DPARQUET_BUILD_EXECUTABLES=OFF \
+ -DPARQUET_REQUIRE_ENCRYPTION=OFF \
+ -Dre2_SOURCE=BUNDLED \
+ ${arrow_dir}/cpp
+cmake --build . --target install
+
+if [ "${ARROW_BUILD_TESTS}" == "ON" ]; then
+ ctest
+fi
+
+popd
+
+echo "=== Copying libraries to the distribution folder ==="
+mkdir -p "${dist_dir}"
+cp -L ${build_dir}/lib/libgandiva_jni.dylib ${dist_dir}
+cp -L ${build_dir}/lib/libarrow_dataset_jni.dylib ${dist_dir}
+cp -L ${build_dir}/lib/libarrow_orc_jni.dylib ${dist_dir}
+
+echo "=== Checking shared dependencies for libraries ==="
+
+pushd ${dist_dir}
+archery linking check-dependencies \
+ --allow libarrow_dataset_jni \
+ --allow libarrow_orc_jni \
+ --allow libc++ \
+ --allow libgandiva_jni \
+ --allow libncurses \
+ --allow libSystem \
+ --allow libz \
+ libgandiva_jni.dylib \
+ libarrow_dataset_jni.dylib \
+ libarrow_orc_jni.dylib
+popd
diff --git a/src/arrow/ci/scripts/java_jni_manylinux_build.sh b/src/arrow/ci/scripts/java_jni_manylinux_build.sh
new file mode 100755
index 000000000..396c8fc19
--- /dev/null
+++ b/src/arrow/ci/scripts/java_jni_manylinux_build.sh
@@ -0,0 +1,137 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+arrow_dir=${1}
+build_dir=${2}
+# The directory where the final binaries will be stored when scripts finish
+dist_dir=${3}
+
+echo "=== Clear output directories and leftovers ==="
+# Clear output directories and leftovers
+rm -rf ${build_dir}
+
+echo "=== Building Arrow C++ libraries ==="
+devtoolset_version=$(rpm -qa "devtoolset-*-gcc" --queryformat %{VERSION} | \
+ grep -o "^[0-9]*")
+devtoolset_include_cpp="/opt/rh/devtoolset-${devtoolset_version}/root/usr/include/c++/${devtoolset_version}"
+: ${ARROW_DATASET:=ON}
+: ${ARROW_GANDIVA:=ON}
+: ${ARROW_GANDIVA_JAVA:=ON}
+: ${ARROW_FILESYSTEM:=ON}
+: ${ARROW_JEMALLOC:=ON}
+: ${ARROW_RPATH_ORIGIN:=ON}
+: ${ARROW_ORC:=ON}
+: ${ARROW_PARQUET:=ON}
+: ${ARROW_PLASMA:=ON}
+: ${ARROW_PLASMA_JAVA_CLIENT:=ON}
+: ${ARROW_PYTHON:=OFF}
+: ${ARROW_BUILD_TESTS:=OFF}
+: ${CMAKE_BUILD_TYPE:=Release}
+: ${CMAKE_UNITY_BUILD:=ON}
+: ${VCPKG_FEATURE_FLAGS:=-manifests}
+: ${VCPKG_TARGET_TRIPLET:=${VCPKG_DEFAULT_TRIPLET:-x64-linux-static-${CMAKE_BUILD_TYPE}}}
+: ${GANDIVA_CXX_FLAGS:=-isystem;${devtoolset_include_cpp};-isystem;${devtoolset_include_cpp}/x86_64-redhat-linux;-isystem;-lpthread}
+
+export ARROW_TEST_DATA="${arrow_dir}/testing/data"
+export PARQUET_TEST_DATA="${arrow_dir}/cpp/submodules/parquet-testing/data"
+export AWS_EC2_METADATA_DISABLED=TRUE
+
+mkdir -p "${build_dir}"
+pushd "${build_dir}"
+
+cmake \
+ -DARROW_BOOST_USE_SHARED=OFF \
+ -DARROW_BROTLI_USE_SHARED=OFF \
+ -DARROW_BUILD_SHARED=ON \
+ -DARROW_BUILD_TESTS=${ARROW_BUILD_TESTS} \
+ -DARROW_BUILD_UTILITIES=OFF \
+ -DARROW_BZ2_USE_SHARED=OFF \
+ -DARROW_DATASET=${ARROW_DATASET} \
+ -DARROW_DEPENDENCY_SOURCE="VCPKG" \
+ -DARROW_FILESYSTEM=${ARROW_FILESYSTEM} \
+ -DARROW_GANDIVA_JAVA=${ARROW_GANDIVA_JAVA} \
+ -DARROW_GANDIVA_PC_CXX_FLAGS=${GANDIVA_CXX_FLAGS} \
+ -DARROW_GANDIVA=${ARROW_GANDIVA} \
+ -DARROW_GRPC_USE_SHARED=OFF \
+ -DARROW_JEMALLOC=${ARROW_JEMALLOC} \
+ -DARROW_JNI=ON \
+ -DARROW_LZ4_USE_SHARED=OFF \
+ -DARROW_OPENSSL_USE_SHARED=OFF \
+ -DARROW_ORC=${ARROW_ORC} \
+ -DARROW_PARQUET=${ARROW_PARQUET} \
+ -DARROW_PLASMA_JAVA_CLIENT=${ARROW_PLASMA_JAVA_CLIENT} \
+ -DARROW_PLASMA=${ARROW_PLASMA} \
+ -DARROW_PROTOBUF_USE_SHARED=OFF \
+ -DARROW_PYTHON=${ARROW_PYTHON} \
+ -DARROW_RPATH_ORIGIN=${ARROW_RPATH_ORIGIN} \
+ -DARROW_SNAPPY_USE_SHARED=OFF \
+ -DARROW_THRIFT_USE_SHARED=OFF \
+ -DARROW_UTF8PROC_USE_SHARED=OFF \
+ -DARROW_ZSTD_USE_SHARED=OFF \
+ -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \
+ -DCMAKE_INSTALL_LIBDIR=lib \
+ -DCMAKE_INSTALL_PREFIX=${build_dir} \
+ -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD} \
+ -DPARQUET_BUILD_EXAMPLES=OFF \
+ -DPARQUET_BUILD_EXECUTABLES=OFF \
+ -DPARQUET_REQUIRE_ENCRYPTION=OFF \
+ -DPythonInterp_FIND_VERSION_MAJOR=3 \
+ -DPythonInterp_FIND_VERSION=ON \
+ -DVCPKG_MANIFEST_MODE=OFF \
+ -DVCPKG_TARGET_TRIPLET=${VCPKG_TARGET_TRIPLET} \
+ -GNinja \
+ ${arrow_dir}/cpp
+ninja install
+
+if [ $ARROW_BUILD_TESTS = "ON" ]; then
+ ctest \
+ --label-regex unittest \
+ --output-on-failure \
+ --parallel $(nproc) \
+ --timeout 300
+fi
+
+popd
+
+echo "=== Copying libraries to the distribution folder ==="
+mkdir -p "${dist_dir}"
+cp -L ${build_dir}/lib/libgandiva_jni.so ${dist_dir}
+cp -L ${build_dir}/lib/libarrow_dataset_jni.so ${dist_dir}
+cp -L ${build_dir}/lib/libarrow_orc_jni.so ${dist_dir}
+
+echo "=== Checking shared dependencies for libraries ==="
+
+pushd ${dist_dir}
+archery linking check-dependencies \
+ --allow ld-linux-x86-64 \
+ --allow libc \
+ --allow libdl \
+ --allow libgcc_s \
+ --allow libm \
+ --allow libpthread \
+ --allow librt \
+ --allow libstdc++ \
+ --allow libz \
+ --allow linux-vdso \
+ libgandiva_jni.so \
+ libarrow_dataset_jni.so \
+ libarrow_orc_jni.so
+popd
diff --git a/src/arrow/ci/scripts/java_test.sh b/src/arrow/ci/scripts/java_test.sh
new file mode 100755
index 000000000..0e755bcaf
--- /dev/null
+++ b/src/arrow/ci/scripts/java_test.sh
@@ -0,0 +1,54 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+arrow_dir=${1}
+source_dir=${1}/java
+cpp_build_dir=${2}/cpp/${ARROW_BUILD_TYPE:-debug}
+cdata_dist_dir=${2}/java/c
+
+# For JNI and Plasma tests
+export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH}
+export PLASMA_STORE=${ARROW_HOME}/bin/plasma-store-server
+
+mvn="mvn -B -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn"
+# Use `2 * ncores` threads
+mvn="${mvn} -T 2C"
+
+pushd ${source_dir}
+
+${mvn} test
+
+if [ "${ARROW_JNI}" = "ON" ]; then
+ ${mvn} test -Parrow-jni -pl adapter/orc,gandiva,dataset -Darrow.cpp.build.dir=${cpp_build_dir}
+fi
+
+if [ "${ARROW_JAVA_CDATA}" = "ON" ]; then
+ ${mvn} test -Parrow-c-data -pl c -Darrow.c.jni.dist.dir=${cdata_dist_dir}
+fi
+
+if [ "${ARROW_PLASMA}" = "ON" ]; then
+ pushd ${source_dir}/plasma
+ java -cp target/test-classes:target/classes \
+ -Djava.library.path=${cpp_build_dir} \
+ org.apache.arrow.plasma.PlasmaClientTest
+ popd
+fi
+
+popd
diff --git a/src/arrow/ci/scripts/js_build.sh b/src/arrow/ci/scripts/js_build.sh
new file mode 100755
index 000000000..10ceb41ee
--- /dev/null
+++ b/src/arrow/ci/scripts/js_build.sh
@@ -0,0 +1,36 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+source_dir=${1}/js
+with_docs=${2:-false}
+
+pushd ${source_dir}
+
+yarn --frozen-lockfile
+# TODO(kszucs): linting should be moved to archery
+yarn lint:ci
+yarn build
+
+if [ "${with_docs}" == "true" ]; then
+ yarn doc
+fi
+
+popd
diff --git a/src/arrow/ci/scripts/js_test.sh b/src/arrow/ci/scripts/js_test.sh
new file mode 100755
index 000000000..345d6cb81
--- /dev/null
+++ b/src/arrow/ci/scripts/js_test.sh
@@ -0,0 +1,29 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+source_dir=${1}/js
+
+pushd ${source_dir}
+
+yarn lint
+yarn test
+
+popd
diff --git a/src/arrow/ci/scripts/matlab_build.sh b/src/arrow/ci/scripts/matlab_build.sh
new file mode 100755
index 000000000..5e9bdd2a9
--- /dev/null
+++ b/src/arrow/ci/scripts/matlab_build.sh
@@ -0,0 +1,29 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Exit on error (-e) and print all commands (-x).
+set -ex
+
+base_dir=${1}
+source_dir=${base_dir}/matlab
+build_dir=${base_dir}/matlab/build
+
+cmake -S ${source_dir} -B ${build_dir} -G Ninja -D MATLAB_BUILD_TESTS=ON
+cmake --build ${build_dir} --config Release
+ctest --test-dir ${build_dir}
diff --git a/src/arrow/ci/scripts/msys2_setup.sh b/src/arrow/ci/scripts/msys2_setup.sh
new file mode 100755
index 000000000..6f6012c87
--- /dev/null
+++ b/src/arrow/ci/scripts/msys2_setup.sh
@@ -0,0 +1,79 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -eux
+
+target=$1
+
+packages=()
+case "${target}" in
+ cpp|c_glib|ruby)
+ packages+=(${MINGW_PACKAGE_PREFIX}-aws-sdk-cpp)
+ packages+=(${MINGW_PACKAGE_PREFIX}-boost)
+ packages+=(${MINGW_PACKAGE_PREFIX}-brotli)
+ packages+=(${MINGW_PACKAGE_PREFIX}-ccache)
+ packages+=(${MINGW_PACKAGE_PREFIX}-clang)
+ packages+=(${MINGW_PACKAGE_PREFIX}-cmake)
+ packages+=(${MINGW_PACKAGE_PREFIX}-gcc)
+ packages+=(${MINGW_PACKAGE_PREFIX}-gflags)
+ packages+=(${MINGW_PACKAGE_PREFIX}-grpc)
+ packages+=(${MINGW_PACKAGE_PREFIX}-gtest)
+ packages+=(${MINGW_PACKAGE_PREFIX}-libutf8proc)
+ packages+=(${MINGW_PACKAGE_PREFIX}-libxml2)
+ packages+=(${MINGW_PACKAGE_PREFIX}-llvm)
+ packages+=(${MINGW_PACKAGE_PREFIX}-lz4)
+ packages+=(${MINGW_PACKAGE_PREFIX}-make)
+ packages+=(${MINGW_PACKAGE_PREFIX}-mlir)
+ packages+=(${MINGW_PACKAGE_PREFIX}-ninja)
+ packages+=(${MINGW_PACKAGE_PREFIX}-polly)
+ packages+=(${MINGW_PACKAGE_PREFIX}-protobuf)
+ packages+=(${MINGW_PACKAGE_PREFIX}-python3-numpy)
+ packages+=(${MINGW_PACKAGE_PREFIX}-rapidjson)
+ packages+=(${MINGW_PACKAGE_PREFIX}-re2)
+ packages+=(${MINGW_PACKAGE_PREFIX}-snappy)
+ packages+=(${MINGW_PACKAGE_PREFIX}-thrift)
+ packages+=(${MINGW_PACKAGE_PREFIX}-zlib)
+ packages+=(${MINGW_PACKAGE_PREFIX}-zstd)
+ ;;
+esac
+
+case "${target}" in
+ c_glib|ruby)
+ packages+=(${MINGW_PACKAGE_PREFIX}-gobject-introspection)
+ packages+=(${MINGW_PACKAGE_PREFIX}-gtk-doc)
+ packages+=(${MINGW_PACKAGE_PREFIX}-meson)
+ ;;
+esac
+
+case "${target}" in
+ cgo)
+ packages+=(${MINGW_PACKAGE_PREFIX}-arrow)
+ packages+=(${MINGW_PACKAGE_PREFIX}-gcc)
+ ;;
+esac
+
+pacman \
+ --needed \
+ --noconfirm \
+ --refresh \
+ --sync \
+ "${packages[@]}"
+
+"$(dirname $0)/ccache_setup.sh"
+echo "CCACHE_DIR=$(cygpath --absolute --windows ccache)" >> $GITHUB_ENV
diff --git a/src/arrow/ci/scripts/msys2_system_clean.sh b/src/arrow/ci/scripts/msys2_system_clean.sh
new file mode 100755
index 000000000..a356aee66
--- /dev/null
+++ b/src/arrow/ci/scripts/msys2_system_clean.sh
@@ -0,0 +1,33 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -eux
+
+pacman \
+ --cascade \
+ --noconfirm \
+ --nosave \
+ --recursive \
+ --remove \
+ ${MINGW_PACKAGE_PREFIX}-clang-tools-extra \
+ ${MINGW_PACKAGE_PREFIX}-gcc-ada \
+ ${MINGW_PACKAGE_PREFIX}-gcc-fortran \
+ ${MINGW_PACKAGE_PREFIX}-gcc-libgfortran \
+ ${MINGW_PACKAGE_PREFIX}-gcc-objc \
+ ${MINGW_PACKAGE_PREFIX}-libgccjit
diff --git a/src/arrow/ci/scripts/msys2_system_upgrade.sh b/src/arrow/ci/scripts/msys2_system_upgrade.sh
new file mode 100755
index 000000000..646428fbb
--- /dev/null
+++ b/src/arrow/ci/scripts/msys2_system_upgrade.sh
@@ -0,0 +1,28 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -eux
+
+pacman \
+ --noconfirm \
+ --refresh \
+ --refresh \
+ --sync \
+ --sysupgrade \
+ --sysupgrade
diff --git a/src/arrow/ci/scripts/python_benchmark.sh b/src/arrow/ci/scripts/python_benchmark.sh
new file mode 100755
index 000000000..3a35298dc
--- /dev/null
+++ b/src/arrow/ci/scripts/python_benchmark.sh
@@ -0,0 +1,40 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Check the ASV benchmarking setup.
+# Unfortunately this won't ensure that all benchmarks succeed
+# (see https://github.com/airspeed-velocity/asv/issues/449)
+source deactivate
+conda create -y -q -n pyarrow_asv python=$PYTHON_VERSION
+conda activate pyarrow_asv
+pip install -q git+https://github.com/pitrou/asv.git@customize_commands
+
+export PYARROW_WITH_PARQUET=1
+export PYARROW_WITH_PLASMA=1
+export PYARROW_WITH_ORC=0
+export PYARROW_WITH_GANDIVA=0
+
+pushd $ARROW_PYTHON_DIR
+# Workaround for https://github.com/airspeed-velocity/asv/issues/631
+git fetch --depth=100 origin master:master
+# Generate machine information (mandatory)
+asv machine --yes
+# Run benchmarks on the changeset being tested
+asv run --no-pull --show-stderr --quick HEAD^!
+popd # $ARROW_PYTHON_DIR
diff --git a/src/arrow/ci/scripts/python_build.sh b/src/arrow/ci/scripts/python_build.sh
new file mode 100755
index 000000000..ec6d723b2
--- /dev/null
+++ b/src/arrow/ci/scripts/python_build.sh
@@ -0,0 +1,54 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+source_dir=${1}/python
+build_dir=${2}/python
+
+if [ ! -z "${CONDA_PREFIX}" ]; then
+ echo -e "===\n=== Conda environment for build\n==="
+ conda list
+fi
+
+export PYARROW_CMAKE_GENERATOR=${CMAKE_GENERATOR:-Ninja}
+export PYARROW_BUILD_TYPE=${CMAKE_BUILD_TYPE:-debug}
+export PYARROW_WITH_S3=${ARROW_S3:-OFF}
+export PYARROW_WITH_ORC=${ARROW_ORC:-OFF}
+export PYARROW_WITH_CUDA=${ARROW_CUDA:-OFF}
+export PYARROW_WITH_HDFS=${ARROW_HDFS:-OFF}
+export PYARROW_WITH_FLIGHT=${ARROW_FLIGHT:-OFF}
+export PYARROW_WITH_PLASMA=${ARROW_PLASMA:-OFF}
+export PYARROW_WITH_GANDIVA=${ARROW_GANDIVA:-OFF}
+export PYARROW_WITH_PARQUET=${ARROW_PARQUET:-OFF}
+export PYARROW_WITH_DATASET=${ARROW_DATASET:-OFF}
+
+export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH}
+
+pushd ${source_dir}
+
+relative_build_dir=$(realpath --relative-to=. $build_dir)
+
+# not nice, but prevents mutating the mounted the source directory for docker
+${PYTHON:-python} \
+ setup.py build --build-base $build_dir \
+ install --single-version-externally-managed \
+ --record $relative_build_dir/record.txt
+
+popd
diff --git a/src/arrow/ci/scripts/python_sdist_build.sh b/src/arrow/ci/scripts/python_sdist_build.sh
new file mode 100755
index 000000000..f9e9359b6
--- /dev/null
+++ b/src/arrow/ci/scripts/python_sdist_build.sh
@@ -0,0 +1,27 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -eux
+
+source_dir=${1}/python
+
+pushd ${source_dir}
+export SETUPTOOLS_SCM_PRETEND_VERSION=${PYARROW_VERSION:-}
+${PYTHON:-python} setup.py sdist
+popd
diff --git a/src/arrow/ci/scripts/python_sdist_test.sh b/src/arrow/ci/scripts/python_sdist_test.sh
new file mode 100755
index 000000000..3dd7d7ddd
--- /dev/null
+++ b/src/arrow/ci/scripts/python_sdist_test.sh
@@ -0,0 +1,58 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -eux
+
+arrow_dir=${1}
+
+export ARROW_SOURCE_DIR=${arrow_dir}
+export ARROW_TEST_DATA=${arrow_dir}/testing/data
+export PARQUET_TEST_DATA=${arrow_dir}/cpp/submodules/parquet-testing/data
+
+export PYARROW_CMAKE_GENERATOR=${CMAKE_GENERATOR:-Ninja}
+export PYARROW_BUILD_TYPE=${CMAKE_BUILD_TYPE:-debug}
+export PYARROW_WITH_S3=${ARROW_S3:-OFF}
+export PYARROW_WITH_ORC=${ARROW_ORC:-OFF}
+export PYARROW_WITH_CUDA=${ARROW_CUDA:-OFF}
+export PYARROW_WITH_HDFS=${ARROW_HDFS:-OFF}
+export PYARROW_WITH_FLIGHT=${ARROW_FLIGHT:-OFF}
+export PYARROW_WITH_PLASMA=${ARROW_PLASMA:-OFF}
+export PYARROW_WITH_GANDIVA=${ARROW_GANDIVA:-OFF}
+export PYARROW_WITH_PARQUET=${ARROW_PARQUET:-OFF}
+export PYARROW_WITH_DATASET=${ARROW_DATASET:-OFF}
+
+# TODO: Users should not require ARROW_HOME and pkg-config to find Arrow C++.
+# Related: ARROW-9171
+# unset ARROW_HOME
+# apt purge -y pkg-config
+
+# ARROW-12619
+if command -v git &> /dev/null; then
+ echo "Git exists, remove it from PATH before executing this script."
+ exit 1
+fi
+
+if [ -n "${PYARROW_VERSION:-}" ]; then
+ sdist="${arrow_dir}/python/dist/pyarrow-${PYARROW_VERSION}.tar.gz"
+else
+ sdist=$(ls ${arrow_dir}/python/dist/pyarrow-*.tar.gz | sort -r | head -n1)
+fi
+${PYTHON:-python} -m pip install ${sdist}
+
+pytest -r s ${PYTEST_ARGS:-} --pyargs pyarrow
diff --git a/src/arrow/ci/scripts/python_test.sh b/src/arrow/ci/scripts/python_test.sh
new file mode 100755
index 000000000..6e05af89a
--- /dev/null
+++ b/src/arrow/ci/scripts/python_test.sh
@@ -0,0 +1,32 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+arrow_dir=${1}
+
+export ARROW_SOURCE_DIR=${arrow_dir}
+export ARROW_TEST_DATA=${arrow_dir}/testing/data
+export PARQUET_TEST_DATA=${arrow_dir}/cpp/submodules/parquet-testing/data
+export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH}
+
+# Enable some checks inside Python itself
+export PYTHONDEVMODE=1
+
+pytest -r s -v ${PYTEST_ARGS} --pyargs pyarrow
diff --git a/src/arrow/ci/scripts/python_wheel_macos_build.sh b/src/arrow/ci/scripts/python_wheel_macos_build.sh
new file mode 100755
index 000000000..1a52a2ad5
--- /dev/null
+++ b/src/arrow/ci/scripts/python_wheel_macos_build.sh
@@ -0,0 +1,166 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+arch=${1}
+source_dir=${2}
+build_dir=${3}
+
+echo "=== (${PYTHON_VERSION}) Clear output directories and leftovers ==="
+# Clear output directories and leftovers
+rm -rf ${build_dir}/install
+rm -rf ${source_dir}/python/dist
+rm -rf ${source_dir}/python/build
+rm -rf ${source_dir}/python/repaired_wheels
+rm -rf ${source_dir}/python/pyarrow/*.so
+rm -rf ${source_dir}/python/pyarrow/*.so.*
+
+echo "=== (${PYTHON_VERSION}) Set SDK, C++ and Wheel flags ==="
+export _PYTHON_HOST_PLATFORM="macosx-${MACOSX_DEPLOYMENT_TARGET}-${arch}"
+export MACOSX_DEPLOYMENT_TARGET=${MACOSX_DEPLOYMENT_TARGET:-10.9}
+export SDKROOT=${SDKROOT:-$(xcrun --sdk macosx --show-sdk-path)}
+
+if [ $arch = "arm64" ]; then
+ export CMAKE_OSX_ARCHITECTURES="arm64"
+elif [ $arch = "x86_64" ]; then
+ export CMAKE_OSX_ARCHITECTURES="x86_64"
+elif [ $arch = "universal2" ]; then
+ export CMAKE_OSX_ARCHITECTURES="x86_64;arm64"
+else
+ echo "Unexpected architecture: $arch"
+ exit 1
+fi
+
+echo "=== (${PYTHON_VERSION}) Install Python build dependencies ==="
+export PIP_SITE_PACKAGES=$(python -c 'import site; print(site.getsitepackages()[0])')
+export PIP_TARGET_PLATFORM="macosx_${MACOSX_DEPLOYMENT_TARGET//./_}_${arch}"
+
+pip install \
+ --upgrade \
+ --only-binary=:all: \
+ --target $PIP_SITE_PACKAGES \
+ --platform $PIP_TARGET_PLATFORM \
+ -r ${source_dir}/python/requirements-wheel-build.txt
+pip install "delocate>=0.9"
+
+echo "=== (${PYTHON_VERSION}) Building Arrow C++ libraries ==="
+: ${ARROW_DATASET:=ON}
+: ${ARROW_FLIGHT:=ON}
+: ${ARROW_GANDIVA:=OFF}
+: ${ARROW_HDFS:=ON}
+: ${ARROW_JEMALLOC:=ON}
+: ${ARROW_MIMALLOC:=ON}
+: ${ARROW_ORC:=ON}
+: ${ARROW_PARQUET:=ON}
+: ${ARROW_PLASMA:=ON}
+: ${ARROW_S3:=ON}
+: ${ARROW_SIMD_LEVEL:="SSE4_2"}
+: ${ARROW_TENSORFLOW:=ON}
+: ${ARROW_WITH_BROTLI:=ON}
+: ${ARROW_WITH_BZ2:=ON}
+: ${ARROW_WITH_LZ4:=ON}
+: ${ARROW_WITH_SNAPPY:=ON}
+: ${ARROW_WITH_ZLIB:=ON}
+: ${ARROW_WITH_ZSTD:=ON}
+: ${CMAKE_BUILD_TYPE:=release}
+: ${CMAKE_GENERATOR:=Ninja}
+: ${CMAKE_UNITY_BUILD:=ON}
+: ${VCPKG_FEATURE_FLAGS:=-manifests}
+: ${VCPKG_TARGET_TRIPLET:=${VCPKG_DEFAULT_TRIPLET:-x64-osx-static-${CMAKE_BUILD_TYPE}}}
+
+mkdir -p ${build_dir}/build
+pushd ${build_dir}/build
+
+cmake \
+ -DARROW_BUILD_SHARED=ON \
+ -DCMAKE_APPLE_SILICON_PROCESSOR=arm64 \
+ -DCMAKE_OSX_ARCHITECTURES=${CMAKE_OSX_ARCHITECTURES} \
+ -DARROW_BUILD_STATIC=OFF \
+ -DARROW_BUILD_TESTS=OFF \
+ -DARROW_DATASET=${ARROW_DATASET} \
+ -DARROW_DEPENDENCY_SOURCE="VCPKG" \
+ -DARROW_DEPENDENCY_USE_SHARED=OFF \
+ -DARROW_FLIGHT=${ARROW_FLIGHT} \
+ -DARROW_GANDIVA=${ARROW_GANDIVA} \
+ -DARROW_HDFS=${ARROW_HDFS} \
+ -DARROW_JEMALLOC=${ARROW_JEMALLOC} \
+ -DARROW_MIMALLOC=${ARROW_MIMALLOC} \
+ -DARROW_ORC=${ARROW_ORC} \
+ -DARROW_PACKAGE_KIND="python-wheel-macos" \
+ -DARROW_PARQUET=${ARROW_PARQUET} \
+ -DARROW_PLASMA=${ARROW_PLASMA} \
+ -DARROW_PYTHON=ON \
+ -DARROW_RPATH_ORIGIN=ON \
+ -DARROW_S3=${ARROW_S3} \
+ -DARROW_SIMD_LEVEL=${ARROW_SIMD_LEVEL} \
+ -DARROW_TENSORFLOW=${ARROW_TENSORFLOW} \
+ -DARROW_USE_CCACHE=ON \
+ -DARROW_WITH_BROTLI=${ARROW_WITH_BROTLI} \
+ -DARROW_WITH_BZ2=${ARROW_WITH_BZ2} \
+ -DARROW_WITH_LZ4=${ARROW_WITH_LZ4} \
+ -DARROW_WITH_SNAPPY=${ARROW_WITH_SNAPPY} \
+ -DARROW_WITH_ZLIB=${ARROW_WITH_ZLIB} \
+ -DARROW_WITH_ZSTD=${ARROW_WITH_ZSTD} \
+ -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \
+ -DCMAKE_INSTALL_LIBDIR=lib \
+ -DCMAKE_INSTALL_PREFIX=${build_dir}/install \
+ -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD} \
+ -DOPENSSL_USE_STATIC_LIBS=ON \
+ -DVCPKG_MANIFEST_MODE=OFF \
+ -DVCPKG_TARGET_TRIPLET=${VCPKG_TARGET_TRIPLET} \
+ -G ${CMAKE_GENERATOR} \
+ ${source_dir}/cpp
+cmake --build . --target install
+popd
+
+echo "=== (${PYTHON_VERSION}) Building wheel ==="
+export PYARROW_BUILD_TYPE=${CMAKE_BUILD_TYPE}
+export PYARROW_BUNDLE_ARROW_CPP=1
+export PYARROW_CMAKE_GENERATOR=${CMAKE_GENERATOR}
+export PYARROW_INSTALL_TESTS=1
+export PYARROW_WITH_DATASET=${ARROW_DATASET}
+export PYARROW_WITH_FLIGHT=${ARROW_FLIGHT}
+export PYARROW_WITH_GANDIVA=${ARROW_GANDIVA}
+export PYARROW_WITH_HDFS=${ARROW_HDFS}
+export PYARROW_WITH_ORC=${ARROW_ORC}
+export PYARROW_WITH_PARQUET=${ARROW_PARQUET}
+export PYARROW_WITH_PLASMA=${ARROW_PLASMA}
+export PYARROW_WITH_S3=${ARROW_S3}
+export PYARROW_CMAKE_OPTIONS="-DCMAKE_OSX_ARCHITECTURES=${CMAKE_OSX_ARCHITECTURES} -DARROW_SIMD_LEVEL=${ARROW_SIMD_LEVEL}"
+# PyArrow build configuration
+export PKG_CONFIG_PATH=/usr/lib/pkgconfig:${build_dir}/install/lib/pkgconfig
+# Set PyArrow version explicitly
+export SETUPTOOLS_SCM_PRETEND_VERSION=${PYARROW_VERSION}
+
+pushd ${source_dir}/python
+python setup.py bdist_wheel
+popd
+
+echo "=== (${PYTHON_VERSION}) Show dynamic libraries the wheel depend on ==="
+deps=$(delocate-listdeps ${source_dir}/python/dist/*.whl)
+
+if echo $deps | grep -v "^pyarrow/lib\(arrow\|gandiva\|parquet\|plasma\)"; then
+ echo "There are non-bundled shared library dependencies."
+ exit 1
+fi
+
+# Move the verified wheels
+mkdir -p ${source_dir}/python/repaired_wheels
+mv ${source_dir}/python/dist/*.whl ${source_dir}/python/repaired_wheels/
diff --git a/src/arrow/ci/scripts/python_wheel_manylinux_build.sh b/src/arrow/ci/scripts/python_wheel_manylinux_build.sh
new file mode 100755
index 000000000..434605cf2
--- /dev/null
+++ b/src/arrow/ci/scripts/python_wheel_manylinux_build.sh
@@ -0,0 +1,149 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+function check_arrow_visibility {
+ nm --demangle --dynamic /tmp/arrow-dist/lib/libarrow.so > nm_arrow.log
+
+ # Filter out Arrow symbols and see if anything remains.
+ # '_init' and '_fini' symbols may or not be present, we don't care.
+ # (note we must ignore the grep exit status when no match is found)
+ grep ' T ' nm_arrow.log | grep -v -E '(arrow|\b_init\b|\b_fini\b)' | cat - > visible_symbols.log
+
+ if [[ -f visible_symbols.log && `cat visible_symbols.log | wc -l` -eq 0 ]]; then
+ return 0
+ else
+ echo "== Unexpected symbols exported by libarrow.so =="
+ cat visible_symbols.log
+ echo "================================================"
+
+ exit 1
+ fi
+}
+
+echo "=== (${PYTHON_VERSION}) Clear output directories and leftovers ==="
+# Clear output directories and leftovers
+rm -rf /tmp/arrow-build
+rm -rf /arrow/python/dist
+rm -rf /arrow/python/build
+rm -rf /arrow/python/repaired_wheels
+rm -rf /arrow/python/pyarrow/*.so
+rm -rf /arrow/python/pyarrow/*.so.*
+
+echo "=== (${PYTHON_VERSION}) Building Arrow C++ libraries ==="
+: ${ARROW_DATASET:=ON}
+: ${ARROW_FLIGHT:=ON}
+: ${ARROW_GANDIVA:=OFF}
+: ${ARROW_HDFS:=ON}
+: ${ARROW_JEMALLOC:=ON}
+: ${ARROW_MIMALLOC:=ON}
+: ${ARROW_ORC:=ON}
+: ${ARROW_PARQUET:=ON}
+: ${ARROW_PLASMA:=ON}
+: ${ARROW_S3:=ON}
+: ${ARROW_TENSORFLOW:=ON}
+: ${ARROW_WITH_BROTLI:=ON}
+: ${ARROW_WITH_BZ2:=ON}
+: ${ARROW_WITH_LZ4:=ON}
+: ${ARROW_WITH_SNAPPY:=ON}
+: ${ARROW_WITH_ZLIB:=ON}
+: ${ARROW_WITH_ZSTD:=ON}
+: ${CMAKE_BUILD_TYPE:=release}
+: ${CMAKE_UNITY_BUILD:=ON}
+: ${CMAKE_GENERATOR:=Ninja}
+: ${VCPKG_FEATURE_FLAGS:=-manifests}
+: ${VCPKG_TARGET_TRIPLET:=${VCPKG_DEFAULT_TRIPLET:-x64-linux-static-${CMAKE_BUILD_TYPE}}}
+
+if [[ "$(uname -m)" == arm* ]] || [[ "$(uname -m)" == aarch* ]]; then
+ # Build jemalloc --with-lg-page=16 in order to make the wheel work on both
+ # 4k and 64k page arm64 systems. For more context see
+ # https://github.com/apache/arrow/issues/10929
+ export ARROW_EXTRA_CMAKE_FLAGS="-DARROW_JEMALLOC_LG_PAGE=16"
+fi
+
+mkdir /tmp/arrow-build
+pushd /tmp/arrow-build
+cmake \
+ -DARROW_BROTLI_USE_SHARED=OFF \
+ -DARROW_BUILD_SHARED=ON \
+ -DARROW_BUILD_STATIC=OFF \
+ -DARROW_BUILD_TESTS=OFF \
+ -DARROW_DATASET=${ARROW_DATASET} \
+ -DARROW_DEPENDENCY_SOURCE="VCPKG" \
+ -DARROW_DEPENDENCY_USE_SHARED=OFF \
+ -DARROW_FLIGHT==${ARROW_FLIGHT} \
+ -DARROW_GANDIVA=${ARROW_GANDIVA} \
+ -DARROW_HDFS=${ARROW_HDFS} \
+ -DARROW_JEMALLOC=${ARROW_JEMALLOC} \
+ -DARROW_MIMALLOC=${ARROW_MIMALLOC} \
+ -DARROW_ORC=${ARROW_ORC} \
+ -DARROW_PACKAGE_KIND="python-wheel-manylinux${MANYLINUX_VERSION}" \
+ -DARROW_PARQUET=${ARROW_PARQUET} \
+ -DARROW_PLASMA=${ARROW_PLASMA} \
+ -DARROW_PYTHON=ON \
+ -DARROW_RPATH_ORIGIN=ON \
+ -DARROW_S3=${ARROW_S3} \
+ -DARROW_TENSORFLOW=${ARROW_TENSORFLOW} \
+ -DARROW_USE_CCACHE=ON \
+ -DARROW_UTF8PROC_USE_SHARED=OFF \
+ -DARROW_WITH_BROTLI=${ARROW_WITH_BROTLI} \
+ -DARROW_WITH_BZ2=${ARROW_WITH_BZ2} \
+ -DARROW_WITH_LZ4=${ARROW_WITH_LZ4} \
+ -DARROW_WITH_SNAPPY=${ARROW_WITH_SNAPPY} \
+ -DARROW_WITH_ZLIB=${ARROW_WITH_ZLIB} \
+ -DARROW_WITH_ZSTD=${ARROW_WITH_ZSTD} \
+ -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \
+ -DCMAKE_INSTALL_LIBDIR=lib \
+ -DCMAKE_INSTALL_PREFIX=/tmp/arrow-dist \
+ -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD} \
+ -DOPENSSL_USE_STATIC_LIBS=ON \
+ -DVCPKG_MANIFEST_MODE=OFF \
+ -DVCPKG_TARGET_TRIPLET=${VCPKG_TARGET_TRIPLET} \
+ ${ARROW_EXTRA_CMAKE_FLAGS} \
+ -G ${CMAKE_GENERATOR} \
+ /arrow/cpp
+cmake --build . --target install
+popd
+
+# Check that we don't expose any unwanted symbols
+check_arrow_visibility
+
+echo "=== (${PYTHON_VERSION}) Building wheel ==="
+export PYARROW_BUILD_TYPE=${CMAKE_BUILD_TYPE}
+export PYARROW_BUNDLE_ARROW_CPP=1
+export PYARROW_CMAKE_GENERATOR=${CMAKE_GENERATOR}
+export PYARROW_INSTALL_TESTS=1
+export PYARROW_WITH_DATASET=${ARROW_DATASET}
+export PYARROW_WITH_FLIGHT=${ARROW_FLIGHT}
+export PYARROW_WITH_GANDIVA=${ARROW_GANDIVA}
+export PYARROW_WITH_HDFS=${ARROW_HDFS}
+export PYARROW_WITH_ORC=${ARROW_ORC}
+export PYARROW_WITH_PARQUET=${ARROW_PARQUET}
+export PYARROW_WITH_PLASMA=${ARROW_PLASMA}
+export PYARROW_WITH_S3=${ARROW_S3}
+# PyArrow build configuration
+export PKG_CONFIG_PATH=/usr/lib/pkgconfig:/tmp/arrow-dist/lib/pkgconfig
+
+pushd /arrow/python
+python setup.py bdist_wheel
+
+echo "=== (${PYTHON_VERSION}) Tag the wheel with manylinux${MANYLINUX_VERSION} ==="
+auditwheel repair -L . dist/pyarrow-*.whl -w repaired_wheels
+popd
diff --git a/src/arrow/ci/scripts/python_wheel_unix_test.sh b/src/arrow/ci/scripts/python_wheel_unix_test.sh
new file mode 100755
index 000000000..ec703abfc
--- /dev/null
+++ b/src/arrow/ci/scripts/python_wheel_unix_test.sh
@@ -0,0 +1,84 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -e
+set -x
+set -o pipefail
+
+if [ "$#" -ne 1 ]; then
+ echo "Usage: $0 <arrow-src-dir>"
+ exit 1
+fi
+
+source_dir=${1}
+
+: ${ARROW_FLIGHT:=ON}
+: ${ARROW_S3:=ON}
+: ${CHECK_IMPORTS:=ON}
+: ${CHECK_UNITTESTS:=ON}
+: ${INSTALL_PYARROW:=ON}
+
+export PYARROW_TEST_CYTHON=OFF
+export PYARROW_TEST_DATASET=ON
+export PYARROW_TEST_FLIGHT=${ARROW_FLIGHT}
+export PYARROW_TEST_GANDIVA=OFF
+export PYARROW_TEST_HDFS=ON
+export PYARROW_TEST_ORC=ON
+export PYARROW_TEST_PANDAS=ON
+export PYARROW_TEST_PARQUET=ON
+export PYARROW_TEST_PLASMA=ON
+export PYARROW_TEST_S3=${ARROW_S3}
+export PYARROW_TEST_TENSORFLOW=ON
+
+export ARROW_TEST_DATA=${source_dir}/testing/data
+export PARQUET_TEST_DATA=${source_dir}/submodules/parquet-testing/data
+
+if [ "${INSTALL_PYARROW}" == "ON" ]; then
+ # Install the built wheels
+ pip install --force-reinstall ${source_dir}/python/repaired_wheels/*.whl
+fi
+
+if [ "${CHECK_IMPORTS}" == "ON" ]; then
+ # Test that the modules are importable
+ python -c "
+import pyarrow
+import pyarrow._hdfs
+import pyarrow.csv
+import pyarrow.dataset
+import pyarrow.fs
+import pyarrow.json
+import pyarrow.orc
+import pyarrow.parquet
+import pyarrow.plasma
+"
+ if [ "${PYARROW_TEST_S3}" == "ON" ]; then
+ python -c "import pyarrow._s3fs"
+ fi
+ if [ "${PYARROW_TEST_FLIGHT}" == "ON" ]; then
+ python -c "import pyarrow.flight"
+ fi
+fi
+
+if [ "${CHECK_UNITTESTS}" == "ON" ]; then
+ # Install testing dependencies
+ pip install -U -r ${source_dir}/python/requirements-wheel-test.txt
+ # Execute unittest, test dependencies must be installed
+ python -c 'import pyarrow; pyarrow.create_library_symlinks()'
+ python -m pytest -r s --pyargs pyarrow
+fi
diff --git a/src/arrow/ci/scripts/python_wheel_windows_build.bat b/src/arrow/ci/scripts/python_wheel_windows_build.bat
new file mode 100644
index 000000000..23be7f512
--- /dev/null
+++ b/src/arrow/ci/scripts/python_wheel_windows_build.bat
@@ -0,0 +1,109 @@
+@rem Licensed to the Apache Software Foundation (ASF) under one
+@rem or more contributor license agreements. See the NOTICE file
+@rem distributed with this work for additional information
+@rem regarding copyright ownership. The ASF licenses this file
+@rem to you under the Apache License, Version 2.0 (the
+@rem "License"); you may not use this file except in compliance
+@rem with the License. You may obtain a copy of the License at
+@rem
+@rem http://www.apache.org/licenses/LICENSE-2.0
+@rem
+@rem Unless required by applicable law or agreed to in writing,
+@rem software distributed under the License is distributed on an
+@rem "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+@rem KIND, either express or implied. See the License for the
+@rem specific language governing permissions and limitations
+@rem under the License.
+
+@echo on
+
+echo "Building windows wheel..."
+
+call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvars64.bat"
+
+echo "=== (%PYTHON_VERSION%) Clear output directories and leftovers ==="
+del /s /q C:\arrow-build
+del /s /q C:\arrow-dist
+del /s /q C:\arrow\python\dist
+del /s /q C:\arrow\python\build
+del /s /q C:\arrow\python\pyarrow\*.so
+del /s /q C:\arrow\python\pyarrow\*.so.*
+
+echo "=== (%PYTHON_VERSION%) Building Arrow C++ libraries ==="
+set ARROW_DATASET=ON
+set ARROW_FLIGHT=ON
+set ARROW_GANDIVA=OFF
+set ARROW_HDFS=ON
+set ARROW_ORC=OFF
+set ARROW_PARQUET=ON
+set ARROW_MIMALLOC=ON
+set ARROW_S3=ON
+set ARROW_TENSORFLOW=ON
+set ARROW_WITH_BROTLI=ON
+set ARROW_WITH_BZ2=ON
+set ARROW_WITH_LZ4=ON
+set ARROW_WITH_SNAPPY=ON
+set ARROW_WITH_ZLIB=ON
+set ARROW_WITH_ZSTD=ON
+set CMAKE_UNITY_BUILD=ON
+set CMAKE_GENERATOR=Visual Studio 15 2017 Win64
+set VCPKG_FEATURE_FLAGS=-manifests
+
+mkdir C:\arrow-build
+pushd C:\arrow-build
+cmake ^
+ -DARROW_BUILD_SHARED=ON ^
+ -DARROW_BUILD_STATIC=OFF ^
+ -DARROW_BUILD_TESTS=OFF ^
+ -DARROW_CXXFLAGS="/MP" ^
+ -DARROW_DATASET=%ARROW_DATASET% ^
+ -DARROW_DEPENDENCY_SOURCE=VCPKG ^
+ -DARROW_DEPENDENCY_USE_SHARED=OFF ^
+ -DARROW_FLIGHT=%ARROW_FLIGHT% ^
+ -DARROW_GANDIVA=%ARROW_GANDIVA% ^
+ -DARROW_HDFS=%ARROW_HDFS% ^
+ -DARROW_MIMALLOC=%ARROW_MIMALLOC% ^
+ -DARROW_ORC=%ARROW_ORC% ^
+ -DARROW_PACKAGE_KIND="python-wheel-windows" ^
+ -DARROW_PARQUET=%ARROW_PARQUET% ^
+ -DARROW_PYTHON=ON ^
+ -DARROW_S3=%ARROW_S3% ^
+ -DARROW_TENSORFLOW=%ARROW_TENSORFLOW% ^
+ -DARROW_WITH_BROTLI=%ARROW_WITH_BROTLI% ^
+ -DARROW_WITH_BZ2=%ARROW_WITH_BZ2% ^
+ -DARROW_WITH_LZ4=%ARROW_WITH_LZ4% ^
+ -DARROW_WITH_SNAPPY=%ARROW_WITH_SNAPPY% ^
+ -DARROW_WITH_ZLIB=%ARROW_WITH_ZLIB% ^
+ -DARROW_WITH_ZSTD=%ARROW_WITH_ZSTD% ^
+ -DCMAKE_BUILD_TYPE=%CMAKE_BUILD_TYPE% ^
+ -DCMAKE_CXX_COMPILER=clcache ^
+ -DCMAKE_INSTALL_PREFIX=C:\arrow-dist ^
+ -DCMAKE_UNITY_BUILD=%CMAKE_UNITY_BUILD% ^
+ -DMSVC_LINK_VERBOSE=ON ^
+ -DVCPKG_MANIFEST_MODE=OFF ^
+ -DVCPKG_TARGET_TRIPLET=x64-windows-static-md-%CMAKE_BUILD_TYPE% ^
+ -G "%CMAKE_GENERATOR%" ^
+ C:\arrow\cpp || exit /B
+cmake --build . --config %CMAKE_BUILD_TYPE% --target install || exit /B
+popd
+
+echo "=== (%PYTHON_VERSION%) Building wheel ==="
+set PYARROW_BUILD_TYPE=%CMAKE_BUILD_TYPE%
+set PYARROW_BUNDLE_ARROW_CPP=ON
+set PYARROW_BUNDLE_BOOST=OFF
+set PYARROW_CMAKE_GENERATOR=%CMAKE_GENERATOR%
+set PYARROW_INSTALL_TESTS=ON
+set PYARROW_WITH_DATASET=%ARROW_DATASET%
+set PYARROW_WITH_FLIGHT=%ARROW_FLIGHT%
+set PYARROW_WITH_GANDIVA=%ARROW_GANDIVA%
+set PYARROW_WITH_HDFS=%ARROW_HDFS%
+set PYARROW_WITH_ORC=%ARROW_ORC%
+set PYARROW_WITH_PARQUET=%ARROW_PARQUET%
+set PYARROW_WITH_S3=%ARROW_S3%
+set ARROW_HOME=C:\arrow-dist
+
+pushd C:\arrow\python
+@REM bundle the msvc runtime
+cp "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Redist\MSVC\14.16.27012\x64\Microsoft.VC141.CRT\msvcp140.dll" pyarrow\
+python setup.py bdist_wheel || exit /B
+popd
diff --git a/src/arrow/ci/scripts/python_wheel_windows_test.bat b/src/arrow/ci/scripts/python_wheel_windows_test.bat
new file mode 100755
index 000000000..1ea0f8acd
--- /dev/null
+++ b/src/arrow/ci/scripts/python_wheel_windows_test.bat
@@ -0,0 +1,55 @@
+@rem Licensed to the Apache Software Foundation (ASF) under one
+@rem or more contributor license agreements. See the NOTICE file
+@rem distributed with this work for additional information
+@rem regarding copyright ownership. The ASF licenses this file
+@rem to you under the Apache License, Version 2.0 (the
+@rem "License"); you may not use this file except in compliance
+@rem with the License. You may obtain a copy of the License at
+@rem
+@rem http://www.apache.org/licenses/LICENSE-2.0
+@rem
+@rem Unless required by applicable law or agreed to in writing,
+@rem software distributed under the License is distributed on an
+@rem "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+@rem KIND, either express or implied. See the License for the
+@rem specific language governing permissions and limitations
+@rem under the License.
+
+@echo on
+
+set PYARROW_TEST_CYTHON=OFF
+set PYARROW_TEST_DATASET=ON
+set PYARROW_TEST_FLIGHT=ON
+set PYARROW_TEST_GANDIVA=OFF
+set PYARROW_TEST_HDFS=ON
+set PYARROW_TEST_ORC=OFF
+set PYARROW_TEST_PARQUET=ON
+set PYARROW_TEST_PLASMA=OFF
+set PYARROW_TEST_S3=OFF
+set PYARROW_TEST_TENSORFLOW=ON
+
+@REM Enable again once https://github.com/scipy/oldest-supported-numpy/pull/27 gets merged
+@REM set PYARROW_TEST_PANDAS=ON
+
+set ARROW_TEST_DATA=C:\arrow\testing\data
+set PARQUET_TEST_DATA=C:\arrow\submodules\parquet-testing\data
+
+@REM Install testing dependencies
+pip install -r C:\arrow\python\requirements-wheel-test.txt || exit /B
+
+@REM Install the built wheels
+python -m pip install --no-index --find-links=C:\arrow\python\dist\ pyarrow || exit /B
+
+@REM Test that the modules are importable
+python -c "import pyarrow"
+python -c "import pyarrow._hdfs"
+python -c "import pyarrow._s3fs"
+python -c "import pyarrow.csv"
+python -c "import pyarrow.dataset"
+python -c "import pyarrow.flight"
+python -c "import pyarrow.fs"
+python -c "import pyarrow.json"
+python -c "import pyarrow.parquet"
+
+@REM Execute unittest
+pytest -r s --pyargs pyarrow || exit /B
diff --git a/src/arrow/ci/scripts/r_build.sh b/src/arrow/ci/scripts/r_build.sh
new file mode 100755
index 000000000..2a2b9d7d1
--- /dev/null
+++ b/src/arrow/ci/scripts/r_build.sh
@@ -0,0 +1,33 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+: ${R_BIN:=R}
+source_dir=${1}/r
+with_docs=${2:-false}
+
+pushd ${source_dir}
+
+${R_BIN} CMD INSTALL ${INSTALL_ARGS} .
+
+if [ "${with_docs}" == "true" ]; then
+ ${R_BIN} -e "pkgdown::build_site(install = FALSE)"
+fi
+
+popd \ No newline at end of file
diff --git a/src/arrow/ci/scripts/r_deps.sh b/src/arrow/ci/scripts/r_deps.sh
new file mode 100755
index 000000000..ad1b5ecc1
--- /dev/null
+++ b/src/arrow/ci/scripts/r_deps.sh
@@ -0,0 +1,45 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+: ${R_BIN:=R}
+
+source_dir=${1}/r
+
+pushd ${source_dir}
+
+if [ ${R_BIN} = "RDsan" ]; then
+ # To prevent the build from timing out, let's prune some optional deps (and their possible version requirements)
+ ${R_BIN} -e 'd <- read.dcf("DESCRIPTION")
+ to_prune <- c("duckdb", "DBI", "dbplyr", "decor", "knitr", "rmarkdown", "pkgload", "reticulate")
+ pattern <- paste0("\\n?", to_prune, " (\\\\(.*\\\\))?,?", collapse = "|")
+ d[,"Suggests"] <- gsub(pattern, "", d[,"Suggests"])
+ write.dcf(d, "DESCRIPTION")'
+fi
+
+# Install R package dependencies
+# install.packages() emits warnings if packages fail to install,
+# but we want to error/fail the build.
+# options(warn=2) turns warnings into errors
+${R_BIN} -e "options(warn=2); install.packages('remotes'); remotes::install_cran(c('glue', 'rcmdcheck', 'sys')); remotes::install_deps(INSTALL_opts = '"${INSTALL_ARGS}"')"
+# Separately install the optional/test dependencies but don't error on them,
+# they're not available everywhere and that's ok
+${R_BIN} -e "remotes::install_deps(dependencies = TRUE, INSTALL_opts = '"${INSTALL_ARGS}"')"
+
+popd
diff --git a/src/arrow/ci/scripts/r_docker_configure.sh b/src/arrow/ci/scripts/r_docker_configure.sh
new file mode 100755
index 000000000..20c987085
--- /dev/null
+++ b/src/arrow/ci/scripts/r_docker_configure.sh
@@ -0,0 +1,81 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+: ${R_BIN:=R}
+
+# The Dockerfile should have put this file here
+if [ -f "/arrow/ci/etc/rprofile" ]; then
+ # Ensure parallel R package installation, set CRAN repo mirror,
+ # and use pre-built binaries where possible
+ cat /arrow/ci/etc/rprofile >> $(${R_BIN} RHOME)/etc/Rprofile.site
+fi
+
+# Ensure parallel compilation of C/C++ code
+echo "MAKEFLAGS=-j$(${R_BIN} -s -e 'cat(parallel::detectCores())')" >> $(R RHOME)/etc/Renviron.site
+
+# Special hacking to try to reproduce quirks on fedora-clang-devel on CRAN
+# which uses a bespoke clang compiled to use libc++
+# https://www.stats.ox.ac.uk/pub/bdr/Rconfig/r-devel-linux-x86_64-fedora-clang
+if [ "$RHUB_PLATFORM" = "linux-x86_64-fedora-clang" ]; then
+ dnf install -y libcxx-devel
+ sed -i.bak -E -e 's/(CXX1?1? =.*)/\1 -stdlib=libc++/g' $(${R_BIN} RHOME)/etc/Makeconf
+ rm -rf $(${R_BIN} RHOME)/etc/Makeconf.bak
+
+ sed -i.bak -E -e 's/(CXXFLAGS = )(.*)/\1 -g -O3 -Wall -pedantic -frtti -fPIC/' $(${R_BIN} RHOME)/etc/Makeconf
+ rm -rf $(${R_BIN} RHOME)/etc/Makeconf.bak
+fi
+
+# Special hacking to try to reproduce quirks on centos using non-default build
+# tooling.
+if [[ "$DEVTOOLSET_VERSION" -gt 0 ]]; then
+ if [ "`which dnf`" ]; then
+ dnf install -y centos-release-scl
+ dnf install -y "devtoolset-$DEVTOOLSET_VERSION"
+ else
+ yum install -y centos-release-scl
+ yum install -y "devtoolset-$DEVTOOLSET_VERSION"
+ fi
+fi
+
+# Install openssl for S3 support
+if [ "$ARROW_S3" == "ON" ] || [ "$ARROW_R_DEV" == "TRUE" ]; then
+ if [ "`which dnf`" ]; then
+ dnf install -y libcurl-devel openssl-devel
+ elif [ "`which yum`" ]; then
+ yum install -y libcurl-devel openssl-devel
+ elif [ "`which zypper`" ]; then
+ zypper install -y libcurl-devel libopenssl-devel
+ else
+ apt-get update
+ apt-get install -y libcurl4-openssl-dev libssl-dev
+ fi
+
+ # The Dockerfile should have put this file here
+ if [ -f "/arrow/ci/scripts/install_minio.sh" ] && [ "`which wget`" ]; then
+ /arrow/ci/scripts/install_minio.sh amd64 linux latest /usr/local
+ fi
+
+ if [ -f "/arrow/ci/scripts/install_gcs_testbench.sh" ] && [ "`which pip`" ]; then
+ /arrow/ci/scripts/install_gcs_testbench.sh amd64 default
+ fi
+fi
+
+# Workaround for html help install failure; see https://github.com/r-lib/devtools/issues/2084#issuecomment-530912786
+Rscript -e 'x <- file.path(R.home("doc"), "html"); if (!file.exists(x)) {dir.create(x, recursive=TRUE); file.copy(system.file("html/R.css", package="stats"), x)}'
diff --git a/src/arrow/ci/scripts/r_pkgdown_check.sh b/src/arrow/ci/scripts/r_pkgdown_check.sh
new file mode 100755
index 000000000..327480a6b
--- /dev/null
+++ b/src/arrow/ci/scripts/r_pkgdown_check.sh
@@ -0,0 +1,41 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Use this script to prevent errors in the pkgdown site being rendered due to missing YAML entries
+
+# all .Rd files in the repo
+all_rd_files=`find ./r/man -maxdepth 1 -name "*.Rd" | sed -e 's/.\/r\/man\///g' | sed -e 's/.Rd//g' | sort`
+
+# .Rd files to exclude from search (i.e. are internal)
+exclusions=`grep "\keyword{internal}" -rl ./r/man --include=*.Rd | sed -e 's/.\/r\/man\///g' | sed -e 's/.Rd//g' | sort`
+
+# .Rd files to check against pkgdown.yml
+rd_files=`echo ${exclusions[@]} ${all_rd_files[@]} | tr ' ' '\n' | sort | uniq -u`
+
+# pkgdown sections
+pkgdown_sections=`awk '/^[^ ]/{ f=/reference:/; next } f{ if (sub(/:$/,"")) pkg=$2; else print pkg, $2 }' ./r/_pkgdown.yml | grep -v "title:" | sort`
+
+# get things that appear in man files that don't appear in pkgdown sections
+pkgdown_missing=`echo ${pkgdown_sections[@]} ${pkgdown_sections[@]} ${rd_files[@]} | tr ' ' '\n' | sort | uniq -u`
+
+# if any sections are missing raise an error
+if ([ ${#pkgdown_missing} -ge 1 ]); then
+ echo "Error! $pkgdown_missing missing from ./r/_pkgdown.yml"
+ exit 1
+fi
diff --git a/src/arrow/ci/scripts/r_revdepcheck.sh b/src/arrow/ci/scripts/r_revdepcheck.sh
new file mode 100755
index 000000000..b0a2bab64
--- /dev/null
+++ b/src/arrow/ci/scripts/r_revdepcheck.sh
@@ -0,0 +1,88 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+: ${R_BIN:=R}
+
+source_dir=${1}/r
+
+# cpp building dependencies
+apt install -y cmake
+
+# system dependencies needed for arrow's reverse dependencies
+apt install -y libxml2-dev \
+ libfontconfig1-dev \
+ libcairo2-dev \
+ libglpk-dev \
+ libmysqlclient-dev \
+ unixodbc-dev \
+ libpq-dev \
+ coinor-libsymphony-dev \
+ coinor-libcgl-dev \
+ coinor-symphony \
+ libzmq3-dev \
+ libudunits2-dev \
+ libgdal-dev \
+ libgeos-dev \
+ libproj-dev
+
+pushd ${source_dir}
+
+printenv
+
+# By default, aws-sdk tries to contact a non-existing local ip host
+# to retrieve metadata. Disable this so that S3FileSystem tests run faster.
+export AWS_EC2_METADATA_DISABLED=TRUE
+
+# Set crancache dir so we can cache it
+export CRANCACHE_DIR="/arrow/.crancache"
+
+SCRIPT="
+ # We can't use RSPM binaries because we need source packages
+ options('repos' = c(CRAN = 'https://packagemanager.rstudio.com/all/latest'))
+ remotes::install_github('r-lib/revdepcheck')
+
+ # zoo is needed by RcisTarget tests, though only listed in enhances so not installed by revdepcheck
+ install.packages('zoo')
+
+ # actually run revdepcheck
+ revdepcheck::revdep_check(
+ quiet = FALSE,
+ timeout = as.difftime(120, units = 'mins'),
+ num_workers = 1,
+ env = c(
+ ARROW_R_DEV = '$ARROW_R_DEV',
+ LIBARROW_DOWNLOAD = TRUE,
+ LIBARROW_MINIMAL = FALSE,
+ revdepcheck::revdep_env_vars()
+ ))
+ revdepcheck::revdep_report(all = TRUE)
+
+ # Go through the summary and fail if any of the statuses include -
+ summary <- revdepcheck::revdep_summary()
+ failed <- lapply(summary, function(check) grepl('-', check[['status']]))
+
+ if (any(unlist(failed))) {
+ quit(status = 1)
+ }
+ "
+
+echo "$SCRIPT" | ${R_BIN} --no-save
+
+popd
diff --git a/src/arrow/ci/scripts/r_sanitize.sh b/src/arrow/ci/scripts/r_sanitize.sh
new file mode 100755
index 000000000..6c79c0851
--- /dev/null
+++ b/src/arrow/ci/scripts/r_sanitize.sh
@@ -0,0 +1,46 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+: ${R_BIN:=RDsan}
+
+source_dir=${1}/r
+
+pushd ${source_dir}
+
+# Unity builds were causing the CI job to run out of memory
+export CMAKE_UNITY_BUILD=OFF
+# Make installation verbose so that the CI job doesn't time out due to silence
+export ARROW_R_DEV=TRUE
+${R_BIN} CMD INSTALL ${INSTALL_ARGS} .
+# But unset the env var so that it doesn't cause us to run extra dev tests
+unset ARROW_R_DEV
+
+export UBSAN_OPTIONS="print_stacktrace=1,suppressions=/arrow/r/tools/ubsan.supp"
+
+pushd tests
+${R_BIN} < testthat.R > testthat.out 2>&1 || { cat testthat.out; exit 1; }
+popd
+${R_BIN} -e 'library(arrow); testthat::test_examples(".")' >> testthat.out 2>&1 || { cat testthat.out; exit 1; }
+
+cat testthat.out
+if grep -q "runtime error" testthat.out; then
+ exit 1
+fi
+popd
diff --git a/src/arrow/ci/scripts/r_test.sh b/src/arrow/ci/scripts/r_test.sh
new file mode 100755
index 000000000..62e423cf5
--- /dev/null
+++ b/src/arrow/ci/scripts/r_test.sh
@@ -0,0 +1,106 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+: ${R_BIN:=R}
+
+source_dir=${1}/r
+
+pushd ${source_dir}
+
+printenv
+
+if [ "$ARROW_USE_PKG_CONFIG" != "false" ]; then
+ export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH}
+ export R_LD_LIBRARY_PATH=${LD_LIBRARY_PATH}
+fi
+export _R_CHECK_COMPILATION_FLAGS_KNOWN_=${ARROW_R_CXXFLAGS}
+if [ "$ARROW_R_DEV" = "TRUE" ]; then
+ # These are sometimes used in the Arrow C++ build and are not a problem
+ export _R_CHECK_COMPILATION_FLAGS_KNOWN_="${_R_CHECK_COMPILATION_FLAGS_KNOWN_} -Wno-attributes -msse4.2 -Wno-noexcept-type -Wno-subobject-linkage"
+ if [ "$NOT_CRAN" = "" ]; then
+ # Note that NOT_CRAN=true means (among other things) that optional dependencies are built
+ # You can set NOT_CRAN=false for the CRAN build and then
+ # ARROW_R_DEV=TRUE just adds verbosity
+ export NOT_CRAN=true
+ fi
+fi
+
+export _R_CHECK_CRAN_INCOMING_REMOTE_=FALSE
+if [ "$TEST_R_WITHOUT_LIBARROW" != "TRUE" ]; then
+ # --run-donttest was used in R < 4.0, this is used now
+ export _R_CHECK_DONTTEST_EXAMPLES_=TRUE
+fi
+# Not all Suggested packages are needed for checking, so in case they aren't installed don't fail
+export _R_CHECK_FORCE_SUGGESTS_=FALSE
+export _R_CHECK_LIMIT_CORES_=FALSE
+export _R_CHECK_TESTS_NLINES_=0
+
+# By default, aws-sdk tries to contact a non-existing local ip host
+# to retrieve metadata. Disable this so that S3FileSystem tests run faster.
+export AWS_EC2_METADATA_DISABLED=TRUE
+
+# Hack so that texlive2020 doesn't pollute the home dir
+export TEXMFCONFIG=/tmp/texmf-config
+export TEXMFVAR=/tmp/texmf-var
+
+if [[ "$DEVTOOLSET_VERSION" -gt 0 ]]; then
+ # enable the devtoolset version to use it
+ source /opt/rh/devtoolset-$DEVTOOLSET_VERSION/enable
+fi
+
+# Make sure we aren't writing to the home dir (CRAN _hates_ this but there is no official check)
+BEFORE=$(ls -alh ~/)
+
+SCRIPT="as_cran <- !identical(tolower(Sys.getenv('NOT_CRAN')), 'true')
+ if (as_cran) {
+ args <- '--as-cran'
+ build_args <- character()
+ } else {
+ args <- c('--no-manual', '--ignore-vignettes')
+ build_args <- '--no-build-vignettes'
+
+ if (nzchar(Sys.which('minio'))) {
+ message('Running minio for S3 tests (if build supports them)')
+ minio_dir <- tempfile()
+ dir.create(minio_dir)
+ pid <- sys::exec_background('minio', c('server', minio_dir))
+ on.exit(tools::pskill(pid))
+ }
+ }
+
+ run_donttest <- identical(tolower(Sys.getenv('_R_CHECK_DONTTEST_EXAMPLES_', 'true')), 'true')
+ if (run_donttest) {
+ args <- c(args, '--run-donttest')
+ }
+
+ install_args <- Sys.getenv('INSTALL_ARGS')
+ if (nzchar(install_args)) {
+ args <- c(args, paste0('--install-args=\"', install_args, '\"'))
+ }
+
+ rcmdcheck::rcmdcheck(build_args = build_args, args = args, error_on = 'warning', check_dir = 'check', timeout = 3600)"
+echo "$SCRIPT" | ${R_BIN} --no-save
+
+AFTER=$(ls -alh ~/)
+if [ "$NOT_CRAN" != "true" ] && [ "$BEFORE" != "$AFTER" ]; then
+ ls -alh ~/.cmake/packages
+ exit 1
+fi
+popd
diff --git a/src/arrow/ci/scripts/r_valgrind.sh b/src/arrow/ci/scripts/r_valgrind.sh
new file mode 100755
index 000000000..772d8f44e
--- /dev/null
+++ b/src/arrow/ci/scripts/r_valgrind.sh
@@ -0,0 +1,46 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+: ${R_BIN:=RDvalgrind}
+
+source_dir=${1}/r
+
+export CMAKE_BUILD_TYPE=RelWithDebInfo
+
+${R_BIN} CMD INSTALL ${INSTALL_ARGS} ${source_dir}
+pushd ${source_dir}/tests
+
+# to generate suppression files run:
+# ${R_BIN} --vanilla -d "valgrind --tool=memcheck --leak-check=full --track-origins=yes --gen-suppressions=all --log-file=memcheck.log" -f testtthat.supp
+${R_BIN} --vanilla -d "valgrind --tool=memcheck --leak-check=full --track-origins=yes --suppressions=/${1}/ci/etc/valgrind-cran.supp" -f testthat.R |& tee testthat.out
+
+# valgrind --error-exitcode=1 should return an erroring exit code that we can catch,
+# but R eats that and returns 0, so we need to look at the output and make sure that
+# we have 0 errors instead.
+if [ $(grep -c "ERROR SUMMARY: 0 errors" testthat.out) != 1 ]; then
+ cat testthat.out
+ echo "Found Valgrind errors"
+ exit 1
+fi
+
+# We might also considering using the greps that LibthGBM uses:
+# https://github.com/microsoft/LightGBM/blob/fa6d356555f9ef888acf5f5e259dca958ca24f6d/.ci/test_r_package_valgrind.sh#L20-L85
+
+popd
diff --git a/src/arrow/ci/scripts/r_windows_build.sh b/src/arrow/ci/scripts/r_windows_build.sh
new file mode 100755
index 000000000..5bb58c760
--- /dev/null
+++ b/src/arrow/ci/scripts/r_windows_build.sh
@@ -0,0 +1,116 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+: ${ARROW_HOME:=$(pwd)}
+# Make sure it is absolute and exported
+export ARROW_HOME="$(cd "${ARROW_HOME}" && pwd)"
+
+if [ "$RTOOLS_VERSION" = "35" ]; then
+ # Use rtools-backports if building with rtools35
+ curl https://raw.githubusercontent.com/r-windows/rtools-backports/master/pacman.conf > /etc/pacman.conf
+ pacman --noconfirm -Syy
+ # lib-4.9.3 is for libraries compiled with gcc 4.9 (Rtools 3.5)
+ RWINLIB_LIB_DIR="lib-4.9.3"
+ # This is the default (will build for each arch) but we can set up CI to
+ # do these in parallel
+ : ${MINGW_ARCH:="mingw32 mingw64"}
+else
+ # Uncomment L38-41 if you're testing a new rtools dependency that hasn't yet sync'd to CRAN
+ # curl https://raw.githubusercontent.com/r-windows/rtools-packages/master/pacman.conf > /etc/pacman.conf
+ # curl -OSsl "http://repo.msys2.org/msys/x86_64/msys2-keyring-r21.b39fb11-1-any.pkg.tar.xz"
+ # pacman -U --noconfirm msys2-keyring-r21.b39fb11-1-any.pkg.tar.xz && rm msys2-keyring-r21.b39fb11-1-any.pkg.tar.xz
+ # pacman --noconfirm -Scc
+
+ pacman --noconfirm -Syy
+ RWINLIB_LIB_DIR="lib"
+ : ${MINGW_ARCH:="mingw32 mingw64 ucrt64"}
+fi
+
+export MINGW_ARCH
+
+cp $ARROW_HOME/ci/scripts/PKGBUILD .
+printenv
+makepkg-mingw --noconfirm --noprogressbar --skippgpcheck --nocheck --syncdeps --cleanbuild
+
+VERSION=$(grep Version $ARROW_HOME/r/DESCRIPTION | cut -d " " -f 2)
+DST_DIR="arrow-$VERSION"
+
+# Collect the build artifacts and make the shape of zip file that rwinlib expects
+ls
+mkdir -p build
+mv mingw* build
+cd build
+
+# This may vary by system/CI provider
+MSYS_LIB_DIR="/c/rtools40"
+
+# Untar the builds we made
+ls *.xz | xargs -n 1 tar -xJf
+mkdir -p $DST_DIR
+# Grab the headers from one, either one is fine
+# (if we're building twice to combine old and new toolchains, this may already exist)
+if [ ! -d $DST_DIR/include ]; then
+ mv $(echo $MINGW_ARCH | cut -d ' ' -f 1)/include $DST_DIR
+fi
+
+# mingw64 -> x64
+# mingw32 -> i386
+# ucrt64 -> x64-ucrt
+
+if [ -d mingw64/lib/ ]; then
+ ls $MSYS_LIB_DIR/mingw64/lib/
+ # Make the rest of the directory structure
+ # lib-4.9.3 is for libraries compiled with gcc 4.9 (Rtools 3.5)
+ mkdir -p $DST_DIR/${RWINLIB_LIB_DIR}/x64
+ # lib is for the new gcc 8 toolchain (Rtools 4.0)
+ mkdir -p $DST_DIR/lib/x64
+ # Move the 64-bit versions of libarrow into the expected location
+ mv mingw64/lib/*.a $DST_DIR/${RWINLIB_LIB_DIR}/x64
+ # These may be from https://dl.bintray.com/rtools/backports/
+ cp $MSYS_LIB_DIR/mingw64/lib/lib{thrift,snappy}.a $DST_DIR/${RWINLIB_LIB_DIR}/x64
+ # These are from https://dl.bintray.com/rtools/mingw{32,64}/
+ cp $MSYS_LIB_DIR/mingw64/lib/lib{zstd,lz4,crypto,utf8proc,re2,aws*}.a $DST_DIR/lib/x64
+fi
+
+# Same for the 32-bit versions
+if [ -d mingw32/lib/ ]; then
+ ls $MSYS_LIB_DIR/mingw32/lib/
+ mkdir -p $DST_DIR/${RWINLIB_LIB_DIR}/i386
+ mkdir -p $DST_DIR/lib/i386
+ mv mingw32/lib/*.a $DST_DIR/${RWINLIB_LIB_DIR}/i386
+ cp $MSYS_LIB_DIR/mingw32/lib/lib{thrift,snappy}.a $DST_DIR/${RWINLIB_LIB_DIR}/i386
+ cp $MSYS_LIB_DIR/mingw32/lib/lib{zstd,lz4,crypto,utf8proc,re2,aws*}.a $DST_DIR/lib/i386
+fi
+
+# Do the same also for ucrt64
+if [ -d ucrt64/lib/ ]; then
+ ls $MSYS_LIB_DIR/ucrt64/lib/
+ mkdir -p $DST_DIR/lib/x64-ucrt
+ mv ucrt64/lib/*.a $DST_DIR/lib/x64-ucrt
+ cp $MSYS_LIB_DIR/ucrt64/lib/lib{thrift,snappy,zstd,lz4,crypto,utf8proc,re2,aws*}.a $DST_DIR/lib/x64-ucrt
+fi
+
+# Create build artifact
+zip -r ${DST_DIR}.zip $DST_DIR
+
+# Copy that to a file name/path that does not vary by version number so we
+# can easily find it in the R package tests on CI
+cp ${DST_DIR}.zip ../libarrow.zip
diff --git a/src/arrow/ci/scripts/release_test.sh b/src/arrow/ci/scripts/release_test.sh
new file mode 100755
index 000000000..ae2ab3288
--- /dev/null
+++ b/src/arrow/ci/scripts/release_test.sh
@@ -0,0 +1,28 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -eux
+
+arrow_dir=${1}
+
+pushd ${arrow_dir}
+
+dev/release/run-test.rb
+
+popd
diff --git a/src/arrow/ci/scripts/ruby_test.sh b/src/arrow/ci/scripts/ruby_test.sh
new file mode 100755
index 000000000..03d20e198
--- /dev/null
+++ b/src/arrow/ci/scripts/ruby_test.sh
@@ -0,0 +1,29 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -ex
+
+source_dir=${1}/ruby
+build_dir=${2}/ruby
+
+export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH}
+export PKG_CONFIG_PATH=${ARROW_HOME}/lib/pkgconfig
+export GI_TYPELIB_PATH=${ARROW_HOME}/lib/girepository-1.0
+
+rake -f ${source_dir}/Rakefile BUILD_DIR=${build_dir} USE_BUNDLER=yes
diff --git a/src/arrow/ci/scripts/rust_build.sh b/src/arrow/ci/scripts/rust_build.sh
new file mode 100755
index 000000000..3532ea3d5
--- /dev/null
+++ b/src/arrow/ci/scripts/rust_build.sh
@@ -0,0 +1,59 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -e
+
+arrow_dir=${1}
+source_dir=${1}/rust
+
+# This file is used to build the rust binaries needed for the archery
+# integration tests. Testing of the rust implementation in normal CI is handled
+# by github workflows in the arrow-rs repository.
+
+# Disable full debug symbol generation to speed up CI build / reduce memory required
+export RUSTFLAGS="-C debuginfo=1"
+
+export ARROW_TEST_DATA=${arrow_dir}/testing/data
+export PARQUET_TEST_DATA=${arrow_dir}/cpp/submodules/parquet-testing/data
+
+if [ "${ARCHERY_INTEGRATION_WITH_RUST}" -eq "0" ]; then
+ echo "====================================================================="
+ echo "Not building the Rust implementation."
+ echo "====================================================================="
+ exit 0;
+elif [ ! -d "${source_dir}" ]; then
+ echo "====================================================================="
+ echo "The Rust source is missing. Please clone the arrow-rs repository"
+ echo "to arrow/rust before running the integration tests:"
+ echo " git clone https://github.com/apache/arrow-rs.git path/to/arrow/rust"
+ echo "====================================================================="
+ exit 1;
+fi
+
+set -x
+
+# show activated toolchain
+rustup show
+
+pushd ${source_dir}
+
+# build only the integration testing binaries
+cargo build -p arrow-integration-testing
+
+popd
diff --git a/src/arrow/ci/scripts/util_checkout.sh b/src/arrow/ci/scripts/util_checkout.sh
new file mode 100755
index 000000000..25fe69aa1
--- /dev/null
+++ b/src/arrow/ci/scripts/util_checkout.sh
@@ -0,0 +1,28 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# this script is github actions specific to check out the submodules and tags
+
+# TODO(kszucs): remove it once the "submodules: recursive" feature is released
+auth_header="$(git config --local --get http.https://github.com/.extraheader)"
+git submodule sync --recursive
+git -c "http.extraheader=$auth_header" -c protocol.version=2 submodule update --init --force --recursive --depth=1
+
+# fetch all the tags
+git fetch --depth=1 origin +refs/tags/*:refs/tags/*
diff --git a/src/arrow/ci/scripts/util_cleanup.sh b/src/arrow/ci/scripts/util_cleanup.sh
new file mode 100755
index 000000000..3a13a1a78
--- /dev/null
+++ b/src/arrow/ci/scripts/util_cleanup.sh
@@ -0,0 +1,54 @@
+#!/usr/bin/env bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# This script is Github Actions-specific to free up disk space,
+# to avoid disk full errors on some builds
+
+if [ $RUNNER_OS = "Linux" ]; then
+ df -h
+
+ # remove swap
+ sudo swapoff -a
+ sudo rm -f /swapfile
+
+ # clean apt cache
+ sudo apt clean
+
+ # remove haskell, consumes 8.6 GB
+ sudo rm -rf /opt/ghc
+
+ # 1 GB
+ sudo rm -rf /home/linuxbrew/.linuxbrew
+
+ # 1+ GB
+ sudo rm -rf /opt/hostedtoolcache/CodeQL
+
+ # 1+ GB
+ sudo rm -rf /usr/share/swift
+
+ # 12 GB, but takes a lot of time to delete
+ #sudo rm -rf /usr/local/lib/android
+
+ # remove cached docker images, around 13 GB
+ docker rmi $(docker image ls -aq)
+
+ # NOTE: /usr/share/dotnet is 25 GB
+fi
+
+df -h
diff --git a/src/arrow/ci/scripts/util_download_apache.sh b/src/arrow/ci/scripts/util_download_apache.sh
new file mode 100755
index 000000000..d8e9b6ca7
--- /dev/null
+++ b/src/arrow/ci/scripts/util_download_apache.sh
@@ -0,0 +1,55 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -x
+
+if [ "$#" -ne 2 ]; then
+ echo "Usage: $0 <apache tarball path> <target directory>"
+ exit 1
+fi
+
+tarball_path=$1
+target_dir=$2
+
+APACHE_MIRRORS=(
+ "http://www.apache.org/dyn/closer.cgi?action=download&filename="
+ "https://downloads.apache.org"
+ "https://apache.claz.org"
+ "https://apache.cs.utah.edu"
+ "https://apache.mirrors.lucidnetworks.net"
+ "https://apache.osuosl.org"
+ "https://ftp.wayne.edu/apache"
+ "https://mirror.olnevhost.net/pub/apache"
+ "https://mirrors.gigenet.com/apache"
+ "https://mirrors.koehn.com/apache"
+ "https://mirrors.ocf.berkeley.edu/apache"
+ "https://mirrors.sonic.net/apache"
+ "https://us.mirrors.quenda.co/apache"
+)
+
+mkdir -p "${target_dir}"
+
+for mirror in ${APACHE_MIRRORS[*]}
+do
+ curl -SL "${mirror}/${tarball_path}" | tar -xzf - -C "${target_dir}"
+ if [ $? == 0 ]; then
+ exit 0
+ fi
+done
+
+exit 1
diff --git a/src/arrow/ci/scripts/util_wait_for_it.sh b/src/arrow/ci/scripts/util_wait_for_it.sh
new file mode 100755
index 000000000..51ce816eb
--- /dev/null
+++ b/src/arrow/ci/scripts/util_wait_for_it.sh
@@ -0,0 +1,199 @@
+#!/usr/bin/env bash
+
+# The MIT License (MIT)
+# Copyright (c) 2016 Giles Hall
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of
+# this software and associated documentation files (the "Software"), to deal in
+# the Software without restriction, including without limitation the rights to
+# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+# of the Software, and to permit persons to whom the Software is furnished to do
+# so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+#
+# Use this script to test if a given TCP host/port are available
+
+cmdname=$(basename $0)
+
+echoerr() { if [[ $QUIET -ne 1 ]]; then echo "$@" 1>&2; fi }
+
+usage()
+{
+ cat << USAGE >&2
+Usage:
+ $cmdname host:port [-s] [-t timeout] [-- command args]
+ -h HOST | --host=HOST Host or IP under test
+ -p PORT | --port=PORT TCP port under test
+ Alternatively, you specify the host and port as host:port
+ -s | --strict Only execute subcommand if the test succeeds
+ -q | --quiet Don't output any status messages
+ -t TIMEOUT | --timeout=TIMEOUT
+ Timeout in seconds, zero for no timeout
+ -- COMMAND ARGS Execute command with args after the test finishes
+USAGE
+ exit 1
+}
+
+wait_for()
+{
+ if [[ $TIMEOUT -gt 0 ]]; then
+ echoerr "$cmdname: waiting $TIMEOUT seconds for $HOST:$PORT"
+ else
+ echoerr "$cmdname: waiting for $HOST:$PORT without a timeout"
+ fi
+ start_ts=$(date +%s)
+ while :
+ do
+ if [[ $ISBUSY -eq 1 ]]; then
+ nc -z $HOST $PORT
+ result=$?
+ else
+ (echo > /dev/tcp/$HOST/$PORT) >/dev/null 2>&1
+ result=$?
+ fi
+ if [[ $result -eq 0 ]]; then
+ end_ts=$(date +%s)
+ echoerr "$cmdname: $HOST:$PORT is available after $((end_ts - start_ts)) seconds"
+ break
+ fi
+ sleep 1
+ done
+ return $result
+}
+
+wait_for_wrapper()
+{
+ # In order to support SIGINT during timeout: http://unix.stackexchange.com/a/57692
+ if [[ $QUIET -eq 1 ]]; then
+ timeout $BUSYTIMEFLAG $TIMEOUT $0 --quiet --child --host=$HOST --port=$PORT --timeout=$TIMEOUT &
+ else
+ timeout $BUSYTIMEFLAG $TIMEOUT $0 --child --host=$HOST --port=$PORT --timeout=$TIMEOUT &
+ fi
+ PID=$!
+ trap "kill -INT -$PID" INT
+ wait $PID
+ RESULT=$?
+ if [[ $RESULT -ne 0 ]]; then
+ echoerr "$cmdname: timeout occurred after waiting $TIMEOUT seconds for $HOST:$PORT"
+ fi
+ return $RESULT
+}
+
+# process arguments
+while [[ $# -gt 0 ]]
+do
+ case "$1" in
+ *:* )
+ hostport=(${1//:/ })
+ HOST=${hostport[0]}
+ PORT=${hostport[1]}
+ shift 1
+ ;;
+ --child)
+ CHILD=1
+ shift 1
+ ;;
+ -q | --quiet)
+ QUIET=1
+ shift 1
+ ;;
+ -s | --strict)
+ STRICT=1
+ shift 1
+ ;;
+ -h)
+ HOST="$2"
+ if [[ $HOST == "" ]]; then break; fi
+ shift 2
+ ;;
+ --host=*)
+ HOST="${1#*=}"
+ shift 1
+ ;;
+ -p)
+ PORT="$2"
+ if [[ $PORT == "" ]]; then break; fi
+ shift 2
+ ;;
+ --port=*)
+ PORT="${1#*=}"
+ shift 1
+ ;;
+ -t)
+ TIMEOUT="$2"
+ if [[ $TIMEOUT == "" ]]; then break; fi
+ shift 2
+ ;;
+ --timeout=*)
+ TIMEOUT="${1#*=}"
+ shift 1
+ ;;
+ --)
+ shift
+ CLI=("$@")
+ break
+ ;;
+ --help)
+ usage
+ ;;
+ *)
+ echoerr "Unknown argument: $1"
+ usage
+ ;;
+ esac
+done
+
+if [[ "$HOST" == "" || "$PORT" == "" ]]; then
+ echoerr "Error: you need to provide a host and port to test."
+ usage
+fi
+
+TIMEOUT=${TIMEOUT:-15}
+STRICT=${STRICT:-0}
+CHILD=${CHILD:-0}
+QUIET=${QUIET:-0}
+
+# check to see if timeout is from busybox?
+# check to see if timeout is from busybox?
+TIMEOUT_PATH=$(realpath $(which timeout))
+if [[ $TIMEOUT_PATH =~ "busybox" ]]; then
+ ISBUSY=1
+ BUSYTIMEFLAG="-t"
+else
+ ISBUSY=0
+ BUSYTIMEFLAG=""
+fi
+
+if [[ $CHILD -gt 0 ]]; then
+ wait_for
+ RESULT=$?
+ exit $RESULT
+else
+ if [[ $TIMEOUT -gt 0 ]]; then
+ wait_for_wrapper
+ RESULT=$?
+ else
+ wait_for
+ RESULT=$?
+ fi
+fi
+
+if [[ $CLI != "" ]]; then
+ if [[ $RESULT -ne 0 && $STRICT -eq 1 ]]; then
+ echoerr "$cmdname: strict mode, refusing to execute subprocess"
+ exit $RESULT
+ fi
+ exec "${CLI[@]}"
+else
+ exit $RESULT
+fi