diff options
Diffstat (limited to 'src/arrow/ci/scripts')
70 files changed, 4280 insertions, 0 deletions
diff --git a/src/arrow/ci/scripts/PKGBUILD b/src/arrow/ci/scripts/PKGBUILD new file mode 100644 index 000000000..975d1514f --- /dev/null +++ b/src/arrow/ci/scripts/PKGBUILD @@ -0,0 +1,138 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +_realname=arrow +pkgbase=mingw-w64-${_realname} +pkgname="${MINGW_PACKAGE_PREFIX}-${_realname}" +pkgver=6.0.1 +pkgrel=8000 +pkgdesc="Apache Arrow is a cross-language development platform for in-memory data (mingw-w64)" +arch=("any") +url="https://arrow.apache.org/" +license=("Apache-2.0") +depends=("${MINGW_PACKAGE_PREFIX}-aws-sdk-cpp" + "${MINGW_PACKAGE_PREFIX}-libutf8proc" + "${MINGW_PACKAGE_PREFIX}-re2" + "${MINGW_PACKAGE_PREFIX}-thrift" + "${MINGW_PACKAGE_PREFIX}-snappy" + "${MINGW_PACKAGE_PREFIX}-zlib" + "${MINGW_PACKAGE_PREFIX}-lz4" + "${MINGW_PACKAGE_PREFIX}-zstd") +makedepends=("${MINGW_PACKAGE_PREFIX}-ccache" + "${MINGW_PACKAGE_PREFIX}-cmake" + "${MINGW_PACKAGE_PREFIX}-gcc") +options=("staticlibs" "strip" "!buildflags") + +# For installing from a local checkout, set source_dir to . and don't include +# a "source" param below +source_dir="$ARROW_HOME" +# else +# source_dir=apache-${_realname}-${pkgver} + +# For released version: +#source=("https://archive.apache.org/dist/arrow/arrow-${pkgver}/apache-arrow-${pkgver}.tar.gz") +#sha256sums=("ac2a77dd9168e9892e432c474611e86ded0be6dfe15f689c948751d37f81391a") +# For github dev version: +# Append `#commit=54b1b2f688e5e84b4c664b1e12a95f93b94ab2f3` to the URL to select a revision +# source=("${source_dir}"::"git+https://github.com/apache/arrow") +# sha256sums=("SKIP") +# source_dir="${APPVEYOR_BUILD_FOLDER}/${source_dir}" + +cpp_build_dir=build-${CARCH}-cpp + +pkgver() { + # The only purpose of this here is to cause the job to error if the + # version in pkgver is different from what is in r/DESCRIPTION + grep Version "${source_dir}/r/DESCRIPTION" | cut -d " " -f 2 +} + +build() { + ARROW_CPP_DIR="${source_dir}/cpp" + [[ -d ${cpp_build_dir} ]] && rm -rf ${cpp_build_dir} + mkdir -p ${cpp_build_dir} + pushd ${cpp_build_dir} + + # The Rtools libutf8proc is a static lib, but Findutf8proc.cmake doesn't + # set the appropriate compiler definition. + export CPPFLAGS="-DUTF8PROC_STATIC" + + # This is the difference between rtools-packages and rtools-backports + # Remove this when submitting to rtools-packages + if [ "$RTOOLS_VERSION" = "35" ]; then + export CC="/C/Rtools${MINGW_PREFIX/mingw/mingw_}/bin/gcc" + export CXX="/C/Rtools${MINGW_PREFIX/mingw/mingw_}/bin/g++" + export PATH="/C/Rtools${MINGW_PREFIX/mingw/mingw_}/bin:$PATH" + export CPPFLAGS="${CPPFLAGS} -I${MINGW_PREFIX}/include" + export LIBS="-L${MINGW_PREFIX}/libs" + export ARROW_S3=OFF + export ARROW_WITH_RE2=OFF + # Without this, some dataset functionality segfaults + export CMAKE_UNITY_BUILD=ON + else + export ARROW_S3=ON + export ARROW_WITH_RE2=ON + # Without this, some compute functionality segfaults in tests + export CMAKE_UNITY_BUILD=OFF + fi + + MSYS2_ARG_CONV_EXCL="-DCMAKE_INSTALL_PREFIX=" \ + ${MINGW_PREFIX}/bin/cmake.exe \ + ${ARROW_CPP_DIR} \ + -G "MSYS Makefiles" \ + -DARROW_BUILD_SHARED=OFF \ + -DARROW_BUILD_STATIC=ON \ + -DARROW_BUILD_UTILITIES=OFF \ + -DARROW_COMPUTE=ON \ + -DARROW_CSV=ON \ + -DARROW_DATASET=ON \ + -DARROW_FILESYSTEM=ON \ + -DARROW_HDFS=OFF \ + -DARROW_JEMALLOC=OFF \ + -DARROW_JSON=ON \ + -DARROW_LZ4_USE_SHARED=OFF \ + -DARROW_MIMALLOC=ON \ + -DARROW_PACKAGE_PREFIX="${MINGW_PREFIX}" \ + -DARROW_PARQUET=ON \ + -DARROW_S3="${ARROW_S3}" \ + -DARROW_SNAPPY_USE_SHARED=OFF \ + -DARROW_USE_GLOG=OFF \ + -DARROW_WITH_LZ4=ON \ + -DARROW_WITH_RE2="${ARROW_WITH_RE2}" \ + -DARROW_WITH_SNAPPY=ON \ + -DARROW_WITH_ZLIB=ON \ + -DARROW_WITH_ZSTD=ON \ + -DARROW_ZSTD_USE_SHARED=OFF \ + -DARROW_CXXFLAGS="${CPPFLAGS}" \ + -DCMAKE_BUILD_TYPE="release" \ + -DCMAKE_INSTALL_PREFIX=${MINGW_PREFIX} \ + -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD} \ + -DCMAKE_VERBOSE_MAKEFILE=ON + + make -j3 + popd +} + +package() { + make -C ${cpp_build_dir} DESTDIR="${pkgdir}" install + + local PREFIX_DEPS=$(cygpath -am ${MINGW_PREFIX}) + pushd "${pkgdir}${MINGW_PREFIX}/lib/pkgconfig" + for pc in *.pc; do + sed -s "s|${PREFIX_DEPS}|${MINGW_PREFIX}|g" -i $pc + done + popd +} diff --git a/src/arrow/ci/scripts/c_glib_build.sh b/src/arrow/ci/scripts/c_glib_build.sh new file mode 100755 index 000000000..ce3cea18e --- /dev/null +++ b/src/arrow/ci/scripts/c_glib_build.sh @@ -0,0 +1,45 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +source_dir=${1}/c_glib +build_dir=${2}/c_glib +: ${ARROW_GLIB_GTK_DOC:=false} +: ${ARROW_GLIB_DEVELOPMENT_MODE:=false} + +export PKG_CONFIG_PATH=${ARROW_HOME}/lib/pkgconfig + +export CFLAGS="-DARROW_NO_DEPRECATED_API" +export CXXFLAGS="-DARROW_NO_DEPRECATED_API" + +mkdir -p ${build_dir} + +# Build with Meson +meson --prefix=$ARROW_HOME \ + --libdir=lib \ + -Ddevelopment_mode=${ARROW_GLIB_DEVELOPMENT_MODE} \ + -Dgtk_doc=${ARROW_GLIB_GTK_DOC} \ + ${build_dir} \ + ${source_dir} + +pushd ${build_dir} +ninja +ninja install +popd diff --git a/src/arrow/ci/scripts/c_glib_test.sh b/src/arrow/ci/scripts/c_glib_test.sh new file mode 100755 index 000000000..25c54138e --- /dev/null +++ b/src/arrow/ci/scripts/c_glib_test.sh @@ -0,0 +1,48 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +source_dir=${1}/c_glib +build_dir=${2}/c_glib + +export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH} +export PKG_CONFIG_PATH=${ARROW_HOME}/lib/pkgconfig +export GI_TYPELIB_PATH=${ARROW_HOME}/lib/girepository-1.0 + +pushd ${source_dir} + +ruby test/run-test.rb + +if [[ "$(uname -s)" == "Linux" ]]; then + # TODO(kszucs): on osx it fails to load 'lgi.corelgilua51' despite that lgi + # was installed by luarocks + pushd example/lua + lua write-batch.lua + lua read-batch.lua + lua write-stream.lua + lua read-stream.lua + popd +fi + +popd + +pushd ${build_dir} +example/extension-type +popd diff --git a/src/arrow/ci/scripts/ccache_setup.sh b/src/arrow/ci/scripts/ccache_setup.sh new file mode 100755 index 000000000..f77fbb373 --- /dev/null +++ b/src/arrow/ci/scripts/ccache_setup.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -eux + +echo "ARROW_USE_CCACHE=ON" >> $GITHUB_ENV +echo "CCACHE_COMPILERCHECK=content" >> $GITHUB_ENV +echo "CCACHE_COMPRESS=1" >> $GITHUB_ENV +echo "CCACHE_COMPRESSLEVEL=6" >> $GITHUB_ENV +echo "CCACHE_MAXSIZE=500M" >> $GITHUB_ENV diff --git a/src/arrow/ci/scripts/cpp_build.sh b/src/arrow/ci/scripts/cpp_build.sh new file mode 100755 index 000000000..a11dd23b7 --- /dev/null +++ b/src/arrow/ci/scripts/cpp_build.sh @@ -0,0 +1,164 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +source_dir=${1}/cpp +build_dir=${2}/cpp +with_docs=${3:-false} + +: ${ARROW_USE_CCACHE:=OFF} + +# TODO(kszucs): consider to move these to CMake +if [ ! -z "${CONDA_PREFIX}" ]; then + echo -e "===\n=== Conda environment for build\n===" + conda list + + export CMAKE_ARGS="${CMAKE_ARGS} -DCMAKE_AR=${AR} -DCMAKE_RANLIB=${RANLIB}" + export ARROW_GANDIVA_PC_CXX_FLAGS=$(echo | ${CXX} -E -Wp,-v -xc++ - 2>&1 | grep '^ ' | awk '{print "-isystem;" substr($1, 1)}' | tr '\n' ';') +elif [ -x "$(command -v xcrun)" ]; then + export ARROW_GANDIVA_PC_CXX_FLAGS="-isysroot;$(xcrun --show-sdk-path)" +fi + +if [ "${ARROW_USE_CCACHE}" == "ON" ]; then + echo -e "===\n=== ccache statistics before build\n===" + ccache -s +fi + +if [ "${ARROW_USE_TSAN}" == "ON" ] && [ ! -x "${ASAN_SYMBOLIZER_PATH}" ]; then + echo -e "Invalid value for \$ASAN_SYMBOLIZER_PATH: ${ASAN_SYMBOLIZER_PATH}" + exit 1 +fi + +mkdir -p ${build_dir} +pushd ${build_dir} + +cmake -G "${CMAKE_GENERATOR:-Ninja}" \ + -DARROW_BOOST_USE_SHARED=${ARROW_BOOST_USE_SHARED:-ON} \ + -DARROW_BUILD_BENCHMARKS=${ARROW_BUILD_BENCHMARKS:-OFF} \ + -DARROW_BUILD_BENCHMARKS_REFERENCE=${ARROW_BUILD_BENCHMARKS:-OFF} \ + -DARROW_BUILD_EXAMPLES=${ARROW_BUILD_EXAMPLES:-OFF} \ + -DARROW_BUILD_INTEGRATION=${ARROW_BUILD_INTEGRATION:-OFF} \ + -DARROW_BUILD_SHARED=${ARROW_BUILD_SHARED:-ON} \ + -DARROW_BUILD_STATIC=${ARROW_BUILD_STATIC:-ON} \ + -DARROW_BUILD_TESTS=${ARROW_BUILD_TESTS:-OFF} \ + -DARROW_BUILD_UTILITIES=${ARROW_BUILD_UTILITIES:-ON} \ + -DARROW_COMPUTE=${ARROW_COMPUTE:-ON} \ + -DARROW_CSV=${ARROW_CSV:-ON} \ + -DARROW_CUDA=${ARROW_CUDA:-OFF} \ + -DARROW_CXXFLAGS=${ARROW_CXXFLAGS:-} \ + -DARROW_DATASET=${ARROW_DATASET:-ON} \ + -DARROW_ENGINE=${ARROW_ENGINE:-ON} \ + -DARROW_DEPENDENCY_SOURCE=${ARROW_DEPENDENCY_SOURCE:-AUTO} \ + -DARROW_EXTRA_ERROR_CONTEXT=${ARROW_EXTRA_ERROR_CONTEXT:-OFF} \ + -DARROW_ENABLE_TIMING_TESTS=${ARROW_ENABLE_TIMING_TESTS:-ON} \ + -DARROW_FILESYSTEM=${ARROW_FILESYSTEM:-ON} \ + -DARROW_FLIGHT=${ARROW_FLIGHT:-OFF} \ + -DARROW_FUZZING=${ARROW_FUZZING:-OFF} \ + -DARROW_GANDIVA_JAVA=${ARROW_GANDIVA_JAVA:-OFF} \ + -DARROW_GANDIVA_PC_CXX_FLAGS=${ARROW_GANDIVA_PC_CXX_FLAGS:-} \ + -DARROW_GANDIVA=${ARROW_GANDIVA:-OFF} \ + -DARROW_GCS=${ARROW_GCS:-OFF} \ + -DARROW_HDFS=${ARROW_HDFS:-ON} \ + -DARROW_HIVESERVER2=${ARROW_HIVESERVER2:-OFF} \ + -DARROW_INSTALL_NAME_RPATH=${ARROW_INSTALL_NAME_RPATH:-ON} \ + -DARROW_JEMALLOC=${ARROW_JEMALLOC:-ON} \ + -DARROW_JNI=${ARROW_JNI:-OFF} \ + -DARROW_JSON=${ARROW_JSON:-ON} \ + -DARROW_LARGE_MEMORY_TESTS=${ARROW_LARGE_MEMORY_TESTS:-OFF} \ + -DARROW_MIMALLOC=${ARROW_MIMALLOC:-OFF} \ + -DARROW_NO_DEPRECATED_API=${ARROW_NO_DEPRECATED_API:-OFF} \ + -DARROW_ORC=${ARROW_ORC:-OFF} \ + -DARROW_PARQUET=${ARROW_PARQUET:-OFF} \ + -DARROW_PLASMA_JAVA_CLIENT=${ARROW_PLASMA_JAVA_CLIENT:-OFF} \ + -DARROW_PLASMA=${ARROW_PLASMA:-OFF} \ + -DARROW_PYTHON=${ARROW_PYTHON:-OFF} \ + -DARROW_RUNTIME_SIMD_LEVEL=${ARROW_RUNTIME_SIMD_LEVEL:-MAX} \ + -DARROW_S3=${ARROW_S3:-OFF} \ + -DARROW_TEST_LINKAGE=${ARROW_TEST_LINKAGE:-shared} \ + -DARROW_TEST_MEMCHECK=${ARROW_TEST_MEMCHECK:-OFF} \ + -DARROW_USE_ASAN=${ARROW_USE_ASAN:-OFF} \ + -DARROW_USE_CCACHE=${ARROW_USE_CCACHE:-ON} \ + -DARROW_USE_GLOG=${ARROW_USE_GLOG:-OFF} \ + -DARROW_USE_LD_GOLD=${ARROW_USE_LD_GOLD:-OFF} \ + -DARROW_USE_PRECOMPILED_HEADERS=${ARROW_USE_PRECOMPILED_HEADERS:-OFF} \ + -DARROW_USE_STATIC_CRT=${ARROW_USE_STATIC_CRT:-OFF} \ + -DARROW_USE_TSAN=${ARROW_USE_TSAN:-OFF} \ + -DARROW_USE_UBSAN=${ARROW_USE_UBSAN:-OFF} \ + -DARROW_VERBOSE_THIRDPARTY_BUILD=${ARROW_VERBOSE_THIRDPARTY_BUILD:-OFF} \ + -DARROW_WITH_BROTLI=${ARROW_WITH_BROTLI:-OFF} \ + -DARROW_WITH_BZ2=${ARROW_WITH_BZ2:-OFF} \ + -DARROW_WITH_LZ4=${ARROW_WITH_LZ4:-OFF} \ + -DARROW_WITH_SNAPPY=${ARROW_WITH_SNAPPY:-OFF} \ + -DARROW_WITH_UTF8PROC=${ARROW_WITH_UTF8PROC:-ON} \ + -DARROW_WITH_ZLIB=${ARROW_WITH_ZLIB:-OFF} \ + -DARROW_WITH_ZSTD=${ARROW_WITH_ZSTD:-OFF} \ + -DAWSSDK_SOURCE=${AWSSDK_SOURCE:-} \ + -Dbenchmark_SOURCE=${benchmark_SOURCE:-} \ + -DBOOST_SOURCE=${BOOST_SOURCE:-} \ + -DBrotli_SOURCE=${Brotli_SOURCE:-} \ + -DBUILD_WARNING_LEVEL=${BUILD_WARNING_LEVEL:-CHECKIN} \ + -Dc-ares_SOURCE=${cares_SOURCE:-} \ + -DCMAKE_BUILD_TYPE=${ARROW_BUILD_TYPE:-debug} \ + -DCMAKE_C_FLAGS="${CFLAGS:-}" \ + -DCMAKE_CXX_FLAGS="${CXXFLAGS:-}" \ + -DCMAKE_INSTALL_LIBDIR=${CMAKE_INSTALL_LIBDIR:-lib} \ + -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX:-${ARROW_HOME}} \ + -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD:-OFF} \ + -Dgflags_SOURCE=${gflags_SOURCE:-} \ + -Dgoogle_cloud_cpp_storage_SOURCE=${google_cloud_cpp_storage_SOURCE:-} \ + -DgRPC_SOURCE=${gRPC_SOURCE:-} \ + -DGTest_SOURCE=${GTest_SOURCE:-} \ + -DLz4_SOURCE=${Lz4_SOURCE:-} \ + -DORC_SOURCE=${ORC_SOURCE:-} \ + -DPARQUET_BUILD_EXECUTABLES=${PARQUET_BUILD_EXECUTABLES:-OFF} \ + -DPARQUET_BUILD_EXAMPLES=${PARQUET_BUILD_EXAMPLES:-OFF} \ + -DPARQUET_REQUIRE_ENCRYPTION=${PARQUET_REQUIRE_ENCRYPTION:-ON} \ + -DProtobuf_SOURCE=${Protobuf_SOURCE:-} \ + -DRapidJSON_SOURCE=${RapidJSON_SOURCE:-} \ + -Dre2_SOURCE=${re2_SOURCE:-} \ + -DSnappy_SOURCE=${Snappy_SOURCE:-} \ + -DThrift_SOURCE=${Thrift_SOURCE:-} \ + -Dutf8proc_SOURCE=${utf8proc_SOURCE:-} \ + -Dzstd_SOURCE=${zstd_SOURCE:-} \ + ${CMAKE_ARGS} \ + ${source_dir} + +if [ ! -z "${CPP_MAKE_PARALLELISM}" ]; then + time cmake --build . --target install -- -j${CPP_MAKE_PARALLELISM} +else + time cmake --build . --target install +fi + +popd + +if [ -x "$(command -v ldconfig)" ]; then + ldconfig +fi + +if [ "${ARROW_USE_CCACHE}" == "ON" ]; then + echo -e "===\n=== ccache statistics after build\n===" + ccache -s +fi + +if [ "${with_docs}" == "true" ]; then + pushd ${source_dir}/apidoc + doxygen + popd +fi diff --git a/src/arrow/ci/scripts/cpp_test.sh b/src/arrow/ci/scripts/cpp_test.sh new file mode 100755 index 000000000..822557f25 --- /dev/null +++ b/src/arrow/ci/scripts/cpp_test.sh @@ -0,0 +1,118 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +if [[ $# < 2 ]]; then + echo "Usage: $0 <Arrow dir> <build dir> [ctest args ...]" + exit 1 +fi + +arrow_dir=${1}; shift +build_dir=${1}/cpp; shift +source_dir=${arrow_dir}/cpp +binary_output_dir=${build_dir}/${ARROW_BUILD_TYPE:-debug} + +export ARROW_TEST_DATA=${arrow_dir}/testing/data +export PARQUET_TEST_DATA=${source_dir}/submodules/parquet-testing/data +export LD_LIBRARY_PATH=${ARROW_HOME}/${CMAKE_INSTALL_LIBDIR:-lib}:${LD_LIBRARY_PATH} + +# By default, aws-sdk tries to contact a non-existing local ip host +# to retrieve metadata. Disable this so that S3FileSystem tests run faster. +export AWS_EC2_METADATA_DISABLED=TRUE + +ctest_options=() +case "$(uname)" in + Linux) + n_jobs=$(nproc) + ;; + Darwin) + n_jobs=$(sysctl -n hw.ncpu) + ;; + MINGW*) + n_jobs=${NUMBER_OF_PROCESSORS:-1} + # TODO: Enable these crashed tests. + # https://issues.apache.org/jira/browse/ARROW-9072 + exclude_tests="gandiva-internals-test" + exclude_tests="${exclude_tests}|gandiva-projector-test" + exclude_tests="${exclude_tests}|gandiva-utf8-test" + if [ "${MSYSTEM}" = "MINGW32" ]; then + exclude_tests="${exclude_tests}|gandiva-projector-test" + exclude_tests="${exclude_tests}|gandiva-binary-test" + exclude_tests="${exclude_tests}|gandiva-boolean-expr-test" + exclude_tests="${exclude_tests}|gandiva-date-time-test" + exclude_tests="${exclude_tests}|gandiva-decimal-single-test" + exclude_tests="${exclude_tests}|gandiva-decimal-test" + exclude_tests="${exclude_tests}|gandiva-filter-project-test" + exclude_tests="${exclude_tests}|gandiva-filter-test" + exclude_tests="${exclude_tests}|gandiva-hash-test" + exclude_tests="${exclude_tests}|gandiva-if-expr-test" + exclude_tests="${exclude_tests}|gandiva-in-expr-test" + exclude_tests="${exclude_tests}|gandiva-literal-test" + exclude_tests="${exclude_tests}|gandiva-null-validity-test" + fi + ctest_options+=(--exclude-regex "${exclude_tests}") + ;; + *) + n_jobs=${NPROC:-1} + ;; +esac + +pushd ${build_dir} + +if ! which python > /dev/null 2>&1; then + export PYTHON=python3 +fi +ctest \ + --label-regex unittest \ + --output-on-failure \ + --parallel ${n_jobs} \ + --timeout 300 \ + "${ctest_options[@]}" \ + $@ + +if [ "${ARROW_BUILD_EXAMPLES}" == "ON" ]; then + examples=$(find ${binary_output_dir} -executable -name "*example") + if [ "${examples}" == "" ]; then + echo "==================" + echo "No examples found!" + echo "==================" + exit 1 + fi + for ex in ${examples} + do + echo "==================" + echo "Executing ${ex}" + echo "==================" + ${ex} + done +fi + +if [ "${ARROW_FUZZING}" == "ON" ]; then + # Fuzzing regression tests + ${binary_output_dir}/arrow-ipc-stream-fuzz ${ARROW_TEST_DATA}/arrow-ipc-stream/crash-* + ${binary_output_dir}/arrow-ipc-stream-fuzz ${ARROW_TEST_DATA}/arrow-ipc-stream/*-testcase-* + ${binary_output_dir}/arrow-ipc-file-fuzz ${ARROW_TEST_DATA}/arrow-ipc-file/*-testcase-* + ${binary_output_dir}/arrow-ipc-tensor-stream-fuzz ${ARROW_TEST_DATA}/arrow-ipc-tensor-stream/*-testcase-* + if [ "${ARROW_PARQUET}" == "ON" ]; then + ${binary_output_dir}/parquet-arrow-fuzz ${ARROW_TEST_DATA}/parquet/fuzzing/*-testcase-* + fi +fi + +popd diff --git a/src/arrow/ci/scripts/csharp_build.sh b/src/arrow/ci/scripts/csharp_build.sh new file mode 100755 index 000000000..5a3976794 --- /dev/null +++ b/src/arrow/ci/scripts/csharp_build.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +source_dir=${1}/csharp + +pushd ${source_dir} +dotnet build +popd diff --git a/src/arrow/ci/scripts/csharp_pack.sh b/src/arrow/ci/scripts/csharp_pack.sh new file mode 100755 index 000000000..e9dfc664e --- /dev/null +++ b/src/arrow/ci/scripts/csharp_pack.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -eux + +source_dir=${1}/csharp + +pushd ${source_dir} +dotnet pack -c Release +popd diff --git a/src/arrow/ci/scripts/csharp_test.sh b/src/arrow/ci/scripts/csharp_test.sh new file mode 100755 index 000000000..9e4e35dd4 --- /dev/null +++ b/src/arrow/ci/scripts/csharp_test.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +source_dir=${1}/csharp + +pushd ${source_dir} +dotnet test +for pdb in artifacts/Apache.Arrow/*/*/Apache.Arrow.pdb; do + sourcelink test ${pdb} +done +popd diff --git a/src/arrow/ci/scripts/docs_build.sh b/src/arrow/ci/scripts/docs_build.sh new file mode 100755 index 000000000..e6ee768ee --- /dev/null +++ b/src/arrow/ci/scripts/docs_build.sh @@ -0,0 +1,48 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +set -ex + +arrow_dir=${1} +build_dir=${2}/docs + +export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH} +export PKG_CONFIG_PATH=${ARROW_HOME}/lib/pkgconfig:${PKG_CONFIG_PATH} +export GI_TYPELIB_PATH=${ARROW_HOME}/lib/girepository-1.0 +export CFLAGS="-DARROW_NO_DEPRECATED_API" +export CXXFLAGS="-DARROW_NO_DEPRECATED_API" + +ncpus=$(python3 -c "import os; print(os.cpu_count())") + +# Sphinx docs +sphinx-build -b html -j ${ncpus} ${arrow_dir}/docs/source ${build_dir} + +# C++ - original doxygen +# rsync -a ${arrow_dir}/cpp/apidoc/ ${build_dir}/cpp + +# R +rsync -a ${arrow_dir}/r/docs/ ${build_dir}/r + +# C GLib +rsync -a ${ARROW_HOME}/share/gtk-doc/html/ ${build_dir}/c_glib + +# Java +rsync -a ${arrow_dir}/java/target/site/apidocs/ ${build_dir}/java/reference + +# Javascript +rsync -a ${arrow_dir}/js/doc/ ${build_dir}/js diff --git a/src/arrow/ci/scripts/go_build.sh b/src/arrow/ci/scripts/go_build.sh new file mode 100755 index 000000000..267f78e59 --- /dev/null +++ b/src/arrow/ci/scripts/go_build.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +source_dir=${1}/go + +pushd ${source_dir}/arrow + +if [[ -n "${ARROW_GO_TESTCGO}" ]]; then + TAGS="-tags ccalloc" +fi + +go get -d -t -v ./... +go install $TAGS -v ./... + +popd + +pushd ${source_dir}/parquet + +go get -d -t -v ./... +go install -v ./... + +popd diff --git a/src/arrow/ci/scripts/go_cgo_python_test.sh b/src/arrow/ci/scripts/go_cgo_python_test.sh new file mode 100755 index 000000000..5f2032fba --- /dev/null +++ b/src/arrow/ci/scripts/go_cgo_python_test.sh @@ -0,0 +1,45 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +source_dir=${1}/go + +pushd ${source_dir}/arrow/cdata/test + +case "$(uname)" in + Linux) + testlib="cgotest.so" + ;; + Darwin) + testlib="cgotest.so" + ;; + MINGW*) + testlib="cgotest.dll" + ;; +esac + +go build -tags cdata_test,assert -buildmode=c-shared -o $testlib . + +python test_export_to_cgo.py + +rm $testlib +rm "${testlib%.*}.h" + +popd diff --git a/src/arrow/ci/scripts/go_test.sh b/src/arrow/ci/scripts/go_test.sh new file mode 100755 index 000000000..f7b2cd963 --- /dev/null +++ b/src/arrow/ci/scripts/go_test.sh @@ -0,0 +1,60 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +source_dir=${1}/go + +testargs="-race" +case "$(uname)" in + MINGW*) + # -race doesn't work on windows currently + testargs="" + ;; +esac + +if [[ "$(go env GOHOSTARCH)" = "s390x" ]]; then + testargs="" # -race not supported on s390x +fi + +pushd ${source_dir}/arrow + +TAGS="assert,test" +if [[ -n "${ARROW_GO_TESTCGO}" ]]; then + TAGS="${TAGS},ccalloc" +fi + + +# the cgo implementation of the c data interface requires the "test" +# tag in order to run its tests so that the testing functions implemented +# in .c files don't get included in non-test builds. + +for d in $(go list ./... | grep -v vendor); do + go test $testargs -tags $TAGS $d +done + +popd + +pushd ${source_dir}/parquet + +for d in $(go list ./... | grep -v vendor); do + go test $testargs -tags assert $d +done + +popd diff --git a/src/arrow/ci/scripts/install_conda.sh b/src/arrow/ci/scripts/install_conda.sh new file mode 100755 index 000000000..f4d313b63 --- /dev/null +++ b/src/arrow/ci/scripts/install_conda.sh @@ -0,0 +1,66 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -e + +declare -A archs +archs=([amd64]=x86_64 + [arm32v7]=armv7l + [ppc64le]=ppc64le + [i386]=x86) + +declare -A platforms +platforms=([windows]=Windows + [macos]=MacOSX + [linux]=Linux) + +if [ "$#" -ne 4 ]; then + echo "Usage: $0 <architecture> <platform> <version> <prefix>" + exit 1 +elif [[ -z ${archs[$1]} ]]; then + echo "Unexpected architecture: ${1}" + exit 1 +elif [[ -z ${platforms[$2]} ]]; then + echo "Unexpected platform: ${2}" + exit 1 +fi + +arch=${archs[$1]} +platform=${platforms[$2]} +version=$3 +prefix=$4 + +echo "Downloading Miniconda installer..." +wget -nv https://repo.continuum.io/miniconda/Miniconda3-${version}-${platform}-${arch}.sh -O /tmp/miniconda.sh +bash /tmp/miniconda.sh -b -p ${prefix} +rm /tmp/miniconda.sh + +# Like "conda init", but for POSIX sh rather than bash +ln -s ${prefix}/etc/profile.d/conda.sh /etc/profile.d/conda.sh + +# Configure +source /etc/profile.d/conda.sh +conda config --add channels conda-forge +conda config --set channel_priority strict +conda config --set show_channel_urls True +conda config --set remote_connect_timeout_secs 12 + +# Update and clean +conda update --all -y +conda clean --all -y diff --git a/src/arrow/ci/scripts/install_dask.sh b/src/arrow/ci/scripts/install_dask.sh new file mode 100755 index 000000000..954ce3249 --- /dev/null +++ b/src/arrow/ci/scripts/install_dask.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -e + +if [ "$#" -ne 1 ]; then + echo "Usage: $0 <dask version>" + exit 1 +fi + +dask=$1 + +if [ "${dask}" = "master" ]; then + pip install https://github.com/dask/dask/archive/main.tar.gz#egg=dask[dataframe] +elif [ "${dask}" = "latest" ]; then + conda install -q dask +else + conda install -q dask=${dask} +fi +conda clean --all diff --git a/src/arrow/ci/scripts/install_gcs_testbench.sh b/src/arrow/ci/scripts/install_gcs_testbench.sh new file mode 100755 index 000000000..579a78944 --- /dev/null +++ b/src/arrow/ci/scripts/install_gcs_testbench.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -e + +if [ "$#" -ne 2 ]; then + echo "Usage: $0 <arch> <storage-testbench version>" + exit 1 +fi + +arch=$1 +if [ "${arch}" != "amd64" ]; then + echo "GCS testbench won't install on non-x86 architecture" + exit 0 +fi + +version=$2 +if [[ "${version}" -eq "default" ]]; then + version="v0.7.0" +fi + +pip install "https://github.com/googleapis/storage-testbench/archive/${version}.tar.gz" diff --git a/src/arrow/ci/scripts/install_iwyu.sh b/src/arrow/ci/scripts/install_iwyu.sh new file mode 100755 index 000000000..3cd2cbc95 --- /dev/null +++ b/src/arrow/ci/scripts/install_iwyu.sh @@ -0,0 +1,48 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +set -eu + +source_dir=${1:-/tmp/iwyu} +install_prefix=${2:-/usr/local} +clang_tools_version=${3:-8} + +iwyu_branch_name="clang_${clang_tools_version}" +if [ ${clang_tools_version} -lt 10 ]; then + iwyu_branch_name="${iwyu_branch_name}.0" +fi + +git clone --single-branch --branch ${iwyu_branch_name} \ + https://github.com/include-what-you-use/include-what-you-use.git ${source_dir} + +mkdir -p ${source_dir}/build +pushd ${source_dir}/build + +# Build IWYU for current Clang +export CC=clang-${clang_tools_version} +export CXX=clang++-${clang_tools_version} + +cmake -DCMAKE_PREFIX_PATH=/usr/lib/llvm-${clang_tools_version} \ + -DCMAKE_INSTALL_PREFIX=${install_prefix} \ + ${source_dir} +make -j4 +make install + +popd + +rm -rf ${source_dir} diff --git a/src/arrow/ci/scripts/install_kartothek.sh b/src/arrow/ci/scripts/install_kartothek.sh new file mode 100755 index 000000000..4d88943b6 --- /dev/null +++ b/src/arrow/ci/scripts/install_kartothek.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -e + +if [ "$#" -ne 2 ]; then + echo "Usage: $0 <kartothek version> <target directory>" + exit 1 +fi + +karthothek=$1 +target=$2 + +git clone --recurse-submodules https://github.com/JDASoftwareGroup/kartothek "${target}" +if [ "${kartothek}" = "master" ]; then + git -C "${target}" checkout master; +elif [ "${kartothek}" = "latest" ]; then + git -C "${target}" checkout $(git describe --tags); +else + git -C "${target}" checkout ${kartothek}; +fi + +pushd "${target}" +pip install --no-deps . +popd diff --git a/src/arrow/ci/scripts/install_minio.sh b/src/arrow/ci/scripts/install_minio.sh new file mode 100755 index 000000000..5cda46e59 --- /dev/null +++ b/src/arrow/ci/scripts/install_minio.sh @@ -0,0 +1,62 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -e + +declare -A archs +archs=([amd64]=amd64 + [arm64v8]=arm64 + [arm32v7]=arm + [s390x]=s390x) + +declare -A platforms +platforms=([linux]=linux + [macos]=darwin) + +arch=${archs[$1]} +platform=${platforms[$2]} +version=$3 +prefix=$4 + +if [ "$#" -ne 4 ]; then + echo "Usage: $0 <architecture> <platform> <version> <prefix>" + exit 1 +elif [[ -z ${arch} ]]; then + echo "Unexpected architecture: ${1}" + exit 1 +elif [[ -z ${platform} ]]; then + echo "Unexpected platform: ${2}" + exit 1 +elif [[ ${version} != "latest" ]]; then + echo "Cannot fetch specific versions of minio, only latest is supported." + exit 1 +fi + +if [[ ! -x ${prefix}/bin/minio ]]; then + url="https://dl.min.io/server/minio/release/${platform}-${arch}/minio" + echo "Fetching ${url}..." + wget -nv -P ${prefix}/bin ${url} + chmod +x ${prefix}/bin/minio +fi +if [[ ! -x ${prefix}/bin/mc ]]; then + url="https://dl.min.io/client/mc/release/${platform}-${arch}/mc" + echo "Fetching ${url}..." + wget -nv -P ${prefix}/bin ${url} + chmod +x ${prefix}/bin/mc +fi diff --git a/src/arrow/ci/scripts/install_osx_sdk.sh b/src/arrow/ci/scripts/install_osx_sdk.sh new file mode 100755 index 000000000..896d084e0 --- /dev/null +++ b/src/arrow/ci/scripts/install_osx_sdk.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +if [ ${using_homebrew} != "yes" ]; then + export MACOSX_DEPLOYMENT_TARGET="10.9" + export CONDA_BUILD_SYSROOT="$(xcode-select -p)/Platforms/MacOSX.platform/Developer/SDKs/MacOSX${MACOSX_DEPLOYMENT_TARGET}.sdk" + + if [[ ! -d ${CONDA_BUILD_SYSROOT} || "$OSX_FORCE_SDK_DOWNLOAD" == "1" ]]; then + echo "downloading ${macosx_deployment_target} sdk" + curl -L -O https://github.com/phracker/MacOSX-SDKs/releases/download/10.13/MacOSX${MACOSX_DEPLOYMENT_TARGET}.sdk.tar.xz + tar -xf MacOSX${MACOSX_DEPLOYMENT_TARGET}.sdk.tar.xz -C "$(dirname "$CONDA_BUILD_SYSROOT")" + # set minimum sdk version to our target + plutil -replace MinimumSDKVersion -string ${MACOSX_DEPLOYMENT_TARGET} $(xcode-select -p)/Platforms/MacOSX.platform/Info.plist + plutil -replace DTSDKName -string macosx${MACOSX_DEPLOYMENT_TARGET}internal $(xcode-select -p)/Platforms/MacOSX.platform/Info.plist + fi + + if [ -d "${CONDA_BUILD_SYSROOT}" ]; then + echo "Found CONDA_BUILD_SYSROOT: ${CONDA_BUILD_SYSROOT}" + else + echo "Missing CONDA_BUILD_SYSROOT: ${CONDA_BUILD_SYSROOT}" + exit 1 + fi +fi diff --git a/src/arrow/ci/scripts/install_pandas.sh b/src/arrow/ci/scripts/install_pandas.sh new file mode 100755 index 000000000..5aca65f82 --- /dev/null +++ b/src/arrow/ci/scripts/install_pandas.sh @@ -0,0 +1,46 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -e + +if [ "$#" -lt 1 ]; then + echo "Usage: $0 <pandas version> <optional numpy version = latest>" + exit 1 +fi + +pandas=$1 +numpy=${2:-"latest"} + +if [ "${numpy}" = "nightly" ]; then + pip install --extra-index-url https://pypi.anaconda.org/scipy-wheels-nightly/simple --pre numpy +elif [ "${numpy}" = "latest" ]; then + pip install numpy +else + pip install numpy==${numpy} +fi + +if [ "${pandas}" = "master" ]; then + pip install git+https://github.com/pandas-dev/pandas.git --no-build-isolation +elif [ "${pandas}" = "nightly" ]; then + pip install --extra-index-url https://pypi.anaconda.org/scipy-wheels-nightly/simple --pre pandas +elif [ "${pandas}" = "latest" ]; then + pip install pandas +else + pip install pandas==${pandas} +fi diff --git a/src/arrow/ci/scripts/install_python.sh b/src/arrow/ci/scripts/install_python.sh new file mode 100755 index 000000000..babb2c1e8 --- /dev/null +++ b/src/arrow/ci/scripts/install_python.sh @@ -0,0 +1,68 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -eu + +declare -A platforms +platforms=([windows]=Windows + [macos]=MacOSX + [linux]=Linux) + +declare -A versions +versions=([3.6]=3.6.8 + [3.7]=3.7.9 + [3.8]=3.8.10 + [3.9]=3.9.6 + [3.10]=3.10.0) + +if [ "$#" -ne 2 ]; then + echo "Usage: $0 <platform> <version>" + exit 1 +elif [[ -z ${platforms[$1]} ]]; then + echo "Unexpected platform: ${1}" + exit 1 +fi + +platform=${platforms[$1]} +version=$2 +full_version=${versions[$2]} + +if [ $platform = "MacOSX" ]; then + echo "Downloading Python installer..." + + if [ "$(uname -m)" = "arm64" ] || [ "$version" = "3.10" ]; then + fname="python-${full_version}-macos11.pkg" + else + fname="python-${full_version}-macosx10.9.pkg" + fi + wget "https://www.python.org/ftp/python/${full_version}/${fname}" + + echo "Installing Python..." + installer -pkg $fname -target / + rm $fname + + echo "Installing Pip..." + python="/Library/Frameworks/Python.framework/Versions/${version}/bin/python${version}" + pip="${python} -m pip" + + $python -m ensurepip + $pip install -U pip setuptools virtualenv +else + echo "Unsupported platform: $platform" +fi diff --git a/src/arrow/ci/scripts/install_spark.sh b/src/arrow/ci/scripts/install_spark.sh new file mode 100755 index 000000000..936313fd8 --- /dev/null +++ b/src/arrow/ci/scripts/install_spark.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -e + +if [ "$#" -ne 2 ]; then + echo "Usage: $0 <spark version> <target directory>" + exit 1 +fi + +spark=$1 +target=$2 + +git clone https://github.com/apache/spark "${target}" +git -C "${target}" checkout "${spark}" diff --git a/src/arrow/ci/scripts/install_turbodbc.sh b/src/arrow/ci/scripts/install_turbodbc.sh new file mode 100755 index 000000000..3e644a3e2 --- /dev/null +++ b/src/arrow/ci/scripts/install_turbodbc.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -e + +if [ "$#" -ne 2 ]; then + echo "Usage: $0 <turbodbc version> <target directory>" + exit 1 +fi + +turbodbc=$1 +target=$2 + +git clone --recurse-submodules https://github.com/blue-yonder/turbodbc "${target}" +if [ "${turbodbc}" = "master" ]; then + git -C "${target}" checkout master; +elif [ "${turbodbc}" = "latest" ]; then + git -C "${target}" checkout $(git describe --tags); +else + git -C "${target}" checkout ${turbodbc}; +fi + +pushd ${target} +wget -q https://github.com/pybind/pybind11/archive/v2.6.2.tar.gz +tar xvf v2.6.2.tar.gz +mv pybind11-2.6.2 pybind11 +popd diff --git a/src/arrow/ci/scripts/install_vcpkg.sh b/src/arrow/ci/scripts/install_vcpkg.sh new file mode 100755 index 000000000..fe99a7fea --- /dev/null +++ b/src/arrow/ci/scripts/install_vcpkg.sh @@ -0,0 +1,39 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -e + +if [ "$#" -ne 2 ]; then + echo "Usage: $0 <vcpkg version> <target directory>" + exit 1 +fi + +vcpkg_version=$1 +vcpkg_destination=$2 +vcpkg_patch=$(realpath $(dirname "${0}")/../vcpkg/ports.patch) + +git clone --depth 1 --branch ${vcpkg_version} https://github.com/microsoft/vcpkg ${vcpkg_destination} + +pushd ${vcpkg_destination} + +./bootstrap-vcpkg.sh -useSystemBinaries -disableMetrics +git apply --ignore-whitespace ${vcpkg_patch} +echo "Patch successfully applied!" + +popd diff --git a/src/arrow/ci/scripts/integration_arrow.sh b/src/arrow/ci/scripts/integration_arrow.sh new file mode 100755 index 000000000..30cbb2d63 --- /dev/null +++ b/src/arrow/ci/scripts/integration_arrow.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +arrow_dir=${1} +gold_dir=$arrow_dir/testing/data/arrow-ipc-stream/integration + +pip install -e $arrow_dir/dev/archery + +# Rust can be enabled by exporting ARCHERY_INTEGRATION_WITH_RUST=1 +archery integration \ + --run-flight \ + --with-cpp=1 \ + --with-csharp=1 \ + --with-java=1 \ + --with-js=1 \ + --with-go=1 \ + --gold-dirs=$gold_dir/0.14.1 \ + --gold-dirs=$gold_dir/0.17.1 \ + --gold-dirs=$gold_dir/1.0.0-bigendian \ + --gold-dirs=$gold_dir/1.0.0-littleendian \ + --gold-dirs=$gold_dir/2.0.0-compression \ + --gold-dirs=$gold_dir/4.0.0-shareddict \ diff --git a/src/arrow/ci/scripts/integration_dask.sh b/src/arrow/ci/scripts/integration_dask.sh new file mode 100755 index 000000000..e67a02945 --- /dev/null +++ b/src/arrow/ci/scripts/integration_dask.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -e + +# check that optional pyarrow modules are available +# because pytest would just skip the dask tests +python -c "import pyarrow.orc" +python -c "import pyarrow.parquet" + +# check that dask.dataframe is correctly installed +python -c "import dask.dataframe" + +# TODO(kszucs): the following tests are also uses pyarrow +# pytest -sv --pyargs dask.bytes.tests.test_s3 +# pytest -sv --pyargs dask.bytes.tests.test_hdfs +# pytest -sv --pyargs dask.bytes.tests.test_local + +# skip failing pickle test, see https://github.com/dask/dask/issues/6374 +pytest -v --pyargs dask.dataframe.tests.test_dataframe -k "not test_dataframe_picklable and not test_describe_empty" +pytest -v --pyargs dask.dataframe.io.tests.test_orc +# skip failing parquet tests, see https://github.com/dask/dask/issues/6243 +pytest -v --pyargs dask.dataframe.io.tests.test_parquet \ + -k "not test_to_parquet_pyarrow_w_inconsistent_schema_by_partition_fails_by_default and not test_timeseries_nulls_in_schema" diff --git a/src/arrow/ci/scripts/integration_hdfs.sh b/src/arrow/ci/scripts/integration_hdfs.sh new file mode 100755 index 000000000..c95449379 --- /dev/null +++ b/src/arrow/ci/scripts/integration_hdfs.sh @@ -0,0 +1,69 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -e + +source_dir=${1}/cpp +build_dir=${2}/cpp + +export CLASSPATH=$($HADOOP_HOME/bin/hadoop classpath --glob) +export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop +export LIBHDFS3_CONF=$HADOOP_CONF_DIR/hdfs-site.xml +export ARROW_LIBHDFS3_DIR=$CONDA_PREFIX/lib + +libhdfs_dir=$HADOOP_HOME/lib/native +hadoop_home=$HADOOP_HOME + +function use_hadoop_home() { + unset ARROW_LIBHDFS_DIR + export HADOOP_HOME=$hadoop_home +} + +function use_libhdfs_dir() { + unset HADOOP_HOME + export ARROW_LIBHDFS_DIR=$libhdfs_dir +} + +# execute cpp tests +export ARROW_HDFS_TEST_LIBHDFS_REQUIRE=ON +pushd ${build_dir} + +debug/arrow-io-hdfs-test +debug/arrow-hdfs-test + +use_libhdfs_dir +debug/arrow-io-hdfs-test +debug/arrow-hdfs-test +use_hadoop_home + +popd + +# cannot use --pyargs with custom arguments like --hdfs or --only-hdfs, because +# pytest ignores them, see https://github.com/pytest-dev/pytest/issues/3517 +export PYARROW_TEST_HDFS=ON + +export PYARROW_HDFS_TEST_LIBHDFS_REQUIRE=ON + +pytest -vs --pyargs pyarrow.tests.test_fs +pytest -vs --pyargs pyarrow.tests.test_hdfs + +use_libhdfs_dir +pytest -vs --pyargs pyarrow.tests.test_fs +pytest -vs --pyargs pyarrow.tests.test_hdfs +use_hadoop_home diff --git a/src/arrow/ci/scripts/integration_hiveserver2.sh b/src/arrow/ci/scripts/integration_hiveserver2.sh new file mode 100755 index 000000000..36fba5ca8 --- /dev/null +++ b/src/arrow/ci/scripts/integration_hiveserver2.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +set -e + +arrow_dir=${1} +source_dir=${1}/cpp +build_dir=${2}/cpp + +${arrow_dir}/ci/scripts/util_wait_for_it.sh impala:21050 -t 300 -s -- echo "impala is up" + +pushd ${build_dir} + +# ninja hiveserver2-test +debug/hiveserver2-test + +popd diff --git a/src/arrow/ci/scripts/integration_kartothek.sh b/src/arrow/ci/scripts/integration_kartothek.sh new file mode 100755 index 000000000..379569b9c --- /dev/null +++ b/src/arrow/ci/scripts/integration_kartothek.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -e + +# check that optional pyarrow modules are available +# because pytest would just skip the pyarrow tests +python -c "import pyarrow.parquet" + +# check that kartothek is correctly installed +python -c "import kartothek" + +pushd /kartothek +# See ARROW-12314, test_load_dataframes_columns_raises_missing skipped because of changed error message +pytest -n0 --ignore tests/cli/test_query.py -k "not test_load_dataframes_columns_raises_missing" diff --git a/src/arrow/ci/scripts/integration_spark.sh b/src/arrow/ci/scripts/integration_spark.sh new file mode 100755 index 000000000..90ecbce39 --- /dev/null +++ b/src/arrow/ci/scripts/integration_spark.sh @@ -0,0 +1,87 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# exit on any error +set -eu + +source_dir=${1} +spark_dir=${2} + +# Test Spark with latest PyArrow only, don't build with latest Arrow Java +test_pyarrow_only=${3:-false} + +# Spark branch to checkout +spark_version=${SPARK_VERSION:-master} + +# Use old behavior that always dropped tiemzones. +export PYARROW_IGNORE_TIMEZONE=1 + +if [ "${SPARK_VERSION:0:2}" == "2." ]; then + # https://github.com/apache/spark/blob/master/docs/sql-pyspark-pandas-with-arrow.md#compatibility-setting-for-pyarrow--0150-and-spark-23x-24x + export ARROW_PRE_0_15_IPC_FORMAT=1 +fi + +# Get Arrow Java version +pushd ${source_dir}/java + arrow_version=`mvn org.apache.maven.plugins:maven-help-plugin:2.1.1:evaluate -Dexpression=project.version | sed -n -e '/^\[.*\]/ !{ /^[0-9]/ { p; q } }'` +popd + +export MAVEN_OPTS="-Xss256m -Xmx2g -XX:ReservedCodeCacheSize=1g -Dorg.slf4j.simpleLogger.defaultLogLevel=warn" +export MAVEN_OPTS="${MAVEN_OPTS} -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn" + +pushd ${spark_dir} + + if [ "${test_pyarrow_only}" == "true" ]; then + echo "Building Spark ${SPARK_VERSION} to test pyarrow only" + + # Build Spark only + build/mvn -B -DskipTests package + + else + + # Update Spark pom with the Arrow version just installed and build Spark, need package phase for pyspark + echo "Building Spark ${SPARK_VERSION} with Arrow ${arrow_version}" + build/mvn versions:set-property -Dproperty=arrow.version -DnewVersion=${arrow_version} + + # Build Spark with new Arrow Java + build/mvn -B -DskipTests package + + spark_scala_tests=( + "org.apache.spark.sql.execution.arrow" + "org.apache.spark.sql.execution.vectorized.ColumnarBatchSuite" + "org.apache.spark.sql.execution.vectorized.ArrowColumnVectorSuite") + + (echo "Testing Spark:"; IFS=$'\n'; echo "${spark_scala_tests[*]}") + + # TODO: should be able to only build spark-sql tests with adding "-pl sql/core" but not currently working + build/mvn -B -Dtest=none -DwildcardSuites=$(IFS=,; echo "${spark_scala_tests[*]}") test + fi + + # Run pyarrow related Python tests only + spark_python_tests=( + "pyspark.sql.tests.test_arrow" + "pyspark.sql.tests.test_pandas_map" + "pyspark.sql.tests.test_pandas_cogrouped_map" + "pyspark.sql.tests.test_pandas_grouped_map" + "pyspark.sql.tests.test_pandas_udf" + "pyspark.sql.tests.test_pandas_udf_scalar" + "pyspark.sql.tests.test_pandas_udf_grouped_agg" + "pyspark.sql.tests.test_pandas_udf_window") + + (echo "Testing PySpark:"; IFS=$'\n'; echo "${spark_python_tests[*]}") + python/run-tests --testnames "$(IFS=,; echo "${spark_python_tests[*]}")" --python-executables python +popd diff --git a/src/arrow/ci/scripts/integration_turbodbc.sh b/src/arrow/ci/scripts/integration_turbodbc.sh new file mode 100755 index 000000000..f0fafd512 --- /dev/null +++ b/src/arrow/ci/scripts/integration_turbodbc.sh @@ -0,0 +1,47 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +source_dir=${1} +build_dir=${2}/turbodbc + +# check that optional pyarrow modules are available +# because pytest would just skip the pyarrow tests +python -c "import pyarrow.orc" +python -c "import pyarrow.parquet" + +mkdir -p ${build_dir} +pushd ${build_dir} + +cmake -DCMAKE_INSTALL_PREFIX=${ARROW_HOME} \ + -DCMAKE_CXX_FLAGS="${CXXFLAGS}" \ + -DPYTHON_EXECUTABLE=$(which python) \ + -GNinja \ + ${source_dir} +ninja install + +# TODO(ARROW-5074) +export LD_LIBRARY_PATH="${ARROW_HOME}/lib:${LD_LIBRARY_PATH}" +export ODBCSYSINI="${source_dir}/earthly/odbc/" + +service postgresql start +ctest --output-on-failure + +popd diff --git a/src/arrow/ci/scripts/java_build.sh b/src/arrow/ci/scripts/java_build.sh new file mode 100755 index 000000000..1ba37606d --- /dev/null +++ b/src/arrow/ci/scripts/java_build.sh @@ -0,0 +1,107 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +arrow_dir=${1} +source_dir=${1}/java +cpp_build_dir=${2}/cpp/${ARROW_BUILD_TYPE:-debug} +cdata_dist_dir=${2}/java/c +with_docs=${3:-false} + +if [[ "$(uname -s)" == "Linux" ]] && [[ "$(uname -m)" == "s390x" ]]; then + # Since some files for s390_64 are not available at maven central, + # download pre-build files from Artifactory and install them explicitly + mvn_install="mvn install:install-file" + wget="wget" + artifactory_base_url="https://apache.jfrog.io/artifactory/arrow" + + artifactory_dir="protoc-binary" + group="com.google.protobuf" + artifact="protoc" + ver="3.7.1" + classifier="linux-s390_64" + extension="exe" + target=${artifact}-${ver}-${classifier}.${extension} + ${wget} ${artifactory_base_url}/${artifactory_dir}/${ver}/${target} + ${mvn_install} -DgroupId=${group} -DartifactId=${artifact} -Dversion=${ver} -Dclassifier=${classifier} -Dpackaging=${extension} -Dfile=$(pwd)/${target} + # protoc requires libprotoc.so.18 libprotobuf.so.18 + ${wget} ${artifactory_base_url}/${artifactory_dir}/${ver}/libprotoc.so.18 + ${wget} ${artifactory_base_url}/${artifactory_dir}/${ver}/libprotobuf.so.18 + mkdir -p ${ARROW_HOME}/lib + cp lib*.so.18 ${ARROW_HOME}/lib + export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${ARROW_HOME}/lib + + artifactory_dir="protoc-gen-grpc-java-binary" + group="io.grpc" + artifact="protoc-gen-grpc-java" + ver="1.30.2" + classifier="linux-s390_64" + extension="exe" + target=${artifact}-${ver}-${classifier}.${extension} + ${wget} ${artifactory_base_url}/${artifactory_dir}/${ver}/${target} + ${mvn_install} -DgroupId=${group} -DartifactId=${artifact} -Dversion=${ver} -Dclassifier=${classifier} -Dpackaging=${extension} -Dfile=$(pwd)/${target} + + artifactory_dir="netty-binary" + group="io.netty" + artifact="netty-transport-native-unix-common" + ver="4.1.48.Final" + classifier="linux-s390_64" + extension="jar" + target=${artifact}-${ver}-${classifier}.${extension} + ${wget} ${artifactory_base_url}/${artifactory_dir}/${ver}/${target} + ${mvn_install} -DgroupId=${group} -DartifactId=${artifact} -Dversion=${ver} -Dclassifier=${classifier} -Dpackaging=${extension} -Dfile=$(pwd)/${target} + artifact="netty-transport-native-epoll" + extension="jar" + target=${artifact}-${ver}-${classifier}.${extension} + ${wget} ${artifactory_base_url}/${artifactory_dir}/${ver}/${target} + ${mvn_install} -DgroupId=${group} -DartifactId=${artifact} -Dversion=${ver} -Dclassifier=${classifier} -Dpackaging=${extension} -Dfile=$(pwd)/${target} +fi + +mvn="mvn -B -DskipTests -Drat.skip=true -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn" +# Use `2 * ncores` threads +mvn="${mvn} -T 2C" + +pushd ${source_dir} + +${mvn} install + +if [ "${ARROW_JAVA_SHADE_FLATBUFFERS}" == "ON" ]; then + ${mvn} -Pshade-flatbuffers install +fi + +if [ "${ARROW_JAVA_CDATA}" = "ON" ]; then + ${mvn} -Darrow.c.jni.dist.dir=${cdata_dist_dir} -Parrow-c-data install +fi + +if [ "${ARROW_GANDIVA_JAVA}" = "ON" ]; then + ${mvn} -Darrow.cpp.build.dir=${cpp_build_dir} -Parrow-jni install +fi + +if [ "${ARROW_PLASMA}" = "ON" ]; then + pushd ${source_dir}/plasma + ${mvn} clean install + popd +fi + +if [ "${with_docs}" == "true" ]; then + # HTTP pooling is turned of to avoid download issues https://issues.apache.org/jira/browse/ARROW-11633 + ${mvn} -Dcheckstyle.skip=true -Dhttp.keepAlive=false -Dmaven.wagon.http.pool=false install site +fi + +popd diff --git a/src/arrow/ci/scripts/java_cdata_build.sh b/src/arrow/ci/scripts/java_cdata_build.sh new file mode 100755 index 000000000..730c775d4 --- /dev/null +++ b/src/arrow/ci/scripts/java_cdata_build.sh @@ -0,0 +1,45 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +arrow_dir=${1} +build_dir=${2} +# The directory where the final binaries will be stored when scripts finish +dist_dir=${3} + +echo "=== Clear output directories and leftovers ===" +# Clear output directories and leftovers +rm -rf ${build_dir} + +echo "=== Building Arrow Java C Data Interface native library ===" +mkdir -p "${build_dir}" +pushd "${build_dir}" + +cmake \ + -DCMAKE_BUILD_TYPE=${ARROW_BUILD_TYPE:-release} \ + -DCMAKE_INSTALL_LIBDIR=lib \ + -DCMAKE_INSTALL_PREFIX=${build_dir} \ + ${arrow_dir}/java/c +cmake --build . --target install --config ${ARROW_BUILD_TYPE:-release} +popd + +echo "=== Copying libraries to the distribution folder ===" +mkdir -p "${dist_dir}" +cp -L ${build_dir}/lib/*arrow_cdata_jni.* ${dist_dir} diff --git a/src/arrow/ci/scripts/java_full_build.sh b/src/arrow/ci/scripts/java_full_build.sh new file mode 100755 index 000000000..e452b8098 --- /dev/null +++ b/src/arrow/ci/scripts/java_full_build.sh @@ -0,0 +1,42 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -e + +arrow_dir=${1} +dist_dir=${2} + +export ARROW_TEST_DATA=${arrow_dir}/testing/data + +pushd ${arrow_dir}/java + +# build the entire project +mvn clean install \ + -Parrow-c-data \ + -Parrow-jni \ + -Darrow.cpp.build.dir=$dist_dir \ + -Darrow.c.jni.dist.dir=$dist_dir + +# copy all jars and pom files to the distribution folder +find ~/.m2/repository/org/apache/arrow \ + "(" -name "*.jar" -o -name "*.pom" ")" \ + -exec echo {} ";" \ + -exec cp {} $dist_dir ";" + +popd diff --git a/src/arrow/ci/scripts/java_jni_macos_build.sh b/src/arrow/ci/scripts/java_jni_macos_build.sh new file mode 100755 index 000000000..218d2d396 --- /dev/null +++ b/src/arrow/ci/scripts/java_jni_macos_build.sh @@ -0,0 +1,115 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +arrow_dir=${1} +build_dir=${2} +# The directory where the final binaries will be stored when scripts finish +dist_dir=${3} + +echo "=== Clear output directories and leftovers ===" +# Clear output directories and leftovers +rm -rf ${build_dir} + +echo "=== Building Arrow C++ libraries ===" +: ${ARROW_BUILD_TESTS:=OFF} +: ${ARROW_DATASET:=ON} +: ${ARROW_FILESYSTEM:=ON} +: ${ARROW_GANDIVA_JAVA:=ON} +: ${ARROW_GANDIVA:=ON} +: ${ARROW_ORC:=ON} +: ${ARROW_PARQUET:=ON} +: ${ARROW_PLASMA_JAVA_CLIENT:=ON} +: ${ARROW_PLASMA:=ON} +: ${ARROW_PYTHON:=OFF} +: ${CMAKE_BUILD_TYPE:=Release} +: ${CMAKE_UNITY_BUILD:=ON} + +export ARROW_TEST_DATA="${arrow_dir}/testing/data" +export PARQUET_TEST_DATA="${arrow_dir}/cpp/submodules/parquet-testing/data" +export AWS_EC2_METADATA_DISABLED=TRUE + +mkdir -p "${build_dir}" +pushd "${build_dir}" + +cmake \ + -DARROW_BOOST_USE_SHARED=OFF \ + -DARROW_BROTLI_USE_SHARED=OFF \ + -DARROW_BUILD_TESTS=${ARROW_BUILD_TESTS} \ + -DARROW_BUILD_UTILITIES=OFF \ + -DARROW_BZ2_USE_SHARED=OFF \ + -DARROW_DATASET=${ARROW_DATASET} \ + -DARROW_FILESYSTEM=${ARROW_FILESYSTEM} \ + -DARROW_GANDIVA_JAVA=${ARROW_GANDIVA_JAVA} \ + -DARROW_GANDIVA_STATIC_LIBSTDCPP=ON \ + -DARROW_GANDIVA=${ARROW_GANDIVA} \ + -DARROW_GFLAGS_USE_SHARED=OFF \ + -DARROW_GRPC_USE_SHARED=OFF \ + -DARROW_JNI=ON \ + -DARROW_LZ4_USE_SHARED=OFF \ + -DARROW_OPENSSL_USE_SHARED=OFF \ + -DARROW_ORC=${ARROW_ORC} \ + -DARROW_PARQUET=${ARROW_PARQUET} \ + -DARROW_PLASMA_JAVA_CLIENT=${ARROW_PLASMA_JAVA_CLIENT} \ + -DARROW_PLASMA=${ARROW_PLASMA} \ + -DARROW_PROTOBUF_USE_SHARED=OFF \ + -DARROW_PYTHON=${ARROW_PYTHON} \ + -DARROW_SNAPPY_USE_SHARED=OFF \ + -DARROW_THRIFT_USE_SHARED=OFF \ + -DARROW_UTF8PROC_USE_SHARED=OFF \ + -DARROW_ZSTD_USE_SHARED=OFF \ + -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \ + -DCMAKE_INSTALL_LIBDIR=lib \ + -DCMAKE_INSTALL_PREFIX=${build_dir} \ + -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD} \ + -DPARQUET_BUILD_EXAMPLES=OFF \ + -DPARQUET_BUILD_EXECUTABLES=OFF \ + -DPARQUET_REQUIRE_ENCRYPTION=OFF \ + -Dre2_SOURCE=BUNDLED \ + ${arrow_dir}/cpp +cmake --build . --target install + +if [ "${ARROW_BUILD_TESTS}" == "ON" ]; then + ctest +fi + +popd + +echo "=== Copying libraries to the distribution folder ===" +mkdir -p "${dist_dir}" +cp -L ${build_dir}/lib/libgandiva_jni.dylib ${dist_dir} +cp -L ${build_dir}/lib/libarrow_dataset_jni.dylib ${dist_dir} +cp -L ${build_dir}/lib/libarrow_orc_jni.dylib ${dist_dir} + +echo "=== Checking shared dependencies for libraries ===" + +pushd ${dist_dir} +archery linking check-dependencies \ + --allow libarrow_dataset_jni \ + --allow libarrow_orc_jni \ + --allow libc++ \ + --allow libgandiva_jni \ + --allow libncurses \ + --allow libSystem \ + --allow libz \ + libgandiva_jni.dylib \ + libarrow_dataset_jni.dylib \ + libarrow_orc_jni.dylib +popd diff --git a/src/arrow/ci/scripts/java_jni_manylinux_build.sh b/src/arrow/ci/scripts/java_jni_manylinux_build.sh new file mode 100755 index 000000000..396c8fc19 --- /dev/null +++ b/src/arrow/ci/scripts/java_jni_manylinux_build.sh @@ -0,0 +1,137 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +arrow_dir=${1} +build_dir=${2} +# The directory where the final binaries will be stored when scripts finish +dist_dir=${3} + +echo "=== Clear output directories and leftovers ===" +# Clear output directories and leftovers +rm -rf ${build_dir} + +echo "=== Building Arrow C++ libraries ===" +devtoolset_version=$(rpm -qa "devtoolset-*-gcc" --queryformat %{VERSION} | \ + grep -o "^[0-9]*") +devtoolset_include_cpp="/opt/rh/devtoolset-${devtoolset_version}/root/usr/include/c++/${devtoolset_version}" +: ${ARROW_DATASET:=ON} +: ${ARROW_GANDIVA:=ON} +: ${ARROW_GANDIVA_JAVA:=ON} +: ${ARROW_FILESYSTEM:=ON} +: ${ARROW_JEMALLOC:=ON} +: ${ARROW_RPATH_ORIGIN:=ON} +: ${ARROW_ORC:=ON} +: ${ARROW_PARQUET:=ON} +: ${ARROW_PLASMA:=ON} +: ${ARROW_PLASMA_JAVA_CLIENT:=ON} +: ${ARROW_PYTHON:=OFF} +: ${ARROW_BUILD_TESTS:=OFF} +: ${CMAKE_BUILD_TYPE:=Release} +: ${CMAKE_UNITY_BUILD:=ON} +: ${VCPKG_FEATURE_FLAGS:=-manifests} +: ${VCPKG_TARGET_TRIPLET:=${VCPKG_DEFAULT_TRIPLET:-x64-linux-static-${CMAKE_BUILD_TYPE}}} +: ${GANDIVA_CXX_FLAGS:=-isystem;${devtoolset_include_cpp};-isystem;${devtoolset_include_cpp}/x86_64-redhat-linux;-isystem;-lpthread} + +export ARROW_TEST_DATA="${arrow_dir}/testing/data" +export PARQUET_TEST_DATA="${arrow_dir}/cpp/submodules/parquet-testing/data" +export AWS_EC2_METADATA_DISABLED=TRUE + +mkdir -p "${build_dir}" +pushd "${build_dir}" + +cmake \ + -DARROW_BOOST_USE_SHARED=OFF \ + -DARROW_BROTLI_USE_SHARED=OFF \ + -DARROW_BUILD_SHARED=ON \ + -DARROW_BUILD_TESTS=${ARROW_BUILD_TESTS} \ + -DARROW_BUILD_UTILITIES=OFF \ + -DARROW_BZ2_USE_SHARED=OFF \ + -DARROW_DATASET=${ARROW_DATASET} \ + -DARROW_DEPENDENCY_SOURCE="VCPKG" \ + -DARROW_FILESYSTEM=${ARROW_FILESYSTEM} \ + -DARROW_GANDIVA_JAVA=${ARROW_GANDIVA_JAVA} \ + -DARROW_GANDIVA_PC_CXX_FLAGS=${GANDIVA_CXX_FLAGS} \ + -DARROW_GANDIVA=${ARROW_GANDIVA} \ + -DARROW_GRPC_USE_SHARED=OFF \ + -DARROW_JEMALLOC=${ARROW_JEMALLOC} \ + -DARROW_JNI=ON \ + -DARROW_LZ4_USE_SHARED=OFF \ + -DARROW_OPENSSL_USE_SHARED=OFF \ + -DARROW_ORC=${ARROW_ORC} \ + -DARROW_PARQUET=${ARROW_PARQUET} \ + -DARROW_PLASMA_JAVA_CLIENT=${ARROW_PLASMA_JAVA_CLIENT} \ + -DARROW_PLASMA=${ARROW_PLASMA} \ + -DARROW_PROTOBUF_USE_SHARED=OFF \ + -DARROW_PYTHON=${ARROW_PYTHON} \ + -DARROW_RPATH_ORIGIN=${ARROW_RPATH_ORIGIN} \ + -DARROW_SNAPPY_USE_SHARED=OFF \ + -DARROW_THRIFT_USE_SHARED=OFF \ + -DARROW_UTF8PROC_USE_SHARED=OFF \ + -DARROW_ZSTD_USE_SHARED=OFF \ + -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \ + -DCMAKE_INSTALL_LIBDIR=lib \ + -DCMAKE_INSTALL_PREFIX=${build_dir} \ + -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD} \ + -DPARQUET_BUILD_EXAMPLES=OFF \ + -DPARQUET_BUILD_EXECUTABLES=OFF \ + -DPARQUET_REQUIRE_ENCRYPTION=OFF \ + -DPythonInterp_FIND_VERSION_MAJOR=3 \ + -DPythonInterp_FIND_VERSION=ON \ + -DVCPKG_MANIFEST_MODE=OFF \ + -DVCPKG_TARGET_TRIPLET=${VCPKG_TARGET_TRIPLET} \ + -GNinja \ + ${arrow_dir}/cpp +ninja install + +if [ $ARROW_BUILD_TESTS = "ON" ]; then + ctest \ + --label-regex unittest \ + --output-on-failure \ + --parallel $(nproc) \ + --timeout 300 +fi + +popd + +echo "=== Copying libraries to the distribution folder ===" +mkdir -p "${dist_dir}" +cp -L ${build_dir}/lib/libgandiva_jni.so ${dist_dir} +cp -L ${build_dir}/lib/libarrow_dataset_jni.so ${dist_dir} +cp -L ${build_dir}/lib/libarrow_orc_jni.so ${dist_dir} + +echo "=== Checking shared dependencies for libraries ===" + +pushd ${dist_dir} +archery linking check-dependencies \ + --allow ld-linux-x86-64 \ + --allow libc \ + --allow libdl \ + --allow libgcc_s \ + --allow libm \ + --allow libpthread \ + --allow librt \ + --allow libstdc++ \ + --allow libz \ + --allow linux-vdso \ + libgandiva_jni.so \ + libarrow_dataset_jni.so \ + libarrow_orc_jni.so +popd diff --git a/src/arrow/ci/scripts/java_test.sh b/src/arrow/ci/scripts/java_test.sh new file mode 100755 index 000000000..0e755bcaf --- /dev/null +++ b/src/arrow/ci/scripts/java_test.sh @@ -0,0 +1,54 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +arrow_dir=${1} +source_dir=${1}/java +cpp_build_dir=${2}/cpp/${ARROW_BUILD_TYPE:-debug} +cdata_dist_dir=${2}/java/c + +# For JNI and Plasma tests +export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH} +export PLASMA_STORE=${ARROW_HOME}/bin/plasma-store-server + +mvn="mvn -B -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn" +# Use `2 * ncores` threads +mvn="${mvn} -T 2C" + +pushd ${source_dir} + +${mvn} test + +if [ "${ARROW_JNI}" = "ON" ]; then + ${mvn} test -Parrow-jni -pl adapter/orc,gandiva,dataset -Darrow.cpp.build.dir=${cpp_build_dir} +fi + +if [ "${ARROW_JAVA_CDATA}" = "ON" ]; then + ${mvn} test -Parrow-c-data -pl c -Darrow.c.jni.dist.dir=${cdata_dist_dir} +fi + +if [ "${ARROW_PLASMA}" = "ON" ]; then + pushd ${source_dir}/plasma + java -cp target/test-classes:target/classes \ + -Djava.library.path=${cpp_build_dir} \ + org.apache.arrow.plasma.PlasmaClientTest + popd +fi + +popd diff --git a/src/arrow/ci/scripts/js_build.sh b/src/arrow/ci/scripts/js_build.sh new file mode 100755 index 000000000..10ceb41ee --- /dev/null +++ b/src/arrow/ci/scripts/js_build.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +source_dir=${1}/js +with_docs=${2:-false} + +pushd ${source_dir} + +yarn --frozen-lockfile +# TODO(kszucs): linting should be moved to archery +yarn lint:ci +yarn build + +if [ "${with_docs}" == "true" ]; then + yarn doc +fi + +popd diff --git a/src/arrow/ci/scripts/js_test.sh b/src/arrow/ci/scripts/js_test.sh new file mode 100755 index 000000000..345d6cb81 --- /dev/null +++ b/src/arrow/ci/scripts/js_test.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +source_dir=${1}/js + +pushd ${source_dir} + +yarn lint +yarn test + +popd diff --git a/src/arrow/ci/scripts/matlab_build.sh b/src/arrow/ci/scripts/matlab_build.sh new file mode 100755 index 000000000..5e9bdd2a9 --- /dev/null +++ b/src/arrow/ci/scripts/matlab_build.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Exit on error (-e) and print all commands (-x). +set -ex + +base_dir=${1} +source_dir=${base_dir}/matlab +build_dir=${base_dir}/matlab/build + +cmake -S ${source_dir} -B ${build_dir} -G Ninja -D MATLAB_BUILD_TESTS=ON +cmake --build ${build_dir} --config Release +ctest --test-dir ${build_dir} diff --git a/src/arrow/ci/scripts/msys2_setup.sh b/src/arrow/ci/scripts/msys2_setup.sh new file mode 100755 index 000000000..6f6012c87 --- /dev/null +++ b/src/arrow/ci/scripts/msys2_setup.sh @@ -0,0 +1,79 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -eux + +target=$1 + +packages=() +case "${target}" in + cpp|c_glib|ruby) + packages+=(${MINGW_PACKAGE_PREFIX}-aws-sdk-cpp) + packages+=(${MINGW_PACKAGE_PREFIX}-boost) + packages+=(${MINGW_PACKAGE_PREFIX}-brotli) + packages+=(${MINGW_PACKAGE_PREFIX}-ccache) + packages+=(${MINGW_PACKAGE_PREFIX}-clang) + packages+=(${MINGW_PACKAGE_PREFIX}-cmake) + packages+=(${MINGW_PACKAGE_PREFIX}-gcc) + packages+=(${MINGW_PACKAGE_PREFIX}-gflags) + packages+=(${MINGW_PACKAGE_PREFIX}-grpc) + packages+=(${MINGW_PACKAGE_PREFIX}-gtest) + packages+=(${MINGW_PACKAGE_PREFIX}-libutf8proc) + packages+=(${MINGW_PACKAGE_PREFIX}-libxml2) + packages+=(${MINGW_PACKAGE_PREFIX}-llvm) + packages+=(${MINGW_PACKAGE_PREFIX}-lz4) + packages+=(${MINGW_PACKAGE_PREFIX}-make) + packages+=(${MINGW_PACKAGE_PREFIX}-mlir) + packages+=(${MINGW_PACKAGE_PREFIX}-ninja) + packages+=(${MINGW_PACKAGE_PREFIX}-polly) + packages+=(${MINGW_PACKAGE_PREFIX}-protobuf) + packages+=(${MINGW_PACKAGE_PREFIX}-python3-numpy) + packages+=(${MINGW_PACKAGE_PREFIX}-rapidjson) + packages+=(${MINGW_PACKAGE_PREFIX}-re2) + packages+=(${MINGW_PACKAGE_PREFIX}-snappy) + packages+=(${MINGW_PACKAGE_PREFIX}-thrift) + packages+=(${MINGW_PACKAGE_PREFIX}-zlib) + packages+=(${MINGW_PACKAGE_PREFIX}-zstd) + ;; +esac + +case "${target}" in + c_glib|ruby) + packages+=(${MINGW_PACKAGE_PREFIX}-gobject-introspection) + packages+=(${MINGW_PACKAGE_PREFIX}-gtk-doc) + packages+=(${MINGW_PACKAGE_PREFIX}-meson) + ;; +esac + +case "${target}" in + cgo) + packages+=(${MINGW_PACKAGE_PREFIX}-arrow) + packages+=(${MINGW_PACKAGE_PREFIX}-gcc) + ;; +esac + +pacman \ + --needed \ + --noconfirm \ + --refresh \ + --sync \ + "${packages[@]}" + +"$(dirname $0)/ccache_setup.sh" +echo "CCACHE_DIR=$(cygpath --absolute --windows ccache)" >> $GITHUB_ENV diff --git a/src/arrow/ci/scripts/msys2_system_clean.sh b/src/arrow/ci/scripts/msys2_system_clean.sh new file mode 100755 index 000000000..a356aee66 --- /dev/null +++ b/src/arrow/ci/scripts/msys2_system_clean.sh @@ -0,0 +1,33 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -eux + +pacman \ + --cascade \ + --noconfirm \ + --nosave \ + --recursive \ + --remove \ + ${MINGW_PACKAGE_PREFIX}-clang-tools-extra \ + ${MINGW_PACKAGE_PREFIX}-gcc-ada \ + ${MINGW_PACKAGE_PREFIX}-gcc-fortran \ + ${MINGW_PACKAGE_PREFIX}-gcc-libgfortran \ + ${MINGW_PACKAGE_PREFIX}-gcc-objc \ + ${MINGW_PACKAGE_PREFIX}-libgccjit diff --git a/src/arrow/ci/scripts/msys2_system_upgrade.sh b/src/arrow/ci/scripts/msys2_system_upgrade.sh new file mode 100755 index 000000000..646428fbb --- /dev/null +++ b/src/arrow/ci/scripts/msys2_system_upgrade.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -eux + +pacman \ + --noconfirm \ + --refresh \ + --refresh \ + --sync \ + --sysupgrade \ + --sysupgrade diff --git a/src/arrow/ci/scripts/python_benchmark.sh b/src/arrow/ci/scripts/python_benchmark.sh new file mode 100755 index 000000000..3a35298dc --- /dev/null +++ b/src/arrow/ci/scripts/python_benchmark.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Check the ASV benchmarking setup. +# Unfortunately this won't ensure that all benchmarks succeed +# (see https://github.com/airspeed-velocity/asv/issues/449) +source deactivate +conda create -y -q -n pyarrow_asv python=$PYTHON_VERSION +conda activate pyarrow_asv +pip install -q git+https://github.com/pitrou/asv.git@customize_commands + +export PYARROW_WITH_PARQUET=1 +export PYARROW_WITH_PLASMA=1 +export PYARROW_WITH_ORC=0 +export PYARROW_WITH_GANDIVA=0 + +pushd $ARROW_PYTHON_DIR +# Workaround for https://github.com/airspeed-velocity/asv/issues/631 +git fetch --depth=100 origin master:master +# Generate machine information (mandatory) +asv machine --yes +# Run benchmarks on the changeset being tested +asv run --no-pull --show-stderr --quick HEAD^! +popd # $ARROW_PYTHON_DIR diff --git a/src/arrow/ci/scripts/python_build.sh b/src/arrow/ci/scripts/python_build.sh new file mode 100755 index 000000000..ec6d723b2 --- /dev/null +++ b/src/arrow/ci/scripts/python_build.sh @@ -0,0 +1,54 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +source_dir=${1}/python +build_dir=${2}/python + +if [ ! -z "${CONDA_PREFIX}" ]; then + echo -e "===\n=== Conda environment for build\n===" + conda list +fi + +export PYARROW_CMAKE_GENERATOR=${CMAKE_GENERATOR:-Ninja} +export PYARROW_BUILD_TYPE=${CMAKE_BUILD_TYPE:-debug} +export PYARROW_WITH_S3=${ARROW_S3:-OFF} +export PYARROW_WITH_ORC=${ARROW_ORC:-OFF} +export PYARROW_WITH_CUDA=${ARROW_CUDA:-OFF} +export PYARROW_WITH_HDFS=${ARROW_HDFS:-OFF} +export PYARROW_WITH_FLIGHT=${ARROW_FLIGHT:-OFF} +export PYARROW_WITH_PLASMA=${ARROW_PLASMA:-OFF} +export PYARROW_WITH_GANDIVA=${ARROW_GANDIVA:-OFF} +export PYARROW_WITH_PARQUET=${ARROW_PARQUET:-OFF} +export PYARROW_WITH_DATASET=${ARROW_DATASET:-OFF} + +export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH} + +pushd ${source_dir} + +relative_build_dir=$(realpath --relative-to=. $build_dir) + +# not nice, but prevents mutating the mounted the source directory for docker +${PYTHON:-python} \ + setup.py build --build-base $build_dir \ + install --single-version-externally-managed \ + --record $relative_build_dir/record.txt + +popd diff --git a/src/arrow/ci/scripts/python_sdist_build.sh b/src/arrow/ci/scripts/python_sdist_build.sh new file mode 100755 index 000000000..f9e9359b6 --- /dev/null +++ b/src/arrow/ci/scripts/python_sdist_build.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -eux + +source_dir=${1}/python + +pushd ${source_dir} +export SETUPTOOLS_SCM_PRETEND_VERSION=${PYARROW_VERSION:-} +${PYTHON:-python} setup.py sdist +popd diff --git a/src/arrow/ci/scripts/python_sdist_test.sh b/src/arrow/ci/scripts/python_sdist_test.sh new file mode 100755 index 000000000..3dd7d7ddd --- /dev/null +++ b/src/arrow/ci/scripts/python_sdist_test.sh @@ -0,0 +1,58 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -eux + +arrow_dir=${1} + +export ARROW_SOURCE_DIR=${arrow_dir} +export ARROW_TEST_DATA=${arrow_dir}/testing/data +export PARQUET_TEST_DATA=${arrow_dir}/cpp/submodules/parquet-testing/data + +export PYARROW_CMAKE_GENERATOR=${CMAKE_GENERATOR:-Ninja} +export PYARROW_BUILD_TYPE=${CMAKE_BUILD_TYPE:-debug} +export PYARROW_WITH_S3=${ARROW_S3:-OFF} +export PYARROW_WITH_ORC=${ARROW_ORC:-OFF} +export PYARROW_WITH_CUDA=${ARROW_CUDA:-OFF} +export PYARROW_WITH_HDFS=${ARROW_HDFS:-OFF} +export PYARROW_WITH_FLIGHT=${ARROW_FLIGHT:-OFF} +export PYARROW_WITH_PLASMA=${ARROW_PLASMA:-OFF} +export PYARROW_WITH_GANDIVA=${ARROW_GANDIVA:-OFF} +export PYARROW_WITH_PARQUET=${ARROW_PARQUET:-OFF} +export PYARROW_WITH_DATASET=${ARROW_DATASET:-OFF} + +# TODO: Users should not require ARROW_HOME and pkg-config to find Arrow C++. +# Related: ARROW-9171 +# unset ARROW_HOME +# apt purge -y pkg-config + +# ARROW-12619 +if command -v git &> /dev/null; then + echo "Git exists, remove it from PATH before executing this script." + exit 1 +fi + +if [ -n "${PYARROW_VERSION:-}" ]; then + sdist="${arrow_dir}/python/dist/pyarrow-${PYARROW_VERSION}.tar.gz" +else + sdist=$(ls ${arrow_dir}/python/dist/pyarrow-*.tar.gz | sort -r | head -n1) +fi +${PYTHON:-python} -m pip install ${sdist} + +pytest -r s ${PYTEST_ARGS:-} --pyargs pyarrow diff --git a/src/arrow/ci/scripts/python_test.sh b/src/arrow/ci/scripts/python_test.sh new file mode 100755 index 000000000..6e05af89a --- /dev/null +++ b/src/arrow/ci/scripts/python_test.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +arrow_dir=${1} + +export ARROW_SOURCE_DIR=${arrow_dir} +export ARROW_TEST_DATA=${arrow_dir}/testing/data +export PARQUET_TEST_DATA=${arrow_dir}/cpp/submodules/parquet-testing/data +export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH} + +# Enable some checks inside Python itself +export PYTHONDEVMODE=1 + +pytest -r s -v ${PYTEST_ARGS} --pyargs pyarrow diff --git a/src/arrow/ci/scripts/python_wheel_macos_build.sh b/src/arrow/ci/scripts/python_wheel_macos_build.sh new file mode 100755 index 000000000..1a52a2ad5 --- /dev/null +++ b/src/arrow/ci/scripts/python_wheel_macos_build.sh @@ -0,0 +1,166 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +arch=${1} +source_dir=${2} +build_dir=${3} + +echo "=== (${PYTHON_VERSION}) Clear output directories and leftovers ===" +# Clear output directories and leftovers +rm -rf ${build_dir}/install +rm -rf ${source_dir}/python/dist +rm -rf ${source_dir}/python/build +rm -rf ${source_dir}/python/repaired_wheels +rm -rf ${source_dir}/python/pyarrow/*.so +rm -rf ${source_dir}/python/pyarrow/*.so.* + +echo "=== (${PYTHON_VERSION}) Set SDK, C++ and Wheel flags ===" +export _PYTHON_HOST_PLATFORM="macosx-${MACOSX_DEPLOYMENT_TARGET}-${arch}" +export MACOSX_DEPLOYMENT_TARGET=${MACOSX_DEPLOYMENT_TARGET:-10.9} +export SDKROOT=${SDKROOT:-$(xcrun --sdk macosx --show-sdk-path)} + +if [ $arch = "arm64" ]; then + export CMAKE_OSX_ARCHITECTURES="arm64" +elif [ $arch = "x86_64" ]; then + export CMAKE_OSX_ARCHITECTURES="x86_64" +elif [ $arch = "universal2" ]; then + export CMAKE_OSX_ARCHITECTURES="x86_64;arm64" +else + echo "Unexpected architecture: $arch" + exit 1 +fi + +echo "=== (${PYTHON_VERSION}) Install Python build dependencies ===" +export PIP_SITE_PACKAGES=$(python -c 'import site; print(site.getsitepackages()[0])') +export PIP_TARGET_PLATFORM="macosx_${MACOSX_DEPLOYMENT_TARGET//./_}_${arch}" + +pip install \ + --upgrade \ + --only-binary=:all: \ + --target $PIP_SITE_PACKAGES \ + --platform $PIP_TARGET_PLATFORM \ + -r ${source_dir}/python/requirements-wheel-build.txt +pip install "delocate>=0.9" + +echo "=== (${PYTHON_VERSION}) Building Arrow C++ libraries ===" +: ${ARROW_DATASET:=ON} +: ${ARROW_FLIGHT:=ON} +: ${ARROW_GANDIVA:=OFF} +: ${ARROW_HDFS:=ON} +: ${ARROW_JEMALLOC:=ON} +: ${ARROW_MIMALLOC:=ON} +: ${ARROW_ORC:=ON} +: ${ARROW_PARQUET:=ON} +: ${ARROW_PLASMA:=ON} +: ${ARROW_S3:=ON} +: ${ARROW_SIMD_LEVEL:="SSE4_2"} +: ${ARROW_TENSORFLOW:=ON} +: ${ARROW_WITH_BROTLI:=ON} +: ${ARROW_WITH_BZ2:=ON} +: ${ARROW_WITH_LZ4:=ON} +: ${ARROW_WITH_SNAPPY:=ON} +: ${ARROW_WITH_ZLIB:=ON} +: ${ARROW_WITH_ZSTD:=ON} +: ${CMAKE_BUILD_TYPE:=release} +: ${CMAKE_GENERATOR:=Ninja} +: ${CMAKE_UNITY_BUILD:=ON} +: ${VCPKG_FEATURE_FLAGS:=-manifests} +: ${VCPKG_TARGET_TRIPLET:=${VCPKG_DEFAULT_TRIPLET:-x64-osx-static-${CMAKE_BUILD_TYPE}}} + +mkdir -p ${build_dir}/build +pushd ${build_dir}/build + +cmake \ + -DARROW_BUILD_SHARED=ON \ + -DCMAKE_APPLE_SILICON_PROCESSOR=arm64 \ + -DCMAKE_OSX_ARCHITECTURES=${CMAKE_OSX_ARCHITECTURES} \ + -DARROW_BUILD_STATIC=OFF \ + -DARROW_BUILD_TESTS=OFF \ + -DARROW_DATASET=${ARROW_DATASET} \ + -DARROW_DEPENDENCY_SOURCE="VCPKG" \ + -DARROW_DEPENDENCY_USE_SHARED=OFF \ + -DARROW_FLIGHT=${ARROW_FLIGHT} \ + -DARROW_GANDIVA=${ARROW_GANDIVA} \ + -DARROW_HDFS=${ARROW_HDFS} \ + -DARROW_JEMALLOC=${ARROW_JEMALLOC} \ + -DARROW_MIMALLOC=${ARROW_MIMALLOC} \ + -DARROW_ORC=${ARROW_ORC} \ + -DARROW_PACKAGE_KIND="python-wheel-macos" \ + -DARROW_PARQUET=${ARROW_PARQUET} \ + -DARROW_PLASMA=${ARROW_PLASMA} \ + -DARROW_PYTHON=ON \ + -DARROW_RPATH_ORIGIN=ON \ + -DARROW_S3=${ARROW_S3} \ + -DARROW_SIMD_LEVEL=${ARROW_SIMD_LEVEL} \ + -DARROW_TENSORFLOW=${ARROW_TENSORFLOW} \ + -DARROW_USE_CCACHE=ON \ + -DARROW_WITH_BROTLI=${ARROW_WITH_BROTLI} \ + -DARROW_WITH_BZ2=${ARROW_WITH_BZ2} \ + -DARROW_WITH_LZ4=${ARROW_WITH_LZ4} \ + -DARROW_WITH_SNAPPY=${ARROW_WITH_SNAPPY} \ + -DARROW_WITH_ZLIB=${ARROW_WITH_ZLIB} \ + -DARROW_WITH_ZSTD=${ARROW_WITH_ZSTD} \ + -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \ + -DCMAKE_INSTALL_LIBDIR=lib \ + -DCMAKE_INSTALL_PREFIX=${build_dir}/install \ + -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD} \ + -DOPENSSL_USE_STATIC_LIBS=ON \ + -DVCPKG_MANIFEST_MODE=OFF \ + -DVCPKG_TARGET_TRIPLET=${VCPKG_TARGET_TRIPLET} \ + -G ${CMAKE_GENERATOR} \ + ${source_dir}/cpp +cmake --build . --target install +popd + +echo "=== (${PYTHON_VERSION}) Building wheel ===" +export PYARROW_BUILD_TYPE=${CMAKE_BUILD_TYPE} +export PYARROW_BUNDLE_ARROW_CPP=1 +export PYARROW_CMAKE_GENERATOR=${CMAKE_GENERATOR} +export PYARROW_INSTALL_TESTS=1 +export PYARROW_WITH_DATASET=${ARROW_DATASET} +export PYARROW_WITH_FLIGHT=${ARROW_FLIGHT} +export PYARROW_WITH_GANDIVA=${ARROW_GANDIVA} +export PYARROW_WITH_HDFS=${ARROW_HDFS} +export PYARROW_WITH_ORC=${ARROW_ORC} +export PYARROW_WITH_PARQUET=${ARROW_PARQUET} +export PYARROW_WITH_PLASMA=${ARROW_PLASMA} +export PYARROW_WITH_S3=${ARROW_S3} +export PYARROW_CMAKE_OPTIONS="-DCMAKE_OSX_ARCHITECTURES=${CMAKE_OSX_ARCHITECTURES} -DARROW_SIMD_LEVEL=${ARROW_SIMD_LEVEL}" +# PyArrow build configuration +export PKG_CONFIG_PATH=/usr/lib/pkgconfig:${build_dir}/install/lib/pkgconfig +# Set PyArrow version explicitly +export SETUPTOOLS_SCM_PRETEND_VERSION=${PYARROW_VERSION} + +pushd ${source_dir}/python +python setup.py bdist_wheel +popd + +echo "=== (${PYTHON_VERSION}) Show dynamic libraries the wheel depend on ===" +deps=$(delocate-listdeps ${source_dir}/python/dist/*.whl) + +if echo $deps | grep -v "^pyarrow/lib\(arrow\|gandiva\|parquet\|plasma\)"; then + echo "There are non-bundled shared library dependencies." + exit 1 +fi + +# Move the verified wheels +mkdir -p ${source_dir}/python/repaired_wheels +mv ${source_dir}/python/dist/*.whl ${source_dir}/python/repaired_wheels/ diff --git a/src/arrow/ci/scripts/python_wheel_manylinux_build.sh b/src/arrow/ci/scripts/python_wheel_manylinux_build.sh new file mode 100755 index 000000000..434605cf2 --- /dev/null +++ b/src/arrow/ci/scripts/python_wheel_manylinux_build.sh @@ -0,0 +1,149 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +function check_arrow_visibility { + nm --demangle --dynamic /tmp/arrow-dist/lib/libarrow.so > nm_arrow.log + + # Filter out Arrow symbols and see if anything remains. + # '_init' and '_fini' symbols may or not be present, we don't care. + # (note we must ignore the grep exit status when no match is found) + grep ' T ' nm_arrow.log | grep -v -E '(arrow|\b_init\b|\b_fini\b)' | cat - > visible_symbols.log + + if [[ -f visible_symbols.log && `cat visible_symbols.log | wc -l` -eq 0 ]]; then + return 0 + else + echo "== Unexpected symbols exported by libarrow.so ==" + cat visible_symbols.log + echo "================================================" + + exit 1 + fi +} + +echo "=== (${PYTHON_VERSION}) Clear output directories and leftovers ===" +# Clear output directories and leftovers +rm -rf /tmp/arrow-build +rm -rf /arrow/python/dist +rm -rf /arrow/python/build +rm -rf /arrow/python/repaired_wheels +rm -rf /arrow/python/pyarrow/*.so +rm -rf /arrow/python/pyarrow/*.so.* + +echo "=== (${PYTHON_VERSION}) Building Arrow C++ libraries ===" +: ${ARROW_DATASET:=ON} +: ${ARROW_FLIGHT:=ON} +: ${ARROW_GANDIVA:=OFF} +: ${ARROW_HDFS:=ON} +: ${ARROW_JEMALLOC:=ON} +: ${ARROW_MIMALLOC:=ON} +: ${ARROW_ORC:=ON} +: ${ARROW_PARQUET:=ON} +: ${ARROW_PLASMA:=ON} +: ${ARROW_S3:=ON} +: ${ARROW_TENSORFLOW:=ON} +: ${ARROW_WITH_BROTLI:=ON} +: ${ARROW_WITH_BZ2:=ON} +: ${ARROW_WITH_LZ4:=ON} +: ${ARROW_WITH_SNAPPY:=ON} +: ${ARROW_WITH_ZLIB:=ON} +: ${ARROW_WITH_ZSTD:=ON} +: ${CMAKE_BUILD_TYPE:=release} +: ${CMAKE_UNITY_BUILD:=ON} +: ${CMAKE_GENERATOR:=Ninja} +: ${VCPKG_FEATURE_FLAGS:=-manifests} +: ${VCPKG_TARGET_TRIPLET:=${VCPKG_DEFAULT_TRIPLET:-x64-linux-static-${CMAKE_BUILD_TYPE}}} + +if [[ "$(uname -m)" == arm* ]] || [[ "$(uname -m)" == aarch* ]]; then + # Build jemalloc --with-lg-page=16 in order to make the wheel work on both + # 4k and 64k page arm64 systems. For more context see + # https://github.com/apache/arrow/issues/10929 + export ARROW_EXTRA_CMAKE_FLAGS="-DARROW_JEMALLOC_LG_PAGE=16" +fi + +mkdir /tmp/arrow-build +pushd /tmp/arrow-build +cmake \ + -DARROW_BROTLI_USE_SHARED=OFF \ + -DARROW_BUILD_SHARED=ON \ + -DARROW_BUILD_STATIC=OFF \ + -DARROW_BUILD_TESTS=OFF \ + -DARROW_DATASET=${ARROW_DATASET} \ + -DARROW_DEPENDENCY_SOURCE="VCPKG" \ + -DARROW_DEPENDENCY_USE_SHARED=OFF \ + -DARROW_FLIGHT==${ARROW_FLIGHT} \ + -DARROW_GANDIVA=${ARROW_GANDIVA} \ + -DARROW_HDFS=${ARROW_HDFS} \ + -DARROW_JEMALLOC=${ARROW_JEMALLOC} \ + -DARROW_MIMALLOC=${ARROW_MIMALLOC} \ + -DARROW_ORC=${ARROW_ORC} \ + -DARROW_PACKAGE_KIND="python-wheel-manylinux${MANYLINUX_VERSION}" \ + -DARROW_PARQUET=${ARROW_PARQUET} \ + -DARROW_PLASMA=${ARROW_PLASMA} \ + -DARROW_PYTHON=ON \ + -DARROW_RPATH_ORIGIN=ON \ + -DARROW_S3=${ARROW_S3} \ + -DARROW_TENSORFLOW=${ARROW_TENSORFLOW} \ + -DARROW_USE_CCACHE=ON \ + -DARROW_UTF8PROC_USE_SHARED=OFF \ + -DARROW_WITH_BROTLI=${ARROW_WITH_BROTLI} \ + -DARROW_WITH_BZ2=${ARROW_WITH_BZ2} \ + -DARROW_WITH_LZ4=${ARROW_WITH_LZ4} \ + -DARROW_WITH_SNAPPY=${ARROW_WITH_SNAPPY} \ + -DARROW_WITH_ZLIB=${ARROW_WITH_ZLIB} \ + -DARROW_WITH_ZSTD=${ARROW_WITH_ZSTD} \ + -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \ + -DCMAKE_INSTALL_LIBDIR=lib \ + -DCMAKE_INSTALL_PREFIX=/tmp/arrow-dist \ + -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD} \ + -DOPENSSL_USE_STATIC_LIBS=ON \ + -DVCPKG_MANIFEST_MODE=OFF \ + -DVCPKG_TARGET_TRIPLET=${VCPKG_TARGET_TRIPLET} \ + ${ARROW_EXTRA_CMAKE_FLAGS} \ + -G ${CMAKE_GENERATOR} \ + /arrow/cpp +cmake --build . --target install +popd + +# Check that we don't expose any unwanted symbols +check_arrow_visibility + +echo "=== (${PYTHON_VERSION}) Building wheel ===" +export PYARROW_BUILD_TYPE=${CMAKE_BUILD_TYPE} +export PYARROW_BUNDLE_ARROW_CPP=1 +export PYARROW_CMAKE_GENERATOR=${CMAKE_GENERATOR} +export PYARROW_INSTALL_TESTS=1 +export PYARROW_WITH_DATASET=${ARROW_DATASET} +export PYARROW_WITH_FLIGHT=${ARROW_FLIGHT} +export PYARROW_WITH_GANDIVA=${ARROW_GANDIVA} +export PYARROW_WITH_HDFS=${ARROW_HDFS} +export PYARROW_WITH_ORC=${ARROW_ORC} +export PYARROW_WITH_PARQUET=${ARROW_PARQUET} +export PYARROW_WITH_PLASMA=${ARROW_PLASMA} +export PYARROW_WITH_S3=${ARROW_S3} +# PyArrow build configuration +export PKG_CONFIG_PATH=/usr/lib/pkgconfig:/tmp/arrow-dist/lib/pkgconfig + +pushd /arrow/python +python setup.py bdist_wheel + +echo "=== (${PYTHON_VERSION}) Tag the wheel with manylinux${MANYLINUX_VERSION} ===" +auditwheel repair -L . dist/pyarrow-*.whl -w repaired_wheels +popd diff --git a/src/arrow/ci/scripts/python_wheel_unix_test.sh b/src/arrow/ci/scripts/python_wheel_unix_test.sh new file mode 100755 index 000000000..ec703abfc --- /dev/null +++ b/src/arrow/ci/scripts/python_wheel_unix_test.sh @@ -0,0 +1,84 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -e +set -x +set -o pipefail + +if [ "$#" -ne 1 ]; then + echo "Usage: $0 <arrow-src-dir>" + exit 1 +fi + +source_dir=${1} + +: ${ARROW_FLIGHT:=ON} +: ${ARROW_S3:=ON} +: ${CHECK_IMPORTS:=ON} +: ${CHECK_UNITTESTS:=ON} +: ${INSTALL_PYARROW:=ON} + +export PYARROW_TEST_CYTHON=OFF +export PYARROW_TEST_DATASET=ON +export PYARROW_TEST_FLIGHT=${ARROW_FLIGHT} +export PYARROW_TEST_GANDIVA=OFF +export PYARROW_TEST_HDFS=ON +export PYARROW_TEST_ORC=ON +export PYARROW_TEST_PANDAS=ON +export PYARROW_TEST_PARQUET=ON +export PYARROW_TEST_PLASMA=ON +export PYARROW_TEST_S3=${ARROW_S3} +export PYARROW_TEST_TENSORFLOW=ON + +export ARROW_TEST_DATA=${source_dir}/testing/data +export PARQUET_TEST_DATA=${source_dir}/submodules/parquet-testing/data + +if [ "${INSTALL_PYARROW}" == "ON" ]; then + # Install the built wheels + pip install --force-reinstall ${source_dir}/python/repaired_wheels/*.whl +fi + +if [ "${CHECK_IMPORTS}" == "ON" ]; then + # Test that the modules are importable + python -c " +import pyarrow +import pyarrow._hdfs +import pyarrow.csv +import pyarrow.dataset +import pyarrow.fs +import pyarrow.json +import pyarrow.orc +import pyarrow.parquet +import pyarrow.plasma +" + if [ "${PYARROW_TEST_S3}" == "ON" ]; then + python -c "import pyarrow._s3fs" + fi + if [ "${PYARROW_TEST_FLIGHT}" == "ON" ]; then + python -c "import pyarrow.flight" + fi +fi + +if [ "${CHECK_UNITTESTS}" == "ON" ]; then + # Install testing dependencies + pip install -U -r ${source_dir}/python/requirements-wheel-test.txt + # Execute unittest, test dependencies must be installed + python -c 'import pyarrow; pyarrow.create_library_symlinks()' + python -m pytest -r s --pyargs pyarrow +fi diff --git a/src/arrow/ci/scripts/python_wheel_windows_build.bat b/src/arrow/ci/scripts/python_wheel_windows_build.bat new file mode 100644 index 000000000..23be7f512 --- /dev/null +++ b/src/arrow/ci/scripts/python_wheel_windows_build.bat @@ -0,0 +1,109 @@ +@rem Licensed to the Apache Software Foundation (ASF) under one +@rem or more contributor license agreements. See the NOTICE file +@rem distributed with this work for additional information +@rem regarding copyright ownership. The ASF licenses this file +@rem to you under the Apache License, Version 2.0 (the +@rem "License"); you may not use this file except in compliance +@rem with the License. You may obtain a copy of the License at +@rem +@rem http://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, +@rem software distributed under the License is distributed on an +@rem "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +@rem KIND, either express or implied. See the License for the +@rem specific language governing permissions and limitations +@rem under the License. + +@echo on + +echo "Building windows wheel..." + +call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvars64.bat" + +echo "=== (%PYTHON_VERSION%) Clear output directories and leftovers ===" +del /s /q C:\arrow-build +del /s /q C:\arrow-dist +del /s /q C:\arrow\python\dist +del /s /q C:\arrow\python\build +del /s /q C:\arrow\python\pyarrow\*.so +del /s /q C:\arrow\python\pyarrow\*.so.* + +echo "=== (%PYTHON_VERSION%) Building Arrow C++ libraries ===" +set ARROW_DATASET=ON +set ARROW_FLIGHT=ON +set ARROW_GANDIVA=OFF +set ARROW_HDFS=ON +set ARROW_ORC=OFF +set ARROW_PARQUET=ON +set ARROW_MIMALLOC=ON +set ARROW_S3=ON +set ARROW_TENSORFLOW=ON +set ARROW_WITH_BROTLI=ON +set ARROW_WITH_BZ2=ON +set ARROW_WITH_LZ4=ON +set ARROW_WITH_SNAPPY=ON +set ARROW_WITH_ZLIB=ON +set ARROW_WITH_ZSTD=ON +set CMAKE_UNITY_BUILD=ON +set CMAKE_GENERATOR=Visual Studio 15 2017 Win64 +set VCPKG_FEATURE_FLAGS=-manifests + +mkdir C:\arrow-build +pushd C:\arrow-build +cmake ^ + -DARROW_BUILD_SHARED=ON ^ + -DARROW_BUILD_STATIC=OFF ^ + -DARROW_BUILD_TESTS=OFF ^ + -DARROW_CXXFLAGS="/MP" ^ + -DARROW_DATASET=%ARROW_DATASET% ^ + -DARROW_DEPENDENCY_SOURCE=VCPKG ^ + -DARROW_DEPENDENCY_USE_SHARED=OFF ^ + -DARROW_FLIGHT=%ARROW_FLIGHT% ^ + -DARROW_GANDIVA=%ARROW_GANDIVA% ^ + -DARROW_HDFS=%ARROW_HDFS% ^ + -DARROW_MIMALLOC=%ARROW_MIMALLOC% ^ + -DARROW_ORC=%ARROW_ORC% ^ + -DARROW_PACKAGE_KIND="python-wheel-windows" ^ + -DARROW_PARQUET=%ARROW_PARQUET% ^ + -DARROW_PYTHON=ON ^ + -DARROW_S3=%ARROW_S3% ^ + -DARROW_TENSORFLOW=%ARROW_TENSORFLOW% ^ + -DARROW_WITH_BROTLI=%ARROW_WITH_BROTLI% ^ + -DARROW_WITH_BZ2=%ARROW_WITH_BZ2% ^ + -DARROW_WITH_LZ4=%ARROW_WITH_LZ4% ^ + -DARROW_WITH_SNAPPY=%ARROW_WITH_SNAPPY% ^ + -DARROW_WITH_ZLIB=%ARROW_WITH_ZLIB% ^ + -DARROW_WITH_ZSTD=%ARROW_WITH_ZSTD% ^ + -DCMAKE_BUILD_TYPE=%CMAKE_BUILD_TYPE% ^ + -DCMAKE_CXX_COMPILER=clcache ^ + -DCMAKE_INSTALL_PREFIX=C:\arrow-dist ^ + -DCMAKE_UNITY_BUILD=%CMAKE_UNITY_BUILD% ^ + -DMSVC_LINK_VERBOSE=ON ^ + -DVCPKG_MANIFEST_MODE=OFF ^ + -DVCPKG_TARGET_TRIPLET=x64-windows-static-md-%CMAKE_BUILD_TYPE% ^ + -G "%CMAKE_GENERATOR%" ^ + C:\arrow\cpp || exit /B +cmake --build . --config %CMAKE_BUILD_TYPE% --target install || exit /B +popd + +echo "=== (%PYTHON_VERSION%) Building wheel ===" +set PYARROW_BUILD_TYPE=%CMAKE_BUILD_TYPE% +set PYARROW_BUNDLE_ARROW_CPP=ON +set PYARROW_BUNDLE_BOOST=OFF +set PYARROW_CMAKE_GENERATOR=%CMAKE_GENERATOR% +set PYARROW_INSTALL_TESTS=ON +set PYARROW_WITH_DATASET=%ARROW_DATASET% +set PYARROW_WITH_FLIGHT=%ARROW_FLIGHT% +set PYARROW_WITH_GANDIVA=%ARROW_GANDIVA% +set PYARROW_WITH_HDFS=%ARROW_HDFS% +set PYARROW_WITH_ORC=%ARROW_ORC% +set PYARROW_WITH_PARQUET=%ARROW_PARQUET% +set PYARROW_WITH_S3=%ARROW_S3% +set ARROW_HOME=C:\arrow-dist + +pushd C:\arrow\python +@REM bundle the msvc runtime +cp "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Redist\MSVC\14.16.27012\x64\Microsoft.VC141.CRT\msvcp140.dll" pyarrow\ +python setup.py bdist_wheel || exit /B +popd diff --git a/src/arrow/ci/scripts/python_wheel_windows_test.bat b/src/arrow/ci/scripts/python_wheel_windows_test.bat new file mode 100755 index 000000000..1ea0f8acd --- /dev/null +++ b/src/arrow/ci/scripts/python_wheel_windows_test.bat @@ -0,0 +1,55 @@ +@rem Licensed to the Apache Software Foundation (ASF) under one +@rem or more contributor license agreements. See the NOTICE file +@rem distributed with this work for additional information +@rem regarding copyright ownership. The ASF licenses this file +@rem to you under the Apache License, Version 2.0 (the +@rem "License"); you may not use this file except in compliance +@rem with the License. You may obtain a copy of the License at +@rem +@rem http://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, +@rem software distributed under the License is distributed on an +@rem "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +@rem KIND, either express or implied. See the License for the +@rem specific language governing permissions and limitations +@rem under the License. + +@echo on + +set PYARROW_TEST_CYTHON=OFF +set PYARROW_TEST_DATASET=ON +set PYARROW_TEST_FLIGHT=ON +set PYARROW_TEST_GANDIVA=OFF +set PYARROW_TEST_HDFS=ON +set PYARROW_TEST_ORC=OFF +set PYARROW_TEST_PARQUET=ON +set PYARROW_TEST_PLASMA=OFF +set PYARROW_TEST_S3=OFF +set PYARROW_TEST_TENSORFLOW=ON + +@REM Enable again once https://github.com/scipy/oldest-supported-numpy/pull/27 gets merged +@REM set PYARROW_TEST_PANDAS=ON + +set ARROW_TEST_DATA=C:\arrow\testing\data +set PARQUET_TEST_DATA=C:\arrow\submodules\parquet-testing\data + +@REM Install testing dependencies +pip install -r C:\arrow\python\requirements-wheel-test.txt || exit /B + +@REM Install the built wheels +python -m pip install --no-index --find-links=C:\arrow\python\dist\ pyarrow || exit /B + +@REM Test that the modules are importable +python -c "import pyarrow" +python -c "import pyarrow._hdfs" +python -c "import pyarrow._s3fs" +python -c "import pyarrow.csv" +python -c "import pyarrow.dataset" +python -c "import pyarrow.flight" +python -c "import pyarrow.fs" +python -c "import pyarrow.json" +python -c "import pyarrow.parquet" + +@REM Execute unittest +pytest -r s --pyargs pyarrow || exit /B diff --git a/src/arrow/ci/scripts/r_build.sh b/src/arrow/ci/scripts/r_build.sh new file mode 100755 index 000000000..2a2b9d7d1 --- /dev/null +++ b/src/arrow/ci/scripts/r_build.sh @@ -0,0 +1,33 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +: ${R_BIN:=R} +source_dir=${1}/r +with_docs=${2:-false} + +pushd ${source_dir} + +${R_BIN} CMD INSTALL ${INSTALL_ARGS} . + +if [ "${with_docs}" == "true" ]; then + ${R_BIN} -e "pkgdown::build_site(install = FALSE)" +fi + +popd
\ No newline at end of file diff --git a/src/arrow/ci/scripts/r_deps.sh b/src/arrow/ci/scripts/r_deps.sh new file mode 100755 index 000000000..ad1b5ecc1 --- /dev/null +++ b/src/arrow/ci/scripts/r_deps.sh @@ -0,0 +1,45 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +: ${R_BIN:=R} + +source_dir=${1}/r + +pushd ${source_dir} + +if [ ${R_BIN} = "RDsan" ]; then + # To prevent the build from timing out, let's prune some optional deps (and their possible version requirements) + ${R_BIN} -e 'd <- read.dcf("DESCRIPTION") + to_prune <- c("duckdb", "DBI", "dbplyr", "decor", "knitr", "rmarkdown", "pkgload", "reticulate") + pattern <- paste0("\\n?", to_prune, " (\\\\(.*\\\\))?,?", collapse = "|") + d[,"Suggests"] <- gsub(pattern, "", d[,"Suggests"]) + write.dcf(d, "DESCRIPTION")' +fi + +# Install R package dependencies +# install.packages() emits warnings if packages fail to install, +# but we want to error/fail the build. +# options(warn=2) turns warnings into errors +${R_BIN} -e "options(warn=2); install.packages('remotes'); remotes::install_cran(c('glue', 'rcmdcheck', 'sys')); remotes::install_deps(INSTALL_opts = '"${INSTALL_ARGS}"')" +# Separately install the optional/test dependencies but don't error on them, +# they're not available everywhere and that's ok +${R_BIN} -e "remotes::install_deps(dependencies = TRUE, INSTALL_opts = '"${INSTALL_ARGS}"')" + +popd diff --git a/src/arrow/ci/scripts/r_docker_configure.sh b/src/arrow/ci/scripts/r_docker_configure.sh new file mode 100755 index 000000000..20c987085 --- /dev/null +++ b/src/arrow/ci/scripts/r_docker_configure.sh @@ -0,0 +1,81 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +: ${R_BIN:=R} + +# The Dockerfile should have put this file here +if [ -f "/arrow/ci/etc/rprofile" ]; then + # Ensure parallel R package installation, set CRAN repo mirror, + # and use pre-built binaries where possible + cat /arrow/ci/etc/rprofile >> $(${R_BIN} RHOME)/etc/Rprofile.site +fi + +# Ensure parallel compilation of C/C++ code +echo "MAKEFLAGS=-j$(${R_BIN} -s -e 'cat(parallel::detectCores())')" >> $(R RHOME)/etc/Renviron.site + +# Special hacking to try to reproduce quirks on fedora-clang-devel on CRAN +# which uses a bespoke clang compiled to use libc++ +# https://www.stats.ox.ac.uk/pub/bdr/Rconfig/r-devel-linux-x86_64-fedora-clang +if [ "$RHUB_PLATFORM" = "linux-x86_64-fedora-clang" ]; then + dnf install -y libcxx-devel + sed -i.bak -E -e 's/(CXX1?1? =.*)/\1 -stdlib=libc++/g' $(${R_BIN} RHOME)/etc/Makeconf + rm -rf $(${R_BIN} RHOME)/etc/Makeconf.bak + + sed -i.bak -E -e 's/(CXXFLAGS = )(.*)/\1 -g -O3 -Wall -pedantic -frtti -fPIC/' $(${R_BIN} RHOME)/etc/Makeconf + rm -rf $(${R_BIN} RHOME)/etc/Makeconf.bak +fi + +# Special hacking to try to reproduce quirks on centos using non-default build +# tooling. +if [[ "$DEVTOOLSET_VERSION" -gt 0 ]]; then + if [ "`which dnf`" ]; then + dnf install -y centos-release-scl + dnf install -y "devtoolset-$DEVTOOLSET_VERSION" + else + yum install -y centos-release-scl + yum install -y "devtoolset-$DEVTOOLSET_VERSION" + fi +fi + +# Install openssl for S3 support +if [ "$ARROW_S3" == "ON" ] || [ "$ARROW_R_DEV" == "TRUE" ]; then + if [ "`which dnf`" ]; then + dnf install -y libcurl-devel openssl-devel + elif [ "`which yum`" ]; then + yum install -y libcurl-devel openssl-devel + elif [ "`which zypper`" ]; then + zypper install -y libcurl-devel libopenssl-devel + else + apt-get update + apt-get install -y libcurl4-openssl-dev libssl-dev + fi + + # The Dockerfile should have put this file here + if [ -f "/arrow/ci/scripts/install_minio.sh" ] && [ "`which wget`" ]; then + /arrow/ci/scripts/install_minio.sh amd64 linux latest /usr/local + fi + + if [ -f "/arrow/ci/scripts/install_gcs_testbench.sh" ] && [ "`which pip`" ]; then + /arrow/ci/scripts/install_gcs_testbench.sh amd64 default + fi +fi + +# Workaround for html help install failure; see https://github.com/r-lib/devtools/issues/2084#issuecomment-530912786 +Rscript -e 'x <- file.path(R.home("doc"), "html"); if (!file.exists(x)) {dir.create(x, recursive=TRUE); file.copy(system.file("html/R.css", package="stats"), x)}' diff --git a/src/arrow/ci/scripts/r_pkgdown_check.sh b/src/arrow/ci/scripts/r_pkgdown_check.sh new file mode 100755 index 000000000..327480a6b --- /dev/null +++ b/src/arrow/ci/scripts/r_pkgdown_check.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Use this script to prevent errors in the pkgdown site being rendered due to missing YAML entries + +# all .Rd files in the repo +all_rd_files=`find ./r/man -maxdepth 1 -name "*.Rd" | sed -e 's/.\/r\/man\///g' | sed -e 's/.Rd//g' | sort` + +# .Rd files to exclude from search (i.e. are internal) +exclusions=`grep "\keyword{internal}" -rl ./r/man --include=*.Rd | sed -e 's/.\/r\/man\///g' | sed -e 's/.Rd//g' | sort` + +# .Rd files to check against pkgdown.yml +rd_files=`echo ${exclusions[@]} ${all_rd_files[@]} | tr ' ' '\n' | sort | uniq -u` + +# pkgdown sections +pkgdown_sections=`awk '/^[^ ]/{ f=/reference:/; next } f{ if (sub(/:$/,"")) pkg=$2; else print pkg, $2 }' ./r/_pkgdown.yml | grep -v "title:" | sort` + +# get things that appear in man files that don't appear in pkgdown sections +pkgdown_missing=`echo ${pkgdown_sections[@]} ${pkgdown_sections[@]} ${rd_files[@]} | tr ' ' '\n' | sort | uniq -u` + +# if any sections are missing raise an error +if ([ ${#pkgdown_missing} -ge 1 ]); then + echo "Error! $pkgdown_missing missing from ./r/_pkgdown.yml" + exit 1 +fi diff --git a/src/arrow/ci/scripts/r_revdepcheck.sh b/src/arrow/ci/scripts/r_revdepcheck.sh new file mode 100755 index 000000000..b0a2bab64 --- /dev/null +++ b/src/arrow/ci/scripts/r_revdepcheck.sh @@ -0,0 +1,88 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +: ${R_BIN:=R} + +source_dir=${1}/r + +# cpp building dependencies +apt install -y cmake + +# system dependencies needed for arrow's reverse dependencies +apt install -y libxml2-dev \ + libfontconfig1-dev \ + libcairo2-dev \ + libglpk-dev \ + libmysqlclient-dev \ + unixodbc-dev \ + libpq-dev \ + coinor-libsymphony-dev \ + coinor-libcgl-dev \ + coinor-symphony \ + libzmq3-dev \ + libudunits2-dev \ + libgdal-dev \ + libgeos-dev \ + libproj-dev + +pushd ${source_dir} + +printenv + +# By default, aws-sdk tries to contact a non-existing local ip host +# to retrieve metadata. Disable this so that S3FileSystem tests run faster. +export AWS_EC2_METADATA_DISABLED=TRUE + +# Set crancache dir so we can cache it +export CRANCACHE_DIR="/arrow/.crancache" + +SCRIPT=" + # We can't use RSPM binaries because we need source packages + options('repos' = c(CRAN = 'https://packagemanager.rstudio.com/all/latest')) + remotes::install_github('r-lib/revdepcheck') + + # zoo is needed by RcisTarget tests, though only listed in enhances so not installed by revdepcheck + install.packages('zoo') + + # actually run revdepcheck + revdepcheck::revdep_check( + quiet = FALSE, + timeout = as.difftime(120, units = 'mins'), + num_workers = 1, + env = c( + ARROW_R_DEV = '$ARROW_R_DEV', + LIBARROW_DOWNLOAD = TRUE, + LIBARROW_MINIMAL = FALSE, + revdepcheck::revdep_env_vars() + )) + revdepcheck::revdep_report(all = TRUE) + + # Go through the summary and fail if any of the statuses include - + summary <- revdepcheck::revdep_summary() + failed <- lapply(summary, function(check) grepl('-', check[['status']])) + + if (any(unlist(failed))) { + quit(status = 1) + } + " + +echo "$SCRIPT" | ${R_BIN} --no-save + +popd diff --git a/src/arrow/ci/scripts/r_sanitize.sh b/src/arrow/ci/scripts/r_sanitize.sh new file mode 100755 index 000000000..6c79c0851 --- /dev/null +++ b/src/arrow/ci/scripts/r_sanitize.sh @@ -0,0 +1,46 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +: ${R_BIN:=RDsan} + +source_dir=${1}/r + +pushd ${source_dir} + +# Unity builds were causing the CI job to run out of memory +export CMAKE_UNITY_BUILD=OFF +# Make installation verbose so that the CI job doesn't time out due to silence +export ARROW_R_DEV=TRUE +${R_BIN} CMD INSTALL ${INSTALL_ARGS} . +# But unset the env var so that it doesn't cause us to run extra dev tests +unset ARROW_R_DEV + +export UBSAN_OPTIONS="print_stacktrace=1,suppressions=/arrow/r/tools/ubsan.supp" + +pushd tests +${R_BIN} < testthat.R > testthat.out 2>&1 || { cat testthat.out; exit 1; } +popd +${R_BIN} -e 'library(arrow); testthat::test_examples(".")' >> testthat.out 2>&1 || { cat testthat.out; exit 1; } + +cat testthat.out +if grep -q "runtime error" testthat.out; then + exit 1 +fi +popd diff --git a/src/arrow/ci/scripts/r_test.sh b/src/arrow/ci/scripts/r_test.sh new file mode 100755 index 000000000..62e423cf5 --- /dev/null +++ b/src/arrow/ci/scripts/r_test.sh @@ -0,0 +1,106 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +: ${R_BIN:=R} + +source_dir=${1}/r + +pushd ${source_dir} + +printenv + +if [ "$ARROW_USE_PKG_CONFIG" != "false" ]; then + export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH} + export R_LD_LIBRARY_PATH=${LD_LIBRARY_PATH} +fi +export _R_CHECK_COMPILATION_FLAGS_KNOWN_=${ARROW_R_CXXFLAGS} +if [ "$ARROW_R_DEV" = "TRUE" ]; then + # These are sometimes used in the Arrow C++ build and are not a problem + export _R_CHECK_COMPILATION_FLAGS_KNOWN_="${_R_CHECK_COMPILATION_FLAGS_KNOWN_} -Wno-attributes -msse4.2 -Wno-noexcept-type -Wno-subobject-linkage" + if [ "$NOT_CRAN" = "" ]; then + # Note that NOT_CRAN=true means (among other things) that optional dependencies are built + # You can set NOT_CRAN=false for the CRAN build and then + # ARROW_R_DEV=TRUE just adds verbosity + export NOT_CRAN=true + fi +fi + +export _R_CHECK_CRAN_INCOMING_REMOTE_=FALSE +if [ "$TEST_R_WITHOUT_LIBARROW" != "TRUE" ]; then + # --run-donttest was used in R < 4.0, this is used now + export _R_CHECK_DONTTEST_EXAMPLES_=TRUE +fi +# Not all Suggested packages are needed for checking, so in case they aren't installed don't fail +export _R_CHECK_FORCE_SUGGESTS_=FALSE +export _R_CHECK_LIMIT_CORES_=FALSE +export _R_CHECK_TESTS_NLINES_=0 + +# By default, aws-sdk tries to contact a non-existing local ip host +# to retrieve metadata. Disable this so that S3FileSystem tests run faster. +export AWS_EC2_METADATA_DISABLED=TRUE + +# Hack so that texlive2020 doesn't pollute the home dir +export TEXMFCONFIG=/tmp/texmf-config +export TEXMFVAR=/tmp/texmf-var + +if [[ "$DEVTOOLSET_VERSION" -gt 0 ]]; then + # enable the devtoolset version to use it + source /opt/rh/devtoolset-$DEVTOOLSET_VERSION/enable +fi + +# Make sure we aren't writing to the home dir (CRAN _hates_ this but there is no official check) +BEFORE=$(ls -alh ~/) + +SCRIPT="as_cran <- !identical(tolower(Sys.getenv('NOT_CRAN')), 'true') + if (as_cran) { + args <- '--as-cran' + build_args <- character() + } else { + args <- c('--no-manual', '--ignore-vignettes') + build_args <- '--no-build-vignettes' + + if (nzchar(Sys.which('minio'))) { + message('Running minio for S3 tests (if build supports them)') + minio_dir <- tempfile() + dir.create(minio_dir) + pid <- sys::exec_background('minio', c('server', minio_dir)) + on.exit(tools::pskill(pid)) + } + } + + run_donttest <- identical(tolower(Sys.getenv('_R_CHECK_DONTTEST_EXAMPLES_', 'true')), 'true') + if (run_donttest) { + args <- c(args, '--run-donttest') + } + + install_args <- Sys.getenv('INSTALL_ARGS') + if (nzchar(install_args)) { + args <- c(args, paste0('--install-args=\"', install_args, '\"')) + } + + rcmdcheck::rcmdcheck(build_args = build_args, args = args, error_on = 'warning', check_dir = 'check', timeout = 3600)" +echo "$SCRIPT" | ${R_BIN} --no-save + +AFTER=$(ls -alh ~/) +if [ "$NOT_CRAN" != "true" ] && [ "$BEFORE" != "$AFTER" ]; then + ls -alh ~/.cmake/packages + exit 1 +fi +popd diff --git a/src/arrow/ci/scripts/r_valgrind.sh b/src/arrow/ci/scripts/r_valgrind.sh new file mode 100755 index 000000000..772d8f44e --- /dev/null +++ b/src/arrow/ci/scripts/r_valgrind.sh @@ -0,0 +1,46 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +: ${R_BIN:=RDvalgrind} + +source_dir=${1}/r + +export CMAKE_BUILD_TYPE=RelWithDebInfo + +${R_BIN} CMD INSTALL ${INSTALL_ARGS} ${source_dir} +pushd ${source_dir}/tests + +# to generate suppression files run: +# ${R_BIN} --vanilla -d "valgrind --tool=memcheck --leak-check=full --track-origins=yes --gen-suppressions=all --log-file=memcheck.log" -f testtthat.supp +${R_BIN} --vanilla -d "valgrind --tool=memcheck --leak-check=full --track-origins=yes --suppressions=/${1}/ci/etc/valgrind-cran.supp" -f testthat.R |& tee testthat.out + +# valgrind --error-exitcode=1 should return an erroring exit code that we can catch, +# but R eats that and returns 0, so we need to look at the output and make sure that +# we have 0 errors instead. +if [ $(grep -c "ERROR SUMMARY: 0 errors" testthat.out) != 1 ]; then + cat testthat.out + echo "Found Valgrind errors" + exit 1 +fi + +# We might also considering using the greps that LibthGBM uses: +# https://github.com/microsoft/LightGBM/blob/fa6d356555f9ef888acf5f5e259dca958ca24f6d/.ci/test_r_package_valgrind.sh#L20-L85 + +popd diff --git a/src/arrow/ci/scripts/r_windows_build.sh b/src/arrow/ci/scripts/r_windows_build.sh new file mode 100755 index 000000000..5bb58c760 --- /dev/null +++ b/src/arrow/ci/scripts/r_windows_build.sh @@ -0,0 +1,116 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +: ${ARROW_HOME:=$(pwd)} +# Make sure it is absolute and exported +export ARROW_HOME="$(cd "${ARROW_HOME}" && pwd)" + +if [ "$RTOOLS_VERSION" = "35" ]; then + # Use rtools-backports if building with rtools35 + curl https://raw.githubusercontent.com/r-windows/rtools-backports/master/pacman.conf > /etc/pacman.conf + pacman --noconfirm -Syy + # lib-4.9.3 is for libraries compiled with gcc 4.9 (Rtools 3.5) + RWINLIB_LIB_DIR="lib-4.9.3" + # This is the default (will build for each arch) but we can set up CI to + # do these in parallel + : ${MINGW_ARCH:="mingw32 mingw64"} +else + # Uncomment L38-41 if you're testing a new rtools dependency that hasn't yet sync'd to CRAN + # curl https://raw.githubusercontent.com/r-windows/rtools-packages/master/pacman.conf > /etc/pacman.conf + # curl -OSsl "http://repo.msys2.org/msys/x86_64/msys2-keyring-r21.b39fb11-1-any.pkg.tar.xz" + # pacman -U --noconfirm msys2-keyring-r21.b39fb11-1-any.pkg.tar.xz && rm msys2-keyring-r21.b39fb11-1-any.pkg.tar.xz + # pacman --noconfirm -Scc + + pacman --noconfirm -Syy + RWINLIB_LIB_DIR="lib" + : ${MINGW_ARCH:="mingw32 mingw64 ucrt64"} +fi + +export MINGW_ARCH + +cp $ARROW_HOME/ci/scripts/PKGBUILD . +printenv +makepkg-mingw --noconfirm --noprogressbar --skippgpcheck --nocheck --syncdeps --cleanbuild + +VERSION=$(grep Version $ARROW_HOME/r/DESCRIPTION | cut -d " " -f 2) +DST_DIR="arrow-$VERSION" + +# Collect the build artifacts and make the shape of zip file that rwinlib expects +ls +mkdir -p build +mv mingw* build +cd build + +# This may vary by system/CI provider +MSYS_LIB_DIR="/c/rtools40" + +# Untar the builds we made +ls *.xz | xargs -n 1 tar -xJf +mkdir -p $DST_DIR +# Grab the headers from one, either one is fine +# (if we're building twice to combine old and new toolchains, this may already exist) +if [ ! -d $DST_DIR/include ]; then + mv $(echo $MINGW_ARCH | cut -d ' ' -f 1)/include $DST_DIR +fi + +# mingw64 -> x64 +# mingw32 -> i386 +# ucrt64 -> x64-ucrt + +if [ -d mingw64/lib/ ]; then + ls $MSYS_LIB_DIR/mingw64/lib/ + # Make the rest of the directory structure + # lib-4.9.3 is for libraries compiled with gcc 4.9 (Rtools 3.5) + mkdir -p $DST_DIR/${RWINLIB_LIB_DIR}/x64 + # lib is for the new gcc 8 toolchain (Rtools 4.0) + mkdir -p $DST_DIR/lib/x64 + # Move the 64-bit versions of libarrow into the expected location + mv mingw64/lib/*.a $DST_DIR/${RWINLIB_LIB_DIR}/x64 + # These may be from https://dl.bintray.com/rtools/backports/ + cp $MSYS_LIB_DIR/mingw64/lib/lib{thrift,snappy}.a $DST_DIR/${RWINLIB_LIB_DIR}/x64 + # These are from https://dl.bintray.com/rtools/mingw{32,64}/ + cp $MSYS_LIB_DIR/mingw64/lib/lib{zstd,lz4,crypto,utf8proc,re2,aws*}.a $DST_DIR/lib/x64 +fi + +# Same for the 32-bit versions +if [ -d mingw32/lib/ ]; then + ls $MSYS_LIB_DIR/mingw32/lib/ + mkdir -p $DST_DIR/${RWINLIB_LIB_DIR}/i386 + mkdir -p $DST_DIR/lib/i386 + mv mingw32/lib/*.a $DST_DIR/${RWINLIB_LIB_DIR}/i386 + cp $MSYS_LIB_DIR/mingw32/lib/lib{thrift,snappy}.a $DST_DIR/${RWINLIB_LIB_DIR}/i386 + cp $MSYS_LIB_DIR/mingw32/lib/lib{zstd,lz4,crypto,utf8proc,re2,aws*}.a $DST_DIR/lib/i386 +fi + +# Do the same also for ucrt64 +if [ -d ucrt64/lib/ ]; then + ls $MSYS_LIB_DIR/ucrt64/lib/ + mkdir -p $DST_DIR/lib/x64-ucrt + mv ucrt64/lib/*.a $DST_DIR/lib/x64-ucrt + cp $MSYS_LIB_DIR/ucrt64/lib/lib{thrift,snappy,zstd,lz4,crypto,utf8proc,re2,aws*}.a $DST_DIR/lib/x64-ucrt +fi + +# Create build artifact +zip -r ${DST_DIR}.zip $DST_DIR + +# Copy that to a file name/path that does not vary by version number so we +# can easily find it in the R package tests on CI +cp ${DST_DIR}.zip ../libarrow.zip diff --git a/src/arrow/ci/scripts/release_test.sh b/src/arrow/ci/scripts/release_test.sh new file mode 100755 index 000000000..ae2ab3288 --- /dev/null +++ b/src/arrow/ci/scripts/release_test.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -eux + +arrow_dir=${1} + +pushd ${arrow_dir} + +dev/release/run-test.rb + +popd diff --git a/src/arrow/ci/scripts/ruby_test.sh b/src/arrow/ci/scripts/ruby_test.sh new file mode 100755 index 000000000..03d20e198 --- /dev/null +++ b/src/arrow/ci/scripts/ruby_test.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +source_dir=${1}/ruby +build_dir=${2}/ruby + +export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH} +export PKG_CONFIG_PATH=${ARROW_HOME}/lib/pkgconfig +export GI_TYPELIB_PATH=${ARROW_HOME}/lib/girepository-1.0 + +rake -f ${source_dir}/Rakefile BUILD_DIR=${build_dir} USE_BUNDLER=yes diff --git a/src/arrow/ci/scripts/rust_build.sh b/src/arrow/ci/scripts/rust_build.sh new file mode 100755 index 000000000..3532ea3d5 --- /dev/null +++ b/src/arrow/ci/scripts/rust_build.sh @@ -0,0 +1,59 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -e + +arrow_dir=${1} +source_dir=${1}/rust + +# This file is used to build the rust binaries needed for the archery +# integration tests. Testing of the rust implementation in normal CI is handled +# by github workflows in the arrow-rs repository. + +# Disable full debug symbol generation to speed up CI build / reduce memory required +export RUSTFLAGS="-C debuginfo=1" + +export ARROW_TEST_DATA=${arrow_dir}/testing/data +export PARQUET_TEST_DATA=${arrow_dir}/cpp/submodules/parquet-testing/data + +if [ "${ARCHERY_INTEGRATION_WITH_RUST}" -eq "0" ]; then + echo "=====================================================================" + echo "Not building the Rust implementation." + echo "=====================================================================" + exit 0; +elif [ ! -d "${source_dir}" ]; then + echo "=====================================================================" + echo "The Rust source is missing. Please clone the arrow-rs repository" + echo "to arrow/rust before running the integration tests:" + echo " git clone https://github.com/apache/arrow-rs.git path/to/arrow/rust" + echo "=====================================================================" + exit 1; +fi + +set -x + +# show activated toolchain +rustup show + +pushd ${source_dir} + +# build only the integration testing binaries +cargo build -p arrow-integration-testing + +popd diff --git a/src/arrow/ci/scripts/util_checkout.sh b/src/arrow/ci/scripts/util_checkout.sh new file mode 100755 index 000000000..25fe69aa1 --- /dev/null +++ b/src/arrow/ci/scripts/util_checkout.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# this script is github actions specific to check out the submodules and tags + +# TODO(kszucs): remove it once the "submodules: recursive" feature is released +auth_header="$(git config --local --get http.https://github.com/.extraheader)" +git submodule sync --recursive +git -c "http.extraheader=$auth_header" -c protocol.version=2 submodule update --init --force --recursive --depth=1 + +# fetch all the tags +git fetch --depth=1 origin +refs/tags/*:refs/tags/* diff --git a/src/arrow/ci/scripts/util_cleanup.sh b/src/arrow/ci/scripts/util_cleanup.sh new file mode 100755 index 000000000..3a13a1a78 --- /dev/null +++ b/src/arrow/ci/scripts/util_cleanup.sh @@ -0,0 +1,54 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# This script is Github Actions-specific to free up disk space, +# to avoid disk full errors on some builds + +if [ $RUNNER_OS = "Linux" ]; then + df -h + + # remove swap + sudo swapoff -a + sudo rm -f /swapfile + + # clean apt cache + sudo apt clean + + # remove haskell, consumes 8.6 GB + sudo rm -rf /opt/ghc + + # 1 GB + sudo rm -rf /home/linuxbrew/.linuxbrew + + # 1+ GB + sudo rm -rf /opt/hostedtoolcache/CodeQL + + # 1+ GB + sudo rm -rf /usr/share/swift + + # 12 GB, but takes a lot of time to delete + #sudo rm -rf /usr/local/lib/android + + # remove cached docker images, around 13 GB + docker rmi $(docker image ls -aq) + + # NOTE: /usr/share/dotnet is 25 GB +fi + +df -h diff --git a/src/arrow/ci/scripts/util_download_apache.sh b/src/arrow/ci/scripts/util_download_apache.sh new file mode 100755 index 000000000..d8e9b6ca7 --- /dev/null +++ b/src/arrow/ci/scripts/util_download_apache.sh @@ -0,0 +1,55 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -x + +if [ "$#" -ne 2 ]; then + echo "Usage: $0 <apache tarball path> <target directory>" + exit 1 +fi + +tarball_path=$1 +target_dir=$2 + +APACHE_MIRRORS=( + "http://www.apache.org/dyn/closer.cgi?action=download&filename=" + "https://downloads.apache.org" + "https://apache.claz.org" + "https://apache.cs.utah.edu" + "https://apache.mirrors.lucidnetworks.net" + "https://apache.osuosl.org" + "https://ftp.wayne.edu/apache" + "https://mirror.olnevhost.net/pub/apache" + "https://mirrors.gigenet.com/apache" + "https://mirrors.koehn.com/apache" + "https://mirrors.ocf.berkeley.edu/apache" + "https://mirrors.sonic.net/apache" + "https://us.mirrors.quenda.co/apache" +) + +mkdir -p "${target_dir}" + +for mirror in ${APACHE_MIRRORS[*]} +do + curl -SL "${mirror}/${tarball_path}" | tar -xzf - -C "${target_dir}" + if [ $? == 0 ]; then + exit 0 + fi +done + +exit 1 diff --git a/src/arrow/ci/scripts/util_wait_for_it.sh b/src/arrow/ci/scripts/util_wait_for_it.sh new file mode 100755 index 000000000..51ce816eb --- /dev/null +++ b/src/arrow/ci/scripts/util_wait_for_it.sh @@ -0,0 +1,199 @@ +#!/usr/bin/env bash + +# The MIT License (MIT) +# Copyright (c) 2016 Giles Hall +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of +# this software and associated documentation files (the "Software"), to deal in +# the Software without restriction, including without limitation the rights to +# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +# of the Software, and to permit persons to whom the Software is furnished to do +# so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +# Use this script to test if a given TCP host/port are available + +cmdname=$(basename $0) + +echoerr() { if [[ $QUIET -ne 1 ]]; then echo "$@" 1>&2; fi } + +usage() +{ + cat << USAGE >&2 +Usage: + $cmdname host:port [-s] [-t timeout] [-- command args] + -h HOST | --host=HOST Host or IP under test + -p PORT | --port=PORT TCP port under test + Alternatively, you specify the host and port as host:port + -s | --strict Only execute subcommand if the test succeeds + -q | --quiet Don't output any status messages + -t TIMEOUT | --timeout=TIMEOUT + Timeout in seconds, zero for no timeout + -- COMMAND ARGS Execute command with args after the test finishes +USAGE + exit 1 +} + +wait_for() +{ + if [[ $TIMEOUT -gt 0 ]]; then + echoerr "$cmdname: waiting $TIMEOUT seconds for $HOST:$PORT" + else + echoerr "$cmdname: waiting for $HOST:$PORT without a timeout" + fi + start_ts=$(date +%s) + while : + do + if [[ $ISBUSY -eq 1 ]]; then + nc -z $HOST $PORT + result=$? + else + (echo > /dev/tcp/$HOST/$PORT) >/dev/null 2>&1 + result=$? + fi + if [[ $result -eq 0 ]]; then + end_ts=$(date +%s) + echoerr "$cmdname: $HOST:$PORT is available after $((end_ts - start_ts)) seconds" + break + fi + sleep 1 + done + return $result +} + +wait_for_wrapper() +{ + # In order to support SIGINT during timeout: http://unix.stackexchange.com/a/57692 + if [[ $QUIET -eq 1 ]]; then + timeout $BUSYTIMEFLAG $TIMEOUT $0 --quiet --child --host=$HOST --port=$PORT --timeout=$TIMEOUT & + else + timeout $BUSYTIMEFLAG $TIMEOUT $0 --child --host=$HOST --port=$PORT --timeout=$TIMEOUT & + fi + PID=$! + trap "kill -INT -$PID" INT + wait $PID + RESULT=$? + if [[ $RESULT -ne 0 ]]; then + echoerr "$cmdname: timeout occurred after waiting $TIMEOUT seconds for $HOST:$PORT" + fi + return $RESULT +} + +# process arguments +while [[ $# -gt 0 ]] +do + case "$1" in + *:* ) + hostport=(${1//:/ }) + HOST=${hostport[0]} + PORT=${hostport[1]} + shift 1 + ;; + --child) + CHILD=1 + shift 1 + ;; + -q | --quiet) + QUIET=1 + shift 1 + ;; + -s | --strict) + STRICT=1 + shift 1 + ;; + -h) + HOST="$2" + if [[ $HOST == "" ]]; then break; fi + shift 2 + ;; + --host=*) + HOST="${1#*=}" + shift 1 + ;; + -p) + PORT="$2" + if [[ $PORT == "" ]]; then break; fi + shift 2 + ;; + --port=*) + PORT="${1#*=}" + shift 1 + ;; + -t) + TIMEOUT="$2" + if [[ $TIMEOUT == "" ]]; then break; fi + shift 2 + ;; + --timeout=*) + TIMEOUT="${1#*=}" + shift 1 + ;; + --) + shift + CLI=("$@") + break + ;; + --help) + usage + ;; + *) + echoerr "Unknown argument: $1" + usage + ;; + esac +done + +if [[ "$HOST" == "" || "$PORT" == "" ]]; then + echoerr "Error: you need to provide a host and port to test." + usage +fi + +TIMEOUT=${TIMEOUT:-15} +STRICT=${STRICT:-0} +CHILD=${CHILD:-0} +QUIET=${QUIET:-0} + +# check to see if timeout is from busybox? +# check to see if timeout is from busybox? +TIMEOUT_PATH=$(realpath $(which timeout)) +if [[ $TIMEOUT_PATH =~ "busybox" ]]; then + ISBUSY=1 + BUSYTIMEFLAG="-t" +else + ISBUSY=0 + BUSYTIMEFLAG="" +fi + +if [[ $CHILD -gt 0 ]]; then + wait_for + RESULT=$? + exit $RESULT +else + if [[ $TIMEOUT -gt 0 ]]; then + wait_for_wrapper + RESULT=$? + else + wait_for + RESULT=$? + fi +fi + +if [[ $CLI != "" ]]; then + if [[ $RESULT -ne 0 && $STRICT -eq 1 ]]; then + echoerr "$cmdname: strict mode, refusing to execute subprocess" + exit $RESULT + fi + exec "${CLI[@]}" +else + exit $RESULT +fi |