diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
commit | e6918187568dbd01842d8d1d2c808ce16a894239 (patch) | |
tree | 64f88b554b444a49f656b6c656111a145cbbaa28 /src/arrow/ci | |
parent | Initial commit. (diff) | |
download | ceph-e6918187568dbd01842d8d1d2c808ce16a894239.tar.xz ceph-e6918187568dbd01842d8d1d2c808ce16a894239.zip |
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
140 files changed, 8334 insertions, 0 deletions
diff --git a/src/arrow/ci/appveyor-cpp-build.bat b/src/arrow/ci/appveyor-cpp-build.bat new file mode 100644 index 000000000..534f73c2d --- /dev/null +++ b/src/arrow/ci/appveyor-cpp-build.bat @@ -0,0 +1,163 @@ +@rem Licensed to the Apache Software Foundation (ASF) under one +@rem or more contributor license agreements. See the NOTICE file +@rem distributed with this work for additional information +@rem regarding copyright ownership. The ASF licenses this file +@rem to you under the Apache License, Version 2.0 (the +@rem "License"); you may not use this file except in compliance +@rem with the License. You may obtain a copy of the License at +@rem +@rem http://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, +@rem software distributed under the License is distributed on an +@rem "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +@rem KIND, either express or implied. See the License for the +@rem specific language governing permissions and limitations +@rem under the License. + +@echo on + +git config core.symlinks true +git reset --hard + +@rem Retrieve git submodules, configure env var for Parquet unit tests +git submodule update --init || exit /B + +set ARROW_TEST_DATA=%CD%\testing\data +set PARQUET_TEST_DATA=%CD%\cpp\submodules\parquet-testing\data + +@rem +@rem In the configurations below we disable building the Arrow static library +@rem to save some time. Unfortunately this will still build the Parquet static +@rem library because of PARQUET-1420 (Thrift-generated symbols not exported in DLL). +@rem +if "%JOB%" == "Build_Debug" ( + mkdir cpp\build-debug + pushd cpp\build-debug + + cmake -G "%GENERATOR%" ^ + -DARROW_BOOST_USE_SHARED=OFF ^ + -DARROW_BUILD_EXAMPLES=ON ^ + -DARROW_BUILD_STATIC=OFF ^ + -DARROW_BUILD_TESTS=ON ^ + -DARROW_CXXFLAGS="/MP" ^ + -DARROW_ENABLE_TIMING_TESTS=OFF ^ + -DARROW_USE_PRECOMPILED_HEADERS=OFF ^ + -DARROW_VERBOSE_THIRDPARTY_BUILD=OFF ^ + -DCMAKE_BUILD_TYPE="Debug" ^ + -DCMAKE_UNITY_BUILD=ON ^ + .. || exit /B + + cmake --build . --config Debug || exit /B + ctest --output-on-failure -j2 || exit /B + popd + + @rem Finish Debug build successfully + exit /B 0 +) + +call activate arrow + +@rem Use Boost from Anaconda +set BOOST_ROOT=%CONDA_PREFIX%\Library +set BOOST_LIBRARYDIR=%CONDA_PREFIX%\Library\lib + +@rem The "main" C++ build script for Windows CI +@rem (i.e. for usual configurations) + +if "%JOB%" == "Toolchain" ( + set CMAKE_ARGS=-DARROW_DEPENDENCY_SOURCE=CONDA -DARROW_WITH_BZ2=ON +) else ( + @rem We're in a conda environment but don't want to use it for the dependencies + set CMAKE_ARGS=-DARROW_DEPENDENCY_SOURCE=AUTO +) + +@rem Enable warnings-as-errors +set ARROW_CXXFLAGS=/WX /MP + +@rem +@rem Build and test Arrow C++ libraries (including Parquet) +@rem + +mkdir cpp\build +pushd cpp\build + +@rem XXX Without forcing CMAKE_CXX_COMPILER, CMake can re-run itself and +@rem unfortunately switch from Release to Debug mode... +@rem +@rem In release mode, disable optimizations (/Od) for faster compiling +@rem and enable runtime assertions. + +cmake -G "%GENERATOR%" %CMAKE_ARGS% ^ + -DARROW_BOOST_USE_SHARED=ON ^ + -DARROW_BUILD_EXAMPLES=ON ^ + -DARROW_BUILD_STATIC=OFF ^ + -DARROW_BUILD_TESTS=ON ^ + -DARROW_CSV=ON ^ + -DARROW_CXXFLAGS="%ARROW_CXXFLAGS%" ^ + -DARROW_DATASET=ON ^ + -DARROW_ENABLE_TIMING_TESTS=OFF ^ + -DARROW_ENGINE=ON ^ + -DARROW_FLIGHT=%ARROW_BUILD_FLIGHT% ^ + -DARROW_GANDIVA=%ARROW_BUILD_GANDIVA% ^ + -DARROW_MIMALLOC=ON ^ + -DARROW_PARQUET=ON ^ + -DARROW_PYTHON=ON ^ + -DARROW_S3=%ARROW_S3% ^ + -DARROW_VERBOSE_THIRDPARTY_BUILD=OFF ^ + -DARROW_WITH_BROTLI=ON ^ + -DARROW_WITH_LZ4=ON ^ + -DARROW_WITH_SNAPPY=ON ^ + -DARROW_WITH_ZLIB=ON ^ + -DARROW_WITH_ZSTD=ON ^ + -DCMAKE_BUILD_TYPE="Release" ^ + -DCMAKE_CXX_COMPILER=clcache ^ + -DCMAKE_CXX_FLAGS_RELEASE="/MD /Od /UNDEBUG" ^ + -DCMAKE_INSTALL_PREFIX=%CONDA_PREFIX%\Library ^ + -DCMAKE_UNITY_BUILD=ON ^ + -DCMAKE_VERBOSE_MAKEFILE=OFF ^ + -DPARQUET_BUILD_EXECUTABLES=ON ^ + -DPARQUET_REQUIRE_ENCRYPTION=ON ^ + .. || exit /B +cmake --build . --target install --config %CONFIGURATION% || exit /B + +@rem Needed so arrow-python-test.exe works +set OLD_PYTHONHOME=%PYTHONHOME% +set PYTHONHOME=%CONDA_PREFIX% + +ctest --output-on-failure -j2 || exit /B + +set PYTHONHOME=%OLD_PYTHONHOME% +popd + +@rem +@rem Build and install pyarrow +@rem + +pushd python + +set PYARROW_BUNDLE_BOOST=OFF +set PYARROW_CMAKE_GENERATOR=%GENERATOR% +set PYARROW_CXXFLAGS=%ARROW_CXXFLAGS% +set PYARROW_PARALLEL=2 +set PYARROW_WITH_DATASET=ON +set PYARROW_WITH_FLIGHT=%ARROW_BUILD_FLIGHT% +set PYARROW_WITH_GANDIVA=%ARROW_BUILD_GANDIVA% +set PYARROW_WITH_PARQUET=ON +set PYARROW_WITH_S3=%ARROW_S3% +set PYARROW_WITH_STATIC_BOOST=ON + +set ARROW_HOME=%CONDA_PREFIX%\Library +@rem ARROW-3075; pkgconfig is broken for Parquet for now +set PARQUET_HOME=%CONDA_PREFIX%\Library + +python setup.py develop -q || exit /B + +set PYTHONDEVMODE=1 + +py.test -r sxX --durations=15 --pyargs pyarrow.tests || exit /B + +@rem +@rem Wheels are built and tested separately (see ARROW-5142). +@rem + diff --git a/src/arrow/ci/appveyor-cpp-setup.bat b/src/arrow/ci/appveyor-cpp-setup.bat new file mode 100644 index 000000000..cee9bc28e --- /dev/null +++ b/src/arrow/ci/appveyor-cpp-setup.bat @@ -0,0 +1,108 @@ +@rem Licensed to the Apache Software Foundation (ASF) under one +@rem or more contributor license agreements. See the NOTICE file +@rem distributed with this work for additional information +@rem regarding copyright ownership. The ASF licenses this file +@rem to you under the Apache License, Version 2.0 (the +@rem "License"); you may not use this file except in compliance +@rem with the License. You may obtain a copy of the License at +@rem +@rem http://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, +@rem software distributed under the License is distributed on an +@rem "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +@rem KIND, either express or implied. See the License for the +@rem specific language governing permissions and limitations +@rem under the License. + +@echo on + +set "PATH=C:\Miniconda37-x64;C:\Miniconda37-x64\Scripts;C:\Miniconda37-x64\Library\bin;%PATH%" +set BOOST_ROOT=C:\Libraries\boost_1_67_0 +set BOOST_LIBRARYDIR=C:\Libraries\boost_1_67_0\lib64-msvc-14.0 + +@rem +@rem Avoid picking up AppVeyor-installed OpenSSL (linker errors with gRPC) +@rem XXX Perhaps there is a smarter way of solving this issue? +@rem +rd /s /q C:\OpenSSL-Win32 +rd /s /q C:\OpenSSL-Win64 +rd /s /q C:\OpenSSL-v11-Win32 +rd /s /q C:\OpenSSL-v11-Win64 +rd /s /q C:\OpenSSL-v111-Win32 +rd /s /q C:\OpenSSL-v111-Win64 + +@rem +@rem Configure miniconda +@rem +conda config --set auto_update_conda false +conda config --set show_channel_urls True +@rem Help with SSL timeouts to S3 +conda config --set remote_connect_timeout_secs 12 +@rem Workaround for ARROW-13636 +conda config --append disallowed_packages pypy3 +conda info -a + +@rem +@rem Create conda environment for Build and Toolchain jobs +@rem +@rem Avoid Boost 1.70 because of https://github.com/boostorg/process/issues/85 + +set CONDA_PACKAGES= + +if "%ARROW_BUILD_GANDIVA%" == "ON" ( + @rem Install llvmdev in the toolchain if building gandiva.dll + set CONDA_PACKAGES=%CONDA_PACKAGES% --file=ci\conda_env_gandiva_win.txt +) +if "%JOB%" == "Toolchain" ( + @rem Install pre-built "toolchain" packages for faster builds + set CONDA_PACKAGES=%CONDA_PACKAGES% --file=ci\conda_env_cpp.txt +) +if "%JOB%" NEQ "Build_Debug" ( + @rem Arrow conda environment is only required for the Build and Toolchain jobs + conda create -n arrow -q -y -c conda-forge ^ + --file=ci\conda_env_python.txt ^ + %CONDA_PACKAGES% ^ + "cmake=3.17" ^ + "ninja" ^ + "nomkl" ^ + "pandas" ^ + "fsspec" ^ + "python=%PYTHON%" ^ + || exit /B + + @rem On Windows, GTest is always bundled from source instead of using + @rem conda binaries, avoid any interference between the two versions. + if "%JOB%" == "Toolchain" ( + conda uninstall -n arrow -q -y -c conda-forge gtest + ) +) + +@rem +@rem Configure compiler +@rem +if "%GENERATOR%"=="Ninja" set need_vcvarsall=1 +if defined need_vcvarsall ( + if "%APPVEYOR_BUILD_WORKER_IMAGE%" NEQ "Visual Studio 2017" ( + @rem ARROW-14070 Visual Studio 2015 no longer supported + exit /B + ) + call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvarsall.bat" amd64 +) + +@rem +@rem Use clcache for faster builds +@rem +pip install -q clcache-alt || exit /B +@rem Limit cache size to 500 MB +clcache -M 500000000 +clcache -c +clcache -s +powershell.exe -Command "Start-Process clcache-server" || exit /B + +@rem +@rem Download Minio somewhere on PATH, for unit tests +@rem +if "%ARROW_S3%" == "ON" ( + appveyor DownloadFile https://dl.min.io/server/minio/release/windows-amd64/minio.exe -FileName C:\Windows\Minio.exe || exit /B +) diff --git a/src/arrow/ci/conda_env_archery.txt b/src/arrow/ci/conda_env_archery.txt new file mode 100644 index 000000000..ace7a42ac --- /dev/null +++ b/src/arrow/ci/conda_env_archery.txt @@ -0,0 +1,42 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# cli +click + +# bot, crossbow +github3.py +jinja2 +jira +pygit2 +pygithub +ruamel.yaml +setuptools_scm +toolz + +# benchmark +pandas + +# docker +python-dotenv +#ruamel.yaml + +# release +gitpython +#jinja2 +#jira +semver diff --git a/src/arrow/ci/conda_env_cpp.txt b/src/arrow/ci/conda_env_cpp.txt new file mode 100644 index 000000000..d2ccb66a2 --- /dev/null +++ b/src/arrow/ci/conda_env_cpp.txt @@ -0,0 +1,42 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +aws-sdk-cpp +benchmark>=1.5.4 +boost-cpp>=1.68.0 +brotli +bzip2 +c-ares +cmake +gflags +glog +gmock>=1.10.0 +grpc-cpp>=1.27.3 +gtest=1.10.0 +libprotobuf +libutf8proc +lz4-c +make +ninja +pkg-config +python +rapidjson +re2 +snappy +thrift-cpp>=0.11.0 +zlib +zstd diff --git a/src/arrow/ci/conda_env_crossbow.txt b/src/arrow/ci/conda_env_crossbow.txt new file mode 100644 index 000000000..347294650 --- /dev/null +++ b/src/arrow/ci/conda_env_crossbow.txt @@ -0,0 +1,25 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +click +github3.py +jinja2 +jira +pygit2 +ruamel.yaml +setuptools_scm +toolz diff --git a/src/arrow/ci/conda_env_gandiva.txt b/src/arrow/ci/conda_env_gandiva.txt new file mode 100644 index 000000000..024b9fe74 --- /dev/null +++ b/src/arrow/ci/conda_env_gandiva.txt @@ -0,0 +1,19 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +clang=11 +llvmdev=11 diff --git a/src/arrow/ci/conda_env_gandiva_win.txt b/src/arrow/ci/conda_env_gandiva_win.txt new file mode 100644 index 000000000..9098b53d1 --- /dev/null +++ b/src/arrow/ci/conda_env_gandiva_win.txt @@ -0,0 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# llvmdev=9 or later require Visual Studio 2017 +clangdev=8 +llvmdev=8 diff --git a/src/arrow/ci/conda_env_python.txt b/src/arrow/ci/conda_env_python.txt new file mode 100644 index 000000000..5f4d4c815 --- /dev/null +++ b/src/arrow/ci/conda_env_python.txt @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# don't add pandas here, because it is not a mandatory test dependency +boto3 # not a direct dependency of s3fs, but needed for our s3fs fixture +cffi +cython +cloudpickle +fsspec +hypothesis +numpy>=1.16.6 +pytest +pytest-faulthandler +pytest-lazy-fixture +pytz +s3fs>=2021.8.0 +setuptools +setuptools_scm diff --git a/src/arrow/ci/conda_env_r.txt b/src/arrow/ci/conda_env_r.txt new file mode 100644 index 000000000..151aefb60 --- /dev/null +++ b/src/arrow/ci/conda_env_r.txt @@ -0,0 +1,38 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +r-assertthat +r-base +r-bit64 +r-dplyr +r-purrr +r-r6 +# We are currently vendoring cpp11; restore in ARROW-13610 +# r-cpp11 +r-rlang +r-tidyselect +r-vctrs +# Test/"Suggests" dependencies +pandoc +r-covr +r-hms +r-lubridate +r-rcmdcheck +r-reticulate +r-rmarkdown +r-testthat +r-tibble diff --git a/src/arrow/ci/conda_env_sphinx.txt b/src/arrow/ci/conda_env_sphinx.txt new file mode 100644 index 000000000..64e1c16a5 --- /dev/null +++ b/src/arrow/ci/conda_env_sphinx.txt @@ -0,0 +1,23 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Requirements for building the documentation +breathe +doxygen +ipython +sphinx=4.2 +pydata-sphinx-theme diff --git a/src/arrow/ci/conda_env_unix.txt b/src/arrow/ci/conda_env_unix.txt new file mode 100644 index 000000000..1973238ad --- /dev/null +++ b/src/arrow/ci/conda_env_unix.txt @@ -0,0 +1,23 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# conda package dependencies specific to Unix-like environments (Linux and macOS) + +autoconf +ccache +orc +pkg-config diff --git a/src/arrow/ci/detect-changes.py b/src/arrow/ci/detect-changes.py new file mode 100644 index 000000000..14e71ed48 --- /dev/null +++ b/src/arrow/ci/detect-changes.py @@ -0,0 +1,362 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import print_function + +import functools +import os +import pprint +import re +import sys +import subprocess + + +perr = functools.partial(print, file=sys.stderr) + + +def dump_env_vars(prefix, pattern=None): + if pattern is not None: + match = lambda s: re.search(pattern, s) + else: + match = lambda s: True + for name in sorted(os.environ): + if name.startswith(prefix) and match(name): + perr("- {0}: {1!r}".format(name, os.environ[name])) + + +def run_cmd(cmdline): + proc = subprocess.Popen(cmdline, + stdout=subprocess.PIPE, stderr=subprocess.PIPE) + out, err = proc.communicate() + if proc.returncode != 0: + raise RuntimeError("Command {cmdline} failed with code {returncode}, " + "stderr was:\n{stderr}\n" + .format(cmdline=cmdline, returncode=proc.returncode, + stderr=err.decode())) + return out + + +def get_commit_description(commit): + """ + Return the textual description (title + body) of the given git commit. + """ + out = run_cmd(["git", "show", "--no-patch", "--pretty=format:%B", + commit]) + return out.decode('utf-8', 'ignore') + + +def list_affected_files(commit_range): + """ + Return a list of files changed by the given git commit range. + """ + perr("Getting affected files from", repr(commit_range)) + out = run_cmd(["git", "diff", "--name-only", commit_range]) + return list(filter(None, (s.strip() for s in out.decode().splitlines()))) + + +def get_travis_head_commit(): + return os.environ['TRAVIS_COMMIT'] + + +def get_travis_commit_range(): + if os.environ['TRAVIS_EVENT_TYPE'] == 'pull_request': + # TRAVIS_COMMIT_RANGE is too pessimistic for PRs, as it may contain + # unrelated changes. Instead, use the same strategy as on AppVeyor + # below. + run_cmd(["git", "fetch", "-q", "origin", + "+refs/heads/{0}".format(os.environ['TRAVIS_BRANCH'])]) + merge_base = run_cmd(["git", "merge-base", + "HEAD", "FETCH_HEAD"]).decode().strip() + return "{0}..HEAD".format(merge_base) + else: + cr = os.environ['TRAVIS_COMMIT_RANGE'] + # See + # https://github.com/travis-ci/travis-ci/issues/4596#issuecomment-139811122 + return cr.replace('...', '..') + + +def get_travis_commit_description(): + # Prefer this to get_commit_description(get_travis_head_commit()), + # as rebasing or other repository events may make TRAVIS_COMMIT invalid + # at the time we inspect it + return os.environ['TRAVIS_COMMIT_MESSAGE'] + + +def list_travis_affected_files(): + """ + Return a list of files affected in the current Travis build. + """ + commit_range = get_travis_commit_range() + try: + return list_affected_files(commit_range) + except RuntimeError: + # TRAVIS_COMMIT_RANGE can contain invalid revisions when + # building a branch (not a PR) after rebasing: + # https://github.com/travis-ci/travis-ci/issues/2668 + if os.environ['TRAVIS_EVENT_TYPE'] == 'pull_request': + raise + # If it's a rebase, it's probably enough to use the last commit only + commit_range = '{0}^..'.format(get_travis_head_commit()) + return list_affected_files(commit_range) + + +def list_appveyor_affected_files(): + """ + Return a list of files affected in the current AppVeyor build. + This only works for PR builds. + """ + # Re-fetch PR base branch (e.g. origin/master), pointing FETCH_HEAD to it + run_cmd(["git", "fetch", "-q", "origin", + "+refs/heads/{0}".format(os.environ['APPVEYOR_REPO_BRANCH'])]) + # Compute base changeset between FETCH_HEAD (PR base) and HEAD (PR head) + merge_base = run_cmd(["git", "merge-base", + "HEAD", "FETCH_HEAD"]).decode().strip() + # Compute changes files between base changeset and HEAD + return list_affected_files("{0}..HEAD".format(merge_base)) + + +def list_github_actions_affected_files(): + """ + Return a list of files affected in the current GitHub Actions build. + """ + # GitHub Actions checkout `refs/remotes/pull/$PR/merge` where `HEAD` points + # to the merge commit while `HEAD^` points to the commit before. Hence, + # `..HEAD^` points to all commit between master and the PR. + return list_affected_files("HEAD^..") + + +LANGUAGE_TOPICS = ['c_glib', 'cpp', 'docs', 'go', 'java', 'js', 'python', + 'r', 'ruby', 'csharp'] + +ALL_TOPICS = LANGUAGE_TOPICS + ['integration', 'dev'] + + +AFFECTED_DEPENDENCIES = { + 'java': ['integration', 'python'], + 'js': ['integration'], + 'ci': ALL_TOPICS, + 'cpp': ['python', 'c_glib', 'r', 'ruby', 'integration'], + 'format': LANGUAGE_TOPICS, + 'go': ['integration'], + '.travis.yml': ALL_TOPICS, + 'appveyor.yml': ALL_TOPICS, + # In theory, it should ignore CONTRIBUTING.md and ISSUE_TEMPLATE.md, but in + # practice it's going to be CI + '.github': ALL_TOPICS, + 'c_glib': ['ruby'] +} + +COMPONENTS = {'cpp', 'java', 'c_glib', 'r', 'ruby', 'integration', 'js', + 'csharp', 'go', 'docs', 'python', 'dev'} + + +def get_affected_topics(affected_files): + """ + Return a dict of topics affected by the given files. + Each dict value is True if affected, False otherwise. + """ + affected = dict.fromkeys(ALL_TOPICS, False) + + for path in affected_files: + parts = [] + head = path + while head: + head, tail = os.path.split(head) + parts.append(tail) + parts.reverse() + assert parts + p = parts[0] + fn = parts[-1] + if fn.startswith('README'): + continue + + if p in COMPONENTS: + affected[p] = True + + _path_already_affected = {} + + def _affect_dependencies(component): + if component in _path_already_affected: + # For circular dependencies, terminate + return + for topic in AFFECTED_DEPENDENCIES.get(component, ()): + affected[topic] = True + _affect_dependencies(topic) + _path_already_affected[topic] = True + + _affect_dependencies(p) + + return affected + + +def make_env_for_topics(affected): + return {'ARROW_CI_{0}_AFFECTED'.format(k.upper()): '1' if v else '0' + for k, v in affected.items()} + + +def get_unix_shell_eval(env): + """ + Return a shell-evalable string to setup some environment variables. + """ + return "; ".join(("export {0}='{1}'".format(k, v) + for k, v in env.items())) + + +def get_windows_shell_eval(env): + """ + Return a shell-evalable string to setup some environment variables. + """ + return "\n".join(('set "{0}={1}"'.format(k, v) + for k, v in env.items())) + + +def run_from_travis(): + perr("Environment variables (excerpt):") + dump_env_vars('TRAVIS_', '(BRANCH|COMMIT|PULL)') + if (os.environ['TRAVIS_REPO_SLUG'] == 'apache/arrow' and + os.environ['TRAVIS_BRANCH'] == 'master' and + os.environ['TRAVIS_EVENT_TYPE'] != 'pull_request'): + # Never skip anything on master builds in the official repository + affected = dict.fromkeys(ALL_TOPICS, True) + else: + desc = get_travis_commit_description() + if '[skip travis]' in desc: + # Skip everything + affected = dict.fromkeys(ALL_TOPICS, False) + elif '[force ci]' in desc or '[force travis]' in desc: + # Test everything + affected = dict.fromkeys(ALL_TOPICS, True) + else: + # Test affected topics + affected_files = list_travis_affected_files() + perr("Affected files:", affected_files) + affected = get_affected_topics(affected_files) + assert set(affected) <= set(ALL_TOPICS), affected + + perr("Affected topics:") + perr(pprint.pformat(affected)) + return get_unix_shell_eval(make_env_for_topics(affected)) + + +def run_from_appveyor(): + perr("Environment variables (excerpt):") + dump_env_vars('APPVEYOR_', '(PULL|REPO)') + if not os.environ.get('APPVEYOR_PULL_REQUEST_HEAD_COMMIT'): + # Not a PR build, test everything + affected = dict.fromkeys(ALL_TOPICS, True) + else: + affected_files = list_appveyor_affected_files() + perr("Affected files:", affected_files) + affected = get_affected_topics(affected_files) + assert set(affected) <= set(ALL_TOPICS), affected + + perr("Affected topics:") + perr(pprint.pformat(affected)) + return get_windows_shell_eval(make_env_for_topics(affected)) + + +def run_from_github(): + perr("Environment variables (excerpt):") + dump_env_vars('GITHUB_', '(REPOSITORY|ACTOR|SHA|REF|HEAD_REF|BASE_REF|EVENT_NAME)') + if os.environ['GITHUB_EVENT_NAME'] != 'pull_request': + # Not a PR build, test everything + affected = dict.fromkeys(ALL_TOPICS, True) + else: + affected_files = list_github_actions_affected_files() + perr("Affected files:", affected_files) + affected = get_affected_topics(affected_files) + assert set(affected) <= set(ALL_TOPICS), affected + + perr("Affected topics:") + perr(pprint.pformat(affected)) + return get_unix_shell_eval(make_env_for_topics(affected)) + + +def test_get_affected_topics(): + affected_topics = get_affected_topics(['cpp/CMakeLists.txt']) + assert affected_topics == { + 'c_glib': True, + 'cpp': True, + 'docs': False, + 'go': False, + 'java': False, + 'js': False, + 'python': True, + 'r': True, + 'ruby': True, + 'csharp': False, + 'integration': True, + 'dev': False + } + + affected_topics = get_affected_topics(['format/Schema.fbs']) + assert affected_topics == { + 'c_glib': True, + 'cpp': True, + 'docs': True, + 'go': True, + 'java': True, + 'js': True, + 'python': True, + 'r': True, + 'ruby': True, + 'csharp': True, + 'integration': True, + 'dev': False + } + + affected_topics = get_affected_topics(['.github/workflows']) + assert affected_topics == { + 'c_glib': True, + 'cpp': True, + 'docs': True, + 'go': True, + 'java': True, + 'js': True, + 'python': True, + 'r': True, + 'ruby': True, + 'csharp': True, + 'integration': True, + 'dev': True, + } + + +if __name__ == "__main__": + # This script should have its output evaluated by a shell, + # e.g. "eval `python ci/detect-changes.py`" + if os.environ.get('TRAVIS'): + try: + print(run_from_travis()) + except Exception: + # Make sure the enclosing eval will return an error + print("exit 1") + raise + elif os.environ.get('APPVEYOR'): + try: + print(run_from_appveyor()) + except Exception: + print("exit 1") + raise + elif os.environ.get('GITHUB_WORKFLOW'): + try: + print(run_from_github()) + except Exception: + print("exit 1") + raise + else: + sys.exit("Script must be run under Travis-CI, AppVeyor or GitHub Actions") diff --git a/src/arrow/ci/docker/conda-cpp.dockerfile b/src/arrow/ci/docker/conda-cpp.dockerfile new file mode 100644 index 000000000..ff31930c0 --- /dev/null +++ b/src/arrow/ci/docker/conda-cpp.dockerfile @@ -0,0 +1,53 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +ARG repo +ARG arch +FROM ${repo}:${arch}-conda + +# install the required conda packages into the test environment +COPY ci/conda_env_cpp.txt \ + ci/conda_env_gandiva.txt \ + /arrow/ci/ +RUN conda install \ + --file arrow/ci/conda_env_cpp.txt \ + --file arrow/ci/conda_env_gandiva.txt \ + compilers \ + doxygen \ + valgrind && \ + conda clean --all + +ENV ARROW_BUILD_TESTS=ON \ + ARROW_DATASET=ON \ + ARROW_DEPENDENCY_SOURCE=CONDA \ + ARROW_FLIGHT=ON \ + ARROW_GANDIVA=ON \ + ARROW_HOME=$CONDA_PREFIX \ + ARROW_ORC=ON \ + ARROW_PARQUET=ON \ + ARROW_PLASMA=ON \ + ARROW_S3=ON \ + ARROW_USE_CCACHE=ON \ + ARROW_WITH_BROTLI=ON \ + ARROW_WITH_BZ2=ON \ + ARROW_WITH_LZ4=ON \ + ARROW_WITH_SNAPPY=ON \ + ARROW_WITH_ZLIB=ON \ + ARROW_WITH_ZSTD=ON \ + PARQUET_BUILD_EXAMPLES=ON \ + PARQUET_BUILD_EXECUTABLES=ON \ + PARQUET_HOME=$CONDA_PREFIX diff --git a/src/arrow/ci/docker/conda-integration.dockerfile b/src/arrow/ci/docker/conda-integration.dockerfile new file mode 100644 index 000000000..43d8d943b --- /dev/null +++ b/src/arrow/ci/docker/conda-integration.dockerfile @@ -0,0 +1,73 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +ARG repo +ARG arch=amd64 +FROM ${repo}:${arch}-conda-cpp + +ARG arch=amd64 +ARG maven=3.5 +ARG node=14 +ARG jdk=8 +ARG go=1.15 + +# Install Archery and integration dependencies +COPY ci/conda_env_archery.txt /arrow/ci/ +RUN conda install -q \ + --file arrow/ci/conda_env_archery.txt \ + "python>=3.7" \ + numpy \ + compilers \ + maven=${maven} \ + nodejs=${node} \ + yarn \ + openjdk=${jdk} && \ + conda clean --all --force-pkgs-dirs + +# Install Rust with only the needed components +# (rustfmt is needed for tonic-build to compile the protobuf definitions) +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- --profile=minimal -y && \ + $HOME/.cargo/bin/rustup component add rustfmt + +ENV GOROOT=/opt/go \ + GOBIN=/opt/go/bin \ + GOPATH=/go \ + PATH=/opt/go/bin:$PATH +RUN wget -nv -O - https://dl.google.com/go/go${go}.linux-${arch}.tar.gz | tar -xzf - -C /opt + +ENV DOTNET_ROOT=/opt/dotnet \ + PATH=/opt/dotnet:$PATH +RUN curl -sSL https://dot.net/v1/dotnet-install.sh | bash /dev/stdin -Channel 3.1 -InstallDir /opt/dotnet + +ENV ARROW_BUILD_INTEGRATION=ON \ + ARROW_BUILD_STATIC=OFF \ + ARROW_BUILD_TESTS=OFF \ + ARROW_COMPUTE=OFF \ + ARROW_CSV=OFF \ + ARROW_DATASET=OFF \ + ARROW_FILESYSTEM=OFF \ + ARROW_FLIGHT=ON \ + ARROW_GANDIVA=OFF \ + ARROW_HDFS=OFF \ + ARROW_JEMALLOC=OFF \ + ARROW_JSON=OFF \ + ARROW_ORC=OFF \ + ARROW_PARQUET=OFF \ + ARROW_PLASMA=OFF \ + ARROW_S3=OFF \ + ARROW_USE_GLOG=OFF \ + CMAKE_UNITY_BUILD=ON diff --git a/src/arrow/ci/docker/conda-python-dask.dockerfile b/src/arrow/ci/docker/conda-python-dask.dockerfile new file mode 100644 index 000000000..cd59a5538 --- /dev/null +++ b/src/arrow/ci/docker/conda-python-dask.dockerfile @@ -0,0 +1,25 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +ARG repo +ARG arch=amd64 +ARG python=3.6 +FROM ${repo}:${arch}-conda-python-${python} + +ARG dask=latest +COPY ci/scripts/install_dask.sh /arrow/ci/scripts/ +RUN /arrow/ci/scripts/install_dask.sh ${dask}
\ No newline at end of file diff --git a/src/arrow/ci/docker/conda-python-hdfs.dockerfile b/src/arrow/ci/docker/conda-python-hdfs.dockerfile new file mode 100644 index 000000000..f6ffc71ce --- /dev/null +++ b/src/arrow/ci/docker/conda-python-hdfs.dockerfile @@ -0,0 +1,52 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +ARG repo +ARG arch=amd64 +ARG python=3.6 +FROM ${repo}:${arch}-conda-python-${python} + +ARG jdk=8 +ARG maven=3.5 +RUN conda install -q \ + maven=${maven} \ + openjdk=${jdk} \ + pandas && \ + conda clean --all + +# installing libhdfs (JNI) +ARG hdfs=3.2.1 +ENV HADOOP_HOME=/opt/hadoop-${hdfs} \ + HADOOP_OPTS=-Djava.library.path=/opt/hadoop-${hdfs}/lib/native \ + PATH=$PATH:/opt/hadoop-${hdfs}/bin:/opt/hadoop-${hdfs}/sbin +COPY ci/scripts/util_download_apache.sh /arrow/ci/scripts/ +RUN /arrow/ci/scripts/util_download_apache.sh \ + "hadoop/common/hadoop-${hdfs}/hadoop-${hdfs}.tar.gz" /opt + +COPY ci/etc/hdfs-site.xml $HADOOP_HOME/etc/hadoop/ + +# build cpp with tests +ENV CC=gcc \ + CXX=g++ \ + ARROW_FLIGHT=OFF \ + ARROW_GANDIVA=OFF \ + ARROW_PLASMA=OFF \ + ARROW_PARQUET=ON \ + ARROW_ORC=OFF \ + ARROW_HDFS=ON \ + ARROW_PYTHON=ON \ + ARROW_BUILD_TESTS=ON diff --git a/src/arrow/ci/docker/conda-python-jpype.dockerfile b/src/arrow/ci/docker/conda-python-jpype.dockerfile new file mode 100644 index 000000000..f77ef9bf6 --- /dev/null +++ b/src/arrow/ci/docker/conda-python-jpype.dockerfile @@ -0,0 +1,29 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +ARG repo +ARG arch=amd64 +ARG python=3.6 +FROM ${repo}:${arch}-conda-python-${python} + +ARG jdk=11 +ARG maven=3.6 +RUN conda install -q \ + maven=${maven} \ + openjdk=${jdk} \ + jpype1 && \ + conda clean --all diff --git a/src/arrow/ci/docker/conda-python-kartothek.dockerfile b/src/arrow/ci/docker/conda-python-kartothek.dockerfile new file mode 100644 index 000000000..d52316182 --- /dev/null +++ b/src/arrow/ci/docker/conda-python-kartothek.dockerfile @@ -0,0 +1,46 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +ARG repo +ARG arch=amd64 +ARG python=3.6 +FROM ${repo}:${arch}-conda-python-${python} + +# install kartothek dependencies from conda-forge +RUN conda install -c conda-forge -q \ + attrs \ + click \ + cloudpickle \ + dask \ + decorator \ + freezegun \ + msgpack-python \ + prompt-toolkit \ + pytest-mock \ + pytest-xdist \ + pyyaml \ + simplejson \ + simplekv \ + storefact \ + toolz \ + urlquote \ + zstandard && \ + conda clean --all + +ARG kartothek=latest +COPY ci/scripts/install_kartothek.sh /arrow/ci/scripts/ +RUN /arrow/ci/scripts/install_kartothek.sh ${kartothek} /kartothek diff --git a/src/arrow/ci/docker/conda-python-pandas.dockerfile b/src/arrow/ci/docker/conda-python-pandas.dockerfile new file mode 100644 index 000000000..303cc80e4 --- /dev/null +++ b/src/arrow/ci/docker/conda-python-pandas.dockerfile @@ -0,0 +1,27 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +ARG repo +ARG arch=amd64 +ARG python=3.6 +FROM ${repo}:${arch}-conda-python-${python} + +ARG pandas=latest +ARG numpy=latest +COPY ci/scripts/install_pandas.sh /arrow/ci/scripts/ +RUN conda uninstall -q -y numpy && \ + /arrow/ci/scripts/install_pandas.sh ${pandas} ${numpy} diff --git a/src/arrow/ci/docker/conda-python-spark.dockerfile b/src/arrow/ci/docker/conda-python-spark.dockerfile new file mode 100644 index 000000000..a2af2ac13 --- /dev/null +++ b/src/arrow/ci/docker/conda-python-spark.dockerfile @@ -0,0 +1,43 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +ARG repo +ARG arch=amd64 +ARG python=3.6 +FROM ${repo}:${arch}-conda-python-${python} + +ARG jdk=8 +ARG maven=3.5 + +RUN conda install -q \ + openjdk=${jdk} \ + maven=${maven} \ + pandas && \ + conda clean --all + +# installing specific version of spark +ARG spark=master +COPY ci/scripts/install_spark.sh /arrow/ci/scripts/ +RUN /arrow/ci/scripts/install_spark.sh ${spark} /spark + +# build cpp with tests +ENV CC=gcc \ + CXX=g++ \ + ARROW_PYTHON=ON \ + ARROW_HDFS=ON \ + ARROW_BUILD_TESTS=OFF \ + SPARK_VERSION=${spark} diff --git a/src/arrow/ci/docker/conda-python-turbodbc.dockerfile b/src/arrow/ci/docker/conda-python-turbodbc.dockerfile new file mode 100644 index 000000000..e748604de --- /dev/null +++ b/src/arrow/ci/docker/conda-python-turbodbc.dockerfile @@ -0,0 +1,50 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +ARG repo +ARG arch=amd64 +ARG python=3.6 +FROM ${repo}:${arch}-conda-python-${python} + +RUN export DEBIAN_FRONTEND=noninteractive && \ + apt-get update -y -q && \ + apt-get install -y -q --no-install-recommends \ + odbc-postgresql \ + postgresql \ + sudo && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# install turbodbc dependencies from conda-forge +RUN conda install -c conda-forge -q \ + pybind11 \ + pytest-cov \ + mock \ + unixodbc && \ + conda clean --all + +RUN service postgresql start && \ + sudo -u postgres psql -U postgres -c \ + "CREATE DATABASE test_db;" && \ + sudo -u postgres psql -U postgres -c \ + "ALTER USER postgres WITH PASSWORD 'password';" + +ARG turbodbc=latest +COPY ci/scripts/install_turbodbc.sh /arrow/ci/scripts/ +RUN /arrow/ci/scripts/install_turbodbc.sh ${turbodbc} /turbodbc + +ENV TURBODBC_TEST_CONFIGURATION_FILES "query_fixtures_postgresql.json" diff --git a/src/arrow/ci/docker/conda-python.dockerfile b/src/arrow/ci/docker/conda-python.dockerfile new file mode 100644 index 000000000..ab3f77be1 --- /dev/null +++ b/src/arrow/ci/docker/conda-python.dockerfile @@ -0,0 +1,37 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +ARG repo +ARG arch +FROM ${repo}:${arch}-conda-cpp + +# install python specific packages +ARG python=3.6 +COPY ci/conda_env_python.txt /arrow/ci/ +RUN conda install -q \ + --file arrow/ci/conda_env_python.txt \ + $([ "$python" == "3.6" -o "$python" == "3.7" ] && echo "pickle5") \ + python=${python} \ + nomkl && \ + conda clean --all + +ENV ARROW_PYTHON=ON \ + ARROW_BUILD_STATIC=OFF \ + ARROW_BUILD_TESTS=OFF \ + ARROW_BUILD_UTILITIES=OFF \ + ARROW_TENSORFLOW=ON \ + ARROW_USE_GLOG=OFF diff --git a/src/arrow/ci/docker/conda.dockerfile b/src/arrow/ci/docker/conda.dockerfile new file mode 100644 index 000000000..adb64f9fa --- /dev/null +++ b/src/arrow/ci/docker/conda.dockerfile @@ -0,0 +1,55 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +ARG arch=amd64 +FROM ${arch}/ubuntu:18.04 + +# arch is unset after the FROM statement, so need to define it again +ARG arch=amd64 +ARG prefix=/opt/conda + +# install build essentials +RUN export DEBIAN_FRONTEND=noninteractive && \ + apt-get update -y -q && \ + apt-get install -y -q wget tzdata libc6-dbg gdb \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +ENV PATH=${prefix}/bin:$PATH +# install conda and minio +COPY ci/scripts/install_conda.sh \ + ci/scripts/install_minio.sh \ + /arrow/ci/scripts/ +RUN /arrow/ci/scripts/install_conda.sh ${arch} linux latest ${prefix} +RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest ${prefix} +COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts +RUN /arrow/ci/scripts/install_gcs_testbench.sh ${arch} default + +# create a conda environment +ADD ci/conda_env_unix.txt /arrow/ci/ +RUN conda create -n arrow --file arrow/ci/conda_env_unix.txt git && \ + conda clean --all + +# activate the created environment by default +RUN echo "conda activate arrow" >> ~/.profile +ENV CONDA_PREFIX=${prefix}/envs/arrow + +# use login shell to activate arrow environment un the RUN commands +SHELL [ "/bin/bash", "-c", "-l" ] + +# use login shell when running the container +ENTRYPOINT [ "/bin/bash", "-c", "-l" ] diff --git a/src/arrow/ci/docker/debian-10-cpp.dockerfile b/src/arrow/ci/docker/debian-10-cpp.dockerfile new file mode 100644 index 000000000..16e867fc3 --- /dev/null +++ b/src/arrow/ci/docker/debian-10-cpp.dockerfile @@ -0,0 +1,106 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +ARG arch=amd64 +FROM ${arch}/debian:10 +ARG arch + +ENV DEBIAN_FRONTEND noninteractive + +RUN \ + echo "deb http://deb.debian.org/debian buster-backports main" > \ + /etc/apt/sources.list.d/backports.list + +ARG llvm +RUN apt-get update -y -q && \ + apt-get install -y -q --no-install-recommends \ + apt-transport-https \ + ca-certificates \ + gnupg \ + wget && \ + wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - && \ + echo "deb https://apt.llvm.org/buster/ llvm-toolchain-buster-${llvm} main" > \ + /etc/apt/sources.list.d/llvm.list && \ + apt-get update -y -q && \ + apt-get install -y -q --no-install-recommends \ + autoconf \ + ccache \ + clang-${llvm} \ + cmake \ + g++ \ + gcc \ + gdb \ + git \ + libbenchmark-dev \ + libboost-all-dev \ + libbrotli-dev \ + libbz2-dev \ + libc-ares-dev \ + libcurl4-openssl-dev \ + libgflags-dev \ + libgmock-dev \ + libgoogle-glog-dev \ + liblz4-dev \ + libre2-dev \ + libsnappy-dev \ + libssl-dev \ + libthrift-dev \ + libutf8proc-dev \ + llvm-${llvm}-dev \ + make \ + ninja-build \ + pkg-config \ + protobuf-compiler \ + python3-pip \ + rapidjson-dev \ + tzdata \ + zlib1g-dev && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +COPY ci/scripts/install_minio.sh /arrow/ci/scripts/ +RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest /usr/local +COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/ +RUN /arrow/ci/scripts/install_gcs_testbench.sh ${arch} default + +ENV ARROW_BUILD_TESTS=ON \ + ARROW_DATASET=ON \ + ARROW_DEPENDENCY_SOURCE=SYSTEM \ + ARROW_FLIGHT=ON \ + ARROW_GANDIVA=ON \ + ARROW_HOME=/usr/local \ + ARROW_ORC=ON \ + ARROW_PARQUET=ON \ + ARROW_PLASMA=ON \ + ARROW_S3=ON \ + ARROW_USE_CCACHE=ON \ + ARROW_WITH_BROTLI=ON \ + ARROW_WITH_BZ2=ON \ + ARROW_WITH_LZ4=ON \ + ARROW_WITH_SNAPPY=ON \ + ARROW_WITH_ZLIB=ON \ + ARROW_WITH_ZSTD=ON \ + AWSSDK_SOURCE=BUNDLED \ + cares_SOURCE=BUNDLED \ + CC=gcc \ + CXX=g++ \ + gRPC_SOURCE=BUNDLED \ + GTest_SOURCE=BUNDLED \ + ORC_SOURCE=BUNDLED \ + PATH=/usr/lib/ccache/:$PATH \ + Protobuf_SOURCE=BUNDLED \ + zstd_SOURCE=BUNDLED diff --git a/src/arrow/ci/docker/debian-10-go-cgo-python.dockerfile b/src/arrow/ci/docker/debian-10-go-cgo-python.dockerfile new file mode 100644 index 000000000..46455a42b --- /dev/null +++ b/src/arrow/ci/docker/debian-10-go-cgo-python.dockerfile @@ -0,0 +1,36 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +ARG base +FROM ${base} + +ENV DEBIAN_FRONTEND noninteractive + +# Install python3 and pip so we can install pyarrow to test the C data interface. +RUN apt-get update -y -q && \ + apt-get install -y -q --no-install-recommends \ + python3 \ + python3-pip && \ + apt-get clean + +RUN ln -s /usr/bin/python3 /usr/local/bin/python && \ + ln -s /usr/bin/pip3 /usr/local/bin/pip + +# Need a newer pip than Debian's to install manylinux201x wheels +RUN pip install -U pip + +RUN pip install pyarrow cffi --only-binary pyarrow diff --git a/src/arrow/ci/docker/debian-10-go.dockerfile b/src/arrow/ci/docker/debian-10-go.dockerfile new file mode 100644 index 000000000..3a24b8afe --- /dev/null +++ b/src/arrow/ci/docker/debian-10-go.dockerfile @@ -0,0 +1,26 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +ARG arch=amd64 +ARG go=1.15 +FROM ${arch}/golang:${go}-buster + + +# TODO(kszucs): +# 1. add the files required to install the dependencies to .dockerignore +# 2. copy these files to their appropriate path +# 3. download and compile the dependencies diff --git a/src/arrow/ci/docker/debian-10-js.dockerfile b/src/arrow/ci/docker/debian-10-js.dockerfile new file mode 100644 index 000000000..5bb31f2e3 --- /dev/null +++ b/src/arrow/ci/docker/debian-10-js.dockerfile @@ -0,0 +1,27 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +ARG arch=amd64 +ARG node=14 +FROM ${arch}/node:${node} + +ENV NODE_NO_WARNINGS=1 + +# TODO(kszucs): +# 1. add the files required to install the dependencies to .dockerignore +# 2. copy these files to their appropriate path +# 3. download and compile the dependencies diff --git a/src/arrow/ci/docker/debian-11-cpp.dockerfile b/src/arrow/ci/docker/debian-11-cpp.dockerfile new file mode 100644 index 000000000..659881b0c --- /dev/null +++ b/src/arrow/ci/docker/debian-11-cpp.dockerfile @@ -0,0 +1,100 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +ARG arch=amd64 +FROM ${arch}/debian:11 +ARG arch + +ENV DEBIAN_FRONTEND noninteractive + +ARG llvm +RUN apt-get update -y -q && \ + apt-get install -y -q --no-install-recommends \ + apt-transport-https \ + ca-certificates \ + gnupg \ + wget && \ + wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - && \ + echo "deb https://apt.llvm.org/bullseye/ llvm-toolchain-bullseye-${llvm} main" > \ + /etc/apt/sources.list.d/llvm.list && \ + apt-get update -y -q && \ + apt-get install -y -q --no-install-recommends \ + autoconf \ + ccache \ + clang-${llvm} \ + cmake \ + g++ \ + gcc \ + gdb \ + git \ + libbenchmark-dev \ + libboost-all-dev \ + libbrotli-dev \ + libbz2-dev \ + libc-ares-dev \ + libcurl4-openssl-dev \ + libgflags-dev \ + libgmock-dev \ + libgoogle-glog-dev \ + libgrpc++-dev \ + liblz4-dev \ + libre2-dev \ + libsnappy-dev \ + libssl-dev \ + libthrift-dev \ + libutf8proc-dev \ + libzstd-dev \ + llvm-${llvm}-dev \ + make \ + ninja-build \ + pkg-config \ + protobuf-compiler-grpc \ + python3-pip \ + rapidjson-dev \ + tzdata \ + zlib1g-dev && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +COPY ci/scripts/install_minio.sh /arrow/ci/scripts/ +RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest /usr/local +COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/ +RUN /arrow/ci/scripts/install_gcs_testbench.sh ${arch} default + +ENV ARROW_BUILD_TESTS=ON \ + ARROW_DATASET=ON \ + ARROW_DEPENDENCY_SOURCE=SYSTEM \ + ARROW_FLIGHT=ON \ + ARROW_GANDIVA=ON \ + ARROW_HOME=/usr/local \ + ARROW_ORC=ON \ + ARROW_PARQUET=ON \ + ARROW_PLASMA=ON \ + ARROW_S3=ON \ + ARROW_USE_CCACHE=ON \ + ARROW_WITH_BROTLI=ON \ + ARROW_WITH_BZ2=ON \ + ARROW_WITH_LZ4=ON \ + ARROW_WITH_SNAPPY=ON \ + ARROW_WITH_ZLIB=ON \ + ARROW_WITH_ZSTD=ON \ + AWSSDK_SOURCE=BUNDLED \ + CC=gcc \ + CXX=g++ \ + ORC_SOURCE=BUNDLED \ + PATH=/usr/lib/ccache/:$PATH \ + Protobuf_SOURCE=BUNDLED diff --git a/src/arrow/ci/docker/debian-11-go-cgo-python.dockerfile b/src/arrow/ci/docker/debian-11-go-cgo-python.dockerfile new file mode 100644 index 000000000..46455a42b --- /dev/null +++ b/src/arrow/ci/docker/debian-11-go-cgo-python.dockerfile @@ -0,0 +1,36 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +ARG base +FROM ${base} + +ENV DEBIAN_FRONTEND noninteractive + +# Install python3 and pip so we can install pyarrow to test the C data interface. +RUN apt-get update -y -q && \ + apt-get install -y -q --no-install-recommends \ + python3 \ + python3-pip && \ + apt-get clean + +RUN ln -s /usr/bin/python3 /usr/local/bin/python && \ + ln -s /usr/bin/pip3 /usr/local/bin/pip + +# Need a newer pip than Debian's to install manylinux201x wheels +RUN pip install -U pip + +RUN pip install pyarrow cffi --only-binary pyarrow diff --git a/src/arrow/ci/docker/debian-11-go.dockerfile b/src/arrow/ci/docker/debian-11-go.dockerfile new file mode 100644 index 000000000..3a24b8afe --- /dev/null +++ b/src/arrow/ci/docker/debian-11-go.dockerfile @@ -0,0 +1,26 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +ARG arch=amd64 +ARG go=1.15 +FROM ${arch}/golang:${go}-buster + + +# TODO(kszucs): +# 1. add the files required to install the dependencies to .dockerignore +# 2. copy these files to their appropriate path +# 3. download and compile the dependencies diff --git a/src/arrow/ci/docker/debian-11-js.dockerfile b/src/arrow/ci/docker/debian-11-js.dockerfile new file mode 100644 index 000000000..5bb31f2e3 --- /dev/null +++ b/src/arrow/ci/docker/debian-11-js.dockerfile @@ -0,0 +1,27 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +ARG arch=amd64 +ARG node=14 +FROM ${arch}/node:${node} + +ENV NODE_NO_WARNINGS=1 + +# TODO(kszucs): +# 1. add the files required to install the dependencies to .dockerignore +# 2. copy these files to their appropriate path +# 3. download and compile the dependencies diff --git a/src/arrow/ci/docker/debian-9-java.dockerfile b/src/arrow/ci/docker/debian-9-java.dockerfile new file mode 100644 index 000000000..2cc36e3ea --- /dev/null +++ b/src/arrow/ci/docker/debian-9-java.dockerfile @@ -0,0 +1,28 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +ARG arch=amd64 +ARG jdk=8 +ARG maven=3.5.4 +FROM ${arch}/maven:${maven}-jdk-${jdk} + +ENV ARROW_JAVA_SHADE_FLATBUFS=ON + +# TODO(kszucs): +# 1. add the files required to install the dependencies to .dockerignore +# 2. copy these files to their appropriate path +# 3. download and compile the dependencies diff --git a/src/arrow/ci/docker/debian-go-cgo.dockerfile b/src/arrow/ci/docker/debian-go-cgo.dockerfile new file mode 100644 index 000000000..a494d1e15 --- /dev/null +++ b/src/arrow/ci/docker/debian-go-cgo.dockerfile @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +ARG base +FROM ${base} + +ENV DEBIAN_FRONTEND noninteractive + +# install libarrow-dev to link against with CGO +RUN apt-get update -y -q && \ + apt-get install -y -q --no-install-recommends ca-certificates lsb-release wget && \ + wget https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb && \ + apt-get install -y -q --no-install-recommends ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb && \ + apt-get update -y -q && \ + apt-get install -y -q --no-install-recommends \ + cmake \ + libarrow-dev && \ + apt-get clean diff --git a/src/arrow/ci/docker/fedora-33-cpp.dockerfile b/src/arrow/ci/docker/fedora-33-cpp.dockerfile new file mode 100644 index 000000000..61964a476 --- /dev/null +++ b/src/arrow/ci/docker/fedora-33-cpp.dockerfile @@ -0,0 +1,94 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +ARG arch +FROM ${arch}/fedora:33 +ARG arch + +# install dependencies +RUN dnf update -y && \ + dnf install -y \ + autoconf \ + boost-devel \ + brotli-devel \ + bzip2-devel \ + c-ares-devel \ + ccache \ + clang-devel \ + cmake \ + curl-devel \ + flatbuffers-devel \ + gcc \ + gcc-c++ \ + gflags-devel \ + git \ + glog-devel \ + gmock-devel \ + google-benchmark-devel \ + grpc-devel \ + grpc-plugins \ + gtest-devel \ + java-latest-openjdk-devel \ + java-latest-openjdk-headless \ + libzstd-devel \ + llvm-devel \ + llvm-static \ + lz4-devel \ + make \ + ninja-build \ + openssl-devel \ + protobuf-devel \ + python \ + python-pip \ + rapidjson-devel \ + re2-devel \ + snappy-devel \ + thrift-devel \ + utf8proc-devel \ + wget \ + which \ + zlib-devel + +COPY ci/scripts/install_minio.sh /arrow/ci/scripts/ +RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest /usr/local +COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/ +RUN /arrow/ci/scripts/install_gcs_testbench.sh ${arch} default + +ENV ARROW_BUILD_TESTS=ON \ + ARROW_DEPENDENCY_SOURCE=SYSTEM \ + ARROW_DATASET=ON \ + ARROW_FLIGHT=ON \ + ARROW_GANDIVA_JAVA=ON \ + ARROW_GANDIVA=ON \ + ARROW_HOME=/usr/local \ + ARROW_ORC=ON \ + ARROW_PARQUET=ON \ + ARROW_S3=ON \ + ARROW_USE_CCACHE=ON \ + ARROW_WITH_BROTLI=ON \ + ARROW_WITH_BZ2=ON \ + ARROW_WITH_LZ4=ON \ + ARROW_WITH_SNAPPY=ON \ + ARROW_WITH_ZLIB=ON \ + ARROW_WITH_ZSTD=ON \ + AWSSDK_SOURCE=BUNDLED \ + CC=gcc \ + CXX=g++ \ + ORC_SOURCE=BUNDLED \ + PARQUET_BUILD_EXECUTABLES=ON \ + PARQUET_BUILD_EXAMPLES=ON \ + PATH=/usr/lib/ccache/:$PATH diff --git a/src/arrow/ci/docker/java-jni-manylinux-201x.dockerfile b/src/arrow/ci/docker/java-jni-manylinux-201x.dockerfile new file mode 100644 index 000000000..021dab686 --- /dev/null +++ b/src/arrow/ci/docker/java-jni-manylinux-201x.dockerfile @@ -0,0 +1,37 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +ARG base +FROM ${base} + +# Install the libaries required by the Gandiva to run +RUN vcpkg install --clean-after-build \ + llvm \ + boost-system \ + boost-date-time \ + boost-regex \ + boost-predef \ + boost-algorithm \ + boost-locale \ + boost-format \ + boost-variant \ + boost-multiprecision + +# Install Java +ARG java=1.8.0 +RUN yum install -y java-$java-openjdk-devel && yum clean all +ENV JAVA_HOME=/usr/lib/jvm/java-$java-openjdk/ diff --git a/src/arrow/ci/docker/linux-apt-c-glib.dockerfile b/src/arrow/ci/docker/linux-apt-c-glib.dockerfile new file mode 100644 index 000000000..12c6e23a0 --- /dev/null +++ b/src/arrow/ci/docker/linux-apt-c-glib.dockerfile @@ -0,0 +1,65 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +ARG base +FROM ${base} + +RUN apt-get update -y -q && \ + apt-get install -y -q \ + python3 \ + python3-pip \ + gtk-doc-tools \ + libgirepository1.0-dev \ + libglib2.0-doc \ + lsb-release \ + luarocks \ + pkg-config \ + ruby-dev && \ + if [ "$(lsb_release --codename --short)" = "xenial" ]; then \ + apt-get install -y -q --no-install-recommends -t xenial-backports \ + ninja-build; \ + fi && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +RUN luarocks install lgi + +# pip on Ubuntu 20.04 may be buggy: +# +# Collecting meson +# Downloading meson-0.53.2.tar.gz (1.6 MB) +# Installing build dependencies: started +# Installing build dependencies: finished with status 'done' +# Getting requirements to build wheel: started +# Getting requirements to build wheel: finished with status 'error' +# ERROR: Command errored out with exit status 1: +# command: /usr/bin/python3 /usr/share/python-wheels/pep517-0.7.0-py2.py3-none-any.whl/pep517/_in_process.py get_requires_for_build_wheel /tmp/tmpsk4jveay +# cwd: /tmp/pip-install-jn79a_kh/meson +# Complete output (1 lines): +# /usr/bin/python3: can't find '__main__' module in '/usr/share/python-wheels/pep517-0.7.0-py2.py3-none-any.whl/pep517/_in_process.py' +# ---------------------------------------- +# ERROR: Command errored out with exit status 1: /usr/bin/python3 /usr/share/python-wheels/pep517-0.7.0-py2.py3-none-any.whl/pep517/_in_process.py get_requires_for_build_wheel /tmp/tmpsk4jveay Check the logs for full command output. +RUN (python3 -m pip install meson || \ + python3 -m pip install --no-use-pep517 meson) && \ + gem install --no-document bundler + +COPY c_glib/Gemfile /arrow/c_glib/ +RUN bundle install --gemfile /arrow/c_glib/Gemfile + +ENV ARROW_BUILD_TESTS=OFF \ + ARROW_BUILD_UTILITIES=OFF \ + ARROW_INSTALL_NAME_RPATH=OFF diff --git a/src/arrow/ci/docker/linux-apt-docs.dockerfile b/src/arrow/ci/docker/linux-apt-docs.dockerfile new file mode 100644 index 000000000..12c797f96 --- /dev/null +++ b/src/arrow/ci/docker/linux-apt-docs.dockerfile @@ -0,0 +1,110 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +ARG base +FROM ${base} + +ARG r=4.1 +ARG jdk=8 + +# See R install instructions at https://cloud.r-project.org/bin/linux/ubuntu/ +RUN apt-get update -y && \ + apt-get install -y \ + dirmngr \ + apt-transport-https \ + software-properties-common && \ + apt-key adv \ + --keyserver keyserver.ubuntu.com \ + --recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9 && \ + add-apt-repository 'deb https://cloud.r-project.org/bin/linux/ubuntu '$(lsb_release -cs)'-cran40/' && \ + apt-get install -y --no-install-recommends \ + autoconf-archive \ + automake \ + curl \ + doxygen \ + gobject-introspection \ + gtk-doc-tools \ + libcurl4-openssl-dev \ + libfontconfig1-dev \ + libfribidi-dev \ + libgirepository1.0-dev \ + libglib2.0-doc \ + libharfbuzz-dev \ + libtiff-dev \ + libtool \ + libxml2-dev \ + ninja-build \ + nvidia-cuda-toolkit \ + openjdk-${jdk}-jdk-headless \ + pandoc \ + r-recommended=${r}* \ + r-base=${r}* \ + rsync \ + ruby-dev \ + wget && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +ENV JAVA_HOME=/usr/lib/jvm/java-${jdk}-openjdk-amd64 + +ARG maven=3.5.4 +COPY ci/scripts/util_download_apache.sh /arrow/ci/scripts/ +RUN /arrow/ci/scripts/util_download_apache.sh \ + "maven/maven-3/${maven}/binaries/apache-maven-${maven}-bin.tar.gz" /opt +ENV PATH=/opt/apache-maven-${maven}/bin:$PATH +RUN mvn -version + +ARG node=14 +RUN wget -q -O - https://deb.nodesource.com/setup_${node}.x | bash - && \ + apt-get install -y nodejs && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* && \ + npm install -g yarn + +# ARROW-13353: breathe >= 4.29.1 tries to parse template arguments, +# but Sphinx can't parse constructs like `typename...`. +RUN pip install \ + meson \ + breathe==4.29.0 \ + ipython \ + sphinx \ + pydata-sphinx-theme + +COPY c_glib/Gemfile /arrow/c_glib/ +RUN gem install --no-document bundler && \ + bundle install --gemfile /arrow/c_glib/Gemfile + +# Ensure parallel R package installation, set CRAN repo mirror, +# and use pre-built binaries where possible +COPY ci/etc/rprofile /arrow/ci/etc/ +RUN cat /arrow/ci/etc/rprofile >> $(R RHOME)/etc/Rprofile.site +# Also ensure parallel compilation of C/C++ code +RUN echo "MAKEFLAGS=-j$(R -s -e 'cat(parallel::detectCores())')" >> $(R RHOME)/etc/Renviron.site + +COPY ci/scripts/r_deps.sh /arrow/ci/scripts/ +COPY r/DESCRIPTION /arrow/r/ +RUN /arrow/ci/scripts/r_deps.sh /arrow && \ + R -e "install.packages('pkgdown')" + +ENV ARROW_FLIGHT=ON \ + ARROW_PYTHON=ON \ + ARROW_S3=ON \ + ARROW_BUILD_STATIC=OFF \ + ARROW_BUILD_TESTS=OFF \ + ARROW_BUILD_UTILITIES=OFF \ + ARROW_USE_GLOG=OFF \ + CMAKE_UNITY_BUILD=ON \ diff --git a/src/arrow/ci/docker/linux-apt-jni.dockerfile b/src/arrow/ci/docker/linux-apt-jni.dockerfile new file mode 100644 index 000000000..ddfa72e17 --- /dev/null +++ b/src/arrow/ci/docker/linux-apt-jni.dockerfile @@ -0,0 +1,87 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +ARG base +FROM ${base} + +# pipefail is enabled for proper error detection in the `wget | apt-key add` +# step +SHELL ["/bin/bash", "-o", "pipefail", "-c"] + +ENV DEBIAN_FRONTEND noninteractive + +ARG llvm +RUN apt-get update -y -q && \ + apt-get install -y -q --no-install-recommends \ + apt-transport-https \ + lsb-release \ + software-properties-common \ + wget && \ + code_name=$(lsb_release --codename --short) && \ + wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - && \ + apt-add-repository -y \ + "deb https://apt.llvm.org/${code_name}/ llvm-toolchain-${code_name}-${llvm} main" && \ + apt-get update -y -q && \ + apt-get install -y -q --no-install-recommends \ + ca-certificates \ + ccache \ + clang-${llvm} \ + cmake \ + git \ + g++ \ + gcc \ + libboost-all-dev \ + libgflags-dev \ + libgoogle-glog-dev \ + libgtest-dev \ + liblz4-dev \ + libre2-dev \ + libsnappy-dev \ + libssl-dev \ + llvm-${llvm}-dev \ + make \ + ninja-build \ + pkg-config \ + protobuf-compiler \ + rapidjson-dev \ + tzdata \ + zlib1g-dev && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +ARG cmake=3.11.4 +RUN wget -nv -O - https://github.com/Kitware/CMake/releases/download/v${cmake}/cmake-${cmake}-Linux-x86_64.tar.gz | tar -xzf - -C /opt +ENV PATH=/opt/cmake-${cmake}-Linux-x86_64/bin:$PATH + +ENV ARROW_BUILD_TESTS=OFF \ + ARROW_DATASET=ON \ + ARROW_FLIGHT=OFF \ + ARROW_GANDIVA_JAVA=ON \ + ARROW_GANDIVA=ON \ + ARROW_HOME=/usr/local \ + ARROW_JAVA_CDATA=ON \ + ARROW_JNI=ON \ + ARROW_ORC=ON \ + ARROW_PARQUET=ON \ + ARROW_PLASMA_JAVA_CLIENT=ON \ + ARROW_PLASMA=ON \ + ARROW_USE_CCACHE=ON \ + CC=gcc \ + CXX=g++ \ + ORC_SOURCE=BUNDLED \ + PATH=/usr/lib/ccache/:$PATH \ + Protobuf_SOURCE=BUNDLED diff --git a/src/arrow/ci/docker/linux-apt-lint.dockerfile b/src/arrow/ci/docker/linux-apt-lint.dockerfile new file mode 100644 index 000000000..84de6b05f --- /dev/null +++ b/src/arrow/ci/docker/linux-apt-lint.dockerfile @@ -0,0 +1,95 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +ARG base +FROM hadolint/hadolint:v1.17.2 AS hadolint +FROM ${base} + +ARG clang_tools +RUN apt-get update && \ + apt-get install -y -q \ + clang-${clang_tools} \ + clang-format-${clang_tools} \ + clang-tidy-${clang_tools} \ + clang-tools-${clang_tools} \ + cmake \ + curl \ + libclang-${clang_tools}-dev \ + llvm-${clang_tools}-dev \ + openjdk-11-jdk-headless \ + python3 \ + python3-dev \ + python3-pip \ + ruby \ + apt-transport-https \ + software-properties-common \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +ARG r=4.1 +RUN apt-key adv \ + --keyserver keyserver.ubuntu.com \ + --recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9 && \ + # NOTE: R 3.5 and 3.6 are available in the repos with -cran35 suffix + # for trusty, xenial, bionic, and eoan (as of May 2020) + # -cran40 has 4.0 versions for bionic and focal + # R 3.2, 3.3, 3.4 are available without the suffix but only for trusty and xenial + # TODO: make sure OS version and R version are valid together and conditionally set repo suffix + # This is a hack to turn 3.6 into 35, and 4.0/4.1 into 40: + add-apt-repository 'deb https://cloud.r-project.org/bin/linux/ubuntu '$(lsb_release -cs)'-cran'$(echo "${r}" | tr -d . | tr 6 5 | tr 1 0)'/' && \ + apt-get install -y \ + r-base=${r}* \ + r-recommended=${r}* \ + libxml2-dev + +# Ensure parallel R package installation, set CRAN repo mirror, +# and use pre-built binaries where possible +COPY ci/etc/rprofile /arrow/ci/etc/ +RUN cat /arrow/ci/etc/rprofile >> $(R RHOME)/etc/Rprofile.site +# Also ensure parallel compilation of C/C++ code +RUN echo "MAKEFLAGS=-j$(R -s -e 'cat(parallel::detectCores())')" >> $(R RHOME)/etc/Renviron.site + + +COPY ci/scripts/r_deps.sh /arrow/ci/scripts/ +COPY r/DESCRIPTION /arrow/r/ +# We need to install Arrow's dependencies in order for lintr's namespace searching to work. +# This could be removed if lintr no longer loads the dependency namespaces (see issues/PRs below) +RUN /arrow/ci/scripts/r_deps.sh /arrow +# This fork has a number of changes that have PRs and Issues to resolve upstream: +# https://github.com/jimhester/lintr/pull/843 +# https://github.com/jimhester/lintr/pull/841 +# https://github.com/jimhester/lintr/pull/845 +# https://github.com/jimhester/lintr/issues/842 +# https://github.com/jimhester/lintr/issues/846 +RUN R -e "remotes::install_github('jonkeane/lintr@arrow-branch')" + +# Docker linter +COPY --from=hadolint /bin/hadolint /usr/bin/hadolint + +# IWYU +COPY ci/scripts/install_iwyu.sh /arrow/ci/scripts/ +RUN arrow/ci/scripts/install_iwyu.sh /tmp/iwyu /usr/local ${clang_tools} + +# Use python3 by default in scripts +RUN ln -s /usr/bin/python3 /usr/local/bin/python && \ + ln -s /usr/bin/pip3 /usr/local/bin/pip + +COPY dev/archery/setup.py /arrow/dev/archery/ +RUN pip install -e arrow/dev/archery[lint] + +ENV LC_ALL=C.UTF-8 \ + LANG=C.UTF-8 diff --git a/src/arrow/ci/docker/linux-apt-python-3.dockerfile b/src/arrow/ci/docker/linux-apt-python-3.dockerfile new file mode 100644 index 000000000..753ba0d3a --- /dev/null +++ b/src/arrow/ci/docker/linux-apt-python-3.dockerfile @@ -0,0 +1,46 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +ARG base +FROM ${base} + +RUN apt-get update -y -q && \ + apt-get install -y -q \ + python3 \ + python3-pip \ + python3-dev && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +RUN ln -s /usr/bin/python3 /usr/local/bin/python && \ + ln -s /usr/bin/pip3 /usr/local/bin/pip + +RUN pip install -U pip setuptools + +COPY python/requirements-build.txt \ + python/requirements-test.txt \ + /arrow/python/ + +RUN pip install \ + -r arrow/python/requirements-build.txt \ + -r arrow/python/requirements-test.txt + +ENV ARROW_PYTHON=ON \ + ARROW_BUILD_STATIC=OFF \ + ARROW_BUILD_TESTS=OFF \ + ARROW_BUILD_UTILITIES=OFF \ + ARROW_USE_GLOG=OFF \ diff --git a/src/arrow/ci/docker/linux-apt-r.dockerfile b/src/arrow/ci/docker/linux-apt-r.dockerfile new file mode 100644 index 000000000..6d33d1800 --- /dev/null +++ b/src/arrow/ci/docker/linux-apt-r.dockerfile @@ -0,0 +1,114 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +ARG base +FROM ${base} +ARG arch + +ARG tz="UTC" +ENV TZ=${tz} + +# Build R +# [1] https://www.digitalocean.com/community/tutorials/how-to-install-r-on-ubuntu-18-04 +# [2] https://linuxize.com/post/how-to-install-r-on-ubuntu-18-04/#installing-r-packages-from-cran +ARG r=3.6 +RUN apt-get update -y && \ + apt-get install -y \ + dirmngr \ + apt-transport-https \ + software-properties-common && \ + apt-key adv \ + --keyserver keyserver.ubuntu.com \ + --recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9 && \ + # NOTE: R 3.5 and 3.6 are available in the repos with -cran35 suffix + # for trusty, xenial, bionic, and eoan (as of May 2020) + # -cran40 has 4.0 versions for bionic and focal + # R 3.2, 3.3, 3.4 are available without the suffix but only for trusty and xenial + # TODO: make sure OS version and R version are valid together and conditionally set repo suffix + # This is a hack to turn 3.6 into 35, and 4.0/4.1 into 40: + add-apt-repository 'deb https://cloud.r-project.org/bin/linux/ubuntu '$(lsb_release -cs)'-cran'$(echo "${r}" | tr -d . | tr 6 5 | tr 1 0)'/' && \ + apt-get install -y \ + r-base=${r}* \ + r-recommended=${r}* \ + # system libs needed by core R packages + libxml2-dev \ + libgit2-dev \ + libssl-dev \ + # install clang to mirror what was done on Travis + clang \ + clang-format \ + clang-tidy \ + # R CMD CHECK --as-cran needs pdflatex to build the package manual + texlive-latex-base \ + # Need locales so we can set UTF-8 + locales \ + # Need Python to check py-to-r bridge + python3 \ + python3-pip \ + python3-dev && \ + locale-gen en_US.UTF-8 && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +ARG gcc_version="" +RUN if [ "${gcc_version}" != "" ]; then \ + update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-${gcc_version} 100 && \ + update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-${gcc_version} 100 && \ + update-alternatives --install /usr/bin/cc cc /usr/bin/gcc 30 && \ + update-alternatives --set cc /usr/bin/gcc && \ + update-alternatives --install /usr/bin/c++ c++ /usr/bin/g++ 30 && \ + update-alternatives --set c++ /usr/bin/g++; \ + fi + +# Ensure parallel R package installation, set CRAN repo mirror, +# and use pre-built binaries where possible +COPY ci/etc/rprofile /arrow/ci/etc/ +RUN cat /arrow/ci/etc/rprofile >> $(R RHOME)/etc/Rprofile.site +# Also ensure parallel compilation of C/C++ code +RUN echo "MAKEFLAGS=-j$(R -s -e 'cat(parallel::detectCores())')" >> $(R RHOME)/etc/Renviron.site + +COPY ci/scripts/r_deps.sh /arrow/ci/scripts/ +COPY r/DESCRIPTION /arrow/r/ +RUN /arrow/ci/scripts/r_deps.sh /arrow + +COPY ci/scripts/install_minio.sh /arrow/ci/scripts/ +RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest /usr/local +COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/ +RUN /arrow/ci/scripts/install_gcs_testbench.sh ${arch} default + +# Set up Python 3 and its dependencies +RUN ln -s /usr/bin/python3 /usr/local/bin/python && \ + ln -s /usr/bin/pip3 /usr/local/bin/pip + +COPY python/requirements-build.txt /arrow/python/ +RUN pip install -r arrow/python/requirements-build.txt + +ENV \ + ARROW_BUILD_STATIC=OFF \ + ARROW_BUILD_TESTS=OFF \ + ARROW_BUILD_UTILITIES=OFF \ + ARROW_FLIGHT=OFF \ + ARROW_GANDIVA=OFF \ + ARROW_NO_DEPRECATED_API=ON \ + ARROW_ORC=OFF \ + ARROW_PARQUET=ON \ + ARROW_PLASMA=OFF \ + ARROW_PYTHON=ON \ + ARROW_S3=ON \ + ARROW_USE_CCACHE=ON \ + ARROW_USE_GLOG=OFF \ + LC_ALL=en_US.UTF-8 diff --git a/src/arrow/ci/docker/linux-apt-ruby.dockerfile b/src/arrow/ci/docker/linux-apt-ruby.dockerfile new file mode 100644 index 000000000..58fd65bd5 --- /dev/null +++ b/src/arrow/ci/docker/linux-apt-ruby.dockerfile @@ -0,0 +1,27 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# depends on a C GLib image +ARG base +FROM ${base} + +COPY ruby/ /arrow/ruby/ +RUN bundle install --gemfile /arrow/ruby/Gemfile +RUN \ + for package in /arrow/ruby/*; do \ + bundle install --gemfile ${package}/Gemfile; \ + done diff --git a/src/arrow/ci/docker/linux-dnf-python-3.dockerfile b/src/arrow/ci/docker/linux-dnf-python-3.dockerfile new file mode 100644 index 000000000..8c3c5c701 --- /dev/null +++ b/src/arrow/ci/docker/linux-dnf-python-3.dockerfile @@ -0,0 +1,41 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +ARG base +FROM ${base} + +RUN dnf install -y \ + python3 \ + python3-pip \ + python3-devel + +RUN ln -s /usr/bin/python3 /usr/local/bin/python && \ + ln -s /usr/bin/pip3 /usr/local/bin/pip + +COPY python/requirements-build.txt \ + python/requirements-test.txt \ + /arrow/python/ + +RUN pip install \ + -r arrow/python/requirements-build.txt \ + -r arrow/python/requirements-test.txt + +ENV ARROW_PYTHON=ON \ + ARROW_BUILD_STATIC=OFF \ + ARROW_BUILD_TESTS=OFF \ + ARROW_BUILD_UTILITIES=OFF \ + ARROW_USE_GLOG=OFF \ diff --git a/src/arrow/ci/docker/linux-r.dockerfile b/src/arrow/ci/docker/linux-r.dockerfile new file mode 100644 index 000000000..568b90c22 --- /dev/null +++ b/src/arrow/ci/docker/linux-r.dockerfile @@ -0,0 +1,48 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# General purpose Dockerfile to take a Docker image containing R +# and install Arrow R package dependencies + +ARG base +FROM ${base} + +ARG r_bin=R +ENV R_BIN=${r_bin} + +ARG r_dev=FALSE +ENV ARROW_R_DEV=${r_dev} + +ARG devtoolset_version=-1 +ENV DEVTOOLSET_VERSION=${devtoolset_version} + +ARG tz="UTC" +ENV TZ=${tz} + +# Make sure R is on the path for the R-hub devel versions (where RPREFIX is set in its dockerfile) +ENV PATH "${RPREFIX}/bin:${PATH}" + +# Patch up some of the docker images +COPY ci/scripts/r_docker_configure.sh /arrow/ci/scripts/ +COPY ci/etc/rprofile /arrow/ci/etc/ +COPY ci/scripts/install_minio.sh /arrow/ci/scripts/ +COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/ +RUN /arrow/ci/scripts/r_docker_configure.sh + +COPY ci/scripts/r_deps.sh /arrow/ci/scripts/ +COPY r/DESCRIPTION /arrow/r/ +RUN /arrow/ci/scripts/r_deps.sh /arrow diff --git a/src/arrow/ci/docker/python-sdist.dockerfile b/src/arrow/ci/docker/python-sdist.dockerfile new file mode 100644 index 000000000..853b532ab --- /dev/null +++ b/src/arrow/ci/docker/python-sdist.dockerfile @@ -0,0 +1,36 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +FROM amd64/ubuntu:20.04 + +SHELL ["/bin/bash", "-o", "pipefail", "-c"] + +RUN echo "debconf debconf/frontend select Noninteractive" | \ + debconf-set-selections + +RUN apt-get update -y -q && \ + apt-get install -y -q --no-install-recommends \ + git \ + python3-pip && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists* + +COPY python/requirements-build.txt \ + /arrow/python/requirements-build.txt +RUN pip3 install --requirement /arrow/python/requirements-build.txt + +ENV PYTHON=/usr/bin/python3 diff --git a/src/arrow/ci/docker/python-wheel-manylinux-201x.dockerfile b/src/arrow/ci/docker/python-wheel-manylinux-201x.dockerfile new file mode 100644 index 000000000..ae1b0a776 --- /dev/null +++ b/src/arrow/ci/docker/python-wheel-manylinux-201x.dockerfile @@ -0,0 +1,112 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +ARG base +FROM ${base} + +ARG arch_alias +ARG arch_short_alias + +RUN yum install -y git flex curl autoconf zip wget + +# Install CMake +ARG cmake=3.19.3 +RUN wget -q https://github.com/Kitware/CMake/releases/download/v${cmake}/cmake-${cmake}-Linux-${arch_alias}.tar.gz -O - | \ + tar -xzf - --directory /usr/local --strip-components=1 + +# Install Ninja +ARG ninja=1.10.2 +RUN mkdir /tmp/ninja && \ + wget -q https://github.com/ninja-build/ninja/archive/v${ninja}.tar.gz -O - | \ + tar -xzf - --directory /tmp/ninja --strip-components=1 && \ + cd /tmp/ninja && \ + ./configure.py --bootstrap && \ + mv ninja /usr/local/bin && \ + rm -rf /tmp/ninja + +# Install ccache +ARG ccache=4.1 +RUN mkdir /tmp/ccache && \ + wget -q https://github.com/ccache/ccache/archive/v${ccache}.tar.gz -O - | \ + tar -xzf - --directory /tmp/ccache --strip-components=1 && \ + cd /tmp/ccache && \ + mkdir build && \ + cd build && \ + cmake -GNinja -DCMAKE_BUILD_TYPE=Release -DZSTD_FROM_INTERNET=ON .. && \ + ninja install && \ + rm -rf /tmp/ccache + +# Install vcpkg +ARG vcpkg +RUN git clone https://github.com/microsoft/vcpkg /opt/vcpkg && \ + git -C /opt/vcpkg checkout ${vcpkg} && \ + /opt/vcpkg/bootstrap-vcpkg.sh -useSystemBinaries -disableMetrics && \ + ln -s /opt/vcpkg/vcpkg /usr/bin/vcpkg + +# Patch ports files as needed +COPY ci/vcpkg/*.patch \ + ci/vcpkg/*linux*.cmake \ + arrow/ci/vcpkg/ +RUN cd /opt/vcpkg && git apply --ignore-whitespace /arrow/ci/vcpkg/ports.patch + +ARG build_type=release +ENV CMAKE_BUILD_TYPE=${build_type} \ + VCPKG_FORCE_SYSTEM_BINARIES=1 \ + VCPKG_OVERLAY_TRIPLETS=/arrow/ci/vcpkg \ + VCPKG_DEFAULT_TRIPLET=${arch_short_alias}-linux-static-${build_type} \ + VCPKG_FEATURE_FLAGS=-manifests + +# Need to install the boost-build prior installing the boost packages, otherwise +# vcpkg will raise an error. +# TODO(kszucs): factor out the package enumeration to a text file and reuse it +# from the windows image and potentially in a future macos wheel build +RUN vcpkg install --clean-after-build \ + boost-build:${arch_short_alias}-linux && \ + vcpkg install --clean-after-build \ + abseil \ + aws-sdk-cpp[config,cognito-identity,core,identity-management,s3,sts,transfer] \ + boost-filesystem \ + brotli \ + bzip2 \ + c-ares \ + curl \ + flatbuffers \ + gflags \ + glog \ + grpc \ + lz4 \ + openssl \ + orc \ + protobuf \ + rapidjson \ + re2 \ + snappy \ + thrift \ + utf8proc \ + zlib \ + zstd + +ARG python=3.6 +ENV PYTHON_VERSION=${python} +RUN PYTHON_ROOT=$(find /opt/python -name cp${PYTHON_VERSION/./}-*) && \ + echo "export PATH=$PYTHON_ROOT/bin:\$PATH" >> /etc/profile.d/python.sh + +SHELL ["/bin/bash", "-i", "-c"] +ENTRYPOINT ["/bin/bash", "-i", "-c"] + +COPY python/requirements-wheel-build.txt /arrow/python/ +RUN pip install -r /arrow/python/requirements-wheel-build.txt diff --git a/src/arrow/ci/docker/python-wheel-manylinux-test.dockerfile b/src/arrow/ci/docker/python-wheel-manylinux-test.dockerfile new file mode 100644 index 000000000..55c27d1d7 --- /dev/null +++ b/src/arrow/ci/docker/python-wheel-manylinux-test.dockerfile @@ -0,0 +1,27 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +ARG arch +ARG python +FROM ${arch}/python:${python} + +# RUN pip install --upgrade pip + +# pandas doesn't provide wheel for aarch64 yet, so cache the compiled +# test dependencies in a docker image +COPY python/requirements-wheel-test.txt /arrow/python/ +RUN pip install -r /arrow/python/requirements-wheel-test.txt diff --git a/src/arrow/ci/docker/python-wheel-windows-vs2017.dockerfile b/src/arrow/ci/docker/python-wheel-windows-vs2017.dockerfile new file mode 100644 index 000000000..9a2afb781 --- /dev/null +++ b/src/arrow/ci/docker/python-wheel-windows-vs2017.dockerfile @@ -0,0 +1,105 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# based on mcr.microsoft.com/windows/servercore:ltsc2019 +# contains choco and vs2017 preinstalled +FROM abrarov/msvc-2017:2.10.0 + +# Install CMake and Ninja +RUN choco install --no-progress -r -y cmake --installargs 'ADD_CMAKE_TO_PATH=System' && \ + choco install --no-progress -r -y gzip wget ninja + +# Add unix tools to path +RUN setx path "%path%;C:\Program Files\Git\usr\bin" + +# Install vcpkg +# +# Compiling vcpkg itself from a git tag doesn't work anymore since vcpkg has +# started to ship precompiled binaries for the vcpkg-tool. +ARG vcpkg +RUN git clone https://github.com/Microsoft/vcpkg && \ + vcpkg\bootstrap-vcpkg.bat -disableMetrics && \ + setx PATH "%PATH%;C:\vcpkg" && \ + git -C vcpkg checkout %vcpkg% + +# Patch ports files as needed +COPY ci/vcpkg/*.patch \ + ci/vcpkg/*windows*.cmake \ + arrow/ci/vcpkg/ +RUN cd vcpkg && git apply --ignore-whitespace C:/arrow/ci/vcpkg/ports.patch + +# Configure vcpkg and install dependencies +# NOTE: use windows batch environment notation for build arguments in RUN +# statements but bash notation in ENV statements +# VCPKG_FORCE_SYSTEM_BINARIES=1 spare around ~750MB of image size if the system +# cmake's and ninja's versions are recent enough +ARG build_type=release +ENV CMAKE_BUILD_TYPE=${build_type} \ + VCPKG_OVERLAY_TRIPLETS=C:\\arrow\\ci\\vcpkg \ + VCPKG_DEFAULT_TRIPLET=x64-windows-static-md-${build_type} \ + VCPKG_FEATURE_FLAGS=-manifests + +RUN vcpkg install --clean-after-build \ + abseil \ + aws-sdk-cpp[config,cognito-identity,core,identity-management,s3,sts,transfer] \ + boost-filesystem \ + boost-multiprecision \ + boost-system \ + brotli \ + bzip2 \ + c-ares \ + curl \ + flatbuffers \ + gflags \ + glog \ + grpc \ + lz4 \ + openssl \ + orc \ + protobuf \ + rapidjson \ + re2 \ + snappy \ + thrift \ + utf8proc \ + zlib \ + zstd + +# Remove previous installations of python from the base image +RUN wmic product where "name like 'python%%'" call uninstall /nointeractive && \ + rm -rf Python* + +# Define the full version number otherwise choco falls back to patch number 0 (3.7 => 3.7.0) +ARG python=3.6 +RUN (if "%python%"=="3.6" setx PYTHON_VERSION 3.6.8) & \ + (if "%python%"=="3.7" setx PYTHON_VERSION 3.7.4) & \ + (if "%python%"=="3.8" setx PYTHON_VERSION 3.8.6) & \ + (if "%python%"=="3.9" setx PYTHON_VERSION 3.9.1) & \ + (if "%python%"=="3.10" setx PYTHON_VERSION 3.10.0) +RUN choco install -r -y --no-progress python --version=%PYTHON_VERSION% +RUN python -m pip install -U pip + +COPY python/requirements-wheel-build.txt arrow/python/ +RUN pip install -r arrow/python/requirements-wheel-build.txt + +# TODO(kszucs): set clcache as the compiler +ENV CLCACHE_DIR="C:\clcache" +RUN if "%python%" NEQ "3.10" pip install clcache + +# For debugging purposes +# RUN wget --no-check-certificate https://github.com/lucasg/Dependencies/releases/download/v1.10/Dependencies_x64_Release.zip +# RUN unzip Dependencies_x64_Release.zip -d Dependencies && setx path "%path%;C:\Depencencies" diff --git a/src/arrow/ci/docker/ubuntu-18.04-cpp.dockerfile b/src/arrow/ci/docker/ubuntu-18.04-cpp.dockerfile new file mode 100644 index 000000000..0c05ac4ee --- /dev/null +++ b/src/arrow/ci/docker/ubuntu-18.04-cpp.dockerfile @@ -0,0 +1,130 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +ARG base=amd64/ubuntu:18.04 +FROM ${base} + +# pipefail is enabled for proper error detection in the `wget | apt-key add` +# step +SHELL ["/bin/bash", "-o", "pipefail", "-c"] + +ENV DEBIAN_FRONTEND=noninteractive + +# Installs LLVM toolchain, for Gandiva and testing other compilers +# +# Note that this is installed before the base packages to improve iteration +# while debugging package list with docker build. +ARG clang_tools +ARG llvm +RUN apt-get update -y -q && \ + apt-get install -y -q --no-install-recommends \ + apt-transport-https \ + ca-certificates \ + gnupg \ + wget && \ + wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - && \ + echo "deb https://apt.llvm.org/bionic/ llvm-toolchain-bionic-${llvm} main" > \ + /etc/apt/sources.list.d/llvm.list && \ + if [ "${clang_tools}" != "${llvm}" -a "${clang_tools}" -ge 10 ]; then \ + echo "deb https://apt.llvm.org/bionic/ llvm-toolchain-bionic-${clang_tools} main" > \ + /etc/apt/sources.list.d/clang-tools.list; \ + fi && \ + apt-get update -y -q && \ + apt-get install -y -q --no-install-recommends \ + clang-${clang_tools} \ + clang-${llvm} \ + clang-format-${clang_tools} \ + clang-tidy-${clang_tools} \ + llvm-${llvm}-dev && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists* + +# Installs C++ toolchain and dependencies +RUN apt-get update -y -q && \ + apt-get install -y -q --no-install-recommends \ + autoconf \ + ca-certificates \ + ccache \ + cmake \ + g++ \ + gcc \ + gdb \ + git \ + libbenchmark-dev \ + libboost-filesystem-dev \ + libboost-system-dev \ + libbrotli-dev \ + libbz2-dev \ + libc-ares-dev \ + libcurl4-openssl-dev \ + libgflags-dev \ + libgoogle-glog-dev \ + liblz4-dev \ + libprotobuf-dev \ + libprotoc-dev \ + libre2-dev \ + libsnappy-dev \ + libssl-dev \ + ninja-build \ + pkg-config \ + protobuf-compiler \ + rapidjson-dev \ + tzdata && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists* + +# Prioritize system packages and local installation +# The following dependencies will be downloaded due to missing/invalid packages +# provided by the distribution: +# - libc-ares-dev does not install CMake config files +# - flatbuffer is not packaged +# - libgtest-dev only provide sources +# - libprotobuf-dev only provide sources +# - thrift is too old +# - utf8proc is too old(v2.1.0) +# - s3 tests would require boost-asio that is included since Boost 1.66.0 +ENV ARROW_BUILD_TESTS=ON \ + ARROW_DATASET=ON \ + ARROW_DEPENDENCY_SOURCE=SYSTEM \ + ARROW_FLIGHT=OFF \ + ARROW_GANDIVA=ON \ + ARROW_HDFS=ON \ + ARROW_HOME=/usr/local \ + ARROW_INSTALL_NAME_RPATH=OFF \ + ARROW_NO_DEPRECATED_API=ON \ + ARROW_ORC=ON \ + ARROW_PARQUET=ON \ + ARROW_PLASMA=ON \ + ARROW_USE_ASAN=OFF \ + ARROW_USE_CCACHE=ON \ + ARROW_USE_TSAN=OFF \ + ARROW_USE_UBSAN=OFF \ + ARROW_WITH_BROTLI=ON \ + ARROW_WITH_BZ2=ON \ + ARROW_WITH_LZ4=ON \ + ARROW_WITH_SNAPPY=ON \ + ARROW_WITH_ZLIB=ON \ + ARROW_WITH_ZSTD=ON \ + AWSSDK_SOURCE=BUNDLED \ + GTest_SOURCE=BUNDLED \ + ORC_SOURCE=BUNDLED \ + PARQUET_BUILD_EXAMPLES=ON \ + PARQUET_BUILD_EXECUTABLES=ON \ + PATH=/usr/lib/ccache/:$PATH \ + Thrift_SOURCE=BUNDLED \ + utf8proc_SOURCE=BUNDLED \ + zstd_SOURCE=BUNDLED diff --git a/src/arrow/ci/docker/ubuntu-18.04-csharp.dockerfile b/src/arrow/ci/docker/ubuntu-18.04-csharp.dockerfile new file mode 100644 index 000000000..624ce259d --- /dev/null +++ b/src/arrow/ci/docker/ubuntu-18.04-csharp.dockerfile @@ -0,0 +1,22 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +ARG platform=bionic +ARG dotnet=3.1 +FROM mcr.microsoft.com/dotnet/core/sdk:${dotnet}-${platform} + +RUN dotnet tool install --tool-path /usr/local/bin sourcelink diff --git a/src/arrow/ci/docker/ubuntu-20.04-cpp-minimal.dockerfile b/src/arrow/ci/docker/ubuntu-20.04-cpp-minimal.dockerfile new file mode 100644 index 000000000..d7076b45b --- /dev/null +++ b/src/arrow/ci/docker/ubuntu-20.04-cpp-minimal.dockerfile @@ -0,0 +1,69 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +ARG base=amd64/ubuntu:20.04 +FROM ${base} +ARG arch + +SHELL ["/bin/bash", "-o", "pipefail", "-c"] + +RUN echo "debconf debconf/frontend select Noninteractive" | \ + debconf-set-selections + +RUN apt-get update -y -q && \ + apt-get install -y -q \ + build-essential \ + ccache \ + cmake \ + git \ + libssl-dev \ + libcurl4-openssl-dev \ + python3-pip \ + wget && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists* + +COPY ci/scripts/install_minio.sh /arrow/ci/scripts/ +RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest /usr/local +COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/ +RUN /arrow/ci/scripts/install_gcs_testbench.sh ${arch} default + +ENV ARROW_BUILD_TESTS=ON \ + ARROW_DATASET=ON \ + ARROW_FLIGHT=ON \ + ARROW_GANDIVA=OFF \ + ARROW_GCS=ON \ + ARROW_HDFS=ON \ + ARROW_HOME=/usr/local \ + ARROW_INSTALL_NAME_RPATH=OFF \ + ARROW_NO_DEPRECATED_API=ON \ + ARROW_ORC=ON \ + ARROW_PARQUET=ON \ + ARROW_PLASMA=ON \ + ARROW_S3=ON \ + ARROW_USE_CCACHE=ON \ + ARROW_WITH_BROTLI=ON \ + ARROW_WITH_BZ2=ON \ + ARROW_WITH_LZ4=ON \ + ARROW_WITH_SNAPPY=ON \ + ARROW_WITH_ZLIB=ON \ + ARROW_WITH_ZSTD=ON \ + CMAKE_GENERATOR="Unix Makefiles" \ + PARQUET_BUILD_EXAMPLES=ON \ + PARQUET_BUILD_EXECUTABLES=ON \ + PATH=/usr/lib/ccache/:$PATH \ + PYTHON=python3 diff --git a/src/arrow/ci/docker/ubuntu-20.04-cpp.dockerfile b/src/arrow/ci/docker/ubuntu-20.04-cpp.dockerfile new file mode 100644 index 000000000..de872da9a --- /dev/null +++ b/src/arrow/ci/docker/ubuntu-20.04-cpp.dockerfile @@ -0,0 +1,143 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +ARG base=amd64/ubuntu:20.04 +FROM ${base} +ARG arch + +SHELL ["/bin/bash", "-o", "pipefail", "-c"] + +RUN echo "debconf debconf/frontend select Noninteractive" | \ + debconf-set-selections + +# Installs LLVM toolchain, for Gandiva and testing other compilers +# +# Note that this is installed before the base packages to improve iteration +# while debugging package list with docker build. +ARG clang_tools +ARG llvm +RUN if [ "${llvm}" -gt "10" ]; then \ + apt-get update -y -q && \ + apt-get install -y -q --no-install-recommends \ + apt-transport-https \ + ca-certificates \ + gnupg \ + wget && \ + wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - && \ + echo "deb https://apt.llvm.org/focal/ llvm-toolchain-focal-${llvm} main" > \ + /etc/apt/sources.list.d/llvm.list && \ + if [ "${clang_tools}" != "${llvm}" -a "${clang_tools}" -gt 10 ]; then \ + echo "deb https://apt.llvm.org/focal/ llvm-toolchain-focal-${clang_tools} main" > \ + /etc/apt/sources.list.d/clang-tools.list; \ + fi \ + fi && \ + apt-get update -y -q && \ + apt-get install -y -q --no-install-recommends \ + clang-${clang_tools} \ + clang-${llvm} \ + clang-format-${clang_tools} \ + clang-tidy-${clang_tools} \ + llvm-${llvm}-dev && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists* + +# Installs C++ toolchain and dependencies +RUN apt-get update -y -q && \ + apt-get install -y -q --no-install-recommends \ + autoconf \ + ca-certificates \ + ccache \ + cmake \ + g++ \ + gcc \ + gdb \ + git \ + libbenchmark-dev \ + libboost-filesystem-dev \ + libboost-system-dev \ + libbrotli-dev \ + libbz2-dev \ + libc-ares-dev \ + libcurl4-openssl-dev \ + libgflags-dev \ + libgoogle-glog-dev \ + liblz4-dev \ + libprotobuf-dev \ + libprotoc-dev \ + libre2-dev \ + libsnappy-dev \ + libssl-dev \ + libthrift-dev \ + libutf8proc-dev \ + libzstd-dev \ + make \ + ninja-build \ + pkg-config \ + protobuf-compiler \ + python3-pip \ + rapidjson-dev \ + tzdata \ + wget && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists* + +COPY ci/scripts/install_minio.sh /arrow/ci/scripts/ +RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest /usr/local +COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/ +RUN /arrow/ci/scripts/install_gcs_testbench.sh ${arch} default + +# Prioritize system packages and local installation +# The following dependencies will be downloaded due to missing/invalid packages +# provided by the distribution: +# - libc-ares-dev does not install CMake config files +# - flatbuffer is not packaged +# - libgtest-dev only provide sources +# - libprotobuf-dev only provide sources +ENV ARROW_BUILD_TESTS=ON \ + ARROW_DEPENDENCY_SOURCE=SYSTEM \ + ARROW_DATASET=ON \ + ARROW_FLIGHT=OFF \ + ARROW_GANDIVA=ON \ + ARROW_GCS=ON \ + ARROW_HDFS=ON \ + ARROW_HOME=/usr/local \ + ARROW_INSTALL_NAME_RPATH=OFF \ + ARROW_NO_DEPRECATED_API=ON \ + ARROW_ORC=ON \ + ARROW_PARQUET=ON \ + ARROW_PLASMA=ON \ + ARROW_S3=ON \ + ARROW_USE_ASAN=OFF \ + ARROW_USE_CCACHE=ON \ + ARROW_USE_UBSAN=OFF \ + ARROW_WITH_BROTLI=ON \ + ARROW_WITH_BZ2=ON \ + ARROW_WITH_LZ4=ON \ + ARROW_WITH_SNAPPY=ON \ + ARROW_WITH_ZLIB=ON \ + ARROW_WITH_ZSTD=ON \ + ASAN_SYMBOLIZER_PATH=/usr/lib/llvm-${llvm}/bin/llvm-symbolizer \ + AWSSDK_SOURCE=BUNDLED \ + google_cloud_cpp_storage_SOURCE=BUNDLED \ + GTest_SOURCE=BUNDLED \ + gRPC_SOURCE=BUNDLED \ + ORC_SOURCE=BUNDLED \ + PARQUET_BUILD_EXAMPLES=ON \ + PARQUET_BUILD_EXECUTABLES=ON \ + PATH=/usr/lib/ccache/:$PATH \ + Protobuf_SOURCE=BUNDLED \ + PYTHON=python3 diff --git a/src/arrow/ci/docker/ubuntu-20.10-cpp.dockerfile b/src/arrow/ci/docker/ubuntu-20.10-cpp.dockerfile new file mode 100644 index 000000000..59f5fa4c8 --- /dev/null +++ b/src/arrow/ci/docker/ubuntu-20.10-cpp.dockerfile @@ -0,0 +1,140 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +ARG base=amd64/ubuntu:20.10 +FROM ${base} +ARG arch + +SHELL ["/bin/bash", "-o", "pipefail", "-c"] + +RUN echo "debconf debconf/frontend select Noninteractive" | \ + debconf-set-selections + +# Installs LLVM toolchain, for Gandiva and testing other compilers +# +# Note that this is installed before the base packages to improve iteration +# while debugging package list with docker build. +ARG clang_tools +ARG llvm +RUN if [ "${llvm}" -gt "10" ]; then \ + apt-get update -y -q && \ + apt-get install -y -q --no-install-recommends \ + apt-transport-https \ + ca-certificates \ + gnupg \ + wget && \ + wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - && \ + echo "deb https://apt.llvm.org/groovy/ llvm-toolchain-groovy-${llvm} main" > \ + /etc/apt/sources.list.d/llvm.list && \ + if [ "${clang_tools}" != "${llvm}" -a "${clang_tools}" -gt 10 ]; then \ + echo "deb https://apt.llvm.org/groovy/ llvm-toolchain-groovy-${clang_tools} main" > \ + /etc/apt/sources.list.d/clang-tools.list; \ + fi \ + fi && \ + apt-get update -y -q && \ + apt-get install -y -q --no-install-recommends \ + clang-${clang_tools} \ + clang-${llvm} \ + clang-format-${clang_tools} \ + clang-tidy-${clang_tools} \ + llvm-${llvm}-dev && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists* + +# Installs C++ toolchain and dependencies +RUN apt-get update -y -q && \ + apt-get install -y -q --no-install-recommends \ + autoconf \ + ca-certificates \ + ccache \ + cmake \ + g++ \ + gcc \ + gdb \ + git \ + libbenchmark-dev \ + libboost-filesystem-dev \ + libboost-system-dev \ + libbrotli-dev \ + libbz2-dev \ + libc-ares-dev \ + libcurl4-openssl-dev \ + libgflags-dev \ + libgoogle-glog-dev \ + libgrpc++-dev \ + liblz4-dev \ + libprotobuf-dev \ + libprotoc-dev \ + libre2-dev \ + libsnappy-dev \ + libssl-dev \ + libthrift-dev \ + libutf8proc-dev \ + libzstd-dev \ + make \ + ninja-build \ + pkg-config \ + protobuf-compiler \ + protobuf-compiler-grpc \ + python3-pip \ + rapidjson-dev \ + tzdata \ + wget && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists* + +COPY ci/scripts/install_minio.sh /arrow/ci/scripts/ +RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest /usr/local +COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/ +RUN /arrow/ci/scripts/install_gcs_testbench.sh ${arch} default + +# Prioritize system packages and local installation +# The following dependencies will be downloaded due to missing/invalid packages +# provided by the distribution: +# - libc-ares-dev does not install CMake config files +# - flatbuffer is not packaged +# - libgtest-dev only provide sources +# - libprotobuf-dev only provide sources +ENV ARROW_BUILD_TESTS=ON \ + ARROW_DEPENDENCY_SOURCE=SYSTEM \ + ARROW_DATASET=ON \ + ARROW_FLIGHT=OFF \ + ARROW_GANDIVA=ON \ + ARROW_HDFS=ON \ + ARROW_HOME=/usr/local \ + ARROW_INSTALL_NAME_RPATH=OFF \ + ARROW_NO_DEPRECATED_API=ON \ + ARROW_ORC=ON \ + ARROW_PARQUET=ON \ + ARROW_PLASMA=ON \ + ARROW_S3=ON \ + ARROW_USE_ASAN=OFF \ + ARROW_USE_CCACHE=ON \ + ARROW_USE_UBSAN=OFF \ + ARROW_WITH_BROTLI=ON \ + ARROW_WITH_BZ2=ON \ + ARROW_WITH_LZ4=ON \ + ARROW_WITH_SNAPPY=ON \ + ARROW_WITH_ZLIB=ON \ + ARROW_WITH_ZSTD=ON \ + AWSSDK_SOURCE=BUNDLED \ + GTest_SOURCE=BUNDLED \ + ORC_SOURCE=BUNDLED \ + PARQUET_BUILD_EXAMPLES=ON \ + PARQUET_BUILD_EXECUTABLES=ON \ + PATH=/usr/lib/ccache/:$PATH \ + PYTHON=python3 diff --git a/src/arrow/ci/docker/ubuntu-21.04-cpp.dockerfile b/src/arrow/ci/docker/ubuntu-21.04-cpp.dockerfile new file mode 100644 index 000000000..9fc857c62 --- /dev/null +++ b/src/arrow/ci/docker/ubuntu-21.04-cpp.dockerfile @@ -0,0 +1,162 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +ARG base=amd64/ubuntu:20.04 +FROM ${base} +ARG arch + +SHELL ["/bin/bash", "-o", "pipefail", "-c"] + +RUN echo "debconf debconf/frontend select Noninteractive" | \ + debconf-set-selections + +# Installs LLVM toolchain, for Gandiva and testing other compilers +# +# Note that this is installed before the base packages to improve iteration +# while debugging package list with docker build. +ARG clang_tools +ARG llvm +RUN if [ "${llvm}" -gt "10" ]; then \ + apt-get update -y -q && \ + apt-get install -y -q --no-install-recommends \ + apt-transport-https \ + ca-certificates \ + gnupg \ + wget && \ + wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - && \ + echo "deb https://apt.llvm.org/hirsute/ llvm-toolchain-hirsute-${llvm} main" > \ + /etc/apt/sources.list.d/llvm.list && \ + if [ "${clang_tools}" != "${llvm}" -a "${clang_tools}" -gt 10 ]; then \ + echo "deb https://apt.llvm.org/hirsute/ llvm-toolchain-hirsute-${clang_tools} main" > \ + /etc/apt/sources.list.d/clang-tools.list; \ + fi \ + fi && \ + apt-get update -y -q && \ + apt-get install -y -q --no-install-recommends \ + clang-${clang_tools} \ + clang-${llvm} \ + clang-format-${clang_tools} \ + clang-tidy-${clang_tools} \ + llvm-${llvm}-dev && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists* + +# Installs C++ toolchain and dependencies +RUN apt-get update -y -q && \ + apt-get install -y -q --no-install-recommends \ + autoconf \ + ca-certificates \ + ccache \ + cmake \ + gdb \ + git \ + libbenchmark-dev \ + libboost-filesystem-dev \ + libboost-system-dev \ + libbrotli-dev \ + libbz2-dev \ + libc-ares-dev \ + libcurl4-openssl-dev \ + libgflags-dev \ + libgoogle-glog-dev \ + libgrpc++-dev \ + liblz4-dev \ + libprotobuf-dev \ + libprotoc-dev \ + libre2-dev \ + libsnappy-dev \ + libssl-dev \ + libthrift-dev \ + libutf8proc-dev \ + libzstd-dev \ + make \ + ninja-build \ + pkg-config \ + protobuf-compiler \ + protobuf-compiler-grpc \ + python3-pip \ + rapidjson-dev \ + tzdata \ + wget && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists* + +COPY ci/scripts/install_minio.sh /arrow/ci/scripts/ +RUN /arrow/ci/scripts/install_minio.sh ${arch} linux latest /usr/local +COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/ +RUN /arrow/ci/scripts/install_gcs_testbench.sh ${arch} default + +# Prioritize system packages and local installation +# The following dependencies will be downloaded due to missing/invalid packages +# provided by the distribution: +# - libc-ares-dev does not install CMake config files +# - flatbuffer is not packaged +# - libgtest-dev only provide sources +# - libprotobuf-dev only provide sources +ENV ARROW_BUILD_TESTS=ON \ + ARROW_DEPENDENCY_SOURCE=SYSTEM \ + ARROW_DATASET=ON \ + ARROW_FLIGHT=OFF \ + ARROW_GANDIVA=ON \ + ARROW_HDFS=ON \ + ARROW_HOME=/usr/local \ + ARROW_INSTALL_NAME_RPATH=OFF \ + ARROW_NO_DEPRECATED_API=ON \ + ARROW_ORC=ON \ + ARROW_PARQUET=ON \ + ARROW_PLASMA=ON \ + ARROW_S3=ON \ + ARROW_USE_ASAN=OFF \ + ARROW_USE_CCACHE=ON \ + ARROW_USE_UBSAN=OFF \ + ARROW_WITH_BROTLI=ON \ + ARROW_WITH_BZ2=ON \ + ARROW_WITH_LZ4=ON \ + ARROW_WITH_SNAPPY=ON \ + ARROW_WITH_ZLIB=ON \ + ARROW_WITH_ZSTD=ON \ + AWSSDK_SOURCE=BUNDLED \ + GTest_SOURCE=BUNDLED \ + ORC_SOURCE=BUNDLED \ + PARQUET_BUILD_EXAMPLES=ON \ + PARQUET_BUILD_EXECUTABLES=ON \ + PATH=/usr/lib/ccache/:$PATH \ + PYTHON=python3 + +ARG gcc_version="" +RUN if [ "${gcc_version}" = "" ]; then \ + apt-get update -y -q && \ + apt-get install -y -q --no-install-recommends \ + g++ \ + gcc; \ + else \ + if [ "${gcc_version}" -gt "10" ]; then \ + apt-get update -y -q && \ + apt-get install -y -q --no-install-recommends software-properties-common && \ + add-apt-repository ppa:ubuntu-toolchain-r/volatile; \ + fi; \ + apt-get update -y -q && \ + apt-get install -y -q --no-install-recommends \ + g++-${gcc_version} \ + gcc-${gcc_version} && \ + update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-${gcc_version} 100 && \ + update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-${gcc_version} 100 && \ + update-alternatives --install /usr/bin/cc cc /usr/bin/gcc 100 && \ + update-alternatives --set cc /usr/bin/gcc && \ + update-alternatives --install /usr/bin/c++ c++ /usr/bin/g++ 100 && \ + update-alternatives --set c++ /usr/bin/g++; \ + fi diff --git a/src/arrow/ci/etc/hdfs-site.xml b/src/arrow/ci/etc/hdfs-site.xml new file mode 100644 index 000000000..97214337f --- /dev/null +++ b/src/arrow/ci/etc/hdfs-site.xml @@ -0,0 +1,52 @@ +<?xml version="1.0" encoding="UTF-8"?> +<?xml-stylesheet type="text/xsl" href="configuration.xsl"?> +<!-- + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. See accompanying LICENSE file. +--> + +<!-- This is the client configuration for the HDFS integration tests. --> + +<configuration> + <property> + <name>dfs.replication</name> + <value>2</value> + </property> + <property> + <name>dfs.datanode.data.dir</name> + <value>file:///data/dfs/data</value> + </property> + <property> + <name>dfs.namenode.name.dir</name> + <value>file:///data/dfs/name</value> + </property> + <property> + <name>dfs.namenode.checkpoint.dir</name> + <value>file:///data/dfs/namesecondary </value> + </property> + <property> + <name>dfs.namenode.datanode.registration.ip-hostname-check</name> + <value>false</value> + </property> + <property> + <name>dfs.default.replica</name> + <value>1</value> + </property> + <property> + <name>dfs.support.append</name> + <value>true</value> + </property> + <property> + <name>dfs.client.block.write.replace-datanode-on-failure.enable</name> + <value>false</value> + </property> +</configuration> diff --git a/src/arrow/ci/etc/rprofile b/src/arrow/ci/etc/rprofile new file mode 100644 index 000000000..5ef1dca8f --- /dev/null +++ b/src/arrow/ci/etc/rprofile @@ -0,0 +1,62 @@ + local({ + .pick_cran <- function() { + # Return a CRAN repo URL, preferring RSPM binaries if available for this OS + rspm_template <- "https://packagemanager.rstudio.com/cran/__linux__/%s/latest" + supported_os <- c("focal", "xenial", "bionic", "centos7", "centos8", "opensuse42", "opensuse15", "opensuse152") + + if (nzchar(Sys.which("lsb_release"))) { + os <- tolower(system("lsb_release -cs", intern = TRUE)) + if (os %in% supported_os) { + return(sprintf(rspm_template, os)) + } + } + if (file.exists("/etc/os-release")) { + os_release <- readLines("/etc/os-release") + vals <- sub("^.*=(.*)$", "\\1", os_release) + os <- intersect(vals, supported_os) + if (length(os)) { + # e.g. "bionic" + return(sprintf(rspm_template, os)) + } else { + names(vals) <- sub("^(.*)=.*$", "\\1", os_release) + if (vals["ID"] == "opensuse") { + version <- sub('^"?([0-9]+).*"?.*$', "\\1", vals["VERSION_ID"]) + os <- paste0("opensuse", version) + if (os %in% supported_os) { + return(sprintf(rspm_template, os)) + } + } + } + } + if (file.exists("/etc/system-release")) { + # Something like "CentOS Linux release 7.7.1908 (Core)" + system_release <- tolower(utils::head(readLines("/etc/system-release"), 1)) + # Extract from that the distro and the major version number + os <- sub("^([a-z]+) .* ([0-9]+).*$", "\\1\\2", system_release) + if (os %in% supported_os) { + return(sprintf(rspm_template, os)) + } + } + + return("https://cloud.r-project.org") + } + + options( + Ncpus = parallel::detectCores(), + repos = tryCatch(.pick_cran(), error = function(e) "https://cloud.r-project.org"), + HTTPUserAgent = sprintf( + 'R/%s R (%s)', + getRversion(), + paste(getRversion(), R.version$platform, R.version$arch, R.version$os) + ) + ) + + # there's a bug in 3.5 that will warn/error on these, so only set it around that + if (getRversion() >= "3.6.0" || getRversion() < "3.5.0") { + options( + warnPartialMatchAttr = TRUE, + warnPartialMatchDollar = TRUE, + warnPartialMatchArgs = TRUE + ) + } +}) diff --git a/src/arrow/ci/etc/valgrind-cran.supp b/src/arrow/ci/etc/valgrind-cran.supp new file mode 100644 index 000000000..4d2922026 --- /dev/null +++ b/src/arrow/ci/etc/valgrind-cran.supp @@ -0,0 +1,34 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +{ + # `testthat::skip()`s cause a valgrind error that does not show up on CRAN. + <testthat_skip_error> + Memcheck:Cond + fun:gregexpr_Regexc + fun:do_regexpr + fun:bcEval + fun:Rf_eval + fun:R_execClosure + fun:Rf_applyClosure + fun:bcEval + fun:Rf_eval + fun:forcePromise + fun:FORCE_PROMISE + fun:getvar + fun:bcEval +} diff --git a/src/arrow/ci/scripts/PKGBUILD b/src/arrow/ci/scripts/PKGBUILD new file mode 100644 index 000000000..975d1514f --- /dev/null +++ b/src/arrow/ci/scripts/PKGBUILD @@ -0,0 +1,138 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +_realname=arrow +pkgbase=mingw-w64-${_realname} +pkgname="${MINGW_PACKAGE_PREFIX}-${_realname}" +pkgver=6.0.1 +pkgrel=8000 +pkgdesc="Apache Arrow is a cross-language development platform for in-memory data (mingw-w64)" +arch=("any") +url="https://arrow.apache.org/" +license=("Apache-2.0") +depends=("${MINGW_PACKAGE_PREFIX}-aws-sdk-cpp" + "${MINGW_PACKAGE_PREFIX}-libutf8proc" + "${MINGW_PACKAGE_PREFIX}-re2" + "${MINGW_PACKAGE_PREFIX}-thrift" + "${MINGW_PACKAGE_PREFIX}-snappy" + "${MINGW_PACKAGE_PREFIX}-zlib" + "${MINGW_PACKAGE_PREFIX}-lz4" + "${MINGW_PACKAGE_PREFIX}-zstd") +makedepends=("${MINGW_PACKAGE_PREFIX}-ccache" + "${MINGW_PACKAGE_PREFIX}-cmake" + "${MINGW_PACKAGE_PREFIX}-gcc") +options=("staticlibs" "strip" "!buildflags") + +# For installing from a local checkout, set source_dir to . and don't include +# a "source" param below +source_dir="$ARROW_HOME" +# else +# source_dir=apache-${_realname}-${pkgver} + +# For released version: +#source=("https://archive.apache.org/dist/arrow/arrow-${pkgver}/apache-arrow-${pkgver}.tar.gz") +#sha256sums=("ac2a77dd9168e9892e432c474611e86ded0be6dfe15f689c948751d37f81391a") +# For github dev version: +# Append `#commit=54b1b2f688e5e84b4c664b1e12a95f93b94ab2f3` to the URL to select a revision +# source=("${source_dir}"::"git+https://github.com/apache/arrow") +# sha256sums=("SKIP") +# source_dir="${APPVEYOR_BUILD_FOLDER}/${source_dir}" + +cpp_build_dir=build-${CARCH}-cpp + +pkgver() { + # The only purpose of this here is to cause the job to error if the + # version in pkgver is different from what is in r/DESCRIPTION + grep Version "${source_dir}/r/DESCRIPTION" | cut -d " " -f 2 +} + +build() { + ARROW_CPP_DIR="${source_dir}/cpp" + [[ -d ${cpp_build_dir} ]] && rm -rf ${cpp_build_dir} + mkdir -p ${cpp_build_dir} + pushd ${cpp_build_dir} + + # The Rtools libutf8proc is a static lib, but Findutf8proc.cmake doesn't + # set the appropriate compiler definition. + export CPPFLAGS="-DUTF8PROC_STATIC" + + # This is the difference between rtools-packages and rtools-backports + # Remove this when submitting to rtools-packages + if [ "$RTOOLS_VERSION" = "35" ]; then + export CC="/C/Rtools${MINGW_PREFIX/mingw/mingw_}/bin/gcc" + export CXX="/C/Rtools${MINGW_PREFIX/mingw/mingw_}/bin/g++" + export PATH="/C/Rtools${MINGW_PREFIX/mingw/mingw_}/bin:$PATH" + export CPPFLAGS="${CPPFLAGS} -I${MINGW_PREFIX}/include" + export LIBS="-L${MINGW_PREFIX}/libs" + export ARROW_S3=OFF + export ARROW_WITH_RE2=OFF + # Without this, some dataset functionality segfaults + export CMAKE_UNITY_BUILD=ON + else + export ARROW_S3=ON + export ARROW_WITH_RE2=ON + # Without this, some compute functionality segfaults in tests + export CMAKE_UNITY_BUILD=OFF + fi + + MSYS2_ARG_CONV_EXCL="-DCMAKE_INSTALL_PREFIX=" \ + ${MINGW_PREFIX}/bin/cmake.exe \ + ${ARROW_CPP_DIR} \ + -G "MSYS Makefiles" \ + -DARROW_BUILD_SHARED=OFF \ + -DARROW_BUILD_STATIC=ON \ + -DARROW_BUILD_UTILITIES=OFF \ + -DARROW_COMPUTE=ON \ + -DARROW_CSV=ON \ + -DARROW_DATASET=ON \ + -DARROW_FILESYSTEM=ON \ + -DARROW_HDFS=OFF \ + -DARROW_JEMALLOC=OFF \ + -DARROW_JSON=ON \ + -DARROW_LZ4_USE_SHARED=OFF \ + -DARROW_MIMALLOC=ON \ + -DARROW_PACKAGE_PREFIX="${MINGW_PREFIX}" \ + -DARROW_PARQUET=ON \ + -DARROW_S3="${ARROW_S3}" \ + -DARROW_SNAPPY_USE_SHARED=OFF \ + -DARROW_USE_GLOG=OFF \ + -DARROW_WITH_LZ4=ON \ + -DARROW_WITH_RE2="${ARROW_WITH_RE2}" \ + -DARROW_WITH_SNAPPY=ON \ + -DARROW_WITH_ZLIB=ON \ + -DARROW_WITH_ZSTD=ON \ + -DARROW_ZSTD_USE_SHARED=OFF \ + -DARROW_CXXFLAGS="${CPPFLAGS}" \ + -DCMAKE_BUILD_TYPE="release" \ + -DCMAKE_INSTALL_PREFIX=${MINGW_PREFIX} \ + -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD} \ + -DCMAKE_VERBOSE_MAKEFILE=ON + + make -j3 + popd +} + +package() { + make -C ${cpp_build_dir} DESTDIR="${pkgdir}" install + + local PREFIX_DEPS=$(cygpath -am ${MINGW_PREFIX}) + pushd "${pkgdir}${MINGW_PREFIX}/lib/pkgconfig" + for pc in *.pc; do + sed -s "s|${PREFIX_DEPS}|${MINGW_PREFIX}|g" -i $pc + done + popd +} diff --git a/src/arrow/ci/scripts/c_glib_build.sh b/src/arrow/ci/scripts/c_glib_build.sh new file mode 100755 index 000000000..ce3cea18e --- /dev/null +++ b/src/arrow/ci/scripts/c_glib_build.sh @@ -0,0 +1,45 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +source_dir=${1}/c_glib +build_dir=${2}/c_glib +: ${ARROW_GLIB_GTK_DOC:=false} +: ${ARROW_GLIB_DEVELOPMENT_MODE:=false} + +export PKG_CONFIG_PATH=${ARROW_HOME}/lib/pkgconfig + +export CFLAGS="-DARROW_NO_DEPRECATED_API" +export CXXFLAGS="-DARROW_NO_DEPRECATED_API" + +mkdir -p ${build_dir} + +# Build with Meson +meson --prefix=$ARROW_HOME \ + --libdir=lib \ + -Ddevelopment_mode=${ARROW_GLIB_DEVELOPMENT_MODE} \ + -Dgtk_doc=${ARROW_GLIB_GTK_DOC} \ + ${build_dir} \ + ${source_dir} + +pushd ${build_dir} +ninja +ninja install +popd diff --git a/src/arrow/ci/scripts/c_glib_test.sh b/src/arrow/ci/scripts/c_glib_test.sh new file mode 100755 index 000000000..25c54138e --- /dev/null +++ b/src/arrow/ci/scripts/c_glib_test.sh @@ -0,0 +1,48 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +source_dir=${1}/c_glib +build_dir=${2}/c_glib + +export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH} +export PKG_CONFIG_PATH=${ARROW_HOME}/lib/pkgconfig +export GI_TYPELIB_PATH=${ARROW_HOME}/lib/girepository-1.0 + +pushd ${source_dir} + +ruby test/run-test.rb + +if [[ "$(uname -s)" == "Linux" ]]; then + # TODO(kszucs): on osx it fails to load 'lgi.corelgilua51' despite that lgi + # was installed by luarocks + pushd example/lua + lua write-batch.lua + lua read-batch.lua + lua write-stream.lua + lua read-stream.lua + popd +fi + +popd + +pushd ${build_dir} +example/extension-type +popd diff --git a/src/arrow/ci/scripts/ccache_setup.sh b/src/arrow/ci/scripts/ccache_setup.sh new file mode 100755 index 000000000..f77fbb373 --- /dev/null +++ b/src/arrow/ci/scripts/ccache_setup.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -eux + +echo "ARROW_USE_CCACHE=ON" >> $GITHUB_ENV +echo "CCACHE_COMPILERCHECK=content" >> $GITHUB_ENV +echo "CCACHE_COMPRESS=1" >> $GITHUB_ENV +echo "CCACHE_COMPRESSLEVEL=6" >> $GITHUB_ENV +echo "CCACHE_MAXSIZE=500M" >> $GITHUB_ENV diff --git a/src/arrow/ci/scripts/cpp_build.sh b/src/arrow/ci/scripts/cpp_build.sh new file mode 100755 index 000000000..a11dd23b7 --- /dev/null +++ b/src/arrow/ci/scripts/cpp_build.sh @@ -0,0 +1,164 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +source_dir=${1}/cpp +build_dir=${2}/cpp +with_docs=${3:-false} + +: ${ARROW_USE_CCACHE:=OFF} + +# TODO(kszucs): consider to move these to CMake +if [ ! -z "${CONDA_PREFIX}" ]; then + echo -e "===\n=== Conda environment for build\n===" + conda list + + export CMAKE_ARGS="${CMAKE_ARGS} -DCMAKE_AR=${AR} -DCMAKE_RANLIB=${RANLIB}" + export ARROW_GANDIVA_PC_CXX_FLAGS=$(echo | ${CXX} -E -Wp,-v -xc++ - 2>&1 | grep '^ ' | awk '{print "-isystem;" substr($1, 1)}' | tr '\n' ';') +elif [ -x "$(command -v xcrun)" ]; then + export ARROW_GANDIVA_PC_CXX_FLAGS="-isysroot;$(xcrun --show-sdk-path)" +fi + +if [ "${ARROW_USE_CCACHE}" == "ON" ]; then + echo -e "===\n=== ccache statistics before build\n===" + ccache -s +fi + +if [ "${ARROW_USE_TSAN}" == "ON" ] && [ ! -x "${ASAN_SYMBOLIZER_PATH}" ]; then + echo -e "Invalid value for \$ASAN_SYMBOLIZER_PATH: ${ASAN_SYMBOLIZER_PATH}" + exit 1 +fi + +mkdir -p ${build_dir} +pushd ${build_dir} + +cmake -G "${CMAKE_GENERATOR:-Ninja}" \ + -DARROW_BOOST_USE_SHARED=${ARROW_BOOST_USE_SHARED:-ON} \ + -DARROW_BUILD_BENCHMARKS=${ARROW_BUILD_BENCHMARKS:-OFF} \ + -DARROW_BUILD_BENCHMARKS_REFERENCE=${ARROW_BUILD_BENCHMARKS:-OFF} \ + -DARROW_BUILD_EXAMPLES=${ARROW_BUILD_EXAMPLES:-OFF} \ + -DARROW_BUILD_INTEGRATION=${ARROW_BUILD_INTEGRATION:-OFF} \ + -DARROW_BUILD_SHARED=${ARROW_BUILD_SHARED:-ON} \ + -DARROW_BUILD_STATIC=${ARROW_BUILD_STATIC:-ON} \ + -DARROW_BUILD_TESTS=${ARROW_BUILD_TESTS:-OFF} \ + -DARROW_BUILD_UTILITIES=${ARROW_BUILD_UTILITIES:-ON} \ + -DARROW_COMPUTE=${ARROW_COMPUTE:-ON} \ + -DARROW_CSV=${ARROW_CSV:-ON} \ + -DARROW_CUDA=${ARROW_CUDA:-OFF} \ + -DARROW_CXXFLAGS=${ARROW_CXXFLAGS:-} \ + -DARROW_DATASET=${ARROW_DATASET:-ON} \ + -DARROW_ENGINE=${ARROW_ENGINE:-ON} \ + -DARROW_DEPENDENCY_SOURCE=${ARROW_DEPENDENCY_SOURCE:-AUTO} \ + -DARROW_EXTRA_ERROR_CONTEXT=${ARROW_EXTRA_ERROR_CONTEXT:-OFF} \ + -DARROW_ENABLE_TIMING_TESTS=${ARROW_ENABLE_TIMING_TESTS:-ON} \ + -DARROW_FILESYSTEM=${ARROW_FILESYSTEM:-ON} \ + -DARROW_FLIGHT=${ARROW_FLIGHT:-OFF} \ + -DARROW_FUZZING=${ARROW_FUZZING:-OFF} \ + -DARROW_GANDIVA_JAVA=${ARROW_GANDIVA_JAVA:-OFF} \ + -DARROW_GANDIVA_PC_CXX_FLAGS=${ARROW_GANDIVA_PC_CXX_FLAGS:-} \ + -DARROW_GANDIVA=${ARROW_GANDIVA:-OFF} \ + -DARROW_GCS=${ARROW_GCS:-OFF} \ + -DARROW_HDFS=${ARROW_HDFS:-ON} \ + -DARROW_HIVESERVER2=${ARROW_HIVESERVER2:-OFF} \ + -DARROW_INSTALL_NAME_RPATH=${ARROW_INSTALL_NAME_RPATH:-ON} \ + -DARROW_JEMALLOC=${ARROW_JEMALLOC:-ON} \ + -DARROW_JNI=${ARROW_JNI:-OFF} \ + -DARROW_JSON=${ARROW_JSON:-ON} \ + -DARROW_LARGE_MEMORY_TESTS=${ARROW_LARGE_MEMORY_TESTS:-OFF} \ + -DARROW_MIMALLOC=${ARROW_MIMALLOC:-OFF} \ + -DARROW_NO_DEPRECATED_API=${ARROW_NO_DEPRECATED_API:-OFF} \ + -DARROW_ORC=${ARROW_ORC:-OFF} \ + -DARROW_PARQUET=${ARROW_PARQUET:-OFF} \ + -DARROW_PLASMA_JAVA_CLIENT=${ARROW_PLASMA_JAVA_CLIENT:-OFF} \ + -DARROW_PLASMA=${ARROW_PLASMA:-OFF} \ + -DARROW_PYTHON=${ARROW_PYTHON:-OFF} \ + -DARROW_RUNTIME_SIMD_LEVEL=${ARROW_RUNTIME_SIMD_LEVEL:-MAX} \ + -DARROW_S3=${ARROW_S3:-OFF} \ + -DARROW_TEST_LINKAGE=${ARROW_TEST_LINKAGE:-shared} \ + -DARROW_TEST_MEMCHECK=${ARROW_TEST_MEMCHECK:-OFF} \ + -DARROW_USE_ASAN=${ARROW_USE_ASAN:-OFF} \ + -DARROW_USE_CCACHE=${ARROW_USE_CCACHE:-ON} \ + -DARROW_USE_GLOG=${ARROW_USE_GLOG:-OFF} \ + -DARROW_USE_LD_GOLD=${ARROW_USE_LD_GOLD:-OFF} \ + -DARROW_USE_PRECOMPILED_HEADERS=${ARROW_USE_PRECOMPILED_HEADERS:-OFF} \ + -DARROW_USE_STATIC_CRT=${ARROW_USE_STATIC_CRT:-OFF} \ + -DARROW_USE_TSAN=${ARROW_USE_TSAN:-OFF} \ + -DARROW_USE_UBSAN=${ARROW_USE_UBSAN:-OFF} \ + -DARROW_VERBOSE_THIRDPARTY_BUILD=${ARROW_VERBOSE_THIRDPARTY_BUILD:-OFF} \ + -DARROW_WITH_BROTLI=${ARROW_WITH_BROTLI:-OFF} \ + -DARROW_WITH_BZ2=${ARROW_WITH_BZ2:-OFF} \ + -DARROW_WITH_LZ4=${ARROW_WITH_LZ4:-OFF} \ + -DARROW_WITH_SNAPPY=${ARROW_WITH_SNAPPY:-OFF} \ + -DARROW_WITH_UTF8PROC=${ARROW_WITH_UTF8PROC:-ON} \ + -DARROW_WITH_ZLIB=${ARROW_WITH_ZLIB:-OFF} \ + -DARROW_WITH_ZSTD=${ARROW_WITH_ZSTD:-OFF} \ + -DAWSSDK_SOURCE=${AWSSDK_SOURCE:-} \ + -Dbenchmark_SOURCE=${benchmark_SOURCE:-} \ + -DBOOST_SOURCE=${BOOST_SOURCE:-} \ + -DBrotli_SOURCE=${Brotli_SOURCE:-} \ + -DBUILD_WARNING_LEVEL=${BUILD_WARNING_LEVEL:-CHECKIN} \ + -Dc-ares_SOURCE=${cares_SOURCE:-} \ + -DCMAKE_BUILD_TYPE=${ARROW_BUILD_TYPE:-debug} \ + -DCMAKE_C_FLAGS="${CFLAGS:-}" \ + -DCMAKE_CXX_FLAGS="${CXXFLAGS:-}" \ + -DCMAKE_INSTALL_LIBDIR=${CMAKE_INSTALL_LIBDIR:-lib} \ + -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX:-${ARROW_HOME}} \ + -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD:-OFF} \ + -Dgflags_SOURCE=${gflags_SOURCE:-} \ + -Dgoogle_cloud_cpp_storage_SOURCE=${google_cloud_cpp_storage_SOURCE:-} \ + -DgRPC_SOURCE=${gRPC_SOURCE:-} \ + -DGTest_SOURCE=${GTest_SOURCE:-} \ + -DLz4_SOURCE=${Lz4_SOURCE:-} \ + -DORC_SOURCE=${ORC_SOURCE:-} \ + -DPARQUET_BUILD_EXECUTABLES=${PARQUET_BUILD_EXECUTABLES:-OFF} \ + -DPARQUET_BUILD_EXAMPLES=${PARQUET_BUILD_EXAMPLES:-OFF} \ + -DPARQUET_REQUIRE_ENCRYPTION=${PARQUET_REQUIRE_ENCRYPTION:-ON} \ + -DProtobuf_SOURCE=${Protobuf_SOURCE:-} \ + -DRapidJSON_SOURCE=${RapidJSON_SOURCE:-} \ + -Dre2_SOURCE=${re2_SOURCE:-} \ + -DSnappy_SOURCE=${Snappy_SOURCE:-} \ + -DThrift_SOURCE=${Thrift_SOURCE:-} \ + -Dutf8proc_SOURCE=${utf8proc_SOURCE:-} \ + -Dzstd_SOURCE=${zstd_SOURCE:-} \ + ${CMAKE_ARGS} \ + ${source_dir} + +if [ ! -z "${CPP_MAKE_PARALLELISM}" ]; then + time cmake --build . --target install -- -j${CPP_MAKE_PARALLELISM} +else + time cmake --build . --target install +fi + +popd + +if [ -x "$(command -v ldconfig)" ]; then + ldconfig +fi + +if [ "${ARROW_USE_CCACHE}" == "ON" ]; then + echo -e "===\n=== ccache statistics after build\n===" + ccache -s +fi + +if [ "${with_docs}" == "true" ]; then + pushd ${source_dir}/apidoc + doxygen + popd +fi diff --git a/src/arrow/ci/scripts/cpp_test.sh b/src/arrow/ci/scripts/cpp_test.sh new file mode 100755 index 000000000..822557f25 --- /dev/null +++ b/src/arrow/ci/scripts/cpp_test.sh @@ -0,0 +1,118 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +if [[ $# < 2 ]]; then + echo "Usage: $0 <Arrow dir> <build dir> [ctest args ...]" + exit 1 +fi + +arrow_dir=${1}; shift +build_dir=${1}/cpp; shift +source_dir=${arrow_dir}/cpp +binary_output_dir=${build_dir}/${ARROW_BUILD_TYPE:-debug} + +export ARROW_TEST_DATA=${arrow_dir}/testing/data +export PARQUET_TEST_DATA=${source_dir}/submodules/parquet-testing/data +export LD_LIBRARY_PATH=${ARROW_HOME}/${CMAKE_INSTALL_LIBDIR:-lib}:${LD_LIBRARY_PATH} + +# By default, aws-sdk tries to contact a non-existing local ip host +# to retrieve metadata. Disable this so that S3FileSystem tests run faster. +export AWS_EC2_METADATA_DISABLED=TRUE + +ctest_options=() +case "$(uname)" in + Linux) + n_jobs=$(nproc) + ;; + Darwin) + n_jobs=$(sysctl -n hw.ncpu) + ;; + MINGW*) + n_jobs=${NUMBER_OF_PROCESSORS:-1} + # TODO: Enable these crashed tests. + # https://issues.apache.org/jira/browse/ARROW-9072 + exclude_tests="gandiva-internals-test" + exclude_tests="${exclude_tests}|gandiva-projector-test" + exclude_tests="${exclude_tests}|gandiva-utf8-test" + if [ "${MSYSTEM}" = "MINGW32" ]; then + exclude_tests="${exclude_tests}|gandiva-projector-test" + exclude_tests="${exclude_tests}|gandiva-binary-test" + exclude_tests="${exclude_tests}|gandiva-boolean-expr-test" + exclude_tests="${exclude_tests}|gandiva-date-time-test" + exclude_tests="${exclude_tests}|gandiva-decimal-single-test" + exclude_tests="${exclude_tests}|gandiva-decimal-test" + exclude_tests="${exclude_tests}|gandiva-filter-project-test" + exclude_tests="${exclude_tests}|gandiva-filter-test" + exclude_tests="${exclude_tests}|gandiva-hash-test" + exclude_tests="${exclude_tests}|gandiva-if-expr-test" + exclude_tests="${exclude_tests}|gandiva-in-expr-test" + exclude_tests="${exclude_tests}|gandiva-literal-test" + exclude_tests="${exclude_tests}|gandiva-null-validity-test" + fi + ctest_options+=(--exclude-regex "${exclude_tests}") + ;; + *) + n_jobs=${NPROC:-1} + ;; +esac + +pushd ${build_dir} + +if ! which python > /dev/null 2>&1; then + export PYTHON=python3 +fi +ctest \ + --label-regex unittest \ + --output-on-failure \ + --parallel ${n_jobs} \ + --timeout 300 \ + "${ctest_options[@]}" \ + $@ + +if [ "${ARROW_BUILD_EXAMPLES}" == "ON" ]; then + examples=$(find ${binary_output_dir} -executable -name "*example") + if [ "${examples}" == "" ]; then + echo "==================" + echo "No examples found!" + echo "==================" + exit 1 + fi + for ex in ${examples} + do + echo "==================" + echo "Executing ${ex}" + echo "==================" + ${ex} + done +fi + +if [ "${ARROW_FUZZING}" == "ON" ]; then + # Fuzzing regression tests + ${binary_output_dir}/arrow-ipc-stream-fuzz ${ARROW_TEST_DATA}/arrow-ipc-stream/crash-* + ${binary_output_dir}/arrow-ipc-stream-fuzz ${ARROW_TEST_DATA}/arrow-ipc-stream/*-testcase-* + ${binary_output_dir}/arrow-ipc-file-fuzz ${ARROW_TEST_DATA}/arrow-ipc-file/*-testcase-* + ${binary_output_dir}/arrow-ipc-tensor-stream-fuzz ${ARROW_TEST_DATA}/arrow-ipc-tensor-stream/*-testcase-* + if [ "${ARROW_PARQUET}" == "ON" ]; then + ${binary_output_dir}/parquet-arrow-fuzz ${ARROW_TEST_DATA}/parquet/fuzzing/*-testcase-* + fi +fi + +popd diff --git a/src/arrow/ci/scripts/csharp_build.sh b/src/arrow/ci/scripts/csharp_build.sh new file mode 100755 index 000000000..5a3976794 --- /dev/null +++ b/src/arrow/ci/scripts/csharp_build.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +source_dir=${1}/csharp + +pushd ${source_dir} +dotnet build +popd diff --git a/src/arrow/ci/scripts/csharp_pack.sh b/src/arrow/ci/scripts/csharp_pack.sh new file mode 100755 index 000000000..e9dfc664e --- /dev/null +++ b/src/arrow/ci/scripts/csharp_pack.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -eux + +source_dir=${1}/csharp + +pushd ${source_dir} +dotnet pack -c Release +popd diff --git a/src/arrow/ci/scripts/csharp_test.sh b/src/arrow/ci/scripts/csharp_test.sh new file mode 100755 index 000000000..9e4e35dd4 --- /dev/null +++ b/src/arrow/ci/scripts/csharp_test.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +source_dir=${1}/csharp + +pushd ${source_dir} +dotnet test +for pdb in artifacts/Apache.Arrow/*/*/Apache.Arrow.pdb; do + sourcelink test ${pdb} +done +popd diff --git a/src/arrow/ci/scripts/docs_build.sh b/src/arrow/ci/scripts/docs_build.sh new file mode 100755 index 000000000..e6ee768ee --- /dev/null +++ b/src/arrow/ci/scripts/docs_build.sh @@ -0,0 +1,48 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +set -ex + +arrow_dir=${1} +build_dir=${2}/docs + +export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH} +export PKG_CONFIG_PATH=${ARROW_HOME}/lib/pkgconfig:${PKG_CONFIG_PATH} +export GI_TYPELIB_PATH=${ARROW_HOME}/lib/girepository-1.0 +export CFLAGS="-DARROW_NO_DEPRECATED_API" +export CXXFLAGS="-DARROW_NO_DEPRECATED_API" + +ncpus=$(python3 -c "import os; print(os.cpu_count())") + +# Sphinx docs +sphinx-build -b html -j ${ncpus} ${arrow_dir}/docs/source ${build_dir} + +# C++ - original doxygen +# rsync -a ${arrow_dir}/cpp/apidoc/ ${build_dir}/cpp + +# R +rsync -a ${arrow_dir}/r/docs/ ${build_dir}/r + +# C GLib +rsync -a ${ARROW_HOME}/share/gtk-doc/html/ ${build_dir}/c_glib + +# Java +rsync -a ${arrow_dir}/java/target/site/apidocs/ ${build_dir}/java/reference + +# Javascript +rsync -a ${arrow_dir}/js/doc/ ${build_dir}/js diff --git a/src/arrow/ci/scripts/go_build.sh b/src/arrow/ci/scripts/go_build.sh new file mode 100755 index 000000000..267f78e59 --- /dev/null +++ b/src/arrow/ci/scripts/go_build.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +source_dir=${1}/go + +pushd ${source_dir}/arrow + +if [[ -n "${ARROW_GO_TESTCGO}" ]]; then + TAGS="-tags ccalloc" +fi + +go get -d -t -v ./... +go install $TAGS -v ./... + +popd + +pushd ${source_dir}/parquet + +go get -d -t -v ./... +go install -v ./... + +popd diff --git a/src/arrow/ci/scripts/go_cgo_python_test.sh b/src/arrow/ci/scripts/go_cgo_python_test.sh new file mode 100755 index 000000000..5f2032fba --- /dev/null +++ b/src/arrow/ci/scripts/go_cgo_python_test.sh @@ -0,0 +1,45 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +source_dir=${1}/go + +pushd ${source_dir}/arrow/cdata/test + +case "$(uname)" in + Linux) + testlib="cgotest.so" + ;; + Darwin) + testlib="cgotest.so" + ;; + MINGW*) + testlib="cgotest.dll" + ;; +esac + +go build -tags cdata_test,assert -buildmode=c-shared -o $testlib . + +python test_export_to_cgo.py + +rm $testlib +rm "${testlib%.*}.h" + +popd diff --git a/src/arrow/ci/scripts/go_test.sh b/src/arrow/ci/scripts/go_test.sh new file mode 100755 index 000000000..f7b2cd963 --- /dev/null +++ b/src/arrow/ci/scripts/go_test.sh @@ -0,0 +1,60 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +source_dir=${1}/go + +testargs="-race" +case "$(uname)" in + MINGW*) + # -race doesn't work on windows currently + testargs="" + ;; +esac + +if [[ "$(go env GOHOSTARCH)" = "s390x" ]]; then + testargs="" # -race not supported on s390x +fi + +pushd ${source_dir}/arrow + +TAGS="assert,test" +if [[ -n "${ARROW_GO_TESTCGO}" ]]; then + TAGS="${TAGS},ccalloc" +fi + + +# the cgo implementation of the c data interface requires the "test" +# tag in order to run its tests so that the testing functions implemented +# in .c files don't get included in non-test builds. + +for d in $(go list ./... | grep -v vendor); do + go test $testargs -tags $TAGS $d +done + +popd + +pushd ${source_dir}/parquet + +for d in $(go list ./... | grep -v vendor); do + go test $testargs -tags assert $d +done + +popd diff --git a/src/arrow/ci/scripts/install_conda.sh b/src/arrow/ci/scripts/install_conda.sh new file mode 100755 index 000000000..f4d313b63 --- /dev/null +++ b/src/arrow/ci/scripts/install_conda.sh @@ -0,0 +1,66 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -e + +declare -A archs +archs=([amd64]=x86_64 + [arm32v7]=armv7l + [ppc64le]=ppc64le + [i386]=x86) + +declare -A platforms +platforms=([windows]=Windows + [macos]=MacOSX + [linux]=Linux) + +if [ "$#" -ne 4 ]; then + echo "Usage: $0 <architecture> <platform> <version> <prefix>" + exit 1 +elif [[ -z ${archs[$1]} ]]; then + echo "Unexpected architecture: ${1}" + exit 1 +elif [[ -z ${platforms[$2]} ]]; then + echo "Unexpected platform: ${2}" + exit 1 +fi + +arch=${archs[$1]} +platform=${platforms[$2]} +version=$3 +prefix=$4 + +echo "Downloading Miniconda installer..." +wget -nv https://repo.continuum.io/miniconda/Miniconda3-${version}-${platform}-${arch}.sh -O /tmp/miniconda.sh +bash /tmp/miniconda.sh -b -p ${prefix} +rm /tmp/miniconda.sh + +# Like "conda init", but for POSIX sh rather than bash +ln -s ${prefix}/etc/profile.d/conda.sh /etc/profile.d/conda.sh + +# Configure +source /etc/profile.d/conda.sh +conda config --add channels conda-forge +conda config --set channel_priority strict +conda config --set show_channel_urls True +conda config --set remote_connect_timeout_secs 12 + +# Update and clean +conda update --all -y +conda clean --all -y diff --git a/src/arrow/ci/scripts/install_dask.sh b/src/arrow/ci/scripts/install_dask.sh new file mode 100755 index 000000000..954ce3249 --- /dev/null +++ b/src/arrow/ci/scripts/install_dask.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -e + +if [ "$#" -ne 1 ]; then + echo "Usage: $0 <dask version>" + exit 1 +fi + +dask=$1 + +if [ "${dask}" = "master" ]; then + pip install https://github.com/dask/dask/archive/main.tar.gz#egg=dask[dataframe] +elif [ "${dask}" = "latest" ]; then + conda install -q dask +else + conda install -q dask=${dask} +fi +conda clean --all diff --git a/src/arrow/ci/scripts/install_gcs_testbench.sh b/src/arrow/ci/scripts/install_gcs_testbench.sh new file mode 100755 index 000000000..579a78944 --- /dev/null +++ b/src/arrow/ci/scripts/install_gcs_testbench.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -e + +if [ "$#" -ne 2 ]; then + echo "Usage: $0 <arch> <storage-testbench version>" + exit 1 +fi + +arch=$1 +if [ "${arch}" != "amd64" ]; then + echo "GCS testbench won't install on non-x86 architecture" + exit 0 +fi + +version=$2 +if [[ "${version}" -eq "default" ]]; then + version="v0.7.0" +fi + +pip install "https://github.com/googleapis/storage-testbench/archive/${version}.tar.gz" diff --git a/src/arrow/ci/scripts/install_iwyu.sh b/src/arrow/ci/scripts/install_iwyu.sh new file mode 100755 index 000000000..3cd2cbc95 --- /dev/null +++ b/src/arrow/ci/scripts/install_iwyu.sh @@ -0,0 +1,48 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +set -eu + +source_dir=${1:-/tmp/iwyu} +install_prefix=${2:-/usr/local} +clang_tools_version=${3:-8} + +iwyu_branch_name="clang_${clang_tools_version}" +if [ ${clang_tools_version} -lt 10 ]; then + iwyu_branch_name="${iwyu_branch_name}.0" +fi + +git clone --single-branch --branch ${iwyu_branch_name} \ + https://github.com/include-what-you-use/include-what-you-use.git ${source_dir} + +mkdir -p ${source_dir}/build +pushd ${source_dir}/build + +# Build IWYU for current Clang +export CC=clang-${clang_tools_version} +export CXX=clang++-${clang_tools_version} + +cmake -DCMAKE_PREFIX_PATH=/usr/lib/llvm-${clang_tools_version} \ + -DCMAKE_INSTALL_PREFIX=${install_prefix} \ + ${source_dir} +make -j4 +make install + +popd + +rm -rf ${source_dir} diff --git a/src/arrow/ci/scripts/install_kartothek.sh b/src/arrow/ci/scripts/install_kartothek.sh new file mode 100755 index 000000000..4d88943b6 --- /dev/null +++ b/src/arrow/ci/scripts/install_kartothek.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -e + +if [ "$#" -ne 2 ]; then + echo "Usage: $0 <kartothek version> <target directory>" + exit 1 +fi + +karthothek=$1 +target=$2 + +git clone --recurse-submodules https://github.com/JDASoftwareGroup/kartothek "${target}" +if [ "${kartothek}" = "master" ]; then + git -C "${target}" checkout master; +elif [ "${kartothek}" = "latest" ]; then + git -C "${target}" checkout $(git describe --tags); +else + git -C "${target}" checkout ${kartothek}; +fi + +pushd "${target}" +pip install --no-deps . +popd diff --git a/src/arrow/ci/scripts/install_minio.sh b/src/arrow/ci/scripts/install_minio.sh new file mode 100755 index 000000000..5cda46e59 --- /dev/null +++ b/src/arrow/ci/scripts/install_minio.sh @@ -0,0 +1,62 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -e + +declare -A archs +archs=([amd64]=amd64 + [arm64v8]=arm64 + [arm32v7]=arm + [s390x]=s390x) + +declare -A platforms +platforms=([linux]=linux + [macos]=darwin) + +arch=${archs[$1]} +platform=${platforms[$2]} +version=$3 +prefix=$4 + +if [ "$#" -ne 4 ]; then + echo "Usage: $0 <architecture> <platform> <version> <prefix>" + exit 1 +elif [[ -z ${arch} ]]; then + echo "Unexpected architecture: ${1}" + exit 1 +elif [[ -z ${platform} ]]; then + echo "Unexpected platform: ${2}" + exit 1 +elif [[ ${version} != "latest" ]]; then + echo "Cannot fetch specific versions of minio, only latest is supported." + exit 1 +fi + +if [[ ! -x ${prefix}/bin/minio ]]; then + url="https://dl.min.io/server/minio/release/${platform}-${arch}/minio" + echo "Fetching ${url}..." + wget -nv -P ${prefix}/bin ${url} + chmod +x ${prefix}/bin/minio +fi +if [[ ! -x ${prefix}/bin/mc ]]; then + url="https://dl.min.io/client/mc/release/${platform}-${arch}/mc" + echo "Fetching ${url}..." + wget -nv -P ${prefix}/bin ${url} + chmod +x ${prefix}/bin/mc +fi diff --git a/src/arrow/ci/scripts/install_osx_sdk.sh b/src/arrow/ci/scripts/install_osx_sdk.sh new file mode 100755 index 000000000..896d084e0 --- /dev/null +++ b/src/arrow/ci/scripts/install_osx_sdk.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +if [ ${using_homebrew} != "yes" ]; then + export MACOSX_DEPLOYMENT_TARGET="10.9" + export CONDA_BUILD_SYSROOT="$(xcode-select -p)/Platforms/MacOSX.platform/Developer/SDKs/MacOSX${MACOSX_DEPLOYMENT_TARGET}.sdk" + + if [[ ! -d ${CONDA_BUILD_SYSROOT} || "$OSX_FORCE_SDK_DOWNLOAD" == "1" ]]; then + echo "downloading ${macosx_deployment_target} sdk" + curl -L -O https://github.com/phracker/MacOSX-SDKs/releases/download/10.13/MacOSX${MACOSX_DEPLOYMENT_TARGET}.sdk.tar.xz + tar -xf MacOSX${MACOSX_DEPLOYMENT_TARGET}.sdk.tar.xz -C "$(dirname "$CONDA_BUILD_SYSROOT")" + # set minimum sdk version to our target + plutil -replace MinimumSDKVersion -string ${MACOSX_DEPLOYMENT_TARGET} $(xcode-select -p)/Platforms/MacOSX.platform/Info.plist + plutil -replace DTSDKName -string macosx${MACOSX_DEPLOYMENT_TARGET}internal $(xcode-select -p)/Platforms/MacOSX.platform/Info.plist + fi + + if [ -d "${CONDA_BUILD_SYSROOT}" ]; then + echo "Found CONDA_BUILD_SYSROOT: ${CONDA_BUILD_SYSROOT}" + else + echo "Missing CONDA_BUILD_SYSROOT: ${CONDA_BUILD_SYSROOT}" + exit 1 + fi +fi diff --git a/src/arrow/ci/scripts/install_pandas.sh b/src/arrow/ci/scripts/install_pandas.sh new file mode 100755 index 000000000..5aca65f82 --- /dev/null +++ b/src/arrow/ci/scripts/install_pandas.sh @@ -0,0 +1,46 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -e + +if [ "$#" -lt 1 ]; then + echo "Usage: $0 <pandas version> <optional numpy version = latest>" + exit 1 +fi + +pandas=$1 +numpy=${2:-"latest"} + +if [ "${numpy}" = "nightly" ]; then + pip install --extra-index-url https://pypi.anaconda.org/scipy-wheels-nightly/simple --pre numpy +elif [ "${numpy}" = "latest" ]; then + pip install numpy +else + pip install numpy==${numpy} +fi + +if [ "${pandas}" = "master" ]; then + pip install git+https://github.com/pandas-dev/pandas.git --no-build-isolation +elif [ "${pandas}" = "nightly" ]; then + pip install --extra-index-url https://pypi.anaconda.org/scipy-wheels-nightly/simple --pre pandas +elif [ "${pandas}" = "latest" ]; then + pip install pandas +else + pip install pandas==${pandas} +fi diff --git a/src/arrow/ci/scripts/install_python.sh b/src/arrow/ci/scripts/install_python.sh new file mode 100755 index 000000000..babb2c1e8 --- /dev/null +++ b/src/arrow/ci/scripts/install_python.sh @@ -0,0 +1,68 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -eu + +declare -A platforms +platforms=([windows]=Windows + [macos]=MacOSX + [linux]=Linux) + +declare -A versions +versions=([3.6]=3.6.8 + [3.7]=3.7.9 + [3.8]=3.8.10 + [3.9]=3.9.6 + [3.10]=3.10.0) + +if [ "$#" -ne 2 ]; then + echo "Usage: $0 <platform> <version>" + exit 1 +elif [[ -z ${platforms[$1]} ]]; then + echo "Unexpected platform: ${1}" + exit 1 +fi + +platform=${platforms[$1]} +version=$2 +full_version=${versions[$2]} + +if [ $platform = "MacOSX" ]; then + echo "Downloading Python installer..." + + if [ "$(uname -m)" = "arm64" ] || [ "$version" = "3.10" ]; then + fname="python-${full_version}-macos11.pkg" + else + fname="python-${full_version}-macosx10.9.pkg" + fi + wget "https://www.python.org/ftp/python/${full_version}/${fname}" + + echo "Installing Python..." + installer -pkg $fname -target / + rm $fname + + echo "Installing Pip..." + python="/Library/Frameworks/Python.framework/Versions/${version}/bin/python${version}" + pip="${python} -m pip" + + $python -m ensurepip + $pip install -U pip setuptools virtualenv +else + echo "Unsupported platform: $platform" +fi diff --git a/src/arrow/ci/scripts/install_spark.sh b/src/arrow/ci/scripts/install_spark.sh new file mode 100755 index 000000000..936313fd8 --- /dev/null +++ b/src/arrow/ci/scripts/install_spark.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -e + +if [ "$#" -ne 2 ]; then + echo "Usage: $0 <spark version> <target directory>" + exit 1 +fi + +spark=$1 +target=$2 + +git clone https://github.com/apache/spark "${target}" +git -C "${target}" checkout "${spark}" diff --git a/src/arrow/ci/scripts/install_turbodbc.sh b/src/arrow/ci/scripts/install_turbodbc.sh new file mode 100755 index 000000000..3e644a3e2 --- /dev/null +++ b/src/arrow/ci/scripts/install_turbodbc.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -e + +if [ "$#" -ne 2 ]; then + echo "Usage: $0 <turbodbc version> <target directory>" + exit 1 +fi + +turbodbc=$1 +target=$2 + +git clone --recurse-submodules https://github.com/blue-yonder/turbodbc "${target}" +if [ "${turbodbc}" = "master" ]; then + git -C "${target}" checkout master; +elif [ "${turbodbc}" = "latest" ]; then + git -C "${target}" checkout $(git describe --tags); +else + git -C "${target}" checkout ${turbodbc}; +fi + +pushd ${target} +wget -q https://github.com/pybind/pybind11/archive/v2.6.2.tar.gz +tar xvf v2.6.2.tar.gz +mv pybind11-2.6.2 pybind11 +popd diff --git a/src/arrow/ci/scripts/install_vcpkg.sh b/src/arrow/ci/scripts/install_vcpkg.sh new file mode 100755 index 000000000..fe99a7fea --- /dev/null +++ b/src/arrow/ci/scripts/install_vcpkg.sh @@ -0,0 +1,39 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -e + +if [ "$#" -ne 2 ]; then + echo "Usage: $0 <vcpkg version> <target directory>" + exit 1 +fi + +vcpkg_version=$1 +vcpkg_destination=$2 +vcpkg_patch=$(realpath $(dirname "${0}")/../vcpkg/ports.patch) + +git clone --depth 1 --branch ${vcpkg_version} https://github.com/microsoft/vcpkg ${vcpkg_destination} + +pushd ${vcpkg_destination} + +./bootstrap-vcpkg.sh -useSystemBinaries -disableMetrics +git apply --ignore-whitespace ${vcpkg_patch} +echo "Patch successfully applied!" + +popd diff --git a/src/arrow/ci/scripts/integration_arrow.sh b/src/arrow/ci/scripts/integration_arrow.sh new file mode 100755 index 000000000..30cbb2d63 --- /dev/null +++ b/src/arrow/ci/scripts/integration_arrow.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +arrow_dir=${1} +gold_dir=$arrow_dir/testing/data/arrow-ipc-stream/integration + +pip install -e $arrow_dir/dev/archery + +# Rust can be enabled by exporting ARCHERY_INTEGRATION_WITH_RUST=1 +archery integration \ + --run-flight \ + --with-cpp=1 \ + --with-csharp=1 \ + --with-java=1 \ + --with-js=1 \ + --with-go=1 \ + --gold-dirs=$gold_dir/0.14.1 \ + --gold-dirs=$gold_dir/0.17.1 \ + --gold-dirs=$gold_dir/1.0.0-bigendian \ + --gold-dirs=$gold_dir/1.0.0-littleendian \ + --gold-dirs=$gold_dir/2.0.0-compression \ + --gold-dirs=$gold_dir/4.0.0-shareddict \ diff --git a/src/arrow/ci/scripts/integration_dask.sh b/src/arrow/ci/scripts/integration_dask.sh new file mode 100755 index 000000000..e67a02945 --- /dev/null +++ b/src/arrow/ci/scripts/integration_dask.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -e + +# check that optional pyarrow modules are available +# because pytest would just skip the dask tests +python -c "import pyarrow.orc" +python -c "import pyarrow.parquet" + +# check that dask.dataframe is correctly installed +python -c "import dask.dataframe" + +# TODO(kszucs): the following tests are also uses pyarrow +# pytest -sv --pyargs dask.bytes.tests.test_s3 +# pytest -sv --pyargs dask.bytes.tests.test_hdfs +# pytest -sv --pyargs dask.bytes.tests.test_local + +# skip failing pickle test, see https://github.com/dask/dask/issues/6374 +pytest -v --pyargs dask.dataframe.tests.test_dataframe -k "not test_dataframe_picklable and not test_describe_empty" +pytest -v --pyargs dask.dataframe.io.tests.test_orc +# skip failing parquet tests, see https://github.com/dask/dask/issues/6243 +pytest -v --pyargs dask.dataframe.io.tests.test_parquet \ + -k "not test_to_parquet_pyarrow_w_inconsistent_schema_by_partition_fails_by_default and not test_timeseries_nulls_in_schema" diff --git a/src/arrow/ci/scripts/integration_hdfs.sh b/src/arrow/ci/scripts/integration_hdfs.sh new file mode 100755 index 000000000..c95449379 --- /dev/null +++ b/src/arrow/ci/scripts/integration_hdfs.sh @@ -0,0 +1,69 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -e + +source_dir=${1}/cpp +build_dir=${2}/cpp + +export CLASSPATH=$($HADOOP_HOME/bin/hadoop classpath --glob) +export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop +export LIBHDFS3_CONF=$HADOOP_CONF_DIR/hdfs-site.xml +export ARROW_LIBHDFS3_DIR=$CONDA_PREFIX/lib + +libhdfs_dir=$HADOOP_HOME/lib/native +hadoop_home=$HADOOP_HOME + +function use_hadoop_home() { + unset ARROW_LIBHDFS_DIR + export HADOOP_HOME=$hadoop_home +} + +function use_libhdfs_dir() { + unset HADOOP_HOME + export ARROW_LIBHDFS_DIR=$libhdfs_dir +} + +# execute cpp tests +export ARROW_HDFS_TEST_LIBHDFS_REQUIRE=ON +pushd ${build_dir} + +debug/arrow-io-hdfs-test +debug/arrow-hdfs-test + +use_libhdfs_dir +debug/arrow-io-hdfs-test +debug/arrow-hdfs-test +use_hadoop_home + +popd + +# cannot use --pyargs with custom arguments like --hdfs or --only-hdfs, because +# pytest ignores them, see https://github.com/pytest-dev/pytest/issues/3517 +export PYARROW_TEST_HDFS=ON + +export PYARROW_HDFS_TEST_LIBHDFS_REQUIRE=ON + +pytest -vs --pyargs pyarrow.tests.test_fs +pytest -vs --pyargs pyarrow.tests.test_hdfs + +use_libhdfs_dir +pytest -vs --pyargs pyarrow.tests.test_fs +pytest -vs --pyargs pyarrow.tests.test_hdfs +use_hadoop_home diff --git a/src/arrow/ci/scripts/integration_hiveserver2.sh b/src/arrow/ci/scripts/integration_hiveserver2.sh new file mode 100755 index 000000000..36fba5ca8 --- /dev/null +++ b/src/arrow/ci/scripts/integration_hiveserver2.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +set -e + +arrow_dir=${1} +source_dir=${1}/cpp +build_dir=${2}/cpp + +${arrow_dir}/ci/scripts/util_wait_for_it.sh impala:21050 -t 300 -s -- echo "impala is up" + +pushd ${build_dir} + +# ninja hiveserver2-test +debug/hiveserver2-test + +popd diff --git a/src/arrow/ci/scripts/integration_kartothek.sh b/src/arrow/ci/scripts/integration_kartothek.sh new file mode 100755 index 000000000..379569b9c --- /dev/null +++ b/src/arrow/ci/scripts/integration_kartothek.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -e + +# check that optional pyarrow modules are available +# because pytest would just skip the pyarrow tests +python -c "import pyarrow.parquet" + +# check that kartothek is correctly installed +python -c "import kartothek" + +pushd /kartothek +# See ARROW-12314, test_load_dataframes_columns_raises_missing skipped because of changed error message +pytest -n0 --ignore tests/cli/test_query.py -k "not test_load_dataframes_columns_raises_missing" diff --git a/src/arrow/ci/scripts/integration_spark.sh b/src/arrow/ci/scripts/integration_spark.sh new file mode 100755 index 000000000..90ecbce39 --- /dev/null +++ b/src/arrow/ci/scripts/integration_spark.sh @@ -0,0 +1,87 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# exit on any error +set -eu + +source_dir=${1} +spark_dir=${2} + +# Test Spark with latest PyArrow only, don't build with latest Arrow Java +test_pyarrow_only=${3:-false} + +# Spark branch to checkout +spark_version=${SPARK_VERSION:-master} + +# Use old behavior that always dropped tiemzones. +export PYARROW_IGNORE_TIMEZONE=1 + +if [ "${SPARK_VERSION:0:2}" == "2." ]; then + # https://github.com/apache/spark/blob/master/docs/sql-pyspark-pandas-with-arrow.md#compatibility-setting-for-pyarrow--0150-and-spark-23x-24x + export ARROW_PRE_0_15_IPC_FORMAT=1 +fi + +# Get Arrow Java version +pushd ${source_dir}/java + arrow_version=`mvn org.apache.maven.plugins:maven-help-plugin:2.1.1:evaluate -Dexpression=project.version | sed -n -e '/^\[.*\]/ !{ /^[0-9]/ { p; q } }'` +popd + +export MAVEN_OPTS="-Xss256m -Xmx2g -XX:ReservedCodeCacheSize=1g -Dorg.slf4j.simpleLogger.defaultLogLevel=warn" +export MAVEN_OPTS="${MAVEN_OPTS} -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn" + +pushd ${spark_dir} + + if [ "${test_pyarrow_only}" == "true" ]; then + echo "Building Spark ${SPARK_VERSION} to test pyarrow only" + + # Build Spark only + build/mvn -B -DskipTests package + + else + + # Update Spark pom with the Arrow version just installed and build Spark, need package phase for pyspark + echo "Building Spark ${SPARK_VERSION} with Arrow ${arrow_version}" + build/mvn versions:set-property -Dproperty=arrow.version -DnewVersion=${arrow_version} + + # Build Spark with new Arrow Java + build/mvn -B -DskipTests package + + spark_scala_tests=( + "org.apache.spark.sql.execution.arrow" + "org.apache.spark.sql.execution.vectorized.ColumnarBatchSuite" + "org.apache.spark.sql.execution.vectorized.ArrowColumnVectorSuite") + + (echo "Testing Spark:"; IFS=$'\n'; echo "${spark_scala_tests[*]}") + + # TODO: should be able to only build spark-sql tests with adding "-pl sql/core" but not currently working + build/mvn -B -Dtest=none -DwildcardSuites=$(IFS=,; echo "${spark_scala_tests[*]}") test + fi + + # Run pyarrow related Python tests only + spark_python_tests=( + "pyspark.sql.tests.test_arrow" + "pyspark.sql.tests.test_pandas_map" + "pyspark.sql.tests.test_pandas_cogrouped_map" + "pyspark.sql.tests.test_pandas_grouped_map" + "pyspark.sql.tests.test_pandas_udf" + "pyspark.sql.tests.test_pandas_udf_scalar" + "pyspark.sql.tests.test_pandas_udf_grouped_agg" + "pyspark.sql.tests.test_pandas_udf_window") + + (echo "Testing PySpark:"; IFS=$'\n'; echo "${spark_python_tests[*]}") + python/run-tests --testnames "$(IFS=,; echo "${spark_python_tests[*]}")" --python-executables python +popd diff --git a/src/arrow/ci/scripts/integration_turbodbc.sh b/src/arrow/ci/scripts/integration_turbodbc.sh new file mode 100755 index 000000000..f0fafd512 --- /dev/null +++ b/src/arrow/ci/scripts/integration_turbodbc.sh @@ -0,0 +1,47 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +source_dir=${1} +build_dir=${2}/turbodbc + +# check that optional pyarrow modules are available +# because pytest would just skip the pyarrow tests +python -c "import pyarrow.orc" +python -c "import pyarrow.parquet" + +mkdir -p ${build_dir} +pushd ${build_dir} + +cmake -DCMAKE_INSTALL_PREFIX=${ARROW_HOME} \ + -DCMAKE_CXX_FLAGS="${CXXFLAGS}" \ + -DPYTHON_EXECUTABLE=$(which python) \ + -GNinja \ + ${source_dir} +ninja install + +# TODO(ARROW-5074) +export LD_LIBRARY_PATH="${ARROW_HOME}/lib:${LD_LIBRARY_PATH}" +export ODBCSYSINI="${source_dir}/earthly/odbc/" + +service postgresql start +ctest --output-on-failure + +popd diff --git a/src/arrow/ci/scripts/java_build.sh b/src/arrow/ci/scripts/java_build.sh new file mode 100755 index 000000000..1ba37606d --- /dev/null +++ b/src/arrow/ci/scripts/java_build.sh @@ -0,0 +1,107 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +arrow_dir=${1} +source_dir=${1}/java +cpp_build_dir=${2}/cpp/${ARROW_BUILD_TYPE:-debug} +cdata_dist_dir=${2}/java/c +with_docs=${3:-false} + +if [[ "$(uname -s)" == "Linux" ]] && [[ "$(uname -m)" == "s390x" ]]; then + # Since some files for s390_64 are not available at maven central, + # download pre-build files from Artifactory and install them explicitly + mvn_install="mvn install:install-file" + wget="wget" + artifactory_base_url="https://apache.jfrog.io/artifactory/arrow" + + artifactory_dir="protoc-binary" + group="com.google.protobuf" + artifact="protoc" + ver="3.7.1" + classifier="linux-s390_64" + extension="exe" + target=${artifact}-${ver}-${classifier}.${extension} + ${wget} ${artifactory_base_url}/${artifactory_dir}/${ver}/${target} + ${mvn_install} -DgroupId=${group} -DartifactId=${artifact} -Dversion=${ver} -Dclassifier=${classifier} -Dpackaging=${extension} -Dfile=$(pwd)/${target} + # protoc requires libprotoc.so.18 libprotobuf.so.18 + ${wget} ${artifactory_base_url}/${artifactory_dir}/${ver}/libprotoc.so.18 + ${wget} ${artifactory_base_url}/${artifactory_dir}/${ver}/libprotobuf.so.18 + mkdir -p ${ARROW_HOME}/lib + cp lib*.so.18 ${ARROW_HOME}/lib + export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${ARROW_HOME}/lib + + artifactory_dir="protoc-gen-grpc-java-binary" + group="io.grpc" + artifact="protoc-gen-grpc-java" + ver="1.30.2" + classifier="linux-s390_64" + extension="exe" + target=${artifact}-${ver}-${classifier}.${extension} + ${wget} ${artifactory_base_url}/${artifactory_dir}/${ver}/${target} + ${mvn_install} -DgroupId=${group} -DartifactId=${artifact} -Dversion=${ver} -Dclassifier=${classifier} -Dpackaging=${extension} -Dfile=$(pwd)/${target} + + artifactory_dir="netty-binary" + group="io.netty" + artifact="netty-transport-native-unix-common" + ver="4.1.48.Final" + classifier="linux-s390_64" + extension="jar" + target=${artifact}-${ver}-${classifier}.${extension} + ${wget} ${artifactory_base_url}/${artifactory_dir}/${ver}/${target} + ${mvn_install} -DgroupId=${group} -DartifactId=${artifact} -Dversion=${ver} -Dclassifier=${classifier} -Dpackaging=${extension} -Dfile=$(pwd)/${target} + artifact="netty-transport-native-epoll" + extension="jar" + target=${artifact}-${ver}-${classifier}.${extension} + ${wget} ${artifactory_base_url}/${artifactory_dir}/${ver}/${target} + ${mvn_install} -DgroupId=${group} -DartifactId=${artifact} -Dversion=${ver} -Dclassifier=${classifier} -Dpackaging=${extension} -Dfile=$(pwd)/${target} +fi + +mvn="mvn -B -DskipTests -Drat.skip=true -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn" +# Use `2 * ncores` threads +mvn="${mvn} -T 2C" + +pushd ${source_dir} + +${mvn} install + +if [ "${ARROW_JAVA_SHADE_FLATBUFFERS}" == "ON" ]; then + ${mvn} -Pshade-flatbuffers install +fi + +if [ "${ARROW_JAVA_CDATA}" = "ON" ]; then + ${mvn} -Darrow.c.jni.dist.dir=${cdata_dist_dir} -Parrow-c-data install +fi + +if [ "${ARROW_GANDIVA_JAVA}" = "ON" ]; then + ${mvn} -Darrow.cpp.build.dir=${cpp_build_dir} -Parrow-jni install +fi + +if [ "${ARROW_PLASMA}" = "ON" ]; then + pushd ${source_dir}/plasma + ${mvn} clean install + popd +fi + +if [ "${with_docs}" == "true" ]; then + # HTTP pooling is turned of to avoid download issues https://issues.apache.org/jira/browse/ARROW-11633 + ${mvn} -Dcheckstyle.skip=true -Dhttp.keepAlive=false -Dmaven.wagon.http.pool=false install site +fi + +popd diff --git a/src/arrow/ci/scripts/java_cdata_build.sh b/src/arrow/ci/scripts/java_cdata_build.sh new file mode 100755 index 000000000..730c775d4 --- /dev/null +++ b/src/arrow/ci/scripts/java_cdata_build.sh @@ -0,0 +1,45 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +arrow_dir=${1} +build_dir=${2} +# The directory where the final binaries will be stored when scripts finish +dist_dir=${3} + +echo "=== Clear output directories and leftovers ===" +# Clear output directories and leftovers +rm -rf ${build_dir} + +echo "=== Building Arrow Java C Data Interface native library ===" +mkdir -p "${build_dir}" +pushd "${build_dir}" + +cmake \ + -DCMAKE_BUILD_TYPE=${ARROW_BUILD_TYPE:-release} \ + -DCMAKE_INSTALL_LIBDIR=lib \ + -DCMAKE_INSTALL_PREFIX=${build_dir} \ + ${arrow_dir}/java/c +cmake --build . --target install --config ${ARROW_BUILD_TYPE:-release} +popd + +echo "=== Copying libraries to the distribution folder ===" +mkdir -p "${dist_dir}" +cp -L ${build_dir}/lib/*arrow_cdata_jni.* ${dist_dir} diff --git a/src/arrow/ci/scripts/java_full_build.sh b/src/arrow/ci/scripts/java_full_build.sh new file mode 100755 index 000000000..e452b8098 --- /dev/null +++ b/src/arrow/ci/scripts/java_full_build.sh @@ -0,0 +1,42 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -e + +arrow_dir=${1} +dist_dir=${2} + +export ARROW_TEST_DATA=${arrow_dir}/testing/data + +pushd ${arrow_dir}/java + +# build the entire project +mvn clean install \ + -Parrow-c-data \ + -Parrow-jni \ + -Darrow.cpp.build.dir=$dist_dir \ + -Darrow.c.jni.dist.dir=$dist_dir + +# copy all jars and pom files to the distribution folder +find ~/.m2/repository/org/apache/arrow \ + "(" -name "*.jar" -o -name "*.pom" ")" \ + -exec echo {} ";" \ + -exec cp {} $dist_dir ";" + +popd diff --git a/src/arrow/ci/scripts/java_jni_macos_build.sh b/src/arrow/ci/scripts/java_jni_macos_build.sh new file mode 100755 index 000000000..218d2d396 --- /dev/null +++ b/src/arrow/ci/scripts/java_jni_macos_build.sh @@ -0,0 +1,115 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +arrow_dir=${1} +build_dir=${2} +# The directory where the final binaries will be stored when scripts finish +dist_dir=${3} + +echo "=== Clear output directories and leftovers ===" +# Clear output directories and leftovers +rm -rf ${build_dir} + +echo "=== Building Arrow C++ libraries ===" +: ${ARROW_BUILD_TESTS:=OFF} +: ${ARROW_DATASET:=ON} +: ${ARROW_FILESYSTEM:=ON} +: ${ARROW_GANDIVA_JAVA:=ON} +: ${ARROW_GANDIVA:=ON} +: ${ARROW_ORC:=ON} +: ${ARROW_PARQUET:=ON} +: ${ARROW_PLASMA_JAVA_CLIENT:=ON} +: ${ARROW_PLASMA:=ON} +: ${ARROW_PYTHON:=OFF} +: ${CMAKE_BUILD_TYPE:=Release} +: ${CMAKE_UNITY_BUILD:=ON} + +export ARROW_TEST_DATA="${arrow_dir}/testing/data" +export PARQUET_TEST_DATA="${arrow_dir}/cpp/submodules/parquet-testing/data" +export AWS_EC2_METADATA_DISABLED=TRUE + +mkdir -p "${build_dir}" +pushd "${build_dir}" + +cmake \ + -DARROW_BOOST_USE_SHARED=OFF \ + -DARROW_BROTLI_USE_SHARED=OFF \ + -DARROW_BUILD_TESTS=${ARROW_BUILD_TESTS} \ + -DARROW_BUILD_UTILITIES=OFF \ + -DARROW_BZ2_USE_SHARED=OFF \ + -DARROW_DATASET=${ARROW_DATASET} \ + -DARROW_FILESYSTEM=${ARROW_FILESYSTEM} \ + -DARROW_GANDIVA_JAVA=${ARROW_GANDIVA_JAVA} \ + -DARROW_GANDIVA_STATIC_LIBSTDCPP=ON \ + -DARROW_GANDIVA=${ARROW_GANDIVA} \ + -DARROW_GFLAGS_USE_SHARED=OFF \ + -DARROW_GRPC_USE_SHARED=OFF \ + -DARROW_JNI=ON \ + -DARROW_LZ4_USE_SHARED=OFF \ + -DARROW_OPENSSL_USE_SHARED=OFF \ + -DARROW_ORC=${ARROW_ORC} \ + -DARROW_PARQUET=${ARROW_PARQUET} \ + -DARROW_PLASMA_JAVA_CLIENT=${ARROW_PLASMA_JAVA_CLIENT} \ + -DARROW_PLASMA=${ARROW_PLASMA} \ + -DARROW_PROTOBUF_USE_SHARED=OFF \ + -DARROW_PYTHON=${ARROW_PYTHON} \ + -DARROW_SNAPPY_USE_SHARED=OFF \ + -DARROW_THRIFT_USE_SHARED=OFF \ + -DARROW_UTF8PROC_USE_SHARED=OFF \ + -DARROW_ZSTD_USE_SHARED=OFF \ + -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \ + -DCMAKE_INSTALL_LIBDIR=lib \ + -DCMAKE_INSTALL_PREFIX=${build_dir} \ + -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD} \ + -DPARQUET_BUILD_EXAMPLES=OFF \ + -DPARQUET_BUILD_EXECUTABLES=OFF \ + -DPARQUET_REQUIRE_ENCRYPTION=OFF \ + -Dre2_SOURCE=BUNDLED \ + ${arrow_dir}/cpp +cmake --build . --target install + +if [ "${ARROW_BUILD_TESTS}" == "ON" ]; then + ctest +fi + +popd + +echo "=== Copying libraries to the distribution folder ===" +mkdir -p "${dist_dir}" +cp -L ${build_dir}/lib/libgandiva_jni.dylib ${dist_dir} +cp -L ${build_dir}/lib/libarrow_dataset_jni.dylib ${dist_dir} +cp -L ${build_dir}/lib/libarrow_orc_jni.dylib ${dist_dir} + +echo "=== Checking shared dependencies for libraries ===" + +pushd ${dist_dir} +archery linking check-dependencies \ + --allow libarrow_dataset_jni \ + --allow libarrow_orc_jni \ + --allow libc++ \ + --allow libgandiva_jni \ + --allow libncurses \ + --allow libSystem \ + --allow libz \ + libgandiva_jni.dylib \ + libarrow_dataset_jni.dylib \ + libarrow_orc_jni.dylib +popd diff --git a/src/arrow/ci/scripts/java_jni_manylinux_build.sh b/src/arrow/ci/scripts/java_jni_manylinux_build.sh new file mode 100755 index 000000000..396c8fc19 --- /dev/null +++ b/src/arrow/ci/scripts/java_jni_manylinux_build.sh @@ -0,0 +1,137 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +arrow_dir=${1} +build_dir=${2} +# The directory where the final binaries will be stored when scripts finish +dist_dir=${3} + +echo "=== Clear output directories and leftovers ===" +# Clear output directories and leftovers +rm -rf ${build_dir} + +echo "=== Building Arrow C++ libraries ===" +devtoolset_version=$(rpm -qa "devtoolset-*-gcc" --queryformat %{VERSION} | \ + grep -o "^[0-9]*") +devtoolset_include_cpp="/opt/rh/devtoolset-${devtoolset_version}/root/usr/include/c++/${devtoolset_version}" +: ${ARROW_DATASET:=ON} +: ${ARROW_GANDIVA:=ON} +: ${ARROW_GANDIVA_JAVA:=ON} +: ${ARROW_FILESYSTEM:=ON} +: ${ARROW_JEMALLOC:=ON} +: ${ARROW_RPATH_ORIGIN:=ON} +: ${ARROW_ORC:=ON} +: ${ARROW_PARQUET:=ON} +: ${ARROW_PLASMA:=ON} +: ${ARROW_PLASMA_JAVA_CLIENT:=ON} +: ${ARROW_PYTHON:=OFF} +: ${ARROW_BUILD_TESTS:=OFF} +: ${CMAKE_BUILD_TYPE:=Release} +: ${CMAKE_UNITY_BUILD:=ON} +: ${VCPKG_FEATURE_FLAGS:=-manifests} +: ${VCPKG_TARGET_TRIPLET:=${VCPKG_DEFAULT_TRIPLET:-x64-linux-static-${CMAKE_BUILD_TYPE}}} +: ${GANDIVA_CXX_FLAGS:=-isystem;${devtoolset_include_cpp};-isystem;${devtoolset_include_cpp}/x86_64-redhat-linux;-isystem;-lpthread} + +export ARROW_TEST_DATA="${arrow_dir}/testing/data" +export PARQUET_TEST_DATA="${arrow_dir}/cpp/submodules/parquet-testing/data" +export AWS_EC2_METADATA_DISABLED=TRUE + +mkdir -p "${build_dir}" +pushd "${build_dir}" + +cmake \ + -DARROW_BOOST_USE_SHARED=OFF \ + -DARROW_BROTLI_USE_SHARED=OFF \ + -DARROW_BUILD_SHARED=ON \ + -DARROW_BUILD_TESTS=${ARROW_BUILD_TESTS} \ + -DARROW_BUILD_UTILITIES=OFF \ + -DARROW_BZ2_USE_SHARED=OFF \ + -DARROW_DATASET=${ARROW_DATASET} \ + -DARROW_DEPENDENCY_SOURCE="VCPKG" \ + -DARROW_FILESYSTEM=${ARROW_FILESYSTEM} \ + -DARROW_GANDIVA_JAVA=${ARROW_GANDIVA_JAVA} \ + -DARROW_GANDIVA_PC_CXX_FLAGS=${GANDIVA_CXX_FLAGS} \ + -DARROW_GANDIVA=${ARROW_GANDIVA} \ + -DARROW_GRPC_USE_SHARED=OFF \ + -DARROW_JEMALLOC=${ARROW_JEMALLOC} \ + -DARROW_JNI=ON \ + -DARROW_LZ4_USE_SHARED=OFF \ + -DARROW_OPENSSL_USE_SHARED=OFF \ + -DARROW_ORC=${ARROW_ORC} \ + -DARROW_PARQUET=${ARROW_PARQUET} \ + -DARROW_PLASMA_JAVA_CLIENT=${ARROW_PLASMA_JAVA_CLIENT} \ + -DARROW_PLASMA=${ARROW_PLASMA} \ + -DARROW_PROTOBUF_USE_SHARED=OFF \ + -DARROW_PYTHON=${ARROW_PYTHON} \ + -DARROW_RPATH_ORIGIN=${ARROW_RPATH_ORIGIN} \ + -DARROW_SNAPPY_USE_SHARED=OFF \ + -DARROW_THRIFT_USE_SHARED=OFF \ + -DARROW_UTF8PROC_USE_SHARED=OFF \ + -DARROW_ZSTD_USE_SHARED=OFF \ + -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \ + -DCMAKE_INSTALL_LIBDIR=lib \ + -DCMAKE_INSTALL_PREFIX=${build_dir} \ + -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD} \ + -DPARQUET_BUILD_EXAMPLES=OFF \ + -DPARQUET_BUILD_EXECUTABLES=OFF \ + -DPARQUET_REQUIRE_ENCRYPTION=OFF \ + -DPythonInterp_FIND_VERSION_MAJOR=3 \ + -DPythonInterp_FIND_VERSION=ON \ + -DVCPKG_MANIFEST_MODE=OFF \ + -DVCPKG_TARGET_TRIPLET=${VCPKG_TARGET_TRIPLET} \ + -GNinja \ + ${arrow_dir}/cpp +ninja install + +if [ $ARROW_BUILD_TESTS = "ON" ]; then + ctest \ + --label-regex unittest \ + --output-on-failure \ + --parallel $(nproc) \ + --timeout 300 +fi + +popd + +echo "=== Copying libraries to the distribution folder ===" +mkdir -p "${dist_dir}" +cp -L ${build_dir}/lib/libgandiva_jni.so ${dist_dir} +cp -L ${build_dir}/lib/libarrow_dataset_jni.so ${dist_dir} +cp -L ${build_dir}/lib/libarrow_orc_jni.so ${dist_dir} + +echo "=== Checking shared dependencies for libraries ===" + +pushd ${dist_dir} +archery linking check-dependencies \ + --allow ld-linux-x86-64 \ + --allow libc \ + --allow libdl \ + --allow libgcc_s \ + --allow libm \ + --allow libpthread \ + --allow librt \ + --allow libstdc++ \ + --allow libz \ + --allow linux-vdso \ + libgandiva_jni.so \ + libarrow_dataset_jni.so \ + libarrow_orc_jni.so +popd diff --git a/src/arrow/ci/scripts/java_test.sh b/src/arrow/ci/scripts/java_test.sh new file mode 100755 index 000000000..0e755bcaf --- /dev/null +++ b/src/arrow/ci/scripts/java_test.sh @@ -0,0 +1,54 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +arrow_dir=${1} +source_dir=${1}/java +cpp_build_dir=${2}/cpp/${ARROW_BUILD_TYPE:-debug} +cdata_dist_dir=${2}/java/c + +# For JNI and Plasma tests +export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH} +export PLASMA_STORE=${ARROW_HOME}/bin/plasma-store-server + +mvn="mvn -B -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn" +# Use `2 * ncores` threads +mvn="${mvn} -T 2C" + +pushd ${source_dir} + +${mvn} test + +if [ "${ARROW_JNI}" = "ON" ]; then + ${mvn} test -Parrow-jni -pl adapter/orc,gandiva,dataset -Darrow.cpp.build.dir=${cpp_build_dir} +fi + +if [ "${ARROW_JAVA_CDATA}" = "ON" ]; then + ${mvn} test -Parrow-c-data -pl c -Darrow.c.jni.dist.dir=${cdata_dist_dir} +fi + +if [ "${ARROW_PLASMA}" = "ON" ]; then + pushd ${source_dir}/plasma + java -cp target/test-classes:target/classes \ + -Djava.library.path=${cpp_build_dir} \ + org.apache.arrow.plasma.PlasmaClientTest + popd +fi + +popd diff --git a/src/arrow/ci/scripts/js_build.sh b/src/arrow/ci/scripts/js_build.sh new file mode 100755 index 000000000..10ceb41ee --- /dev/null +++ b/src/arrow/ci/scripts/js_build.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +source_dir=${1}/js +with_docs=${2:-false} + +pushd ${source_dir} + +yarn --frozen-lockfile +# TODO(kszucs): linting should be moved to archery +yarn lint:ci +yarn build + +if [ "${with_docs}" == "true" ]; then + yarn doc +fi + +popd diff --git a/src/arrow/ci/scripts/js_test.sh b/src/arrow/ci/scripts/js_test.sh new file mode 100755 index 000000000..345d6cb81 --- /dev/null +++ b/src/arrow/ci/scripts/js_test.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +source_dir=${1}/js + +pushd ${source_dir} + +yarn lint +yarn test + +popd diff --git a/src/arrow/ci/scripts/matlab_build.sh b/src/arrow/ci/scripts/matlab_build.sh new file mode 100755 index 000000000..5e9bdd2a9 --- /dev/null +++ b/src/arrow/ci/scripts/matlab_build.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Exit on error (-e) and print all commands (-x). +set -ex + +base_dir=${1} +source_dir=${base_dir}/matlab +build_dir=${base_dir}/matlab/build + +cmake -S ${source_dir} -B ${build_dir} -G Ninja -D MATLAB_BUILD_TESTS=ON +cmake --build ${build_dir} --config Release +ctest --test-dir ${build_dir} diff --git a/src/arrow/ci/scripts/msys2_setup.sh b/src/arrow/ci/scripts/msys2_setup.sh new file mode 100755 index 000000000..6f6012c87 --- /dev/null +++ b/src/arrow/ci/scripts/msys2_setup.sh @@ -0,0 +1,79 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -eux + +target=$1 + +packages=() +case "${target}" in + cpp|c_glib|ruby) + packages+=(${MINGW_PACKAGE_PREFIX}-aws-sdk-cpp) + packages+=(${MINGW_PACKAGE_PREFIX}-boost) + packages+=(${MINGW_PACKAGE_PREFIX}-brotli) + packages+=(${MINGW_PACKAGE_PREFIX}-ccache) + packages+=(${MINGW_PACKAGE_PREFIX}-clang) + packages+=(${MINGW_PACKAGE_PREFIX}-cmake) + packages+=(${MINGW_PACKAGE_PREFIX}-gcc) + packages+=(${MINGW_PACKAGE_PREFIX}-gflags) + packages+=(${MINGW_PACKAGE_PREFIX}-grpc) + packages+=(${MINGW_PACKAGE_PREFIX}-gtest) + packages+=(${MINGW_PACKAGE_PREFIX}-libutf8proc) + packages+=(${MINGW_PACKAGE_PREFIX}-libxml2) + packages+=(${MINGW_PACKAGE_PREFIX}-llvm) + packages+=(${MINGW_PACKAGE_PREFIX}-lz4) + packages+=(${MINGW_PACKAGE_PREFIX}-make) + packages+=(${MINGW_PACKAGE_PREFIX}-mlir) + packages+=(${MINGW_PACKAGE_PREFIX}-ninja) + packages+=(${MINGW_PACKAGE_PREFIX}-polly) + packages+=(${MINGW_PACKAGE_PREFIX}-protobuf) + packages+=(${MINGW_PACKAGE_PREFIX}-python3-numpy) + packages+=(${MINGW_PACKAGE_PREFIX}-rapidjson) + packages+=(${MINGW_PACKAGE_PREFIX}-re2) + packages+=(${MINGW_PACKAGE_PREFIX}-snappy) + packages+=(${MINGW_PACKAGE_PREFIX}-thrift) + packages+=(${MINGW_PACKAGE_PREFIX}-zlib) + packages+=(${MINGW_PACKAGE_PREFIX}-zstd) + ;; +esac + +case "${target}" in + c_glib|ruby) + packages+=(${MINGW_PACKAGE_PREFIX}-gobject-introspection) + packages+=(${MINGW_PACKAGE_PREFIX}-gtk-doc) + packages+=(${MINGW_PACKAGE_PREFIX}-meson) + ;; +esac + +case "${target}" in + cgo) + packages+=(${MINGW_PACKAGE_PREFIX}-arrow) + packages+=(${MINGW_PACKAGE_PREFIX}-gcc) + ;; +esac + +pacman \ + --needed \ + --noconfirm \ + --refresh \ + --sync \ + "${packages[@]}" + +"$(dirname $0)/ccache_setup.sh" +echo "CCACHE_DIR=$(cygpath --absolute --windows ccache)" >> $GITHUB_ENV diff --git a/src/arrow/ci/scripts/msys2_system_clean.sh b/src/arrow/ci/scripts/msys2_system_clean.sh new file mode 100755 index 000000000..a356aee66 --- /dev/null +++ b/src/arrow/ci/scripts/msys2_system_clean.sh @@ -0,0 +1,33 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -eux + +pacman \ + --cascade \ + --noconfirm \ + --nosave \ + --recursive \ + --remove \ + ${MINGW_PACKAGE_PREFIX}-clang-tools-extra \ + ${MINGW_PACKAGE_PREFIX}-gcc-ada \ + ${MINGW_PACKAGE_PREFIX}-gcc-fortran \ + ${MINGW_PACKAGE_PREFIX}-gcc-libgfortran \ + ${MINGW_PACKAGE_PREFIX}-gcc-objc \ + ${MINGW_PACKAGE_PREFIX}-libgccjit diff --git a/src/arrow/ci/scripts/msys2_system_upgrade.sh b/src/arrow/ci/scripts/msys2_system_upgrade.sh new file mode 100755 index 000000000..646428fbb --- /dev/null +++ b/src/arrow/ci/scripts/msys2_system_upgrade.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -eux + +pacman \ + --noconfirm \ + --refresh \ + --refresh \ + --sync \ + --sysupgrade \ + --sysupgrade diff --git a/src/arrow/ci/scripts/python_benchmark.sh b/src/arrow/ci/scripts/python_benchmark.sh new file mode 100755 index 000000000..3a35298dc --- /dev/null +++ b/src/arrow/ci/scripts/python_benchmark.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Check the ASV benchmarking setup. +# Unfortunately this won't ensure that all benchmarks succeed +# (see https://github.com/airspeed-velocity/asv/issues/449) +source deactivate +conda create -y -q -n pyarrow_asv python=$PYTHON_VERSION +conda activate pyarrow_asv +pip install -q git+https://github.com/pitrou/asv.git@customize_commands + +export PYARROW_WITH_PARQUET=1 +export PYARROW_WITH_PLASMA=1 +export PYARROW_WITH_ORC=0 +export PYARROW_WITH_GANDIVA=0 + +pushd $ARROW_PYTHON_DIR +# Workaround for https://github.com/airspeed-velocity/asv/issues/631 +git fetch --depth=100 origin master:master +# Generate machine information (mandatory) +asv machine --yes +# Run benchmarks on the changeset being tested +asv run --no-pull --show-stderr --quick HEAD^! +popd # $ARROW_PYTHON_DIR diff --git a/src/arrow/ci/scripts/python_build.sh b/src/arrow/ci/scripts/python_build.sh new file mode 100755 index 000000000..ec6d723b2 --- /dev/null +++ b/src/arrow/ci/scripts/python_build.sh @@ -0,0 +1,54 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +source_dir=${1}/python +build_dir=${2}/python + +if [ ! -z "${CONDA_PREFIX}" ]; then + echo -e "===\n=== Conda environment for build\n===" + conda list +fi + +export PYARROW_CMAKE_GENERATOR=${CMAKE_GENERATOR:-Ninja} +export PYARROW_BUILD_TYPE=${CMAKE_BUILD_TYPE:-debug} +export PYARROW_WITH_S3=${ARROW_S3:-OFF} +export PYARROW_WITH_ORC=${ARROW_ORC:-OFF} +export PYARROW_WITH_CUDA=${ARROW_CUDA:-OFF} +export PYARROW_WITH_HDFS=${ARROW_HDFS:-OFF} +export PYARROW_WITH_FLIGHT=${ARROW_FLIGHT:-OFF} +export PYARROW_WITH_PLASMA=${ARROW_PLASMA:-OFF} +export PYARROW_WITH_GANDIVA=${ARROW_GANDIVA:-OFF} +export PYARROW_WITH_PARQUET=${ARROW_PARQUET:-OFF} +export PYARROW_WITH_DATASET=${ARROW_DATASET:-OFF} + +export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH} + +pushd ${source_dir} + +relative_build_dir=$(realpath --relative-to=. $build_dir) + +# not nice, but prevents mutating the mounted the source directory for docker +${PYTHON:-python} \ + setup.py build --build-base $build_dir \ + install --single-version-externally-managed \ + --record $relative_build_dir/record.txt + +popd diff --git a/src/arrow/ci/scripts/python_sdist_build.sh b/src/arrow/ci/scripts/python_sdist_build.sh new file mode 100755 index 000000000..f9e9359b6 --- /dev/null +++ b/src/arrow/ci/scripts/python_sdist_build.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -eux + +source_dir=${1}/python + +pushd ${source_dir} +export SETUPTOOLS_SCM_PRETEND_VERSION=${PYARROW_VERSION:-} +${PYTHON:-python} setup.py sdist +popd diff --git a/src/arrow/ci/scripts/python_sdist_test.sh b/src/arrow/ci/scripts/python_sdist_test.sh new file mode 100755 index 000000000..3dd7d7ddd --- /dev/null +++ b/src/arrow/ci/scripts/python_sdist_test.sh @@ -0,0 +1,58 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -eux + +arrow_dir=${1} + +export ARROW_SOURCE_DIR=${arrow_dir} +export ARROW_TEST_DATA=${arrow_dir}/testing/data +export PARQUET_TEST_DATA=${arrow_dir}/cpp/submodules/parquet-testing/data + +export PYARROW_CMAKE_GENERATOR=${CMAKE_GENERATOR:-Ninja} +export PYARROW_BUILD_TYPE=${CMAKE_BUILD_TYPE:-debug} +export PYARROW_WITH_S3=${ARROW_S3:-OFF} +export PYARROW_WITH_ORC=${ARROW_ORC:-OFF} +export PYARROW_WITH_CUDA=${ARROW_CUDA:-OFF} +export PYARROW_WITH_HDFS=${ARROW_HDFS:-OFF} +export PYARROW_WITH_FLIGHT=${ARROW_FLIGHT:-OFF} +export PYARROW_WITH_PLASMA=${ARROW_PLASMA:-OFF} +export PYARROW_WITH_GANDIVA=${ARROW_GANDIVA:-OFF} +export PYARROW_WITH_PARQUET=${ARROW_PARQUET:-OFF} +export PYARROW_WITH_DATASET=${ARROW_DATASET:-OFF} + +# TODO: Users should not require ARROW_HOME and pkg-config to find Arrow C++. +# Related: ARROW-9171 +# unset ARROW_HOME +# apt purge -y pkg-config + +# ARROW-12619 +if command -v git &> /dev/null; then + echo "Git exists, remove it from PATH before executing this script." + exit 1 +fi + +if [ -n "${PYARROW_VERSION:-}" ]; then + sdist="${arrow_dir}/python/dist/pyarrow-${PYARROW_VERSION}.tar.gz" +else + sdist=$(ls ${arrow_dir}/python/dist/pyarrow-*.tar.gz | sort -r | head -n1) +fi +${PYTHON:-python} -m pip install ${sdist} + +pytest -r s ${PYTEST_ARGS:-} --pyargs pyarrow diff --git a/src/arrow/ci/scripts/python_test.sh b/src/arrow/ci/scripts/python_test.sh new file mode 100755 index 000000000..6e05af89a --- /dev/null +++ b/src/arrow/ci/scripts/python_test.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +arrow_dir=${1} + +export ARROW_SOURCE_DIR=${arrow_dir} +export ARROW_TEST_DATA=${arrow_dir}/testing/data +export PARQUET_TEST_DATA=${arrow_dir}/cpp/submodules/parquet-testing/data +export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH} + +# Enable some checks inside Python itself +export PYTHONDEVMODE=1 + +pytest -r s -v ${PYTEST_ARGS} --pyargs pyarrow diff --git a/src/arrow/ci/scripts/python_wheel_macos_build.sh b/src/arrow/ci/scripts/python_wheel_macos_build.sh new file mode 100755 index 000000000..1a52a2ad5 --- /dev/null +++ b/src/arrow/ci/scripts/python_wheel_macos_build.sh @@ -0,0 +1,166 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +arch=${1} +source_dir=${2} +build_dir=${3} + +echo "=== (${PYTHON_VERSION}) Clear output directories and leftovers ===" +# Clear output directories and leftovers +rm -rf ${build_dir}/install +rm -rf ${source_dir}/python/dist +rm -rf ${source_dir}/python/build +rm -rf ${source_dir}/python/repaired_wheels +rm -rf ${source_dir}/python/pyarrow/*.so +rm -rf ${source_dir}/python/pyarrow/*.so.* + +echo "=== (${PYTHON_VERSION}) Set SDK, C++ and Wheel flags ===" +export _PYTHON_HOST_PLATFORM="macosx-${MACOSX_DEPLOYMENT_TARGET}-${arch}" +export MACOSX_DEPLOYMENT_TARGET=${MACOSX_DEPLOYMENT_TARGET:-10.9} +export SDKROOT=${SDKROOT:-$(xcrun --sdk macosx --show-sdk-path)} + +if [ $arch = "arm64" ]; then + export CMAKE_OSX_ARCHITECTURES="arm64" +elif [ $arch = "x86_64" ]; then + export CMAKE_OSX_ARCHITECTURES="x86_64" +elif [ $arch = "universal2" ]; then + export CMAKE_OSX_ARCHITECTURES="x86_64;arm64" +else + echo "Unexpected architecture: $arch" + exit 1 +fi + +echo "=== (${PYTHON_VERSION}) Install Python build dependencies ===" +export PIP_SITE_PACKAGES=$(python -c 'import site; print(site.getsitepackages()[0])') +export PIP_TARGET_PLATFORM="macosx_${MACOSX_DEPLOYMENT_TARGET//./_}_${arch}" + +pip install \ + --upgrade \ + --only-binary=:all: \ + --target $PIP_SITE_PACKAGES \ + --platform $PIP_TARGET_PLATFORM \ + -r ${source_dir}/python/requirements-wheel-build.txt +pip install "delocate>=0.9" + +echo "=== (${PYTHON_VERSION}) Building Arrow C++ libraries ===" +: ${ARROW_DATASET:=ON} +: ${ARROW_FLIGHT:=ON} +: ${ARROW_GANDIVA:=OFF} +: ${ARROW_HDFS:=ON} +: ${ARROW_JEMALLOC:=ON} +: ${ARROW_MIMALLOC:=ON} +: ${ARROW_ORC:=ON} +: ${ARROW_PARQUET:=ON} +: ${ARROW_PLASMA:=ON} +: ${ARROW_S3:=ON} +: ${ARROW_SIMD_LEVEL:="SSE4_2"} +: ${ARROW_TENSORFLOW:=ON} +: ${ARROW_WITH_BROTLI:=ON} +: ${ARROW_WITH_BZ2:=ON} +: ${ARROW_WITH_LZ4:=ON} +: ${ARROW_WITH_SNAPPY:=ON} +: ${ARROW_WITH_ZLIB:=ON} +: ${ARROW_WITH_ZSTD:=ON} +: ${CMAKE_BUILD_TYPE:=release} +: ${CMAKE_GENERATOR:=Ninja} +: ${CMAKE_UNITY_BUILD:=ON} +: ${VCPKG_FEATURE_FLAGS:=-manifests} +: ${VCPKG_TARGET_TRIPLET:=${VCPKG_DEFAULT_TRIPLET:-x64-osx-static-${CMAKE_BUILD_TYPE}}} + +mkdir -p ${build_dir}/build +pushd ${build_dir}/build + +cmake \ + -DARROW_BUILD_SHARED=ON \ + -DCMAKE_APPLE_SILICON_PROCESSOR=arm64 \ + -DCMAKE_OSX_ARCHITECTURES=${CMAKE_OSX_ARCHITECTURES} \ + -DARROW_BUILD_STATIC=OFF \ + -DARROW_BUILD_TESTS=OFF \ + -DARROW_DATASET=${ARROW_DATASET} \ + -DARROW_DEPENDENCY_SOURCE="VCPKG" \ + -DARROW_DEPENDENCY_USE_SHARED=OFF \ + -DARROW_FLIGHT=${ARROW_FLIGHT} \ + -DARROW_GANDIVA=${ARROW_GANDIVA} \ + -DARROW_HDFS=${ARROW_HDFS} \ + -DARROW_JEMALLOC=${ARROW_JEMALLOC} \ + -DARROW_MIMALLOC=${ARROW_MIMALLOC} \ + -DARROW_ORC=${ARROW_ORC} \ + -DARROW_PACKAGE_KIND="python-wheel-macos" \ + -DARROW_PARQUET=${ARROW_PARQUET} \ + -DARROW_PLASMA=${ARROW_PLASMA} \ + -DARROW_PYTHON=ON \ + -DARROW_RPATH_ORIGIN=ON \ + -DARROW_S3=${ARROW_S3} \ + -DARROW_SIMD_LEVEL=${ARROW_SIMD_LEVEL} \ + -DARROW_TENSORFLOW=${ARROW_TENSORFLOW} \ + -DARROW_USE_CCACHE=ON \ + -DARROW_WITH_BROTLI=${ARROW_WITH_BROTLI} \ + -DARROW_WITH_BZ2=${ARROW_WITH_BZ2} \ + -DARROW_WITH_LZ4=${ARROW_WITH_LZ4} \ + -DARROW_WITH_SNAPPY=${ARROW_WITH_SNAPPY} \ + -DARROW_WITH_ZLIB=${ARROW_WITH_ZLIB} \ + -DARROW_WITH_ZSTD=${ARROW_WITH_ZSTD} \ + -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \ + -DCMAKE_INSTALL_LIBDIR=lib \ + -DCMAKE_INSTALL_PREFIX=${build_dir}/install \ + -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD} \ + -DOPENSSL_USE_STATIC_LIBS=ON \ + -DVCPKG_MANIFEST_MODE=OFF \ + -DVCPKG_TARGET_TRIPLET=${VCPKG_TARGET_TRIPLET} \ + -G ${CMAKE_GENERATOR} \ + ${source_dir}/cpp +cmake --build . --target install +popd + +echo "=== (${PYTHON_VERSION}) Building wheel ===" +export PYARROW_BUILD_TYPE=${CMAKE_BUILD_TYPE} +export PYARROW_BUNDLE_ARROW_CPP=1 +export PYARROW_CMAKE_GENERATOR=${CMAKE_GENERATOR} +export PYARROW_INSTALL_TESTS=1 +export PYARROW_WITH_DATASET=${ARROW_DATASET} +export PYARROW_WITH_FLIGHT=${ARROW_FLIGHT} +export PYARROW_WITH_GANDIVA=${ARROW_GANDIVA} +export PYARROW_WITH_HDFS=${ARROW_HDFS} +export PYARROW_WITH_ORC=${ARROW_ORC} +export PYARROW_WITH_PARQUET=${ARROW_PARQUET} +export PYARROW_WITH_PLASMA=${ARROW_PLASMA} +export PYARROW_WITH_S3=${ARROW_S3} +export PYARROW_CMAKE_OPTIONS="-DCMAKE_OSX_ARCHITECTURES=${CMAKE_OSX_ARCHITECTURES} -DARROW_SIMD_LEVEL=${ARROW_SIMD_LEVEL}" +# PyArrow build configuration +export PKG_CONFIG_PATH=/usr/lib/pkgconfig:${build_dir}/install/lib/pkgconfig +# Set PyArrow version explicitly +export SETUPTOOLS_SCM_PRETEND_VERSION=${PYARROW_VERSION} + +pushd ${source_dir}/python +python setup.py bdist_wheel +popd + +echo "=== (${PYTHON_VERSION}) Show dynamic libraries the wheel depend on ===" +deps=$(delocate-listdeps ${source_dir}/python/dist/*.whl) + +if echo $deps | grep -v "^pyarrow/lib\(arrow\|gandiva\|parquet\|plasma\)"; then + echo "There are non-bundled shared library dependencies." + exit 1 +fi + +# Move the verified wheels +mkdir -p ${source_dir}/python/repaired_wheels +mv ${source_dir}/python/dist/*.whl ${source_dir}/python/repaired_wheels/ diff --git a/src/arrow/ci/scripts/python_wheel_manylinux_build.sh b/src/arrow/ci/scripts/python_wheel_manylinux_build.sh new file mode 100755 index 000000000..434605cf2 --- /dev/null +++ b/src/arrow/ci/scripts/python_wheel_manylinux_build.sh @@ -0,0 +1,149 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +function check_arrow_visibility { + nm --demangle --dynamic /tmp/arrow-dist/lib/libarrow.so > nm_arrow.log + + # Filter out Arrow symbols and see if anything remains. + # '_init' and '_fini' symbols may or not be present, we don't care. + # (note we must ignore the grep exit status when no match is found) + grep ' T ' nm_arrow.log | grep -v -E '(arrow|\b_init\b|\b_fini\b)' | cat - > visible_symbols.log + + if [[ -f visible_symbols.log && `cat visible_symbols.log | wc -l` -eq 0 ]]; then + return 0 + else + echo "== Unexpected symbols exported by libarrow.so ==" + cat visible_symbols.log + echo "================================================" + + exit 1 + fi +} + +echo "=== (${PYTHON_VERSION}) Clear output directories and leftovers ===" +# Clear output directories and leftovers +rm -rf /tmp/arrow-build +rm -rf /arrow/python/dist +rm -rf /arrow/python/build +rm -rf /arrow/python/repaired_wheels +rm -rf /arrow/python/pyarrow/*.so +rm -rf /arrow/python/pyarrow/*.so.* + +echo "=== (${PYTHON_VERSION}) Building Arrow C++ libraries ===" +: ${ARROW_DATASET:=ON} +: ${ARROW_FLIGHT:=ON} +: ${ARROW_GANDIVA:=OFF} +: ${ARROW_HDFS:=ON} +: ${ARROW_JEMALLOC:=ON} +: ${ARROW_MIMALLOC:=ON} +: ${ARROW_ORC:=ON} +: ${ARROW_PARQUET:=ON} +: ${ARROW_PLASMA:=ON} +: ${ARROW_S3:=ON} +: ${ARROW_TENSORFLOW:=ON} +: ${ARROW_WITH_BROTLI:=ON} +: ${ARROW_WITH_BZ2:=ON} +: ${ARROW_WITH_LZ4:=ON} +: ${ARROW_WITH_SNAPPY:=ON} +: ${ARROW_WITH_ZLIB:=ON} +: ${ARROW_WITH_ZSTD:=ON} +: ${CMAKE_BUILD_TYPE:=release} +: ${CMAKE_UNITY_BUILD:=ON} +: ${CMAKE_GENERATOR:=Ninja} +: ${VCPKG_FEATURE_FLAGS:=-manifests} +: ${VCPKG_TARGET_TRIPLET:=${VCPKG_DEFAULT_TRIPLET:-x64-linux-static-${CMAKE_BUILD_TYPE}}} + +if [[ "$(uname -m)" == arm* ]] || [[ "$(uname -m)" == aarch* ]]; then + # Build jemalloc --with-lg-page=16 in order to make the wheel work on both + # 4k and 64k page arm64 systems. For more context see + # https://github.com/apache/arrow/issues/10929 + export ARROW_EXTRA_CMAKE_FLAGS="-DARROW_JEMALLOC_LG_PAGE=16" +fi + +mkdir /tmp/arrow-build +pushd /tmp/arrow-build +cmake \ + -DARROW_BROTLI_USE_SHARED=OFF \ + -DARROW_BUILD_SHARED=ON \ + -DARROW_BUILD_STATIC=OFF \ + -DARROW_BUILD_TESTS=OFF \ + -DARROW_DATASET=${ARROW_DATASET} \ + -DARROW_DEPENDENCY_SOURCE="VCPKG" \ + -DARROW_DEPENDENCY_USE_SHARED=OFF \ + -DARROW_FLIGHT==${ARROW_FLIGHT} \ + -DARROW_GANDIVA=${ARROW_GANDIVA} \ + -DARROW_HDFS=${ARROW_HDFS} \ + -DARROW_JEMALLOC=${ARROW_JEMALLOC} \ + -DARROW_MIMALLOC=${ARROW_MIMALLOC} \ + -DARROW_ORC=${ARROW_ORC} \ + -DARROW_PACKAGE_KIND="python-wheel-manylinux${MANYLINUX_VERSION}" \ + -DARROW_PARQUET=${ARROW_PARQUET} \ + -DARROW_PLASMA=${ARROW_PLASMA} \ + -DARROW_PYTHON=ON \ + -DARROW_RPATH_ORIGIN=ON \ + -DARROW_S3=${ARROW_S3} \ + -DARROW_TENSORFLOW=${ARROW_TENSORFLOW} \ + -DARROW_USE_CCACHE=ON \ + -DARROW_UTF8PROC_USE_SHARED=OFF \ + -DARROW_WITH_BROTLI=${ARROW_WITH_BROTLI} \ + -DARROW_WITH_BZ2=${ARROW_WITH_BZ2} \ + -DARROW_WITH_LZ4=${ARROW_WITH_LZ4} \ + -DARROW_WITH_SNAPPY=${ARROW_WITH_SNAPPY} \ + -DARROW_WITH_ZLIB=${ARROW_WITH_ZLIB} \ + -DARROW_WITH_ZSTD=${ARROW_WITH_ZSTD} \ + -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \ + -DCMAKE_INSTALL_LIBDIR=lib \ + -DCMAKE_INSTALL_PREFIX=/tmp/arrow-dist \ + -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD} \ + -DOPENSSL_USE_STATIC_LIBS=ON \ + -DVCPKG_MANIFEST_MODE=OFF \ + -DVCPKG_TARGET_TRIPLET=${VCPKG_TARGET_TRIPLET} \ + ${ARROW_EXTRA_CMAKE_FLAGS} \ + -G ${CMAKE_GENERATOR} \ + /arrow/cpp +cmake --build . --target install +popd + +# Check that we don't expose any unwanted symbols +check_arrow_visibility + +echo "=== (${PYTHON_VERSION}) Building wheel ===" +export PYARROW_BUILD_TYPE=${CMAKE_BUILD_TYPE} +export PYARROW_BUNDLE_ARROW_CPP=1 +export PYARROW_CMAKE_GENERATOR=${CMAKE_GENERATOR} +export PYARROW_INSTALL_TESTS=1 +export PYARROW_WITH_DATASET=${ARROW_DATASET} +export PYARROW_WITH_FLIGHT=${ARROW_FLIGHT} +export PYARROW_WITH_GANDIVA=${ARROW_GANDIVA} +export PYARROW_WITH_HDFS=${ARROW_HDFS} +export PYARROW_WITH_ORC=${ARROW_ORC} +export PYARROW_WITH_PARQUET=${ARROW_PARQUET} +export PYARROW_WITH_PLASMA=${ARROW_PLASMA} +export PYARROW_WITH_S3=${ARROW_S3} +# PyArrow build configuration +export PKG_CONFIG_PATH=/usr/lib/pkgconfig:/tmp/arrow-dist/lib/pkgconfig + +pushd /arrow/python +python setup.py bdist_wheel + +echo "=== (${PYTHON_VERSION}) Tag the wheel with manylinux${MANYLINUX_VERSION} ===" +auditwheel repair -L . dist/pyarrow-*.whl -w repaired_wheels +popd diff --git a/src/arrow/ci/scripts/python_wheel_unix_test.sh b/src/arrow/ci/scripts/python_wheel_unix_test.sh new file mode 100755 index 000000000..ec703abfc --- /dev/null +++ b/src/arrow/ci/scripts/python_wheel_unix_test.sh @@ -0,0 +1,84 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -e +set -x +set -o pipefail + +if [ "$#" -ne 1 ]; then + echo "Usage: $0 <arrow-src-dir>" + exit 1 +fi + +source_dir=${1} + +: ${ARROW_FLIGHT:=ON} +: ${ARROW_S3:=ON} +: ${CHECK_IMPORTS:=ON} +: ${CHECK_UNITTESTS:=ON} +: ${INSTALL_PYARROW:=ON} + +export PYARROW_TEST_CYTHON=OFF +export PYARROW_TEST_DATASET=ON +export PYARROW_TEST_FLIGHT=${ARROW_FLIGHT} +export PYARROW_TEST_GANDIVA=OFF +export PYARROW_TEST_HDFS=ON +export PYARROW_TEST_ORC=ON +export PYARROW_TEST_PANDAS=ON +export PYARROW_TEST_PARQUET=ON +export PYARROW_TEST_PLASMA=ON +export PYARROW_TEST_S3=${ARROW_S3} +export PYARROW_TEST_TENSORFLOW=ON + +export ARROW_TEST_DATA=${source_dir}/testing/data +export PARQUET_TEST_DATA=${source_dir}/submodules/parquet-testing/data + +if [ "${INSTALL_PYARROW}" == "ON" ]; then + # Install the built wheels + pip install --force-reinstall ${source_dir}/python/repaired_wheels/*.whl +fi + +if [ "${CHECK_IMPORTS}" == "ON" ]; then + # Test that the modules are importable + python -c " +import pyarrow +import pyarrow._hdfs +import pyarrow.csv +import pyarrow.dataset +import pyarrow.fs +import pyarrow.json +import pyarrow.orc +import pyarrow.parquet +import pyarrow.plasma +" + if [ "${PYARROW_TEST_S3}" == "ON" ]; then + python -c "import pyarrow._s3fs" + fi + if [ "${PYARROW_TEST_FLIGHT}" == "ON" ]; then + python -c "import pyarrow.flight" + fi +fi + +if [ "${CHECK_UNITTESTS}" == "ON" ]; then + # Install testing dependencies + pip install -U -r ${source_dir}/python/requirements-wheel-test.txt + # Execute unittest, test dependencies must be installed + python -c 'import pyarrow; pyarrow.create_library_symlinks()' + python -m pytest -r s --pyargs pyarrow +fi diff --git a/src/arrow/ci/scripts/python_wheel_windows_build.bat b/src/arrow/ci/scripts/python_wheel_windows_build.bat new file mode 100644 index 000000000..23be7f512 --- /dev/null +++ b/src/arrow/ci/scripts/python_wheel_windows_build.bat @@ -0,0 +1,109 @@ +@rem Licensed to the Apache Software Foundation (ASF) under one +@rem or more contributor license agreements. See the NOTICE file +@rem distributed with this work for additional information +@rem regarding copyright ownership. The ASF licenses this file +@rem to you under the Apache License, Version 2.0 (the +@rem "License"); you may not use this file except in compliance +@rem with the License. You may obtain a copy of the License at +@rem +@rem http://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, +@rem software distributed under the License is distributed on an +@rem "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +@rem KIND, either express or implied. See the License for the +@rem specific language governing permissions and limitations +@rem under the License. + +@echo on + +echo "Building windows wheel..." + +call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvars64.bat" + +echo "=== (%PYTHON_VERSION%) Clear output directories and leftovers ===" +del /s /q C:\arrow-build +del /s /q C:\arrow-dist +del /s /q C:\arrow\python\dist +del /s /q C:\arrow\python\build +del /s /q C:\arrow\python\pyarrow\*.so +del /s /q C:\arrow\python\pyarrow\*.so.* + +echo "=== (%PYTHON_VERSION%) Building Arrow C++ libraries ===" +set ARROW_DATASET=ON +set ARROW_FLIGHT=ON +set ARROW_GANDIVA=OFF +set ARROW_HDFS=ON +set ARROW_ORC=OFF +set ARROW_PARQUET=ON +set ARROW_MIMALLOC=ON +set ARROW_S3=ON +set ARROW_TENSORFLOW=ON +set ARROW_WITH_BROTLI=ON +set ARROW_WITH_BZ2=ON +set ARROW_WITH_LZ4=ON +set ARROW_WITH_SNAPPY=ON +set ARROW_WITH_ZLIB=ON +set ARROW_WITH_ZSTD=ON +set CMAKE_UNITY_BUILD=ON +set CMAKE_GENERATOR=Visual Studio 15 2017 Win64 +set VCPKG_FEATURE_FLAGS=-manifests + +mkdir C:\arrow-build +pushd C:\arrow-build +cmake ^ + -DARROW_BUILD_SHARED=ON ^ + -DARROW_BUILD_STATIC=OFF ^ + -DARROW_BUILD_TESTS=OFF ^ + -DARROW_CXXFLAGS="/MP" ^ + -DARROW_DATASET=%ARROW_DATASET% ^ + -DARROW_DEPENDENCY_SOURCE=VCPKG ^ + -DARROW_DEPENDENCY_USE_SHARED=OFF ^ + -DARROW_FLIGHT=%ARROW_FLIGHT% ^ + -DARROW_GANDIVA=%ARROW_GANDIVA% ^ + -DARROW_HDFS=%ARROW_HDFS% ^ + -DARROW_MIMALLOC=%ARROW_MIMALLOC% ^ + -DARROW_ORC=%ARROW_ORC% ^ + -DARROW_PACKAGE_KIND="python-wheel-windows" ^ + -DARROW_PARQUET=%ARROW_PARQUET% ^ + -DARROW_PYTHON=ON ^ + -DARROW_S3=%ARROW_S3% ^ + -DARROW_TENSORFLOW=%ARROW_TENSORFLOW% ^ + -DARROW_WITH_BROTLI=%ARROW_WITH_BROTLI% ^ + -DARROW_WITH_BZ2=%ARROW_WITH_BZ2% ^ + -DARROW_WITH_LZ4=%ARROW_WITH_LZ4% ^ + -DARROW_WITH_SNAPPY=%ARROW_WITH_SNAPPY% ^ + -DARROW_WITH_ZLIB=%ARROW_WITH_ZLIB% ^ + -DARROW_WITH_ZSTD=%ARROW_WITH_ZSTD% ^ + -DCMAKE_BUILD_TYPE=%CMAKE_BUILD_TYPE% ^ + -DCMAKE_CXX_COMPILER=clcache ^ + -DCMAKE_INSTALL_PREFIX=C:\arrow-dist ^ + -DCMAKE_UNITY_BUILD=%CMAKE_UNITY_BUILD% ^ + -DMSVC_LINK_VERBOSE=ON ^ + -DVCPKG_MANIFEST_MODE=OFF ^ + -DVCPKG_TARGET_TRIPLET=x64-windows-static-md-%CMAKE_BUILD_TYPE% ^ + -G "%CMAKE_GENERATOR%" ^ + C:\arrow\cpp || exit /B +cmake --build . --config %CMAKE_BUILD_TYPE% --target install || exit /B +popd + +echo "=== (%PYTHON_VERSION%) Building wheel ===" +set PYARROW_BUILD_TYPE=%CMAKE_BUILD_TYPE% +set PYARROW_BUNDLE_ARROW_CPP=ON +set PYARROW_BUNDLE_BOOST=OFF +set PYARROW_CMAKE_GENERATOR=%CMAKE_GENERATOR% +set PYARROW_INSTALL_TESTS=ON +set PYARROW_WITH_DATASET=%ARROW_DATASET% +set PYARROW_WITH_FLIGHT=%ARROW_FLIGHT% +set PYARROW_WITH_GANDIVA=%ARROW_GANDIVA% +set PYARROW_WITH_HDFS=%ARROW_HDFS% +set PYARROW_WITH_ORC=%ARROW_ORC% +set PYARROW_WITH_PARQUET=%ARROW_PARQUET% +set PYARROW_WITH_S3=%ARROW_S3% +set ARROW_HOME=C:\arrow-dist + +pushd C:\arrow\python +@REM bundle the msvc runtime +cp "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Redist\MSVC\14.16.27012\x64\Microsoft.VC141.CRT\msvcp140.dll" pyarrow\ +python setup.py bdist_wheel || exit /B +popd diff --git a/src/arrow/ci/scripts/python_wheel_windows_test.bat b/src/arrow/ci/scripts/python_wheel_windows_test.bat new file mode 100755 index 000000000..1ea0f8acd --- /dev/null +++ b/src/arrow/ci/scripts/python_wheel_windows_test.bat @@ -0,0 +1,55 @@ +@rem Licensed to the Apache Software Foundation (ASF) under one +@rem or more contributor license agreements. See the NOTICE file +@rem distributed with this work for additional information +@rem regarding copyright ownership. The ASF licenses this file +@rem to you under the Apache License, Version 2.0 (the +@rem "License"); you may not use this file except in compliance +@rem with the License. You may obtain a copy of the License at +@rem +@rem http://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, +@rem software distributed under the License is distributed on an +@rem "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +@rem KIND, either express or implied. See the License for the +@rem specific language governing permissions and limitations +@rem under the License. + +@echo on + +set PYARROW_TEST_CYTHON=OFF +set PYARROW_TEST_DATASET=ON +set PYARROW_TEST_FLIGHT=ON +set PYARROW_TEST_GANDIVA=OFF +set PYARROW_TEST_HDFS=ON +set PYARROW_TEST_ORC=OFF +set PYARROW_TEST_PARQUET=ON +set PYARROW_TEST_PLASMA=OFF +set PYARROW_TEST_S3=OFF +set PYARROW_TEST_TENSORFLOW=ON + +@REM Enable again once https://github.com/scipy/oldest-supported-numpy/pull/27 gets merged +@REM set PYARROW_TEST_PANDAS=ON + +set ARROW_TEST_DATA=C:\arrow\testing\data +set PARQUET_TEST_DATA=C:\arrow\submodules\parquet-testing\data + +@REM Install testing dependencies +pip install -r C:\arrow\python\requirements-wheel-test.txt || exit /B + +@REM Install the built wheels +python -m pip install --no-index --find-links=C:\arrow\python\dist\ pyarrow || exit /B + +@REM Test that the modules are importable +python -c "import pyarrow" +python -c "import pyarrow._hdfs" +python -c "import pyarrow._s3fs" +python -c "import pyarrow.csv" +python -c "import pyarrow.dataset" +python -c "import pyarrow.flight" +python -c "import pyarrow.fs" +python -c "import pyarrow.json" +python -c "import pyarrow.parquet" + +@REM Execute unittest +pytest -r s --pyargs pyarrow || exit /B diff --git a/src/arrow/ci/scripts/r_build.sh b/src/arrow/ci/scripts/r_build.sh new file mode 100755 index 000000000..2a2b9d7d1 --- /dev/null +++ b/src/arrow/ci/scripts/r_build.sh @@ -0,0 +1,33 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +: ${R_BIN:=R} +source_dir=${1}/r +with_docs=${2:-false} + +pushd ${source_dir} + +${R_BIN} CMD INSTALL ${INSTALL_ARGS} . + +if [ "${with_docs}" == "true" ]; then + ${R_BIN} -e "pkgdown::build_site(install = FALSE)" +fi + +popd
\ No newline at end of file diff --git a/src/arrow/ci/scripts/r_deps.sh b/src/arrow/ci/scripts/r_deps.sh new file mode 100755 index 000000000..ad1b5ecc1 --- /dev/null +++ b/src/arrow/ci/scripts/r_deps.sh @@ -0,0 +1,45 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +: ${R_BIN:=R} + +source_dir=${1}/r + +pushd ${source_dir} + +if [ ${R_BIN} = "RDsan" ]; then + # To prevent the build from timing out, let's prune some optional deps (and their possible version requirements) + ${R_BIN} -e 'd <- read.dcf("DESCRIPTION") + to_prune <- c("duckdb", "DBI", "dbplyr", "decor", "knitr", "rmarkdown", "pkgload", "reticulate") + pattern <- paste0("\\n?", to_prune, " (\\\\(.*\\\\))?,?", collapse = "|") + d[,"Suggests"] <- gsub(pattern, "", d[,"Suggests"]) + write.dcf(d, "DESCRIPTION")' +fi + +# Install R package dependencies +# install.packages() emits warnings if packages fail to install, +# but we want to error/fail the build. +# options(warn=2) turns warnings into errors +${R_BIN} -e "options(warn=2); install.packages('remotes'); remotes::install_cran(c('glue', 'rcmdcheck', 'sys')); remotes::install_deps(INSTALL_opts = '"${INSTALL_ARGS}"')" +# Separately install the optional/test dependencies but don't error on them, +# they're not available everywhere and that's ok +${R_BIN} -e "remotes::install_deps(dependencies = TRUE, INSTALL_opts = '"${INSTALL_ARGS}"')" + +popd diff --git a/src/arrow/ci/scripts/r_docker_configure.sh b/src/arrow/ci/scripts/r_docker_configure.sh new file mode 100755 index 000000000..20c987085 --- /dev/null +++ b/src/arrow/ci/scripts/r_docker_configure.sh @@ -0,0 +1,81 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +: ${R_BIN:=R} + +# The Dockerfile should have put this file here +if [ -f "/arrow/ci/etc/rprofile" ]; then + # Ensure parallel R package installation, set CRAN repo mirror, + # and use pre-built binaries where possible + cat /arrow/ci/etc/rprofile >> $(${R_BIN} RHOME)/etc/Rprofile.site +fi + +# Ensure parallel compilation of C/C++ code +echo "MAKEFLAGS=-j$(${R_BIN} -s -e 'cat(parallel::detectCores())')" >> $(R RHOME)/etc/Renviron.site + +# Special hacking to try to reproduce quirks on fedora-clang-devel on CRAN +# which uses a bespoke clang compiled to use libc++ +# https://www.stats.ox.ac.uk/pub/bdr/Rconfig/r-devel-linux-x86_64-fedora-clang +if [ "$RHUB_PLATFORM" = "linux-x86_64-fedora-clang" ]; then + dnf install -y libcxx-devel + sed -i.bak -E -e 's/(CXX1?1? =.*)/\1 -stdlib=libc++/g' $(${R_BIN} RHOME)/etc/Makeconf + rm -rf $(${R_BIN} RHOME)/etc/Makeconf.bak + + sed -i.bak -E -e 's/(CXXFLAGS = )(.*)/\1 -g -O3 -Wall -pedantic -frtti -fPIC/' $(${R_BIN} RHOME)/etc/Makeconf + rm -rf $(${R_BIN} RHOME)/etc/Makeconf.bak +fi + +# Special hacking to try to reproduce quirks on centos using non-default build +# tooling. +if [[ "$DEVTOOLSET_VERSION" -gt 0 ]]; then + if [ "`which dnf`" ]; then + dnf install -y centos-release-scl + dnf install -y "devtoolset-$DEVTOOLSET_VERSION" + else + yum install -y centos-release-scl + yum install -y "devtoolset-$DEVTOOLSET_VERSION" + fi +fi + +# Install openssl for S3 support +if [ "$ARROW_S3" == "ON" ] || [ "$ARROW_R_DEV" == "TRUE" ]; then + if [ "`which dnf`" ]; then + dnf install -y libcurl-devel openssl-devel + elif [ "`which yum`" ]; then + yum install -y libcurl-devel openssl-devel + elif [ "`which zypper`" ]; then + zypper install -y libcurl-devel libopenssl-devel + else + apt-get update + apt-get install -y libcurl4-openssl-dev libssl-dev + fi + + # The Dockerfile should have put this file here + if [ -f "/arrow/ci/scripts/install_minio.sh" ] && [ "`which wget`" ]; then + /arrow/ci/scripts/install_minio.sh amd64 linux latest /usr/local + fi + + if [ -f "/arrow/ci/scripts/install_gcs_testbench.sh" ] && [ "`which pip`" ]; then + /arrow/ci/scripts/install_gcs_testbench.sh amd64 default + fi +fi + +# Workaround for html help install failure; see https://github.com/r-lib/devtools/issues/2084#issuecomment-530912786 +Rscript -e 'x <- file.path(R.home("doc"), "html"); if (!file.exists(x)) {dir.create(x, recursive=TRUE); file.copy(system.file("html/R.css", package="stats"), x)}' diff --git a/src/arrow/ci/scripts/r_pkgdown_check.sh b/src/arrow/ci/scripts/r_pkgdown_check.sh new file mode 100755 index 000000000..327480a6b --- /dev/null +++ b/src/arrow/ci/scripts/r_pkgdown_check.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Use this script to prevent errors in the pkgdown site being rendered due to missing YAML entries + +# all .Rd files in the repo +all_rd_files=`find ./r/man -maxdepth 1 -name "*.Rd" | sed -e 's/.\/r\/man\///g' | sed -e 's/.Rd//g' | sort` + +# .Rd files to exclude from search (i.e. are internal) +exclusions=`grep "\keyword{internal}" -rl ./r/man --include=*.Rd | sed -e 's/.\/r\/man\///g' | sed -e 's/.Rd//g' | sort` + +# .Rd files to check against pkgdown.yml +rd_files=`echo ${exclusions[@]} ${all_rd_files[@]} | tr ' ' '\n' | sort | uniq -u` + +# pkgdown sections +pkgdown_sections=`awk '/^[^ ]/{ f=/reference:/; next } f{ if (sub(/:$/,"")) pkg=$2; else print pkg, $2 }' ./r/_pkgdown.yml | grep -v "title:" | sort` + +# get things that appear in man files that don't appear in pkgdown sections +pkgdown_missing=`echo ${pkgdown_sections[@]} ${pkgdown_sections[@]} ${rd_files[@]} | tr ' ' '\n' | sort | uniq -u` + +# if any sections are missing raise an error +if ([ ${#pkgdown_missing} -ge 1 ]); then + echo "Error! $pkgdown_missing missing from ./r/_pkgdown.yml" + exit 1 +fi diff --git a/src/arrow/ci/scripts/r_revdepcheck.sh b/src/arrow/ci/scripts/r_revdepcheck.sh new file mode 100755 index 000000000..b0a2bab64 --- /dev/null +++ b/src/arrow/ci/scripts/r_revdepcheck.sh @@ -0,0 +1,88 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +: ${R_BIN:=R} + +source_dir=${1}/r + +# cpp building dependencies +apt install -y cmake + +# system dependencies needed for arrow's reverse dependencies +apt install -y libxml2-dev \ + libfontconfig1-dev \ + libcairo2-dev \ + libglpk-dev \ + libmysqlclient-dev \ + unixodbc-dev \ + libpq-dev \ + coinor-libsymphony-dev \ + coinor-libcgl-dev \ + coinor-symphony \ + libzmq3-dev \ + libudunits2-dev \ + libgdal-dev \ + libgeos-dev \ + libproj-dev + +pushd ${source_dir} + +printenv + +# By default, aws-sdk tries to contact a non-existing local ip host +# to retrieve metadata. Disable this so that S3FileSystem tests run faster. +export AWS_EC2_METADATA_DISABLED=TRUE + +# Set crancache dir so we can cache it +export CRANCACHE_DIR="/arrow/.crancache" + +SCRIPT=" + # We can't use RSPM binaries because we need source packages + options('repos' = c(CRAN = 'https://packagemanager.rstudio.com/all/latest')) + remotes::install_github('r-lib/revdepcheck') + + # zoo is needed by RcisTarget tests, though only listed in enhances so not installed by revdepcheck + install.packages('zoo') + + # actually run revdepcheck + revdepcheck::revdep_check( + quiet = FALSE, + timeout = as.difftime(120, units = 'mins'), + num_workers = 1, + env = c( + ARROW_R_DEV = '$ARROW_R_DEV', + LIBARROW_DOWNLOAD = TRUE, + LIBARROW_MINIMAL = FALSE, + revdepcheck::revdep_env_vars() + )) + revdepcheck::revdep_report(all = TRUE) + + # Go through the summary and fail if any of the statuses include - + summary <- revdepcheck::revdep_summary() + failed <- lapply(summary, function(check) grepl('-', check[['status']])) + + if (any(unlist(failed))) { + quit(status = 1) + } + " + +echo "$SCRIPT" | ${R_BIN} --no-save + +popd diff --git a/src/arrow/ci/scripts/r_sanitize.sh b/src/arrow/ci/scripts/r_sanitize.sh new file mode 100755 index 000000000..6c79c0851 --- /dev/null +++ b/src/arrow/ci/scripts/r_sanitize.sh @@ -0,0 +1,46 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +: ${R_BIN:=RDsan} + +source_dir=${1}/r + +pushd ${source_dir} + +# Unity builds were causing the CI job to run out of memory +export CMAKE_UNITY_BUILD=OFF +# Make installation verbose so that the CI job doesn't time out due to silence +export ARROW_R_DEV=TRUE +${R_BIN} CMD INSTALL ${INSTALL_ARGS} . +# But unset the env var so that it doesn't cause us to run extra dev tests +unset ARROW_R_DEV + +export UBSAN_OPTIONS="print_stacktrace=1,suppressions=/arrow/r/tools/ubsan.supp" + +pushd tests +${R_BIN} < testthat.R > testthat.out 2>&1 || { cat testthat.out; exit 1; } +popd +${R_BIN} -e 'library(arrow); testthat::test_examples(".")' >> testthat.out 2>&1 || { cat testthat.out; exit 1; } + +cat testthat.out +if grep -q "runtime error" testthat.out; then + exit 1 +fi +popd diff --git a/src/arrow/ci/scripts/r_test.sh b/src/arrow/ci/scripts/r_test.sh new file mode 100755 index 000000000..62e423cf5 --- /dev/null +++ b/src/arrow/ci/scripts/r_test.sh @@ -0,0 +1,106 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +: ${R_BIN:=R} + +source_dir=${1}/r + +pushd ${source_dir} + +printenv + +if [ "$ARROW_USE_PKG_CONFIG" != "false" ]; then + export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH} + export R_LD_LIBRARY_PATH=${LD_LIBRARY_PATH} +fi +export _R_CHECK_COMPILATION_FLAGS_KNOWN_=${ARROW_R_CXXFLAGS} +if [ "$ARROW_R_DEV" = "TRUE" ]; then + # These are sometimes used in the Arrow C++ build and are not a problem + export _R_CHECK_COMPILATION_FLAGS_KNOWN_="${_R_CHECK_COMPILATION_FLAGS_KNOWN_} -Wno-attributes -msse4.2 -Wno-noexcept-type -Wno-subobject-linkage" + if [ "$NOT_CRAN" = "" ]; then + # Note that NOT_CRAN=true means (among other things) that optional dependencies are built + # You can set NOT_CRAN=false for the CRAN build and then + # ARROW_R_DEV=TRUE just adds verbosity + export NOT_CRAN=true + fi +fi + +export _R_CHECK_CRAN_INCOMING_REMOTE_=FALSE +if [ "$TEST_R_WITHOUT_LIBARROW" != "TRUE" ]; then + # --run-donttest was used in R < 4.0, this is used now + export _R_CHECK_DONTTEST_EXAMPLES_=TRUE +fi +# Not all Suggested packages are needed for checking, so in case they aren't installed don't fail +export _R_CHECK_FORCE_SUGGESTS_=FALSE +export _R_CHECK_LIMIT_CORES_=FALSE +export _R_CHECK_TESTS_NLINES_=0 + +# By default, aws-sdk tries to contact a non-existing local ip host +# to retrieve metadata. Disable this so that S3FileSystem tests run faster. +export AWS_EC2_METADATA_DISABLED=TRUE + +# Hack so that texlive2020 doesn't pollute the home dir +export TEXMFCONFIG=/tmp/texmf-config +export TEXMFVAR=/tmp/texmf-var + +if [[ "$DEVTOOLSET_VERSION" -gt 0 ]]; then + # enable the devtoolset version to use it + source /opt/rh/devtoolset-$DEVTOOLSET_VERSION/enable +fi + +# Make sure we aren't writing to the home dir (CRAN _hates_ this but there is no official check) +BEFORE=$(ls -alh ~/) + +SCRIPT="as_cran <- !identical(tolower(Sys.getenv('NOT_CRAN')), 'true') + if (as_cran) { + args <- '--as-cran' + build_args <- character() + } else { + args <- c('--no-manual', '--ignore-vignettes') + build_args <- '--no-build-vignettes' + + if (nzchar(Sys.which('minio'))) { + message('Running minio for S3 tests (if build supports them)') + minio_dir <- tempfile() + dir.create(minio_dir) + pid <- sys::exec_background('minio', c('server', minio_dir)) + on.exit(tools::pskill(pid)) + } + } + + run_donttest <- identical(tolower(Sys.getenv('_R_CHECK_DONTTEST_EXAMPLES_', 'true')), 'true') + if (run_donttest) { + args <- c(args, '--run-donttest') + } + + install_args <- Sys.getenv('INSTALL_ARGS') + if (nzchar(install_args)) { + args <- c(args, paste0('--install-args=\"', install_args, '\"')) + } + + rcmdcheck::rcmdcheck(build_args = build_args, args = args, error_on = 'warning', check_dir = 'check', timeout = 3600)" +echo "$SCRIPT" | ${R_BIN} --no-save + +AFTER=$(ls -alh ~/) +if [ "$NOT_CRAN" != "true" ] && [ "$BEFORE" != "$AFTER" ]; then + ls -alh ~/.cmake/packages + exit 1 +fi +popd diff --git a/src/arrow/ci/scripts/r_valgrind.sh b/src/arrow/ci/scripts/r_valgrind.sh new file mode 100755 index 000000000..772d8f44e --- /dev/null +++ b/src/arrow/ci/scripts/r_valgrind.sh @@ -0,0 +1,46 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +: ${R_BIN:=RDvalgrind} + +source_dir=${1}/r + +export CMAKE_BUILD_TYPE=RelWithDebInfo + +${R_BIN} CMD INSTALL ${INSTALL_ARGS} ${source_dir} +pushd ${source_dir}/tests + +# to generate suppression files run: +# ${R_BIN} --vanilla -d "valgrind --tool=memcheck --leak-check=full --track-origins=yes --gen-suppressions=all --log-file=memcheck.log" -f testtthat.supp +${R_BIN} --vanilla -d "valgrind --tool=memcheck --leak-check=full --track-origins=yes --suppressions=/${1}/ci/etc/valgrind-cran.supp" -f testthat.R |& tee testthat.out + +# valgrind --error-exitcode=1 should return an erroring exit code that we can catch, +# but R eats that and returns 0, so we need to look at the output and make sure that +# we have 0 errors instead. +if [ $(grep -c "ERROR SUMMARY: 0 errors" testthat.out) != 1 ]; then + cat testthat.out + echo "Found Valgrind errors" + exit 1 +fi + +# We might also considering using the greps that LibthGBM uses: +# https://github.com/microsoft/LightGBM/blob/fa6d356555f9ef888acf5f5e259dca958ca24f6d/.ci/test_r_package_valgrind.sh#L20-L85 + +popd diff --git a/src/arrow/ci/scripts/r_windows_build.sh b/src/arrow/ci/scripts/r_windows_build.sh new file mode 100755 index 000000000..5bb58c760 --- /dev/null +++ b/src/arrow/ci/scripts/r_windows_build.sh @@ -0,0 +1,116 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +: ${ARROW_HOME:=$(pwd)} +# Make sure it is absolute and exported +export ARROW_HOME="$(cd "${ARROW_HOME}" && pwd)" + +if [ "$RTOOLS_VERSION" = "35" ]; then + # Use rtools-backports if building with rtools35 + curl https://raw.githubusercontent.com/r-windows/rtools-backports/master/pacman.conf > /etc/pacman.conf + pacman --noconfirm -Syy + # lib-4.9.3 is for libraries compiled with gcc 4.9 (Rtools 3.5) + RWINLIB_LIB_DIR="lib-4.9.3" + # This is the default (will build for each arch) but we can set up CI to + # do these in parallel + : ${MINGW_ARCH:="mingw32 mingw64"} +else + # Uncomment L38-41 if you're testing a new rtools dependency that hasn't yet sync'd to CRAN + # curl https://raw.githubusercontent.com/r-windows/rtools-packages/master/pacman.conf > /etc/pacman.conf + # curl -OSsl "http://repo.msys2.org/msys/x86_64/msys2-keyring-r21.b39fb11-1-any.pkg.tar.xz" + # pacman -U --noconfirm msys2-keyring-r21.b39fb11-1-any.pkg.tar.xz && rm msys2-keyring-r21.b39fb11-1-any.pkg.tar.xz + # pacman --noconfirm -Scc + + pacman --noconfirm -Syy + RWINLIB_LIB_DIR="lib" + : ${MINGW_ARCH:="mingw32 mingw64 ucrt64"} +fi + +export MINGW_ARCH + +cp $ARROW_HOME/ci/scripts/PKGBUILD . +printenv +makepkg-mingw --noconfirm --noprogressbar --skippgpcheck --nocheck --syncdeps --cleanbuild + +VERSION=$(grep Version $ARROW_HOME/r/DESCRIPTION | cut -d " " -f 2) +DST_DIR="arrow-$VERSION" + +# Collect the build artifacts and make the shape of zip file that rwinlib expects +ls +mkdir -p build +mv mingw* build +cd build + +# This may vary by system/CI provider +MSYS_LIB_DIR="/c/rtools40" + +# Untar the builds we made +ls *.xz | xargs -n 1 tar -xJf +mkdir -p $DST_DIR +# Grab the headers from one, either one is fine +# (if we're building twice to combine old and new toolchains, this may already exist) +if [ ! -d $DST_DIR/include ]; then + mv $(echo $MINGW_ARCH | cut -d ' ' -f 1)/include $DST_DIR +fi + +# mingw64 -> x64 +# mingw32 -> i386 +# ucrt64 -> x64-ucrt + +if [ -d mingw64/lib/ ]; then + ls $MSYS_LIB_DIR/mingw64/lib/ + # Make the rest of the directory structure + # lib-4.9.3 is for libraries compiled with gcc 4.9 (Rtools 3.5) + mkdir -p $DST_DIR/${RWINLIB_LIB_DIR}/x64 + # lib is for the new gcc 8 toolchain (Rtools 4.0) + mkdir -p $DST_DIR/lib/x64 + # Move the 64-bit versions of libarrow into the expected location + mv mingw64/lib/*.a $DST_DIR/${RWINLIB_LIB_DIR}/x64 + # These may be from https://dl.bintray.com/rtools/backports/ + cp $MSYS_LIB_DIR/mingw64/lib/lib{thrift,snappy}.a $DST_DIR/${RWINLIB_LIB_DIR}/x64 + # These are from https://dl.bintray.com/rtools/mingw{32,64}/ + cp $MSYS_LIB_DIR/mingw64/lib/lib{zstd,lz4,crypto,utf8proc,re2,aws*}.a $DST_DIR/lib/x64 +fi + +# Same for the 32-bit versions +if [ -d mingw32/lib/ ]; then + ls $MSYS_LIB_DIR/mingw32/lib/ + mkdir -p $DST_DIR/${RWINLIB_LIB_DIR}/i386 + mkdir -p $DST_DIR/lib/i386 + mv mingw32/lib/*.a $DST_DIR/${RWINLIB_LIB_DIR}/i386 + cp $MSYS_LIB_DIR/mingw32/lib/lib{thrift,snappy}.a $DST_DIR/${RWINLIB_LIB_DIR}/i386 + cp $MSYS_LIB_DIR/mingw32/lib/lib{zstd,lz4,crypto,utf8proc,re2,aws*}.a $DST_DIR/lib/i386 +fi + +# Do the same also for ucrt64 +if [ -d ucrt64/lib/ ]; then + ls $MSYS_LIB_DIR/ucrt64/lib/ + mkdir -p $DST_DIR/lib/x64-ucrt + mv ucrt64/lib/*.a $DST_DIR/lib/x64-ucrt + cp $MSYS_LIB_DIR/ucrt64/lib/lib{thrift,snappy,zstd,lz4,crypto,utf8proc,re2,aws*}.a $DST_DIR/lib/x64-ucrt +fi + +# Create build artifact +zip -r ${DST_DIR}.zip $DST_DIR + +# Copy that to a file name/path that does not vary by version number so we +# can easily find it in the R package tests on CI +cp ${DST_DIR}.zip ../libarrow.zip diff --git a/src/arrow/ci/scripts/release_test.sh b/src/arrow/ci/scripts/release_test.sh new file mode 100755 index 000000000..ae2ab3288 --- /dev/null +++ b/src/arrow/ci/scripts/release_test.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -eux + +arrow_dir=${1} + +pushd ${arrow_dir} + +dev/release/run-test.rb + +popd diff --git a/src/arrow/ci/scripts/ruby_test.sh b/src/arrow/ci/scripts/ruby_test.sh new file mode 100755 index 000000000..03d20e198 --- /dev/null +++ b/src/arrow/ci/scripts/ruby_test.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +source_dir=${1}/ruby +build_dir=${2}/ruby + +export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH} +export PKG_CONFIG_PATH=${ARROW_HOME}/lib/pkgconfig +export GI_TYPELIB_PATH=${ARROW_HOME}/lib/girepository-1.0 + +rake -f ${source_dir}/Rakefile BUILD_DIR=${build_dir} USE_BUNDLER=yes diff --git a/src/arrow/ci/scripts/rust_build.sh b/src/arrow/ci/scripts/rust_build.sh new file mode 100755 index 000000000..3532ea3d5 --- /dev/null +++ b/src/arrow/ci/scripts/rust_build.sh @@ -0,0 +1,59 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -e + +arrow_dir=${1} +source_dir=${1}/rust + +# This file is used to build the rust binaries needed for the archery +# integration tests. Testing of the rust implementation in normal CI is handled +# by github workflows in the arrow-rs repository. + +# Disable full debug symbol generation to speed up CI build / reduce memory required +export RUSTFLAGS="-C debuginfo=1" + +export ARROW_TEST_DATA=${arrow_dir}/testing/data +export PARQUET_TEST_DATA=${arrow_dir}/cpp/submodules/parquet-testing/data + +if [ "${ARCHERY_INTEGRATION_WITH_RUST}" -eq "0" ]; then + echo "=====================================================================" + echo "Not building the Rust implementation." + echo "=====================================================================" + exit 0; +elif [ ! -d "${source_dir}" ]; then + echo "=====================================================================" + echo "The Rust source is missing. Please clone the arrow-rs repository" + echo "to arrow/rust before running the integration tests:" + echo " git clone https://github.com/apache/arrow-rs.git path/to/arrow/rust" + echo "=====================================================================" + exit 1; +fi + +set -x + +# show activated toolchain +rustup show + +pushd ${source_dir} + +# build only the integration testing binaries +cargo build -p arrow-integration-testing + +popd diff --git a/src/arrow/ci/scripts/util_checkout.sh b/src/arrow/ci/scripts/util_checkout.sh new file mode 100755 index 000000000..25fe69aa1 --- /dev/null +++ b/src/arrow/ci/scripts/util_checkout.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# this script is github actions specific to check out the submodules and tags + +# TODO(kszucs): remove it once the "submodules: recursive" feature is released +auth_header="$(git config --local --get http.https://github.com/.extraheader)" +git submodule sync --recursive +git -c "http.extraheader=$auth_header" -c protocol.version=2 submodule update --init --force --recursive --depth=1 + +# fetch all the tags +git fetch --depth=1 origin +refs/tags/*:refs/tags/* diff --git a/src/arrow/ci/scripts/util_cleanup.sh b/src/arrow/ci/scripts/util_cleanup.sh new file mode 100755 index 000000000..3a13a1a78 --- /dev/null +++ b/src/arrow/ci/scripts/util_cleanup.sh @@ -0,0 +1,54 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# This script is Github Actions-specific to free up disk space, +# to avoid disk full errors on some builds + +if [ $RUNNER_OS = "Linux" ]; then + df -h + + # remove swap + sudo swapoff -a + sudo rm -f /swapfile + + # clean apt cache + sudo apt clean + + # remove haskell, consumes 8.6 GB + sudo rm -rf /opt/ghc + + # 1 GB + sudo rm -rf /home/linuxbrew/.linuxbrew + + # 1+ GB + sudo rm -rf /opt/hostedtoolcache/CodeQL + + # 1+ GB + sudo rm -rf /usr/share/swift + + # 12 GB, but takes a lot of time to delete + #sudo rm -rf /usr/local/lib/android + + # remove cached docker images, around 13 GB + docker rmi $(docker image ls -aq) + + # NOTE: /usr/share/dotnet is 25 GB +fi + +df -h diff --git a/src/arrow/ci/scripts/util_download_apache.sh b/src/arrow/ci/scripts/util_download_apache.sh new file mode 100755 index 000000000..d8e9b6ca7 --- /dev/null +++ b/src/arrow/ci/scripts/util_download_apache.sh @@ -0,0 +1,55 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -x + +if [ "$#" -ne 2 ]; then + echo "Usage: $0 <apache tarball path> <target directory>" + exit 1 +fi + +tarball_path=$1 +target_dir=$2 + +APACHE_MIRRORS=( + "http://www.apache.org/dyn/closer.cgi?action=download&filename=" + "https://downloads.apache.org" + "https://apache.claz.org" + "https://apache.cs.utah.edu" + "https://apache.mirrors.lucidnetworks.net" + "https://apache.osuosl.org" + "https://ftp.wayne.edu/apache" + "https://mirror.olnevhost.net/pub/apache" + "https://mirrors.gigenet.com/apache" + "https://mirrors.koehn.com/apache" + "https://mirrors.ocf.berkeley.edu/apache" + "https://mirrors.sonic.net/apache" + "https://us.mirrors.quenda.co/apache" +) + +mkdir -p "${target_dir}" + +for mirror in ${APACHE_MIRRORS[*]} +do + curl -SL "${mirror}/${tarball_path}" | tar -xzf - -C "${target_dir}" + if [ $? == 0 ]; then + exit 0 + fi +done + +exit 1 diff --git a/src/arrow/ci/scripts/util_wait_for_it.sh b/src/arrow/ci/scripts/util_wait_for_it.sh new file mode 100755 index 000000000..51ce816eb --- /dev/null +++ b/src/arrow/ci/scripts/util_wait_for_it.sh @@ -0,0 +1,199 @@ +#!/usr/bin/env bash + +# The MIT License (MIT) +# Copyright (c) 2016 Giles Hall +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of +# this software and associated documentation files (the "Software"), to deal in +# the Software without restriction, including without limitation the rights to +# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +# of the Software, and to permit persons to whom the Software is furnished to do +# so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +# Use this script to test if a given TCP host/port are available + +cmdname=$(basename $0) + +echoerr() { if [[ $QUIET -ne 1 ]]; then echo "$@" 1>&2; fi } + +usage() +{ + cat << USAGE >&2 +Usage: + $cmdname host:port [-s] [-t timeout] [-- command args] + -h HOST | --host=HOST Host or IP under test + -p PORT | --port=PORT TCP port under test + Alternatively, you specify the host and port as host:port + -s | --strict Only execute subcommand if the test succeeds + -q | --quiet Don't output any status messages + -t TIMEOUT | --timeout=TIMEOUT + Timeout in seconds, zero for no timeout + -- COMMAND ARGS Execute command with args after the test finishes +USAGE + exit 1 +} + +wait_for() +{ + if [[ $TIMEOUT -gt 0 ]]; then + echoerr "$cmdname: waiting $TIMEOUT seconds for $HOST:$PORT" + else + echoerr "$cmdname: waiting for $HOST:$PORT without a timeout" + fi + start_ts=$(date +%s) + while : + do + if [[ $ISBUSY -eq 1 ]]; then + nc -z $HOST $PORT + result=$? + else + (echo > /dev/tcp/$HOST/$PORT) >/dev/null 2>&1 + result=$? + fi + if [[ $result -eq 0 ]]; then + end_ts=$(date +%s) + echoerr "$cmdname: $HOST:$PORT is available after $((end_ts - start_ts)) seconds" + break + fi + sleep 1 + done + return $result +} + +wait_for_wrapper() +{ + # In order to support SIGINT during timeout: http://unix.stackexchange.com/a/57692 + if [[ $QUIET -eq 1 ]]; then + timeout $BUSYTIMEFLAG $TIMEOUT $0 --quiet --child --host=$HOST --port=$PORT --timeout=$TIMEOUT & + else + timeout $BUSYTIMEFLAG $TIMEOUT $0 --child --host=$HOST --port=$PORT --timeout=$TIMEOUT & + fi + PID=$! + trap "kill -INT -$PID" INT + wait $PID + RESULT=$? + if [[ $RESULT -ne 0 ]]; then + echoerr "$cmdname: timeout occurred after waiting $TIMEOUT seconds for $HOST:$PORT" + fi + return $RESULT +} + +# process arguments +while [[ $# -gt 0 ]] +do + case "$1" in + *:* ) + hostport=(${1//:/ }) + HOST=${hostport[0]} + PORT=${hostport[1]} + shift 1 + ;; + --child) + CHILD=1 + shift 1 + ;; + -q | --quiet) + QUIET=1 + shift 1 + ;; + -s | --strict) + STRICT=1 + shift 1 + ;; + -h) + HOST="$2" + if [[ $HOST == "" ]]; then break; fi + shift 2 + ;; + --host=*) + HOST="${1#*=}" + shift 1 + ;; + -p) + PORT="$2" + if [[ $PORT == "" ]]; then break; fi + shift 2 + ;; + --port=*) + PORT="${1#*=}" + shift 1 + ;; + -t) + TIMEOUT="$2" + if [[ $TIMEOUT == "" ]]; then break; fi + shift 2 + ;; + --timeout=*) + TIMEOUT="${1#*=}" + shift 1 + ;; + --) + shift + CLI=("$@") + break + ;; + --help) + usage + ;; + *) + echoerr "Unknown argument: $1" + usage + ;; + esac +done + +if [[ "$HOST" == "" || "$PORT" == "" ]]; then + echoerr "Error: you need to provide a host and port to test." + usage +fi + +TIMEOUT=${TIMEOUT:-15} +STRICT=${STRICT:-0} +CHILD=${CHILD:-0} +QUIET=${QUIET:-0} + +# check to see if timeout is from busybox? +# check to see if timeout is from busybox? +TIMEOUT_PATH=$(realpath $(which timeout)) +if [[ $TIMEOUT_PATH =~ "busybox" ]]; then + ISBUSY=1 + BUSYTIMEFLAG="-t" +else + ISBUSY=0 + BUSYTIMEFLAG="" +fi + +if [[ $CHILD -gt 0 ]]; then + wait_for + RESULT=$? + exit $RESULT +else + if [[ $TIMEOUT -gt 0 ]]; then + wait_for_wrapper + RESULT=$? + else + wait_for + RESULT=$? + fi +fi + +if [[ $CLI != "" ]]; then + if [[ $RESULT -ne 0 && $STRICT -eq 1 ]]; then + echoerr "$cmdname: strict mode, refusing to execute subprocess" + exit $RESULT + fi + exec "${CLI[@]}" +else + exit $RESULT +fi diff --git a/src/arrow/ci/vcpkg/arm64-linux-static-debug.cmake b/src/arrow/ci/vcpkg/arm64-linux-static-debug.cmake new file mode 100644 index 000000000..6fea43694 --- /dev/null +++ b/src/arrow/ci/vcpkg/arm64-linux-static-debug.cmake @@ -0,0 +1,28 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set(VCPKG_TARGET_ARCHITECTURE arm64) +set(VCPKG_CRT_LINKAGE dynamic) +set(VCPKG_LIBRARY_LINKAGE static) +set(VCPKG_CMAKE_SYSTEM_NAME Linux) +set(VCPKG_BUILD_TYPE debug) + +if(NOT CMAKE_HOST_SYSTEM_PROCESSOR) + execute_process(COMMAND "uname" "-m" + OUTPUT_VARIABLE CMAKE_HOST_SYSTEM_PROCESSOR + OUTPUT_STRIP_TRAILING_WHITESPACE) +endif() diff --git a/src/arrow/ci/vcpkg/arm64-linux-static-release.cmake b/src/arrow/ci/vcpkg/arm64-linux-static-release.cmake new file mode 100644 index 000000000..4012848b8 --- /dev/null +++ b/src/arrow/ci/vcpkg/arm64-linux-static-release.cmake @@ -0,0 +1,28 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set(VCPKG_TARGET_ARCHITECTURE arm64) +set(VCPKG_CRT_LINKAGE dynamic) +set(VCPKG_LIBRARY_LINKAGE static) +set(VCPKG_CMAKE_SYSTEM_NAME Linux) +set(VCPKG_BUILD_TYPE release) + +if(NOT CMAKE_HOST_SYSTEM_PROCESSOR) + execute_process(COMMAND "uname" "-m" + OUTPUT_VARIABLE CMAKE_HOST_SYSTEM_PROCESSOR + OUTPUT_STRIP_TRAILING_WHITESPACE) +endif() diff --git a/src/arrow/ci/vcpkg/arm64-osx-static-debug.cmake b/src/arrow/ci/vcpkg/arm64-osx-static-debug.cmake new file mode 100644 index 000000000..f511819a2 --- /dev/null +++ b/src/arrow/ci/vcpkg/arm64-osx-static-debug.cmake @@ -0,0 +1,26 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set(VCPKG_TARGET_ARCHITECTURE arm64) +set(VCPKG_CRT_LINKAGE dynamic) +set(VCPKG_LIBRARY_LINKAGE static) + +set(VCPKG_CMAKE_SYSTEM_NAME Darwin) +set(VCPKG_OSX_ARCHITECTURES arm64) +set(VCPKG_OSX_DEPLOYMENT_TARGET "11.0") + +set(VCPKG_BUILD_TYPE debug) diff --git a/src/arrow/ci/vcpkg/arm64-osx-static-release.cmake b/src/arrow/ci/vcpkg/arm64-osx-static-release.cmake new file mode 100644 index 000000000..43d65efb2 --- /dev/null +++ b/src/arrow/ci/vcpkg/arm64-osx-static-release.cmake @@ -0,0 +1,26 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set(VCPKG_TARGET_ARCHITECTURE arm64) +set(VCPKG_CRT_LINKAGE dynamic) +set(VCPKG_LIBRARY_LINKAGE static) + +set(VCPKG_CMAKE_SYSTEM_NAME Darwin) +set(VCPKG_OSX_ARCHITECTURES arm64) +set(VCPKG_OSX_DEPLOYMENT_TARGET "11.0") + +set(VCPKG_BUILD_TYPE release) diff --git a/src/arrow/ci/vcpkg/ports.patch b/src/arrow/ci/vcpkg/ports.patch new file mode 100644 index 000000000..7bcba49c1 --- /dev/null +++ b/src/arrow/ci/vcpkg/ports.patch @@ -0,0 +1,63 @@ +diff --git a/ports/aws-c-common/portfile.cmake b/ports/aws-c-common/portfile.cmake +index f3704ef05b..3af543058d 100644 +--- a/ports/aws-c-common/portfile.cmake ++++ b/ports/aws-c-common/portfile.cmake +@@ -1,8 +1,8 @@ + vcpkg_from_github( + OUT_SOURCE_PATH SOURCE_PATH + REPO awslabs/aws-c-common +- REF 4a21a1c0757083a16497fea27886f5f20ccdf334 # v0.4.56 +- SHA512 68898a8ac15d5490f45676eabfbe0df9e45370a74c543a28909fd0d85fed48dfcf4bcd6ea2d01d1a036dd352e2e4e0b08c48c63ab2a2b477fe150b46a827136e ++ REF 13adef72b7813ec878817c6d50a7a3f241015d8a # v0.4.57 ++ SHA512 28256522ac6af544d7464e3e7dcd4dc802ae2b09728bf8f167f86a6487bb756d0cad5eb4a2480610b2967b9c24c4a7f70621894517aa2828ffdeb0479453803b + HEAD_REF master + PATCHES + disable-error-4068.patch # This patch fixes dependency port compilation failure +diff --git a/ports/curl/portfile.cmake b/ports/curl/portfile.cmake +index be66d452be..a5ce325e9d 100644 +--- a/ports/curl/portfile.cmake ++++ b/ports/curl/portfile.cmake +@@ -94,6 +94,8 @@ vcpkg_configure_cmake( + -DCMAKE_DISABLE_FIND_PACKAGE_Perl=ON + -DENABLE_DEBUG=ON + -DCURL_CA_FALLBACK=ON ++ -DCURL_CA_PATH=none ++ -DCURL_CA_BUNDLE=none + OPTIONS_DEBUG + ${EXTRA_ARGS_DEBUG} + OPTIONS_RELEASE +diff --git a/ports/snappy/portfile.cmake b/ports/snappy/portfile.cmake +index 75dd133027..84345c7caa 100644 +--- a/ports/snappy/portfile.cmake ++++ b/ports/snappy/portfile.cmake +@@ -4,6 +4,7 @@ vcpkg_from_github( + REF 537f4ad6240e586970fe554614542e9717df7902 # 1.1.8 + SHA512 555d3b69a6759592736cbaae8f41654f0cf14e8be693b5dde37640191e53daec189f895872557b173e905d10681ef502f3e6ed8566811add963ffef96ce4016d + HEAD_REF master ++ PATCHES "snappy-disable-bmi.patch" + ) + + vcpkg_configure_cmake( +diff --git a/ports/snappy/snappy-disable-bmi.patch b/ports/snappy/snappy-disable-bmi.patch +new file mode 100644 +index 0000000000..2cbb1533a8 +--- /dev/null ++++ b/ports/snappy/snappy-disable-bmi.patch +@@ -0,0 +1,17 @@ ++--- snappy.cc 2020-06-27 17:38:49.718993748 -0500 +++++ snappy.cc 2020-06-27 17:37:57.543268213 -0500 ++@@ -717,14 +717,10 @@ ++ static inline uint32 ExtractLowBytes(uint32 v, int n) { ++ assert(n >= 0); ++ assert(n <= 4); ++-#if SNAPPY_HAVE_BMI2 ++- return _bzhi_u32(v, 8 * n); ++-#else ++ // This needs to be wider than uint32 otherwise `mask << 32` will be ++ // undefined. ++ uint64 mask = 0xffffffff; ++ return v & ~(mask << (8 * n)); ++-#endif ++ } ++ ++ static inline bool LeftShiftOverflows(uint8 value, uint32 shift) { diff --git a/src/arrow/ci/vcpkg/universal2-osx-static-debug.cmake b/src/arrow/ci/vcpkg/universal2-osx-static-debug.cmake new file mode 100644 index 000000000..706ac47a7 --- /dev/null +++ b/src/arrow/ci/vcpkg/universal2-osx-static-debug.cmake @@ -0,0 +1,26 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set(VCPKG_TARGET_ARCHITECTURE arm64) +set(VCPKG_CRT_LINKAGE dynamic) +set(VCPKG_LIBRARY_LINKAGE static) + +set(VCPKG_CMAKE_SYSTEM_NAME Darwin) +set(VCPKG_OSX_ARCHITECTURES "x86_64\;arm64") +set(VCPKG_OSX_DEPLOYMENT_TARGET "10.13") + +set(VCPKG_BUILD_TYPE debug) diff --git a/src/arrow/ci/vcpkg/universal2-osx-static-release.cmake b/src/arrow/ci/vcpkg/universal2-osx-static-release.cmake new file mode 100644 index 000000000..867069017 --- /dev/null +++ b/src/arrow/ci/vcpkg/universal2-osx-static-release.cmake @@ -0,0 +1,26 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set(VCPKG_TARGET_ARCHITECTURE arm64) +set(VCPKG_CRT_LINKAGE dynamic) +set(VCPKG_LIBRARY_LINKAGE static) + +set(VCPKG_CMAKE_SYSTEM_NAME Darwin) +set(VCPKG_OSX_ARCHITECTURES "x86_64\;arm64") +set(VCPKG_OSX_DEPLOYMENT_TARGET "10.13") + +set(VCPKG_BUILD_TYPE release) diff --git a/src/arrow/ci/vcpkg/x64-linux-static-debug.cmake b/src/arrow/ci/vcpkg/x64-linux-static-debug.cmake new file mode 100644 index 000000000..3acee2ee4 --- /dev/null +++ b/src/arrow/ci/vcpkg/x64-linux-static-debug.cmake @@ -0,0 +1,24 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set(VCPKG_TARGET_ARCHITECTURE x64) +set(VCPKG_CRT_LINKAGE dynamic) +set(VCPKG_LIBRARY_LINKAGE static) + +set(VCPKG_CMAKE_SYSTEM_NAME Linux) + +set(VCPKG_BUILD_TYPE debug) diff --git a/src/arrow/ci/vcpkg/x64-linux-static-release.cmake b/src/arrow/ci/vcpkg/x64-linux-static-release.cmake new file mode 100644 index 000000000..c2caa49fa --- /dev/null +++ b/src/arrow/ci/vcpkg/x64-linux-static-release.cmake @@ -0,0 +1,24 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set(VCPKG_TARGET_ARCHITECTURE x64) +set(VCPKG_CRT_LINKAGE dynamic) +set(VCPKG_LIBRARY_LINKAGE static) + +set(VCPKG_CMAKE_SYSTEM_NAME Linux) + +set(VCPKG_BUILD_TYPE release) diff --git a/src/arrow/ci/vcpkg/x64-osx-static-debug.cmake b/src/arrow/ci/vcpkg/x64-osx-static-debug.cmake new file mode 100644 index 000000000..e8a321ec7 --- /dev/null +++ b/src/arrow/ci/vcpkg/x64-osx-static-debug.cmake @@ -0,0 +1,25 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set(VCPKG_TARGET_ARCHITECTURE x64) +set(VCPKG_CRT_LINKAGE dynamic) +set(VCPKG_LIBRARY_LINKAGE static) + +set(VCPKG_CMAKE_SYSTEM_NAME Darwin) +set(VCPKG_OSX_ARCHITECTURES x86_64) + +set(VCPKG_BUILD_TYPE debug) diff --git a/src/arrow/ci/vcpkg/x64-osx-static-release.cmake b/src/arrow/ci/vcpkg/x64-osx-static-release.cmake new file mode 100644 index 000000000..956d5b92e --- /dev/null +++ b/src/arrow/ci/vcpkg/x64-osx-static-release.cmake @@ -0,0 +1,25 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set(VCPKG_TARGET_ARCHITECTURE x64) +set(VCPKG_CRT_LINKAGE dynamic) +set(VCPKG_LIBRARY_LINKAGE static) + +set(VCPKG_CMAKE_SYSTEM_NAME Darwin) +set(VCPKG_OSX_ARCHITECTURES x86_64) + +set(VCPKG_BUILD_TYPE release) diff --git a/src/arrow/ci/vcpkg/x64-windows-static-md-debug.cmake b/src/arrow/ci/vcpkg/x64-windows-static-md-debug.cmake new file mode 100644 index 000000000..3eae3cfda --- /dev/null +++ b/src/arrow/ci/vcpkg/x64-windows-static-md-debug.cmake @@ -0,0 +1,22 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set(VCPKG_TARGET_ARCHITECTURE x64) +set(VCPKG_CRT_LINKAGE dynamic) +set(VCPKG_LIBRARY_LINKAGE static) + +set(VCPKG_BUILD_TYPE debug) diff --git a/src/arrow/ci/vcpkg/x64-windows-static-md-release.cmake b/src/arrow/ci/vcpkg/x64-windows-static-md-release.cmake new file mode 100644 index 000000000..b8dfbc884 --- /dev/null +++ b/src/arrow/ci/vcpkg/x64-windows-static-md-release.cmake @@ -0,0 +1,22 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set(VCPKG_TARGET_ARCHITECTURE x64) +set(VCPKG_CRT_LINKAGE dynamic) +set(VCPKG_LIBRARY_LINKAGE static) + +set(VCPKG_BUILD_TYPE release) |