summaryrefslogtreecommitdiffstats
path: root/src/arrow/r/tools
diff options
context:
space:
mode:
Diffstat (limited to 'src/arrow/r/tools')
-rw-r--r--src/arrow/r/tools/autobrew66
-rw-r--r--src/arrow/r/tools/nixlibs.R601
-rw-r--r--src/arrow/r/tools/ubsan.supp18
-rw-r--r--src/arrow/r/tools/winlibs.R65
4 files changed, 750 insertions, 0 deletions
diff --git a/src/arrow/r/tools/autobrew b/src/arrow/r/tools/autobrew
new file mode 100644
index 000000000..d40729e18
--- /dev/null
+++ b/src/arrow/r/tools/autobrew
@@ -0,0 +1,66 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# https://github.com/jeroen/autobrew/blob/gh-pages/apache-arrow
+export HOMEBREW_NO_ANALYTICS=1
+export HOMEBREW_NO_AUTO_UPDATE=1
+
+# Official Homebrew no longer supports El-Capitan
+UPSTREAM_ORG="autobrew"
+
+if [ "$DISABLE_AUTOBREW" ]; then return 0; fi
+AUTOBREW=${TMPDIR-/tmp}
+export HOMEBREW_TEMP="$AUTOBREW/hbtmp"
+BREWDIR="$AUTOBREW/build-$PKG_BREW_NAME"
+BREW="$BREWDIR/bin/brew"
+rm -Rf $BREWDIR
+mkdir -p $BREWDIR
+echo "$(date): Auto-brewing $PKG_BREW_NAME in $BREWDIR..."
+curl -fsSL https://github.com/$UPSTREAM_ORG/brew/tarball/master | tar xz --strip 1 -C $BREWDIR
+
+# Install bottle + dependencies
+export HOMEBREW_CACHE="$AUTOBREW"
+LOCAL_FORMULA="tools/${PKG_BREW_NAME}.rb"
+if [ -f "$LOCAL_FORMULA" ]; then
+ # Use the local brew formula and install --HEAD
+ $BREW deps -n "$LOCAL_FORMULA" 2>/dev/null
+ BREW_DEPS=$($BREW deps -n "$LOCAL_FORMULA" 2>/dev/null)
+ $BREW install --force-bottle $BREW_DEPS 2>&1 | perl -pe 's/Warning/Note/gi'
+ $BREW install -v --build-from-source --HEAD "$LOCAL_FORMULA" 2>&1 | perl -pe 's/Warning/Note/gi'
+else
+ $BREW install --force-bottle $BREW_DEPS $PKG_BREW_NAME 2>&1 | perl -pe 's/Warning/Note/gi'
+fi
+
+# Hardcode this for my custom autobrew build
+rm -f $BREWDIR/lib/*.dylib
+AWS_LIBS="-laws-cpp-sdk-config -laws-cpp-sdk-transfer -laws-cpp-sdk-identity-management -laws-cpp-sdk-cognito-identity -laws-cpp-sdk-sts -laws-cpp-sdk-s3 -laws-cpp-sdk-core -laws-c-event-stream -laws-checksums -laws-c-common -lpthread -lcurl"
+PKG_LIBS="-lparquet -larrow_dataset -larrow -larrow_bundled_dependencies -lthrift -llz4 -lsnappy -lzstd $AWS_LIBS"
+PKG_DIRS="-L$BREWDIR/lib"
+
+# Prevent CRAN builder from linking against old libs in /usr/local/lib
+for FILE in $BREWDIR/Cellar/*/*/lib/*.a; do
+ BASENAME=`basename $FILE`
+ LIBNAME=`echo "${BASENAME%.*}" | cut -c4-`
+ cp -f $FILE $BREWDIR/lib/libbrew$LIBNAME.a
+ echo "created $BREWDIR/lib/libbrew$LIBNAME.a"
+ PKG_LIBS=`echo $PKG_LIBS | sed "s/-l$LIBNAME/-lbrew$LIBNAME/g"`
+done
+
+PKG_CFLAGS="-I$BREWDIR/opt/$PKG_BREW_NAME/include -DARROW_R_WITH_PARQUET -DARROW_R_WITH_DATASET -DARROW_R_WITH_JSON -DARROW_R_WITH_S3"
+
+unset HOMEBREW_NO_ANALYTICS
+unset HOMEBREW_NO_AUTO_UPDATE
diff --git a/src/arrow/r/tools/nixlibs.R b/src/arrow/r/tools/nixlibs.R
new file mode 100644
index 000000000..869e0abcf
--- /dev/null
+++ b/src/arrow/r/tools/nixlibs.R
@@ -0,0 +1,601 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+args <- commandArgs(TRUE)
+VERSION <- args[1]
+dst_dir <- paste0("libarrow/arrow-", VERSION)
+
+arrow_repo <- "https://arrow-r-nightly.s3.amazonaws.com/libarrow/"
+
+if (getRversion() < 3.4 && is.null(getOption("download.file.method"))) {
+ # default method doesn't work on R 3.3, nor does libcurl
+ options(download.file.method = "wget")
+}
+
+options(.arrow.cleanup = character()) # To collect dirs to rm on exit
+on.exit(unlink(getOption(".arrow.cleanup")))
+
+env_is <- function(var, value) identical(tolower(Sys.getenv(var)), value)
+
+try_download <- function(from_url, to_file) {
+ status <- try(
+ suppressWarnings(
+ download.file(from_url, to_file, quiet = quietly)
+ ),
+ silent = quietly
+ )
+ # Return whether the download was successful
+ !inherits(status, "try-error") && status == 0
+}
+
+# For local debugging, set ARROW_R_DEV=TRUE to make this script print more
+quietly <- !env_is("ARROW_R_DEV", "true")
+
+# Default is build from source, not download a binary
+build_ok <- !env_is("LIBARROW_BUILD", "false")
+binary_ok <- env_is("LIBARROW_BINARY", "true")
+
+# Check if we're doing an offline build.
+# (Note that cmake will still be downloaded if necessary
+# https://arrow.apache.org/docs/developers/cpp/building.html#offline-builds)
+download_ok <- !env_is("TEST_OFFLINE_BUILD", "true") && try_download("https://github.com", tempfile())
+
+# This "tools/thirdparty_dependencies" path, within the tar file, might exist if
+# create_package_with_all_dependencies() was run, or if someone has created it
+# manually before running make build.
+# If you change this path, you also need to edit
+# `create_package_with_all_dependencies()` in install-arrow.R
+thirdparty_dependency_dir <- Sys.getenv("ARROW_THIRDPARTY_DEPENDENCY_DIR", "tools/thirdparty_dependencies")
+
+
+download_binary <- function(os = identify_os()) {
+ libfile <- tempfile()
+ if (!is.null(os)) {
+ # See if we can map this os-version to one we have binaries for
+ os <- find_available_binary(os)
+ binary_url <- paste0(arrow_repo, "bin/", os, "/arrow-", VERSION, ".zip")
+ if (try_download(binary_url, libfile)) {
+ cat(sprintf("*** Successfully retrieved C++ binaries for %s\n", os))
+ if (!identical(os, "centos-7")) {
+ # centos-7 uses gcc 4.8 so the binary doesn't have ARROW_S3=ON but the others do
+ # TODO: actually check for system requirements?
+ cat("**** Binary package requires libcurl and openssl\n")
+ cat("**** If installation fails, retry after installing those system requirements\n")
+ }
+ } else {
+ cat(sprintf("*** No C++ binaries found for %s\n", os))
+ libfile <- NULL
+ }
+ } else {
+ libfile <- NULL
+ }
+ libfile
+}
+
+# Function to figure out which flavor of binary we should download, if at all.
+# By default (unset or "FALSE"), it will not download a precompiled library,
+# but you can override this by setting the env var LIBARROW_BINARY to:
+# * `TRUE` (not case-sensitive), to try to discover your current OS, or
+# * some other string, presumably a related "distro-version" that has binaries
+# built that work for your OS
+identify_os <- function(os = Sys.getenv("LIBARROW_BINARY")) {
+ if (tolower(os) %in% c("", "false")) {
+ # Env var says not to download a binary
+ return(NULL)
+ } else if (!identical(tolower(os), "true")) {
+ # Env var provided an os-version to use--maybe you're on Ubuntu 18.10 but
+ # we only build for 18.04 and that's fine--so use what the user set
+ return(os)
+ }
+
+ linux <- distro()
+ if (is.null(linux)) {
+ cat("*** Unable to identify current OS/version\n")
+ return(NULL)
+ }
+ paste(linux$id, linux$short_version, sep = "-")
+}
+
+#### start distro ####
+
+distro <- function() {
+ # The code in this script is a (potentially stale) copy of the distro package
+ if (requireNamespace("distro", quietly = TRUE)) {
+ # Use the version from the package, which may be updated from this
+ return(distro::distro())
+ }
+
+ out <- lsb_release()
+ if (is.null(out)) {
+ out <- os_release()
+ if (is.null(out)) {
+ out <- system_release()
+ }
+ }
+ if (is.null(out)) {
+ return(NULL)
+ }
+
+ out$id <- tolower(out$id)
+ # debian unstable & testing lsb_release `version` don't include numbers but we can map from pretty name
+ if (is.null(out$version) || out$version %in% c("testing", "unstable")) {
+ if (grepl("bullseye", out$codename)) {
+ out$short_version <- "11"
+ } else if (grepl("bookworm", out$codename)) {
+ out$short_version <- "12"
+ }
+ } else if (out$id == "ubuntu") {
+ # Keep major.minor version
+ out$short_version <- sub('^"?([0-9]+\\.[0-9]+).*"?.*$', "\\1", out$version)
+ } else {
+ # Only major version number
+ out$short_version <- sub('^"?([0-9]+).*"?.*$', "\\1", out$version)
+ }
+ out
+}
+
+lsb_release <- function() {
+ if (have_lsb_release()) {
+ list(
+ id = call_lsb("-is"),
+ version = call_lsb("-rs"),
+ codename = call_lsb("-cs")
+ )
+ } else {
+ NULL
+ }
+}
+
+have_lsb_release <- function() nzchar(Sys.which("lsb_release"))
+call_lsb <- function(args) system(paste("lsb_release", args), intern = TRUE)
+
+os_release <- function() {
+ rel_data <- read_os_release()
+ if (!is.null(rel_data)) {
+ vals <- as.list(sub('^.*="?(.*?)"?$', "\\1", rel_data))
+ names(vals) <- sub("^(.*)=.*$", "\\1", rel_data)
+
+ out <- list(
+ id = vals[["ID"]],
+ version = vals[["VERSION_ID"]]
+ )
+ if ("VERSION_CODENAME" %in% names(vals)) {
+ out$codename <- vals[["VERSION_CODENAME"]]
+ } else {
+ # This probably isn't right, maybe could extract codename from pretty name?
+ out$codename <- vals[["PRETTY_NAME"]]
+ }
+ out
+ } else {
+ NULL
+ }
+}
+
+read_os_release <- function() {
+ if (file.exists("/etc/os-release")) {
+ readLines("/etc/os-release")
+ }
+}
+
+system_release <- function() {
+ rel_data <- read_system_release()
+ if (!is.null(rel_data)) {
+ # Something like "CentOS Linux release 7.7.1908 (Core)"
+ list(
+ id = sub("^([a-zA-Z]+) .* ([0-9.]+).*$", "\\1", rel_data),
+ version = sub("^([a-zA-Z]+) .* ([0-9.]+).*$", "\\2", rel_data),
+ codename = NA
+ )
+ } else {
+ NULL
+ }
+}
+
+read_system_release <- function() {
+ if (file.exists("/etc/system-release")) {
+ readLines("/etc/system-release")[1]
+ }
+}
+
+#### end distro ####
+
+find_available_binary <- function(os) {
+ # Download a csv that maps one to the other, columns "actual" and "use_this"
+ u <- "https://raw.githubusercontent.com/ursa-labs/arrow-r-nightly/master/linux/distro-map.csv"
+ lookup <- try(utils::read.csv(u, stringsAsFactors = FALSE), silent = quietly)
+ if (!inherits(lookup, "try-error") && os %in% lookup$actual) {
+ new <- lookup$use_this[lookup$actual == os]
+ if (length(new) == 1 && !is.na(new)) { # Just some sanity checking
+ cat(sprintf("*** Using %s binary for %s\n", new, os))
+ os <- new
+ }
+ }
+ os
+}
+
+find_local_source <- function() {
+ # We'll take the first of these that exists
+ # The first case probably occurs if we're in the arrow git repo
+ # The second probably occurs if we're installing the arrow R package
+ cpp_dir_options <- c(
+ file.path(Sys.getenv("ARROW_SOURCE_HOME", ".."), "cpp"),
+ "tools/cpp"
+ )
+ for (cpp_dir in cpp_dir_options) {
+ if (file.exists(file.path(cpp_dir, "src/arrow/api.h"))) {
+ cat(paste0("*** Found local C++ source: '", cpp_dir, "'\n"))
+ return(cpp_dir)
+ }
+ }
+ NULL
+}
+
+env_vars_as_string <- function(env_var_list) {
+ # Do some basic checks on env_var_list:
+ # Check that env_var_list has names, that those names are valid POSIX
+ # environment variables, and that none of the values contain `'`.
+ stopifnot(
+ length(env_var_list) == length(names(env_var_list)),
+ all(grepl("^[^0-9]", names(env_var_list))),
+ all(grepl("^[A-Z0-9_]+$", names(env_var_list))),
+ !any(grepl("'", env_var_list, fixed = TRUE))
+ )
+ env_var_string <- paste0(names(env_var_list), "='", env_var_list, "'", collapse = " ")
+ if (nchar(env_var_string) > 30000) {
+ # This could happen if the full paths in *_SOURCE_URL were *very* long.
+ # A more formal check would look at getconf ARG_MAX, but this shouldn't matter
+ cat("*** Warning: Environment variables are very long. This could cause issues on some shells.\n")
+ }
+ env_var_string
+}
+
+build_libarrow <- function(src_dir, dst_dir) {
+ # We'll need to compile R bindings with these libs, so delete any .o files
+ system("rm src/*.o", ignore.stdout = TRUE, ignore.stderr = TRUE)
+ # Set up make for parallel building
+ makeflags <- Sys.getenv("MAKEFLAGS")
+ if (makeflags == "") {
+ # CRAN policy says not to use more than 2 cores during checks
+ # If you have more and want to use more, set MAKEFLAGS
+ ncores <- min(parallel::detectCores(), 2)
+ makeflags <- sprintf("-j%s", ncores)
+ Sys.setenv(MAKEFLAGS = makeflags)
+ }
+ if (!quietly) {
+ cat("*** Building with MAKEFLAGS=", makeflags, "\n")
+ }
+ # Check for libarrow build dependencies:
+ # * cmake
+ cmake <- ensure_cmake()
+
+ # Optionally build somewhere not in tmp so we can dissect the build if it fails
+ debug_dir <- Sys.getenv("LIBARROW_DEBUG_DIR")
+ if (nzchar(debug_dir)) {
+ build_dir <- debug_dir
+ } else {
+ # But normally we'll just build in a tmp dir
+ build_dir <- tempfile()
+ }
+ options(.arrow.cleanup = c(getOption(".arrow.cleanup"), build_dir))
+
+ R_CMD_config <- function(var) {
+ if (getRversion() < 3.4) {
+ # var names were called CXX1X instead of CXX11
+ var <- sub("^CXX11", "CXX1X", var)
+ }
+ # tools::Rcmd introduced R 3.3
+ tools::Rcmd(paste("config", var), stdout = TRUE)
+ }
+ env_var_list <- c(
+ SOURCE_DIR = src_dir,
+ BUILD_DIR = build_dir,
+ DEST_DIR = dst_dir,
+ CMAKE = cmake,
+ # EXTRA_CMAKE_FLAGS will often be "", but it's convenient later to have it defined
+ EXTRA_CMAKE_FLAGS = Sys.getenv("EXTRA_CMAKE_FLAGS"),
+ # Make sure we build with the same compiler settings that R is using
+ CC = R_CMD_config("CC"),
+ CXX = paste(R_CMD_config("CXX11"), R_CMD_config("CXX11STD")),
+ # CXXFLAGS = R_CMD_config("CXX11FLAGS"), # We don't want the same debug symbols
+ LDFLAGS = R_CMD_config("LDFLAGS")
+ )
+ env_var_list <- with_s3_support(env_var_list)
+ env_var_list <- with_mimalloc(env_var_list)
+
+ # turn_off_all_optional_features() needs to happen after with_mimalloc() and
+ # with_s3_support(), since those might turn features ON.
+ thirdparty_deps_unavailable <- !download_ok &&
+ !dir.exists(thirdparty_dependency_dir) &&
+ !env_is("ARROW_DEPENDENCY_SOURCE", "system")
+ on_solaris <- tolower(Sys.info()[["sysname"]]) %in% "sunos"
+ do_minimal_build <- on_solaris || env_is("LIBARROW_MINIMAL", "true")
+
+ if (do_minimal_build) {
+ # Note that JSON support does work on Solaris, but will be turned off with
+ # the rest of the optional dependencies.
+ # All other dependencies don't compile (e.g thrift, jemalloc, and xsimd)
+ # or do compile but `ar` fails to build
+ # libarrow_bundled_dependencies (e.g. re2 and utf8proc).
+ env_var_list <- turn_off_all_optional_features(env_var_list)
+ } else if (thirdparty_deps_unavailable) {
+ cat(paste0(
+ "*** Building C++ library from source, but downloading thirdparty dependencies\n",
+ " is not possible, so this build will turn off all thirdparty features.\n",
+ " See install vignette for details:\n",
+ " https://cran.r-project.org/web/packages/arrow/vignettes/install.html\n"
+ ))
+ env_var_list <- turn_off_all_optional_features(env_var_list)
+ } else if (dir.exists(thirdparty_dependency_dir)) {
+ # Add the *_SOURCE_URL env vars
+ env_var_list <- set_thirdparty_urls(env_var_list)
+ }
+ env_vars <- env_vars_as_string(env_var_list)
+
+ cat("**** arrow", ifelse(quietly, "", paste("with", env_vars)), "\n")
+ status <- suppressWarnings(system(
+ paste(env_vars, "inst/build_arrow_static.sh"),
+ ignore.stdout = quietly, ignore.stderr = quietly
+ ))
+ if (status != 0) {
+ # It failed :(
+ cat(
+ "**** Error building Arrow C++.",
+ ifelse(env_is("ARROW_R_DEV", "true"), "", "Re-run with ARROW_R_DEV=true for debug information."),
+ "\n"
+ )
+ }
+ invisible(status)
+}
+
+ensure_cmake <- function() {
+ cmake <- find_cmake(c(
+ Sys.getenv("CMAKE"),
+ Sys.which("cmake"),
+ Sys.which("cmake3")
+ ))
+
+ if (is.null(cmake)) {
+ # If not found, download it
+ cat("**** cmake\n")
+ CMAKE_VERSION <- Sys.getenv("CMAKE_VERSION", "3.19.2")
+ if (tolower(Sys.info()[["sysname"]]) %in% "darwin") {
+ postfix <- "-macos-universal.tar.gz"
+ } else {
+ postfix <- "-Linux-x86_64.tar.gz"
+ }
+ cmake_binary_url <- paste0(
+ "https://github.com/Kitware/CMake/releases/download/v", CMAKE_VERSION,
+ "/cmake-", CMAKE_VERSION, postfix
+ )
+ cmake_tar <- tempfile()
+ cmake_dir <- tempfile()
+ download_successful <- try_download(cmake_binary_url, cmake_tar)
+ if (!download_successful) {
+ cat(paste0(
+ "*** cmake was not found locally and download failed.\n",
+ " Make sure cmake >= 3.10 is installed and available on your PATH,\n",
+ " or download ", cmake_binary_url, "\n",
+ " and define the CMAKE environment variable.\n"
+ ))
+ }
+ untar(cmake_tar, exdir = cmake_dir)
+ unlink(cmake_tar)
+ options(.arrow.cleanup = c(getOption(".arrow.cleanup"), cmake_dir))
+ cmake <- paste0(
+ cmake_dir,
+ "/cmake-", CMAKE_VERSION, sub(".tar.gz", "", postfix, fixed = TRUE),
+ "/bin/cmake"
+ )
+ }
+ cmake
+}
+
+find_cmake <- function(paths, version_required = 3.10) {
+ # Given a list of possible cmake paths, return the first one that exists and is new enough
+ for (path in paths) {
+ if (nzchar(path) && cmake_version(path) >= version_required) {
+ # Sys.which() returns a named vector, but that plays badly with c() later
+ names(path) <- NULL
+ return(path)
+ }
+ }
+ # If none found, return NULL
+ NULL
+}
+
+cmake_version <- function(cmd = "cmake") {
+ tryCatch(
+ {
+ raw_version <- system(paste(cmd, "--version"), intern = TRUE, ignore.stderr = TRUE)
+ pat <- ".* ([0-9\\.]+).*?"
+ which_line <- grep(pat, raw_version)
+ package_version(sub(pat, "\\1", raw_version[which_line]))
+ },
+ error = function(e) {
+ return(0)
+ }
+ )
+}
+
+turn_off_all_optional_features <- function(env_var_list) {
+ # Because these are done as environment variables (as opposed to build flags),
+ # setting these to "OFF" overrides any previous setting. We don't need to
+ # check the existing value.
+ turn_off <- c(
+ "ARROW_MIMALLOC" = "OFF",
+ "ARROW_JEMALLOC" = "OFF",
+ "ARROW_JSON" = "OFF",
+ "ARROW_PARQUET" = "OFF", # depends on thrift
+ "ARROW_DATASET" = "OFF", # depends on parquet
+ "ARROW_S3" = "OFF",
+ "ARROW_WITH_BROTLI" = "OFF",
+ "ARROW_WITH_BZ2" = "OFF",
+ "ARROW_WITH_LZ4" = "OFF",
+ "ARROW_WITH_SNAPPY" = "OFF",
+ "ARROW_WITH_ZLIB" = "OFF",
+ "ARROW_WITH_ZSTD" = "OFF",
+ "ARROW_WITH_RE2" = "OFF",
+ "ARROW_WITH_UTF8PROC" = "OFF",
+ # The syntax to turn off XSIMD is different.
+ # Pull existing value of EXTRA_CMAKE_FLAGS first (must be defined)
+ "EXTRA_CMAKE_FLAGS" = paste(
+ env_var_list[["EXTRA_CMAKE_FLAGS"]],
+ "-DARROW_SIMD_LEVEL=NONE -DARROW_RUNTIME_SIMD_LEVEL=NONE"
+ )
+ )
+ # Create a new env_var_list, with the values of turn_off set.
+ # replace() also adds new values if they didn't exist before
+ replace(env_var_list, names(turn_off), turn_off)
+}
+
+set_thirdparty_urls <- function(env_var_list) {
+ # This function does *not* check if existing *_SOURCE_URL variables are set.
+ # The directory tools/thirdparty_dependencies is created by
+ # create_package_with_all_dependencies() and saved in the tar file.
+ files <- list.files(thirdparty_dependency_dir, full.names = FALSE)
+ url_env_varname <- toupper(sub("(.*?)-.*", "ARROW_\\1_URL", files))
+ # Special handling for the aws dependencies, which have extra `-`
+ aws <- grepl("^aws", files)
+ url_env_varname[aws] <- sub(
+ "AWS_SDK_CPP", "AWSSDK",
+ gsub(
+ "-", "_",
+ sub(
+ "(AWS.*)-.*", "ARROW_\\1_URL",
+ toupper(files[aws])
+ )
+ )
+ )
+ full_filenames <- file.path(normalizePath(thirdparty_dependency_dir), files)
+
+ env_var_list <- replace(env_var_list, url_env_varname, full_filenames)
+ if (!quietly) {
+ env_var_list <- replace(env_var_list, "ARROW_VERBOSE_THIRDPARTY_BUILD", "ON")
+ }
+ env_var_list
+}
+
+is_feature_requested <- function(env_varname, default = env_is("LIBARROW_MINIMAL", "false")) {
+ env_value <- tolower(Sys.getenv(env_varname))
+ if (identical(env_value, "off")) {
+ # If e.g. ARROW_MIMALLOC=OFF explicitly, override default
+ requested <- FALSE
+ } else if (identical(env_value, "on")) {
+ requested <- TRUE
+ } else {
+ requested <- default
+ }
+ requested
+}
+
+with_mimalloc <- function(env_var_list) {
+ arrow_mimalloc <- is_feature_requested("ARROW_MIMALLOC")
+ if (arrow_mimalloc) {
+ # User wants mimalloc. If they're using gcc, let's make sure the version is >= 4.9
+ if (isTRUE(cmake_gcc_version(env_var_list) < "4.9")) {
+ cat("**** mimalloc support not available for gcc < 4.9; building with ARROW_MIMALLOC=OFF\n")
+ arrow_mimalloc <- FALSE
+ }
+ }
+ replace(env_var_list, "ARROW_MIMALLOC", ifelse(arrow_mimalloc, "ON", "OFF"))
+}
+
+with_s3_support <- function(env_var_list) {
+ arrow_s3 <- is_feature_requested("ARROW_S3")
+ if (arrow_s3) {
+ # User wants S3 support. If they're using gcc, let's make sure the version is >= 4.9
+ # and make sure that we have curl and openssl system libs
+ if (isTRUE(cmake_gcc_version(env_var_list) < "4.9")) {
+ cat("**** S3 support not available for gcc < 4.9; building with ARROW_S3=OFF\n")
+ arrow_s3 <- FALSE
+ } else if (!cmake_find_package("CURL", NULL, env_var_list)) {
+ # curl on macos should be installed, so no need to alter this for macos
+ cat("**** S3 support requires libcurl-devel (rpm) or libcurl4-openssl-dev (deb); building with ARROW_S3=OFF\n")
+ arrow_s3 <- FALSE
+ } else if (!cmake_find_package("OpenSSL", "1.0.2", env_var_list)) {
+ cat("**** S3 support requires version >= 1.0.2 of openssl-devel (rpm), libssl-dev (deb), or openssl (brew); building with ARROW_S3=OFF\n")
+ arrow_s3 <- FALSE
+ }
+ }
+ replace(env_var_list, "ARROW_S3", ifelse(arrow_s3, "ON", "OFF"))
+}
+
+cmake_gcc_version <- function(env_var_list) {
+ # This function returns NA if using a non-gcc compiler
+ # Always enclose calls to it in isTRUE() or isFALSE()
+ vals <- cmake_cxx_compiler_vars(env_var_list)
+ if (!identical(vals[["CMAKE_CXX_COMPILER_ID"]], "GNU")) {
+ return(NA)
+ }
+ package_version(vals[["CMAKE_CXX_COMPILER_VERSION"]])
+}
+
+cmake_cxx_compiler_vars <- function(env_var_list) {
+ env_vars <- env_vars_as_string(env_var_list)
+ info <- system(paste("export", env_vars, "&& $CMAKE --system-information"), intern = TRUE)
+ info <- grep("^[A-Z_]* .*$", info, value = TRUE)
+ vals <- as.list(sub('^.*? "?(.*?)"?$', "\\1", info))
+ names(vals) <- sub("^(.*?) .*$", "\\1", info)
+ vals[grepl("^CMAKE_CXX_COMPILER_?", names(vals))]
+}
+
+cmake_find_package <- function(pkg, version = NULL, env_var_list) {
+ td <- tempfile()
+ dir.create(td)
+ options(.arrow.cleanup = c(getOption(".arrow.cleanup"), td))
+ find_package <- paste0("find_package(", pkg, " ", version, " REQUIRED)")
+ writeLines(find_package, file.path(td, "CMakeLists.txt"))
+ env_vars <- env_vars_as_string(env_var_list)
+ cmake_cmd <- paste0(
+ "export ", env_vars,
+ " && cd ", td,
+ " && $CMAKE ",
+ " -DCMAKE_EXPORT_NO_PACKAGE_REGISTRY=ON",
+ " -DCMAKE_FIND_PACKAGE_NO_PACKAGE_REGISTRY=ON",
+ " ."
+ )
+ system(cmake_cmd, ignore.stdout = TRUE, ignore.stderr = TRUE) == 0
+}
+
+#####
+
+if (!file.exists(paste0(dst_dir, "/include/arrow/api.h"))) {
+ # If we're working in a local checkout and have already built the libs, we
+ # don't need to do anything. Otherwise,
+ # (1) Look for a prebuilt binary for this version
+ bin_file <- src_dir <- NULL
+ if (download_ok && binary_ok) {
+ bin_file <- download_binary()
+ }
+ if (!is.null(bin_file)) {
+ # Extract them
+ dir.create(dst_dir, showWarnings = !quietly, recursive = TRUE)
+ unzip(bin_file, exdir = dst_dir)
+ unlink(bin_file)
+ } else if (build_ok) {
+ # (2) Find source and build it
+ src_dir <- find_local_source()
+ if (!is.null(src_dir)) {
+ cat("*** Building C++ libraries\n")
+ build_libarrow(src_dir, dst_dir)
+ } else {
+ cat("*** Proceeding without C++ dependencies\n")
+ }
+ } else {
+ cat("*** Proceeding without C++ dependencies\n")
+ }
+}
diff --git a/src/arrow/r/tools/ubsan.supp b/src/arrow/r/tools/ubsan.supp
new file mode 100644
index 000000000..ff88cf984
--- /dev/null
+++ b/src/arrow/r/tools/ubsan.supp
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+vptr:include/c++/8/bits/shared_ptr_base.h
diff --git a/src/arrow/r/tools/winlibs.R b/src/arrow/r/tools/winlibs.R
new file mode 100644
index 000000000..ccaa5c95d
--- /dev/null
+++ b/src/arrow/r/tools/winlibs.R
@@ -0,0 +1,65 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+args <- commandArgs(TRUE)
+VERSION <- args[1]
+if (!file.exists(sprintf("windows/arrow-%s/include/arrow/api.h", VERSION))) {
+ if (length(args) > 1) {
+ # Arg 2 would be the path/to/lib.zip
+ localfile <- args[2]
+ cat(sprintf("*** Using RWINLIB_LOCAL %s\n", localfile))
+ if (!file.exists(localfile)) {
+ cat(sprintf("*** %s does not exist; build will fail\n", localfile))
+ }
+ file.copy(localfile, "lib.zip")
+ } else {
+ # Download static arrow from rwinlib
+ if (getRversion() < "3.3.0") setInternet2()
+ quietly <- !identical(tolower(Sys.getenv("ARROW_R_DEV")), "true")
+ get_file <- function(template, version) {
+ try(
+ suppressWarnings(
+ download.file(sprintf(template, version), "lib.zip", quiet = quietly)
+ ),
+ silent = quietly
+ )
+ }
+ # URL templates
+ nightly <- "https://arrow-r-nightly.s3.amazonaws.com/libarrow/bin/windows/arrow-%s.zip"
+ rwinlib <- "https://github.com/rwinlib/arrow/archive/v%s.zip"
+ # First look for a nightly
+ get_file(nightly, VERSION)
+ # If not found, then check rwinlib
+ if (!file.exists("lib.zip")) {
+ get_file(rwinlib, VERSION)
+ }
+ if (!file.exists("lib.zip")) {
+ # Try a different version
+ # First, try pruning off a dev number, i.e. go from 0.14.1.1 to 0.14.1
+ VERSION <- sub("^([0-9]+\\.[0-9]+\\.[0-9]+).*$", "\\1", VERSION)
+ get_file(rwinlib, VERSION)
+ }
+ if (!file.exists("lib.zip")) {
+ # Next, try without a patch release, i.e. go from 0.14.1 to 0.14.0
+ VERSION <- sub("^([0-9]+\\.[0-9]+\\.).*$", "\\10", VERSION)
+ get_file(rwinlib, VERSION)
+ }
+ }
+ dir.create("windows", showWarnings = FALSE)
+ unzip("lib.zip", exdir = "windows")
+ unlink("lib.zip")
+}