# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. args <- commandArgs(TRUE) VERSION <- args[1] dst_dir <- paste0("libarrow/arrow-", VERSION) arrow_repo <- "https://arrow-r-nightly.s3.amazonaws.com/libarrow/" if (getRversion() < 3.4 && is.null(getOption("download.file.method"))) { # default method doesn't work on R 3.3, nor does libcurl options(download.file.method = "wget") } options(.arrow.cleanup = character()) # To collect dirs to rm on exit on.exit(unlink(getOption(".arrow.cleanup"))) env_is <- function(var, value) identical(tolower(Sys.getenv(var)), value) try_download <- function(from_url, to_file) { status <- try( suppressWarnings( download.file(from_url, to_file, quiet = quietly) ), silent = quietly ) # Return whether the download was successful !inherits(status, "try-error") && status == 0 } # For local debugging, set ARROW_R_DEV=TRUE to make this script print more quietly <- !env_is("ARROW_R_DEV", "true") # Default is build from source, not download a binary build_ok <- !env_is("LIBARROW_BUILD", "false") binary_ok <- env_is("LIBARROW_BINARY", "true") # Check if we're doing an offline build. # (Note that cmake will still be downloaded if necessary # https://arrow.apache.org/docs/developers/cpp/building.html#offline-builds) download_ok <- !env_is("TEST_OFFLINE_BUILD", "true") && try_download("https://github.com", tempfile()) # This "tools/thirdparty_dependencies" path, within the tar file, might exist if # create_package_with_all_dependencies() was run, or if someone has created it # manually before running make build. # If you change this path, you also need to edit # `create_package_with_all_dependencies()` in install-arrow.R thirdparty_dependency_dir <- Sys.getenv("ARROW_THIRDPARTY_DEPENDENCY_DIR", "tools/thirdparty_dependencies") download_binary <- function(os = identify_os()) { libfile <- tempfile() if (!is.null(os)) { # See if we can map this os-version to one we have binaries for os <- find_available_binary(os) binary_url <- paste0(arrow_repo, "bin/", os, "/arrow-", VERSION, ".zip") if (try_download(binary_url, libfile)) { cat(sprintf("*** Successfully retrieved C++ binaries for %s\n", os)) if (!identical(os, "centos-7")) { # centos-7 uses gcc 4.8 so the binary doesn't have ARROW_S3=ON but the others do # TODO: actually check for system requirements? cat("**** Binary package requires libcurl and openssl\n") cat("**** If installation fails, retry after installing those system requirements\n") } } else { cat(sprintf("*** No C++ binaries found for %s\n", os)) libfile <- NULL } } else { libfile <- NULL } libfile } # Function to figure out which flavor of binary we should download, if at all. # By default (unset or "FALSE"), it will not download a precompiled library, # but you can override this by setting the env var LIBARROW_BINARY to: # * `TRUE` (not case-sensitive), to try to discover your current OS, or # * some other string, presumably a related "distro-version" that has binaries # built that work for your OS identify_os <- function(os = Sys.getenv("LIBARROW_BINARY")) { if (tolower(os) %in% c("", "false")) { # Env var says not to download a binary return(NULL) } else if (!identical(tolower(os), "true")) { # Env var provided an os-version to use--maybe you're on Ubuntu 18.10 but # we only build for 18.04 and that's fine--so use what the user set return(os) } linux <- distro() if (is.null(linux)) { cat("*** Unable to identify current OS/version\n") return(NULL) } paste(linux$id, linux$short_version, sep = "-") } #### start distro #### distro <- function() { # The code in this script is a (potentially stale) copy of the distro package if (requireNamespace("distro", quietly = TRUE)) { # Use the version from the package, which may be updated from this return(distro::distro()) } out <- lsb_release() if (is.null(out)) { out <- os_release() if (is.null(out)) { out <- system_release() } } if (is.null(out)) { return(NULL) } out$id <- tolower(out$id) # debian unstable & testing lsb_release `version` don't include numbers but we can map from pretty name if (is.null(out$version) || out$version %in% c("testing", "unstable")) { if (grepl("bullseye", out$codename)) { out$short_version <- "11" } else if (grepl("bookworm", out$codename)) { out$short_version <- "12" } } else if (out$id == "ubuntu") { # Keep major.minor version out$short_version <- sub('^"?([0-9]+\\.[0-9]+).*"?.*$', "\\1", out$version) } else { # Only major version number out$short_version <- sub('^"?([0-9]+).*"?.*$', "\\1", out$version) } out } lsb_release <- function() { if (have_lsb_release()) { list( id = call_lsb("-is"), version = call_lsb("-rs"), codename = call_lsb("-cs") ) } else { NULL } } have_lsb_release <- function() nzchar(Sys.which("lsb_release")) call_lsb <- function(args) system(paste("lsb_release", args), intern = TRUE) os_release <- function() { rel_data <- read_os_release() if (!is.null(rel_data)) { vals <- as.list(sub('^.*="?(.*?)"?$', "\\1", rel_data)) names(vals) <- sub("^(.*)=.*$", "\\1", rel_data) out <- list( id = vals[["ID"]], version = vals[["VERSION_ID"]] ) if ("VERSION_CODENAME" %in% names(vals)) { out$codename <- vals[["VERSION_CODENAME"]] } else { # This probably isn't right, maybe could extract codename from pretty name? out$codename <- vals[["PRETTY_NAME"]] } out } else { NULL } } read_os_release <- function() { if (file.exists("/etc/os-release")) { readLines("/etc/os-release") } } system_release <- function() { rel_data <- read_system_release() if (!is.null(rel_data)) { # Something like "CentOS Linux release 7.7.1908 (Core)" list( id = sub("^([a-zA-Z]+) .* ([0-9.]+).*$", "\\1", rel_data), version = sub("^([a-zA-Z]+) .* ([0-9.]+).*$", "\\2", rel_data), codename = NA ) } else { NULL } } read_system_release <- function() { if (file.exists("/etc/system-release")) { readLines("/etc/system-release")[1] } } #### end distro #### find_available_binary <- function(os) { # Download a csv that maps one to the other, columns "actual" and "use_this" u <- "https://raw.githubusercontent.com/ursa-labs/arrow-r-nightly/master/linux/distro-map.csv" lookup <- try(utils::read.csv(u, stringsAsFactors = FALSE), silent = quietly) if (!inherits(lookup, "try-error") && os %in% lookup$actual) { new <- lookup$use_this[lookup$actual == os] if (length(new) == 1 && !is.na(new)) { # Just some sanity checking cat(sprintf("*** Using %s binary for %s\n", new, os)) os <- new } } os } find_local_source <- function() { # We'll take the first of these that exists # The first case probably occurs if we're in the arrow git repo # The second probably occurs if we're installing the arrow R package cpp_dir_options <- c( file.path(Sys.getenv("ARROW_SOURCE_HOME", ".."), "cpp"), "tools/cpp" ) for (cpp_dir in cpp_dir_options) { if (file.exists(file.path(cpp_dir, "src/arrow/api.h"))) { cat(paste0("*** Found local C++ source: '", cpp_dir, "'\n")) return(cpp_dir) } } NULL } env_vars_as_string <- function(env_var_list) { # Do some basic checks on env_var_list: # Check that env_var_list has names, that those names are valid POSIX # environment variables, and that none of the values contain `'`. stopifnot( length(env_var_list) == length(names(env_var_list)), all(grepl("^[^0-9]", names(env_var_list))), all(grepl("^[A-Z0-9_]+$", names(env_var_list))), !any(grepl("'", env_var_list, fixed = TRUE)) ) env_var_string <- paste0(names(env_var_list), "='", env_var_list, "'", collapse = " ") if (nchar(env_var_string) > 30000) { # This could happen if the full paths in *_SOURCE_URL were *very* long. # A more formal check would look at getconf ARG_MAX, but this shouldn't matter cat("*** Warning: Environment variables are very long. This could cause issues on some shells.\n") } env_var_string } build_libarrow <- function(src_dir, dst_dir) { # We'll need to compile R bindings with these libs, so delete any .o files system("rm src/*.o", ignore.stdout = TRUE, ignore.stderr = TRUE) # Set up make for parallel building makeflags <- Sys.getenv("MAKEFLAGS") if (makeflags == "") { # CRAN policy says not to use more than 2 cores during checks # If you have more and want to use more, set MAKEFLAGS ncores <- min(parallel::detectCores(), 2) makeflags <- sprintf("-j%s", ncores) Sys.setenv(MAKEFLAGS = makeflags) } if (!quietly) { cat("*** Building with MAKEFLAGS=", makeflags, "\n") } # Check for libarrow build dependencies: # * cmake cmake <- ensure_cmake() # Optionally build somewhere not in tmp so we can dissect the build if it fails debug_dir <- Sys.getenv("LIBARROW_DEBUG_DIR") if (nzchar(debug_dir)) { build_dir <- debug_dir } else { # But normally we'll just build in a tmp dir build_dir <- tempfile() } options(.arrow.cleanup = c(getOption(".arrow.cleanup"), build_dir)) R_CMD_config <- function(var) { if (getRversion() < 3.4) { # var names were called CXX1X instead of CXX11 var <- sub("^CXX11", "CXX1X", var) } # tools::Rcmd introduced R 3.3 tools::Rcmd(paste("config", var), stdout = TRUE) } env_var_list <- c( SOURCE_DIR = src_dir, BUILD_DIR = build_dir, DEST_DIR = dst_dir, CMAKE = cmake, # EXTRA_CMAKE_FLAGS will often be "", but it's convenient later to have it defined EXTRA_CMAKE_FLAGS = Sys.getenv("EXTRA_CMAKE_FLAGS"), # Make sure we build with the same compiler settings that R is using CC = R_CMD_config("CC"), CXX = paste(R_CMD_config("CXX11"), R_CMD_config("CXX11STD")), # CXXFLAGS = R_CMD_config("CXX11FLAGS"), # We don't want the same debug symbols LDFLAGS = R_CMD_config("LDFLAGS") ) env_var_list <- with_s3_support(env_var_list) env_var_list <- with_mimalloc(env_var_list) # turn_off_all_optional_features() needs to happen after with_mimalloc() and # with_s3_support(), since those might turn features ON. thirdparty_deps_unavailable <- !download_ok && !dir.exists(thirdparty_dependency_dir) && !env_is("ARROW_DEPENDENCY_SOURCE", "system") on_solaris <- tolower(Sys.info()[["sysname"]]) %in% "sunos" do_minimal_build <- on_solaris || env_is("LIBARROW_MINIMAL", "true") if (do_minimal_build) { # Note that JSON support does work on Solaris, but will be turned off with # the rest of the optional dependencies. # All other dependencies don't compile (e.g thrift, jemalloc, and xsimd) # or do compile but `ar` fails to build # libarrow_bundled_dependencies (e.g. re2 and utf8proc). env_var_list <- turn_off_all_optional_features(env_var_list) } else if (thirdparty_deps_unavailable) { cat(paste0( "*** Building C++ library from source, but downloading thirdparty dependencies\n", " is not possible, so this build will turn off all thirdparty features.\n", " See install vignette for details:\n", " https://cran.r-project.org/web/packages/arrow/vignettes/install.html\n" )) env_var_list <- turn_off_all_optional_features(env_var_list) } else if (dir.exists(thirdparty_dependency_dir)) { # Add the *_SOURCE_URL env vars env_var_list <- set_thirdparty_urls(env_var_list) } env_vars <- env_vars_as_string(env_var_list) cat("**** arrow", ifelse(quietly, "", paste("with", env_vars)), "\n") status <- suppressWarnings(system( paste(env_vars, "inst/build_arrow_static.sh"), ignore.stdout = quietly, ignore.stderr = quietly )) if (status != 0) { # It failed :( cat( "**** Error building Arrow C++.", ifelse(env_is("ARROW_R_DEV", "true"), "", "Re-run with ARROW_R_DEV=true for debug information."), "\n" ) } invisible(status) } ensure_cmake <- function() { cmake <- find_cmake(c( Sys.getenv("CMAKE"), Sys.which("cmake"), Sys.which("cmake3") )) if (is.null(cmake)) { # If not found, download it cat("**** cmake\n") CMAKE_VERSION <- Sys.getenv("CMAKE_VERSION", "3.19.2") if (tolower(Sys.info()[["sysname"]]) %in% "darwin") { postfix <- "-macos-universal.tar.gz" } else { postfix <- "-Linux-x86_64.tar.gz" } cmake_binary_url <- paste0( "https://github.com/Kitware/CMake/releases/download/v", CMAKE_VERSION, "/cmake-", CMAKE_VERSION, postfix ) cmake_tar <- tempfile() cmake_dir <- tempfile() download_successful <- try_download(cmake_binary_url, cmake_tar) if (!download_successful) { cat(paste0( "*** cmake was not found locally and download failed.\n", " Make sure cmake >= 3.10 is installed and available on your PATH,\n", " or download ", cmake_binary_url, "\n", " and define the CMAKE environment variable.\n" )) } untar(cmake_tar, exdir = cmake_dir) unlink(cmake_tar) options(.arrow.cleanup = c(getOption(".arrow.cleanup"), cmake_dir)) cmake <- paste0( cmake_dir, "/cmake-", CMAKE_VERSION, sub(".tar.gz", "", postfix, fixed = TRUE), "/bin/cmake" ) } cmake } find_cmake <- function(paths, version_required = 3.10) { # Given a list of possible cmake paths, return the first one that exists and is new enough for (path in paths) { if (nzchar(path) && cmake_version(path) >= version_required) { # Sys.which() returns a named vector, but that plays badly with c() later names(path) <- NULL return(path) } } # If none found, return NULL NULL } cmake_version <- function(cmd = "cmake") { tryCatch( { raw_version <- system(paste(cmd, "--version"), intern = TRUE, ignore.stderr = TRUE) pat <- ".* ([0-9\\.]+).*?" which_line <- grep(pat, raw_version) package_version(sub(pat, "\\1", raw_version[which_line])) }, error = function(e) { return(0) } ) } turn_off_all_optional_features <- function(env_var_list) { # Because these are done as environment variables (as opposed to build flags), # setting these to "OFF" overrides any previous setting. We don't need to # check the existing value. turn_off <- c( "ARROW_MIMALLOC" = "OFF", "ARROW_JEMALLOC" = "OFF", "ARROW_JSON" = "OFF", "ARROW_PARQUET" = "OFF", # depends on thrift "ARROW_DATASET" = "OFF", # depends on parquet "ARROW_S3" = "OFF", "ARROW_WITH_BROTLI" = "OFF", "ARROW_WITH_BZ2" = "OFF", "ARROW_WITH_LZ4" = "OFF", "ARROW_WITH_SNAPPY" = "OFF", "ARROW_WITH_ZLIB" = "OFF", "ARROW_WITH_ZSTD" = "OFF", "ARROW_WITH_RE2" = "OFF", "ARROW_WITH_UTF8PROC" = "OFF", # The syntax to turn off XSIMD is different. # Pull existing value of EXTRA_CMAKE_FLAGS first (must be defined) "EXTRA_CMAKE_FLAGS" = paste( env_var_list[["EXTRA_CMAKE_FLAGS"]], "-DARROW_SIMD_LEVEL=NONE -DARROW_RUNTIME_SIMD_LEVEL=NONE" ) ) # Create a new env_var_list, with the values of turn_off set. # replace() also adds new values if they didn't exist before replace(env_var_list, names(turn_off), turn_off) } set_thirdparty_urls <- function(env_var_list) { # This function does *not* check if existing *_SOURCE_URL variables are set. # The directory tools/thirdparty_dependencies is created by # create_package_with_all_dependencies() and saved in the tar file. files <- list.files(thirdparty_dependency_dir, full.names = FALSE) url_env_varname <- toupper(sub("(.*?)-.*", "ARROW_\\1_URL", files)) # Special handling for the aws dependencies, which have extra `-` aws <- grepl("^aws", files) url_env_varname[aws] <- sub( "AWS_SDK_CPP", "AWSSDK", gsub( "-", "_", sub( "(AWS.*)-.*", "ARROW_\\1_URL", toupper(files[aws]) ) ) ) full_filenames <- file.path(normalizePath(thirdparty_dependency_dir), files) env_var_list <- replace(env_var_list, url_env_varname, full_filenames) if (!quietly) { env_var_list <- replace(env_var_list, "ARROW_VERBOSE_THIRDPARTY_BUILD", "ON") } env_var_list } is_feature_requested <- function(env_varname, default = env_is("LIBARROW_MINIMAL", "false")) { env_value <- tolower(Sys.getenv(env_varname)) if (identical(env_value, "off")) { # If e.g. ARROW_MIMALLOC=OFF explicitly, override default requested <- FALSE } else if (identical(env_value, "on")) { requested <- TRUE } else { requested <- default } requested } with_mimalloc <- function(env_var_list) { arrow_mimalloc <- is_feature_requested("ARROW_MIMALLOC") if (arrow_mimalloc) { # User wants mimalloc. If they're using gcc, let's make sure the version is >= 4.9 if (isTRUE(cmake_gcc_version(env_var_list) < "4.9")) { cat("**** mimalloc support not available for gcc < 4.9; building with ARROW_MIMALLOC=OFF\n") arrow_mimalloc <- FALSE } } replace(env_var_list, "ARROW_MIMALLOC", ifelse(arrow_mimalloc, "ON", "OFF")) } with_s3_support <- function(env_var_list) { arrow_s3 <- is_feature_requested("ARROW_S3") if (arrow_s3) { # User wants S3 support. If they're using gcc, let's make sure the version is >= 4.9 # and make sure that we have curl and openssl system libs if (isTRUE(cmake_gcc_version(env_var_list) < "4.9")) { cat("**** S3 support not available for gcc < 4.9; building with ARROW_S3=OFF\n") arrow_s3 <- FALSE } else if (!cmake_find_package("CURL", NULL, env_var_list)) { # curl on macos should be installed, so no need to alter this for macos cat("**** S3 support requires libcurl-devel (rpm) or libcurl4-openssl-dev (deb); building with ARROW_S3=OFF\n") arrow_s3 <- FALSE } else if (!cmake_find_package("OpenSSL", "1.0.2", env_var_list)) { cat("**** S3 support requires version >= 1.0.2 of openssl-devel (rpm), libssl-dev (deb), or openssl (brew); building with ARROW_S3=OFF\n") arrow_s3 <- FALSE } } replace(env_var_list, "ARROW_S3", ifelse(arrow_s3, "ON", "OFF")) } cmake_gcc_version <- function(env_var_list) { # This function returns NA if using a non-gcc compiler # Always enclose calls to it in isTRUE() or isFALSE() vals <- cmake_cxx_compiler_vars(env_var_list) if (!identical(vals[["CMAKE_CXX_COMPILER_ID"]], "GNU")) { return(NA) } package_version(vals[["CMAKE_CXX_COMPILER_VERSION"]]) } cmake_cxx_compiler_vars <- function(env_var_list) { env_vars <- env_vars_as_string(env_var_list) info <- system(paste("export", env_vars, "&& $CMAKE --system-information"), intern = TRUE) info <- grep("^[A-Z_]* .*$", info, value = TRUE) vals <- as.list(sub('^.*? "?(.*?)"?$', "\\1", info)) names(vals) <- sub("^(.*?) .*$", "\\1", info) vals[grepl("^CMAKE_CXX_COMPILER_?", names(vals))] } cmake_find_package <- function(pkg, version = NULL, env_var_list) { td <- tempfile() dir.create(td) options(.arrow.cleanup = c(getOption(".arrow.cleanup"), td)) find_package <- paste0("find_package(", pkg, " ", version, " REQUIRED)") writeLines(find_package, file.path(td, "CMakeLists.txt")) env_vars <- env_vars_as_string(env_var_list) cmake_cmd <- paste0( "export ", env_vars, " && cd ", td, " && $CMAKE ", " -DCMAKE_EXPORT_NO_PACKAGE_REGISTRY=ON", " -DCMAKE_FIND_PACKAGE_NO_PACKAGE_REGISTRY=ON", " ." ) system(cmake_cmd, ignore.stdout = TRUE, ignore.stderr = TRUE) == 0 } ##### if (!file.exists(paste0(dst_dir, "/include/arrow/api.h"))) { # If we're working in a local checkout and have already built the libs, we # don't need to do anything. Otherwise, # (1) Look for a prebuilt binary for this version bin_file <- src_dir <- NULL if (download_ok && binary_ok) { bin_file <- download_binary() } if (!is.null(bin_file)) { # Extract them dir.create(dst_dir, showWarnings = !quietly, recursive = TRUE) unzip(bin_file, exdir = dst_dir) unlink(bin_file) } else if (build_ok) { # (2) Find source and build it src_dir <- find_local_source() if (!is.null(src_dir)) { cat("*** Building C++ libraries\n") build_libarrow(src_dir, dst_dir) } else { cat("*** Proceeding without C++ dependencies\n") } } else { cat("*** Proceeding without C++ dependencies\n") } }