diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
commit | e6918187568dbd01842d8d1d2c808ce16a894239 (patch) | |
tree | 64f88b554b444a49f656b6c656111a145cbbaa28 /src/arrow/r/tools/nixlibs.R | |
parent | Initial commit. (diff) | |
download | ceph-e6918187568dbd01842d8d1d2c808ce16a894239.tar.xz ceph-e6918187568dbd01842d8d1d2c808ce16a894239.zip |
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/arrow/r/tools/nixlibs.R')
-rw-r--r-- | src/arrow/r/tools/nixlibs.R | 601 |
1 files changed, 601 insertions, 0 deletions
diff --git a/src/arrow/r/tools/nixlibs.R b/src/arrow/r/tools/nixlibs.R new file mode 100644 index 000000000..869e0abcf --- /dev/null +++ b/src/arrow/r/tools/nixlibs.R @@ -0,0 +1,601 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +args <- commandArgs(TRUE) +VERSION <- args[1] +dst_dir <- paste0("libarrow/arrow-", VERSION) + +arrow_repo <- "https://arrow-r-nightly.s3.amazonaws.com/libarrow/" + +if (getRversion() < 3.4 && is.null(getOption("download.file.method"))) { + # default method doesn't work on R 3.3, nor does libcurl + options(download.file.method = "wget") +} + +options(.arrow.cleanup = character()) # To collect dirs to rm on exit +on.exit(unlink(getOption(".arrow.cleanup"))) + +env_is <- function(var, value) identical(tolower(Sys.getenv(var)), value) + +try_download <- function(from_url, to_file) { + status <- try( + suppressWarnings( + download.file(from_url, to_file, quiet = quietly) + ), + silent = quietly + ) + # Return whether the download was successful + !inherits(status, "try-error") && status == 0 +} + +# For local debugging, set ARROW_R_DEV=TRUE to make this script print more +quietly <- !env_is("ARROW_R_DEV", "true") + +# Default is build from source, not download a binary +build_ok <- !env_is("LIBARROW_BUILD", "false") +binary_ok <- env_is("LIBARROW_BINARY", "true") + +# Check if we're doing an offline build. +# (Note that cmake will still be downloaded if necessary +# https://arrow.apache.org/docs/developers/cpp/building.html#offline-builds) +download_ok <- !env_is("TEST_OFFLINE_BUILD", "true") && try_download("https://github.com", tempfile()) + +# This "tools/thirdparty_dependencies" path, within the tar file, might exist if +# create_package_with_all_dependencies() was run, or if someone has created it +# manually before running make build. +# If you change this path, you also need to edit +# `create_package_with_all_dependencies()` in install-arrow.R +thirdparty_dependency_dir <- Sys.getenv("ARROW_THIRDPARTY_DEPENDENCY_DIR", "tools/thirdparty_dependencies") + + +download_binary <- function(os = identify_os()) { + libfile <- tempfile() + if (!is.null(os)) { + # See if we can map this os-version to one we have binaries for + os <- find_available_binary(os) + binary_url <- paste0(arrow_repo, "bin/", os, "/arrow-", VERSION, ".zip") + if (try_download(binary_url, libfile)) { + cat(sprintf("*** Successfully retrieved C++ binaries for %s\n", os)) + if (!identical(os, "centos-7")) { + # centos-7 uses gcc 4.8 so the binary doesn't have ARROW_S3=ON but the others do + # TODO: actually check for system requirements? + cat("**** Binary package requires libcurl and openssl\n") + cat("**** If installation fails, retry after installing those system requirements\n") + } + } else { + cat(sprintf("*** No C++ binaries found for %s\n", os)) + libfile <- NULL + } + } else { + libfile <- NULL + } + libfile +} + +# Function to figure out which flavor of binary we should download, if at all. +# By default (unset or "FALSE"), it will not download a precompiled library, +# but you can override this by setting the env var LIBARROW_BINARY to: +# * `TRUE` (not case-sensitive), to try to discover your current OS, or +# * some other string, presumably a related "distro-version" that has binaries +# built that work for your OS +identify_os <- function(os = Sys.getenv("LIBARROW_BINARY")) { + if (tolower(os) %in% c("", "false")) { + # Env var says not to download a binary + return(NULL) + } else if (!identical(tolower(os), "true")) { + # Env var provided an os-version to use--maybe you're on Ubuntu 18.10 but + # we only build for 18.04 and that's fine--so use what the user set + return(os) + } + + linux <- distro() + if (is.null(linux)) { + cat("*** Unable to identify current OS/version\n") + return(NULL) + } + paste(linux$id, linux$short_version, sep = "-") +} + +#### start distro #### + +distro <- function() { + # The code in this script is a (potentially stale) copy of the distro package + if (requireNamespace("distro", quietly = TRUE)) { + # Use the version from the package, which may be updated from this + return(distro::distro()) + } + + out <- lsb_release() + if (is.null(out)) { + out <- os_release() + if (is.null(out)) { + out <- system_release() + } + } + if (is.null(out)) { + return(NULL) + } + + out$id <- tolower(out$id) + # debian unstable & testing lsb_release `version` don't include numbers but we can map from pretty name + if (is.null(out$version) || out$version %in% c("testing", "unstable")) { + if (grepl("bullseye", out$codename)) { + out$short_version <- "11" + } else if (grepl("bookworm", out$codename)) { + out$short_version <- "12" + } + } else if (out$id == "ubuntu") { + # Keep major.minor version + out$short_version <- sub('^"?([0-9]+\\.[0-9]+).*"?.*$', "\\1", out$version) + } else { + # Only major version number + out$short_version <- sub('^"?([0-9]+).*"?.*$', "\\1", out$version) + } + out +} + +lsb_release <- function() { + if (have_lsb_release()) { + list( + id = call_lsb("-is"), + version = call_lsb("-rs"), + codename = call_lsb("-cs") + ) + } else { + NULL + } +} + +have_lsb_release <- function() nzchar(Sys.which("lsb_release")) +call_lsb <- function(args) system(paste("lsb_release", args), intern = TRUE) + +os_release <- function() { + rel_data <- read_os_release() + if (!is.null(rel_data)) { + vals <- as.list(sub('^.*="?(.*?)"?$', "\\1", rel_data)) + names(vals) <- sub("^(.*)=.*$", "\\1", rel_data) + + out <- list( + id = vals[["ID"]], + version = vals[["VERSION_ID"]] + ) + if ("VERSION_CODENAME" %in% names(vals)) { + out$codename <- vals[["VERSION_CODENAME"]] + } else { + # This probably isn't right, maybe could extract codename from pretty name? + out$codename <- vals[["PRETTY_NAME"]] + } + out + } else { + NULL + } +} + +read_os_release <- function() { + if (file.exists("/etc/os-release")) { + readLines("/etc/os-release") + } +} + +system_release <- function() { + rel_data <- read_system_release() + if (!is.null(rel_data)) { + # Something like "CentOS Linux release 7.7.1908 (Core)" + list( + id = sub("^([a-zA-Z]+) .* ([0-9.]+).*$", "\\1", rel_data), + version = sub("^([a-zA-Z]+) .* ([0-9.]+).*$", "\\2", rel_data), + codename = NA + ) + } else { + NULL + } +} + +read_system_release <- function() { + if (file.exists("/etc/system-release")) { + readLines("/etc/system-release")[1] + } +} + +#### end distro #### + +find_available_binary <- function(os) { + # Download a csv that maps one to the other, columns "actual" and "use_this" + u <- "https://raw.githubusercontent.com/ursa-labs/arrow-r-nightly/master/linux/distro-map.csv" + lookup <- try(utils::read.csv(u, stringsAsFactors = FALSE), silent = quietly) + if (!inherits(lookup, "try-error") && os %in% lookup$actual) { + new <- lookup$use_this[lookup$actual == os] + if (length(new) == 1 && !is.na(new)) { # Just some sanity checking + cat(sprintf("*** Using %s binary for %s\n", new, os)) + os <- new + } + } + os +} + +find_local_source <- function() { + # We'll take the first of these that exists + # The first case probably occurs if we're in the arrow git repo + # The second probably occurs if we're installing the arrow R package + cpp_dir_options <- c( + file.path(Sys.getenv("ARROW_SOURCE_HOME", ".."), "cpp"), + "tools/cpp" + ) + for (cpp_dir in cpp_dir_options) { + if (file.exists(file.path(cpp_dir, "src/arrow/api.h"))) { + cat(paste0("*** Found local C++ source: '", cpp_dir, "'\n")) + return(cpp_dir) + } + } + NULL +} + +env_vars_as_string <- function(env_var_list) { + # Do some basic checks on env_var_list: + # Check that env_var_list has names, that those names are valid POSIX + # environment variables, and that none of the values contain `'`. + stopifnot( + length(env_var_list) == length(names(env_var_list)), + all(grepl("^[^0-9]", names(env_var_list))), + all(grepl("^[A-Z0-9_]+$", names(env_var_list))), + !any(grepl("'", env_var_list, fixed = TRUE)) + ) + env_var_string <- paste0(names(env_var_list), "='", env_var_list, "'", collapse = " ") + if (nchar(env_var_string) > 30000) { + # This could happen if the full paths in *_SOURCE_URL were *very* long. + # A more formal check would look at getconf ARG_MAX, but this shouldn't matter + cat("*** Warning: Environment variables are very long. This could cause issues on some shells.\n") + } + env_var_string +} + +build_libarrow <- function(src_dir, dst_dir) { + # We'll need to compile R bindings with these libs, so delete any .o files + system("rm src/*.o", ignore.stdout = TRUE, ignore.stderr = TRUE) + # Set up make for parallel building + makeflags <- Sys.getenv("MAKEFLAGS") + if (makeflags == "") { + # CRAN policy says not to use more than 2 cores during checks + # If you have more and want to use more, set MAKEFLAGS + ncores <- min(parallel::detectCores(), 2) + makeflags <- sprintf("-j%s", ncores) + Sys.setenv(MAKEFLAGS = makeflags) + } + if (!quietly) { + cat("*** Building with MAKEFLAGS=", makeflags, "\n") + } + # Check for libarrow build dependencies: + # * cmake + cmake <- ensure_cmake() + + # Optionally build somewhere not in tmp so we can dissect the build if it fails + debug_dir <- Sys.getenv("LIBARROW_DEBUG_DIR") + if (nzchar(debug_dir)) { + build_dir <- debug_dir + } else { + # But normally we'll just build in a tmp dir + build_dir <- tempfile() + } + options(.arrow.cleanup = c(getOption(".arrow.cleanup"), build_dir)) + + R_CMD_config <- function(var) { + if (getRversion() < 3.4) { + # var names were called CXX1X instead of CXX11 + var <- sub("^CXX11", "CXX1X", var) + } + # tools::Rcmd introduced R 3.3 + tools::Rcmd(paste("config", var), stdout = TRUE) + } + env_var_list <- c( + SOURCE_DIR = src_dir, + BUILD_DIR = build_dir, + DEST_DIR = dst_dir, + CMAKE = cmake, + # EXTRA_CMAKE_FLAGS will often be "", but it's convenient later to have it defined + EXTRA_CMAKE_FLAGS = Sys.getenv("EXTRA_CMAKE_FLAGS"), + # Make sure we build with the same compiler settings that R is using + CC = R_CMD_config("CC"), + CXX = paste(R_CMD_config("CXX11"), R_CMD_config("CXX11STD")), + # CXXFLAGS = R_CMD_config("CXX11FLAGS"), # We don't want the same debug symbols + LDFLAGS = R_CMD_config("LDFLAGS") + ) + env_var_list <- with_s3_support(env_var_list) + env_var_list <- with_mimalloc(env_var_list) + + # turn_off_all_optional_features() needs to happen after with_mimalloc() and + # with_s3_support(), since those might turn features ON. + thirdparty_deps_unavailable <- !download_ok && + !dir.exists(thirdparty_dependency_dir) && + !env_is("ARROW_DEPENDENCY_SOURCE", "system") + on_solaris <- tolower(Sys.info()[["sysname"]]) %in% "sunos" + do_minimal_build <- on_solaris || env_is("LIBARROW_MINIMAL", "true") + + if (do_minimal_build) { + # Note that JSON support does work on Solaris, but will be turned off with + # the rest of the optional dependencies. + # All other dependencies don't compile (e.g thrift, jemalloc, and xsimd) + # or do compile but `ar` fails to build + # libarrow_bundled_dependencies (e.g. re2 and utf8proc). + env_var_list <- turn_off_all_optional_features(env_var_list) + } else if (thirdparty_deps_unavailable) { + cat(paste0( + "*** Building C++ library from source, but downloading thirdparty dependencies\n", + " is not possible, so this build will turn off all thirdparty features.\n", + " See install vignette for details:\n", + " https://cran.r-project.org/web/packages/arrow/vignettes/install.html\n" + )) + env_var_list <- turn_off_all_optional_features(env_var_list) + } else if (dir.exists(thirdparty_dependency_dir)) { + # Add the *_SOURCE_URL env vars + env_var_list <- set_thirdparty_urls(env_var_list) + } + env_vars <- env_vars_as_string(env_var_list) + + cat("**** arrow", ifelse(quietly, "", paste("with", env_vars)), "\n") + status <- suppressWarnings(system( + paste(env_vars, "inst/build_arrow_static.sh"), + ignore.stdout = quietly, ignore.stderr = quietly + )) + if (status != 0) { + # It failed :( + cat( + "**** Error building Arrow C++.", + ifelse(env_is("ARROW_R_DEV", "true"), "", "Re-run with ARROW_R_DEV=true for debug information."), + "\n" + ) + } + invisible(status) +} + +ensure_cmake <- function() { + cmake <- find_cmake(c( + Sys.getenv("CMAKE"), + Sys.which("cmake"), + Sys.which("cmake3") + )) + + if (is.null(cmake)) { + # If not found, download it + cat("**** cmake\n") + CMAKE_VERSION <- Sys.getenv("CMAKE_VERSION", "3.19.2") + if (tolower(Sys.info()[["sysname"]]) %in% "darwin") { + postfix <- "-macos-universal.tar.gz" + } else { + postfix <- "-Linux-x86_64.tar.gz" + } + cmake_binary_url <- paste0( + "https://github.com/Kitware/CMake/releases/download/v", CMAKE_VERSION, + "/cmake-", CMAKE_VERSION, postfix + ) + cmake_tar <- tempfile() + cmake_dir <- tempfile() + download_successful <- try_download(cmake_binary_url, cmake_tar) + if (!download_successful) { + cat(paste0( + "*** cmake was not found locally and download failed.\n", + " Make sure cmake >= 3.10 is installed and available on your PATH,\n", + " or download ", cmake_binary_url, "\n", + " and define the CMAKE environment variable.\n" + )) + } + untar(cmake_tar, exdir = cmake_dir) + unlink(cmake_tar) + options(.arrow.cleanup = c(getOption(".arrow.cleanup"), cmake_dir)) + cmake <- paste0( + cmake_dir, + "/cmake-", CMAKE_VERSION, sub(".tar.gz", "", postfix, fixed = TRUE), + "/bin/cmake" + ) + } + cmake +} + +find_cmake <- function(paths, version_required = 3.10) { + # Given a list of possible cmake paths, return the first one that exists and is new enough + for (path in paths) { + if (nzchar(path) && cmake_version(path) >= version_required) { + # Sys.which() returns a named vector, but that plays badly with c() later + names(path) <- NULL + return(path) + } + } + # If none found, return NULL + NULL +} + +cmake_version <- function(cmd = "cmake") { + tryCatch( + { + raw_version <- system(paste(cmd, "--version"), intern = TRUE, ignore.stderr = TRUE) + pat <- ".* ([0-9\\.]+).*?" + which_line <- grep(pat, raw_version) + package_version(sub(pat, "\\1", raw_version[which_line])) + }, + error = function(e) { + return(0) + } + ) +} + +turn_off_all_optional_features <- function(env_var_list) { + # Because these are done as environment variables (as opposed to build flags), + # setting these to "OFF" overrides any previous setting. We don't need to + # check the existing value. + turn_off <- c( + "ARROW_MIMALLOC" = "OFF", + "ARROW_JEMALLOC" = "OFF", + "ARROW_JSON" = "OFF", + "ARROW_PARQUET" = "OFF", # depends on thrift + "ARROW_DATASET" = "OFF", # depends on parquet + "ARROW_S3" = "OFF", + "ARROW_WITH_BROTLI" = "OFF", + "ARROW_WITH_BZ2" = "OFF", + "ARROW_WITH_LZ4" = "OFF", + "ARROW_WITH_SNAPPY" = "OFF", + "ARROW_WITH_ZLIB" = "OFF", + "ARROW_WITH_ZSTD" = "OFF", + "ARROW_WITH_RE2" = "OFF", + "ARROW_WITH_UTF8PROC" = "OFF", + # The syntax to turn off XSIMD is different. + # Pull existing value of EXTRA_CMAKE_FLAGS first (must be defined) + "EXTRA_CMAKE_FLAGS" = paste( + env_var_list[["EXTRA_CMAKE_FLAGS"]], + "-DARROW_SIMD_LEVEL=NONE -DARROW_RUNTIME_SIMD_LEVEL=NONE" + ) + ) + # Create a new env_var_list, with the values of turn_off set. + # replace() also adds new values if they didn't exist before + replace(env_var_list, names(turn_off), turn_off) +} + +set_thirdparty_urls <- function(env_var_list) { + # This function does *not* check if existing *_SOURCE_URL variables are set. + # The directory tools/thirdparty_dependencies is created by + # create_package_with_all_dependencies() and saved in the tar file. + files <- list.files(thirdparty_dependency_dir, full.names = FALSE) + url_env_varname <- toupper(sub("(.*?)-.*", "ARROW_\\1_URL", files)) + # Special handling for the aws dependencies, which have extra `-` + aws <- grepl("^aws", files) + url_env_varname[aws] <- sub( + "AWS_SDK_CPP", "AWSSDK", + gsub( + "-", "_", + sub( + "(AWS.*)-.*", "ARROW_\\1_URL", + toupper(files[aws]) + ) + ) + ) + full_filenames <- file.path(normalizePath(thirdparty_dependency_dir), files) + + env_var_list <- replace(env_var_list, url_env_varname, full_filenames) + if (!quietly) { + env_var_list <- replace(env_var_list, "ARROW_VERBOSE_THIRDPARTY_BUILD", "ON") + } + env_var_list +} + +is_feature_requested <- function(env_varname, default = env_is("LIBARROW_MINIMAL", "false")) { + env_value <- tolower(Sys.getenv(env_varname)) + if (identical(env_value, "off")) { + # If e.g. ARROW_MIMALLOC=OFF explicitly, override default + requested <- FALSE + } else if (identical(env_value, "on")) { + requested <- TRUE + } else { + requested <- default + } + requested +} + +with_mimalloc <- function(env_var_list) { + arrow_mimalloc <- is_feature_requested("ARROW_MIMALLOC") + if (arrow_mimalloc) { + # User wants mimalloc. If they're using gcc, let's make sure the version is >= 4.9 + if (isTRUE(cmake_gcc_version(env_var_list) < "4.9")) { + cat("**** mimalloc support not available for gcc < 4.9; building with ARROW_MIMALLOC=OFF\n") + arrow_mimalloc <- FALSE + } + } + replace(env_var_list, "ARROW_MIMALLOC", ifelse(arrow_mimalloc, "ON", "OFF")) +} + +with_s3_support <- function(env_var_list) { + arrow_s3 <- is_feature_requested("ARROW_S3") + if (arrow_s3) { + # User wants S3 support. If they're using gcc, let's make sure the version is >= 4.9 + # and make sure that we have curl and openssl system libs + if (isTRUE(cmake_gcc_version(env_var_list) < "4.9")) { + cat("**** S3 support not available for gcc < 4.9; building with ARROW_S3=OFF\n") + arrow_s3 <- FALSE + } else if (!cmake_find_package("CURL", NULL, env_var_list)) { + # curl on macos should be installed, so no need to alter this for macos + cat("**** S3 support requires libcurl-devel (rpm) or libcurl4-openssl-dev (deb); building with ARROW_S3=OFF\n") + arrow_s3 <- FALSE + } else if (!cmake_find_package("OpenSSL", "1.0.2", env_var_list)) { + cat("**** S3 support requires version >= 1.0.2 of openssl-devel (rpm), libssl-dev (deb), or openssl (brew); building with ARROW_S3=OFF\n") + arrow_s3 <- FALSE + } + } + replace(env_var_list, "ARROW_S3", ifelse(arrow_s3, "ON", "OFF")) +} + +cmake_gcc_version <- function(env_var_list) { + # This function returns NA if using a non-gcc compiler + # Always enclose calls to it in isTRUE() or isFALSE() + vals <- cmake_cxx_compiler_vars(env_var_list) + if (!identical(vals[["CMAKE_CXX_COMPILER_ID"]], "GNU")) { + return(NA) + } + package_version(vals[["CMAKE_CXX_COMPILER_VERSION"]]) +} + +cmake_cxx_compiler_vars <- function(env_var_list) { + env_vars <- env_vars_as_string(env_var_list) + info <- system(paste("export", env_vars, "&& $CMAKE --system-information"), intern = TRUE) + info <- grep("^[A-Z_]* .*$", info, value = TRUE) + vals <- as.list(sub('^.*? "?(.*?)"?$', "\\1", info)) + names(vals) <- sub("^(.*?) .*$", "\\1", info) + vals[grepl("^CMAKE_CXX_COMPILER_?", names(vals))] +} + +cmake_find_package <- function(pkg, version = NULL, env_var_list) { + td <- tempfile() + dir.create(td) + options(.arrow.cleanup = c(getOption(".arrow.cleanup"), td)) + find_package <- paste0("find_package(", pkg, " ", version, " REQUIRED)") + writeLines(find_package, file.path(td, "CMakeLists.txt")) + env_vars <- env_vars_as_string(env_var_list) + cmake_cmd <- paste0( + "export ", env_vars, + " && cd ", td, + " && $CMAKE ", + " -DCMAKE_EXPORT_NO_PACKAGE_REGISTRY=ON", + " -DCMAKE_FIND_PACKAGE_NO_PACKAGE_REGISTRY=ON", + " ." + ) + system(cmake_cmd, ignore.stdout = TRUE, ignore.stderr = TRUE) == 0 +} + +##### + +if (!file.exists(paste0(dst_dir, "/include/arrow/api.h"))) { + # If we're working in a local checkout and have already built the libs, we + # don't need to do anything. Otherwise, + # (1) Look for a prebuilt binary for this version + bin_file <- src_dir <- NULL + if (download_ok && binary_ok) { + bin_file <- download_binary() + } + if (!is.null(bin_file)) { + # Extract them + dir.create(dst_dir, showWarnings = !quietly, recursive = TRUE) + unzip(bin_file, exdir = dst_dir) + unlink(bin_file) + } else if (build_ok) { + # (2) Find source and build it + src_dir <- find_local_source() + if (!is.null(src_dir)) { + cat("*** Building C++ libraries\n") + build_libarrow(src_dir, dst_dir) + } else { + cat("*** Proceeding without C++ dependencies\n") + } + } else { + cat("*** Proceeding without C++ dependencies\n") + } +} |