diff options
Diffstat (limited to 'src/arrow/r/R/install-arrow.R')
-rw-r--r-- | src/arrow/r/R/install-arrow.R | 239 |
1 files changed, 239 insertions, 0 deletions
diff --git a/src/arrow/r/R/install-arrow.R b/src/arrow/r/R/install-arrow.R new file mode 100644 index 000000000..3e295c543 --- /dev/null +++ b/src/arrow/r/R/install-arrow.R @@ -0,0 +1,239 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +#' Install or upgrade the Arrow library +#' +#' Use this function to install the latest release of `arrow`, to switch to or +#' from a nightly development version, or on Linux to try reinstalling with +#' all necessary C++ dependencies. +#' +#' Note that, unlike packages like `tensorflow`, `blogdown`, and others that +#' require external dependencies, you do not need to run `install_arrow()` +#' after a successful `arrow` installation. +#' +#' @param nightly logical: Should we install a development version of the +#' package, or should we install from CRAN (the default). +#' @param binary On Linux, value to set for the environment variable +#' `LIBARROW_BINARY`, which governs how C++ binaries are used, if at all. +#' The default value, `TRUE`, tells the installation script to detect the +#' Linux distribution and version and find an appropriate C++ library. `FALSE` +#' would tell the script not to retrieve a binary and instead build Arrow C++ +#' from source. Other valid values are strings corresponding to a Linux +#' distribution-version, to override the value that would be detected. +#' See `vignette("install", package = "arrow")` for further details. +#' @param use_system logical: Should we use `pkg-config` to look for Arrow +#' system packages? Default is `FALSE`. If `TRUE`, source installation may be +#' faster, but there is a risk of version mismatch. This sets the +#' `ARROW_USE_PKG_CONFIG` environment variable. +#' @param minimal logical: If building from source, should we build without +#' optional dependencies (compression libraries, for example)? Default is +#' `FALSE`. This sets the `LIBARROW_MINIMAL` environment variable. +#' @param verbose logical: Print more debugging output when installing? Default +#' is `FALSE`. This sets the `ARROW_R_DEV` environment variable. +#' @param repos character vector of base URLs of the repositories to install +#' from (passed to `install.packages()`) +#' @param ... Additional arguments passed to `install.packages()` +#' @export +#' @importFrom utils install.packages +#' @seealso [arrow_available()] to see if the package was configured with +#' necessary C++ dependencies. `vignette("install", package = "arrow")` for +#' more ways to tune installation on Linux. +install_arrow <- function(nightly = FALSE, + binary = Sys.getenv("LIBARROW_BINARY", TRUE), + use_system = Sys.getenv("ARROW_USE_PKG_CONFIG", FALSE), + minimal = Sys.getenv("LIBARROW_MINIMAL", FALSE), + verbose = Sys.getenv("ARROW_R_DEV", FALSE), + repos = getOption("repos"), + ...) { + sysname <- tolower(Sys.info()[["sysname"]]) + conda <- isTRUE(grepl("conda", R.Version()$platform)) + + if (conda) { + if (nightly) { + system("conda install -y -c arrow-nightlies -c conda-forge --strict-channel-priority r-arrow") + } else { + system("conda install -y -c conda-forge --strict-channel-priority r-arrow") + } + } else { + Sys.setenv( + LIBARROW_BINARY = binary, + LIBARROW_MINIMAL = minimal, + ARROW_R_DEV = verbose, + ARROW_USE_PKG_CONFIG = use_system + ) + # On the M1, we can't use the usual autobrew, which pulls Intel dependencies + apple_m1 <- grepl("arm-apple|aarch64.*darwin", R.Version()$platform) + # On Rosetta, we have to build without JEMALLOC, so we also can't autobrew + rosetta <- identical(sysname, "darwin") && identical(system("sysctl -n sysctl.proc_translated", intern = TRUE), "1") + if (rosetta) { + Sys.setenv(ARROW_JEMALLOC = "OFF") + } + if (apple_m1 || rosetta) { + Sys.setenv(FORCE_BUNDLED_BUILD = "true") + } + + opts <- list() + if (apple_m1 || rosetta) { + # Skip binaries (esp. for rosetta) + opts$pkgType <- "source" + } else if (isTRUE(binary)) { + # Unless otherwise directed, don't consider newer source packages when + # options(pkgType) == "both" (default on win/mac) + opts$install.packages.check.source <- "no" + opts$install.packages.compile.from.source <- "never" + } + if (length(opts)) { + old <- options(opts) + on.exit(options(old)) + } + install.packages("arrow", repos = arrow_repos(repos, nightly), ...) + } + if ("arrow" %in% loadedNamespaces()) { + # If you've just sourced this file, "arrow" won't be (re)loaded + reload_arrow() + } +} + +arrow_repos <- function(repos = getOption("repos"), nightly = FALSE) { + if (length(repos) == 0 || identical(repos, c(CRAN = "@CRAN@"))) { + # Set the default/CDN + repos <- "https://cloud.r-project.org/" + } + dev_repo <- getOption("arrow.dev_repo", "https://arrow-r-nightly.s3.amazonaws.com") + # Remove it if it's there (so nightly=FALSE won't accidentally pull from it) + repos <- setdiff(repos, dev_repo) + if (nightly) { + # Add it first + repos <- c(dev_repo, repos) + } + repos +} + +reload_arrow <- function() { + if (requireNamespace("pkgload", quietly = TRUE)) { + is_attached <- "package:arrow" %in% search() + pkgload::unload("arrow") + if (is_attached) { + require("arrow", character.only = TRUE, quietly = TRUE) + } else { + requireNamespace("arrow", quietly = TRUE) + } + } else { + message("Please restart R to use the 'arrow' package.") + } +} + + +#' Create a source bundle that includes all thirdparty dependencies +#' +#' @param dest_file File path for the new tar.gz package. Defaults to +#' `arrow_V.V.V_with_deps.tar.gz` in the current directory (`V.V.V` is the version) +#' @param source_file File path for the input tar.gz package. Defaults to +#' downloading the package from CRAN (or whatever you have set as the first in +#' `getOption("repos")`) +#' @return The full path to `dest_file`, invisibly +#' +#' This function is used for setting up an offline build. If it's possible to +#' download at build time, don't use this function. Instead, let `cmake` +#' download the required dependencies for you. +#' These downloaded dependencies are only used in the build if +#' `ARROW_DEPENDENCY_SOURCE` is unset, `BUNDLED`, or `AUTO`. +#' https://arrow.apache.org/docs/developers/cpp/building.html#offline-builds +#' +#' If you're using binary packages you shouldn't need to use this function. You +#' should download the appropriate binary from your package repository, transfer +#' that to the offline computer, and install that. Any OS can create the source +#' bundle, but it cannot be installed on Windows. (Instead, use a standard +#' Windows binary package.) +#' +#' Note if you're using RStudio Package Manager on Linux: If you still want to +#' make a source bundle with this function, make sure to set the first repo in +#' `options("repos")` to be a mirror that contains source packages (that is: +#' something other than the RSPM binary mirror URLs). +#' +#' ## Steps for an offline install with optional dependencies: +#' +#' ### Using a computer with internet access, pre-download the dependencies: +#' * Install the `arrow` package _or_ run +#' `source("https://raw.githubusercontent.com/apache/arrow/master/r/R/install-arrow.R")` +#' * Run `create_package_with_all_dependencies("my_arrow_pkg.tar.gz")` +#' * Copy the newly created `my_arrow_pkg.tar.gz` to the computer without internet access +#' +#' ### On the computer without internet access, install the prepared package: +#' * Install the `arrow` package from the copied file +#' * `install.packages("my_arrow_pkg.tar.gz", dependencies = c("Depends", "Imports", "LinkingTo"))` +#' * This installation will build from source, so `cmake` must be available +#' * Run [arrow_info()] to check installed capabilities +#' +#' +#' @examples +#' \dontrun{ +#' new_pkg <- create_package_with_all_dependencies() +#' # Note: this works when run in the same R session, but it's meant to be +#' # copied to a different computer. +#' install.packages(new_pkg, dependencies = c("Depends", "Imports", "LinkingTo")) +#' } +#' @export +create_package_with_all_dependencies <- function(dest_file = NULL, source_file = NULL) { + if (is.null(source_file)) { + pkg_download_dir <- tempfile() + dir.create(pkg_download_dir) + on.exit(unlink(pkg_download_dir, recursive = TRUE), add = TRUE) + message("Downloading Arrow source file") + downloaded <- utils::download.packages("arrow", destdir = pkg_download_dir, type = "source") + source_file <- downloaded[1, 2, drop = TRUE] + } + if (!file.exists(source_file) || !endsWith(source_file, "tar.gz")) { + stop("Arrow package .tar.gz file not found") + } + if (is.null(dest_file)) { + # e.g. convert /path/to/arrow_5.0.0.tar.gz to ./arrow_5.0.0_with_deps.tar.gz + # (add 'with_deps' for clarity if the file was downloaded locally) + dest_file <- paste0(gsub(".tar.gz$", "", basename(source_file)), "_with_deps.tar.gz") + } + untar_dir <- tempfile() + on.exit(unlink(untar_dir, recursive = TRUE), add = TRUE) + utils::untar(source_file, exdir = untar_dir) + tools_dir <- file.path(untar_dir, "arrow/tools") + download_dependencies_sh <- file.path(tools_dir, "cpp/thirdparty/download_dependencies.sh") + # If you change this path, also need to edit nixlibs.R + download_dir <- file.path(tools_dir, "thirdparty_dependencies") + dir.create(download_dir) + + message("Downloading files to ", download_dir) + download_successful <- system2(download_dependencies_sh, download_dir, stdout = FALSE) == 0 + if (!download_successful) { + stop("Failed to download thirdparty dependencies") + } + # Need to change directory to untar_dir so tar() will use relative paths. That + # means we'll need a full, non-relative path for dest_file. (extra_flags="-C" + # doesn't work with R's internal tar) + orig_wd <- getwd() + on.exit(setwd(orig_wd), add = TRUE) + # normalizePath() may return the input unchanged if dest_file doesn't exist, + # so create it first. + file.create(dest_file) + dest_file <- normalizePath(dest_file, mustWork = TRUE) + setwd(untar_dir) + + message("Repacking tar.gz file to ", dest_file) + tar_successful <- utils::tar(dest_file, compression = "gz") == 0 + if (!tar_successful) { + stop("Failed to create new tar.gz file") + } + invisible(dest_file) +} |