summaryrefslogtreecommitdiffstats
path: root/src/arrow/r/R/install-arrow.R
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/arrow/r/R/install-arrow.R239
1 files changed, 239 insertions, 0 deletions
diff --git a/src/arrow/r/R/install-arrow.R b/src/arrow/r/R/install-arrow.R
new file mode 100644
index 000000000..3e295c543
--- /dev/null
+++ b/src/arrow/r/R/install-arrow.R
@@ -0,0 +1,239 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#' Install or upgrade the Arrow library
+#'
+#' Use this function to install the latest release of `arrow`, to switch to or
+#' from a nightly development version, or on Linux to try reinstalling with
+#' all necessary C++ dependencies.
+#'
+#' Note that, unlike packages like `tensorflow`, `blogdown`, and others that
+#' require external dependencies, you do not need to run `install_arrow()`
+#' after a successful `arrow` installation.
+#'
+#' @param nightly logical: Should we install a development version of the
+#' package, or should we install from CRAN (the default).
+#' @param binary On Linux, value to set for the environment variable
+#' `LIBARROW_BINARY`, which governs how C++ binaries are used, if at all.
+#' The default value, `TRUE`, tells the installation script to detect the
+#' Linux distribution and version and find an appropriate C++ library. `FALSE`
+#' would tell the script not to retrieve a binary and instead build Arrow C++
+#' from source. Other valid values are strings corresponding to a Linux
+#' distribution-version, to override the value that would be detected.
+#' See `vignette("install", package = "arrow")` for further details.
+#' @param use_system logical: Should we use `pkg-config` to look for Arrow
+#' system packages? Default is `FALSE`. If `TRUE`, source installation may be
+#' faster, but there is a risk of version mismatch. This sets the
+#' `ARROW_USE_PKG_CONFIG` environment variable.
+#' @param minimal logical: If building from source, should we build without
+#' optional dependencies (compression libraries, for example)? Default is
+#' `FALSE`. This sets the `LIBARROW_MINIMAL` environment variable.
+#' @param verbose logical: Print more debugging output when installing? Default
+#' is `FALSE`. This sets the `ARROW_R_DEV` environment variable.
+#' @param repos character vector of base URLs of the repositories to install
+#' from (passed to `install.packages()`)
+#' @param ... Additional arguments passed to `install.packages()`
+#' @export
+#' @importFrom utils install.packages
+#' @seealso [arrow_available()] to see if the package was configured with
+#' necessary C++ dependencies. `vignette("install", package = "arrow")` for
+#' more ways to tune installation on Linux.
+install_arrow <- function(nightly = FALSE,
+ binary = Sys.getenv("LIBARROW_BINARY", TRUE),
+ use_system = Sys.getenv("ARROW_USE_PKG_CONFIG", FALSE),
+ minimal = Sys.getenv("LIBARROW_MINIMAL", FALSE),
+ verbose = Sys.getenv("ARROW_R_DEV", FALSE),
+ repos = getOption("repos"),
+ ...) {
+ sysname <- tolower(Sys.info()[["sysname"]])
+ conda <- isTRUE(grepl("conda", R.Version()$platform))
+
+ if (conda) {
+ if (nightly) {
+ system("conda install -y -c arrow-nightlies -c conda-forge --strict-channel-priority r-arrow")
+ } else {
+ system("conda install -y -c conda-forge --strict-channel-priority r-arrow")
+ }
+ } else {
+ Sys.setenv(
+ LIBARROW_BINARY = binary,
+ LIBARROW_MINIMAL = minimal,
+ ARROW_R_DEV = verbose,
+ ARROW_USE_PKG_CONFIG = use_system
+ )
+ # On the M1, we can't use the usual autobrew, which pulls Intel dependencies
+ apple_m1 <- grepl("arm-apple|aarch64.*darwin", R.Version()$platform)
+ # On Rosetta, we have to build without JEMALLOC, so we also can't autobrew
+ rosetta <- identical(sysname, "darwin") && identical(system("sysctl -n sysctl.proc_translated", intern = TRUE), "1")
+ if (rosetta) {
+ Sys.setenv(ARROW_JEMALLOC = "OFF")
+ }
+ if (apple_m1 || rosetta) {
+ Sys.setenv(FORCE_BUNDLED_BUILD = "true")
+ }
+
+ opts <- list()
+ if (apple_m1 || rosetta) {
+ # Skip binaries (esp. for rosetta)
+ opts$pkgType <- "source"
+ } else if (isTRUE(binary)) {
+ # Unless otherwise directed, don't consider newer source packages when
+ # options(pkgType) == "both" (default on win/mac)
+ opts$install.packages.check.source <- "no"
+ opts$install.packages.compile.from.source <- "never"
+ }
+ if (length(opts)) {
+ old <- options(opts)
+ on.exit(options(old))
+ }
+ install.packages("arrow", repos = arrow_repos(repos, nightly), ...)
+ }
+ if ("arrow" %in% loadedNamespaces()) {
+ # If you've just sourced this file, "arrow" won't be (re)loaded
+ reload_arrow()
+ }
+}
+
+arrow_repos <- function(repos = getOption("repos"), nightly = FALSE) {
+ if (length(repos) == 0 || identical(repos, c(CRAN = "@CRAN@"))) {
+ # Set the default/CDN
+ repos <- "https://cloud.r-project.org/"
+ }
+ dev_repo <- getOption("arrow.dev_repo", "https://arrow-r-nightly.s3.amazonaws.com")
+ # Remove it if it's there (so nightly=FALSE won't accidentally pull from it)
+ repos <- setdiff(repos, dev_repo)
+ if (nightly) {
+ # Add it first
+ repos <- c(dev_repo, repos)
+ }
+ repos
+}
+
+reload_arrow <- function() {
+ if (requireNamespace("pkgload", quietly = TRUE)) {
+ is_attached <- "package:arrow" %in% search()
+ pkgload::unload("arrow")
+ if (is_attached) {
+ require("arrow", character.only = TRUE, quietly = TRUE)
+ } else {
+ requireNamespace("arrow", quietly = TRUE)
+ }
+ } else {
+ message("Please restart R to use the 'arrow' package.")
+ }
+}
+
+
+#' Create a source bundle that includes all thirdparty dependencies
+#'
+#' @param dest_file File path for the new tar.gz package. Defaults to
+#' `arrow_V.V.V_with_deps.tar.gz` in the current directory (`V.V.V` is the version)
+#' @param source_file File path for the input tar.gz package. Defaults to
+#' downloading the package from CRAN (or whatever you have set as the first in
+#' `getOption("repos")`)
+#' @return The full path to `dest_file`, invisibly
+#'
+#' This function is used for setting up an offline build. If it's possible to
+#' download at build time, don't use this function. Instead, let `cmake`
+#' download the required dependencies for you.
+#' These downloaded dependencies are only used in the build if
+#' `ARROW_DEPENDENCY_SOURCE` is unset, `BUNDLED`, or `AUTO`.
+#' https://arrow.apache.org/docs/developers/cpp/building.html#offline-builds
+#'
+#' If you're using binary packages you shouldn't need to use this function. You
+#' should download the appropriate binary from your package repository, transfer
+#' that to the offline computer, and install that. Any OS can create the source
+#' bundle, but it cannot be installed on Windows. (Instead, use a standard
+#' Windows binary package.)
+#'
+#' Note if you're using RStudio Package Manager on Linux: If you still want to
+#' make a source bundle with this function, make sure to set the first repo in
+#' `options("repos")` to be a mirror that contains source packages (that is:
+#' something other than the RSPM binary mirror URLs).
+#'
+#' ## Steps for an offline install with optional dependencies:
+#'
+#' ### Using a computer with internet access, pre-download the dependencies:
+#' * Install the `arrow` package _or_ run
+#' `source("https://raw.githubusercontent.com/apache/arrow/master/r/R/install-arrow.R")`
+#' * Run `create_package_with_all_dependencies("my_arrow_pkg.tar.gz")`
+#' * Copy the newly created `my_arrow_pkg.tar.gz` to the computer without internet access
+#'
+#' ### On the computer without internet access, install the prepared package:
+#' * Install the `arrow` package from the copied file
+#' * `install.packages("my_arrow_pkg.tar.gz", dependencies = c("Depends", "Imports", "LinkingTo"))`
+#' * This installation will build from source, so `cmake` must be available
+#' * Run [arrow_info()] to check installed capabilities
+#'
+#'
+#' @examples
+#' \dontrun{
+#' new_pkg <- create_package_with_all_dependencies()
+#' # Note: this works when run in the same R session, but it's meant to be
+#' # copied to a different computer.
+#' install.packages(new_pkg, dependencies = c("Depends", "Imports", "LinkingTo"))
+#' }
+#' @export
+create_package_with_all_dependencies <- function(dest_file = NULL, source_file = NULL) {
+ if (is.null(source_file)) {
+ pkg_download_dir <- tempfile()
+ dir.create(pkg_download_dir)
+ on.exit(unlink(pkg_download_dir, recursive = TRUE), add = TRUE)
+ message("Downloading Arrow source file")
+ downloaded <- utils::download.packages("arrow", destdir = pkg_download_dir, type = "source")
+ source_file <- downloaded[1, 2, drop = TRUE]
+ }
+ if (!file.exists(source_file) || !endsWith(source_file, "tar.gz")) {
+ stop("Arrow package .tar.gz file not found")
+ }
+ if (is.null(dest_file)) {
+ # e.g. convert /path/to/arrow_5.0.0.tar.gz to ./arrow_5.0.0_with_deps.tar.gz
+ # (add 'with_deps' for clarity if the file was downloaded locally)
+ dest_file <- paste0(gsub(".tar.gz$", "", basename(source_file)), "_with_deps.tar.gz")
+ }
+ untar_dir <- tempfile()
+ on.exit(unlink(untar_dir, recursive = TRUE), add = TRUE)
+ utils::untar(source_file, exdir = untar_dir)
+ tools_dir <- file.path(untar_dir, "arrow/tools")
+ download_dependencies_sh <- file.path(tools_dir, "cpp/thirdparty/download_dependencies.sh")
+ # If you change this path, also need to edit nixlibs.R
+ download_dir <- file.path(tools_dir, "thirdparty_dependencies")
+ dir.create(download_dir)
+
+ message("Downloading files to ", download_dir)
+ download_successful <- system2(download_dependencies_sh, download_dir, stdout = FALSE) == 0
+ if (!download_successful) {
+ stop("Failed to download thirdparty dependencies")
+ }
+ # Need to change directory to untar_dir so tar() will use relative paths. That
+ # means we'll need a full, non-relative path for dest_file. (extra_flags="-C"
+ # doesn't work with R's internal tar)
+ orig_wd <- getwd()
+ on.exit(setwd(orig_wd), add = TRUE)
+ # normalizePath() may return the input unchanged if dest_file doesn't exist,
+ # so create it first.
+ file.create(dest_file)
+ dest_file <- normalizePath(dest_file, mustWork = TRUE)
+ setwd(untar_dir)
+
+ message("Repacking tar.gz file to ", dest_file)
+ tar_successful <- utils::tar(dest_file, compression = "gz") == 0
+ if (!tar_successful) {
+ stop("Failed to create new tar.gz file")
+ }
+ invisible(dest_file)
+}