diff options
Diffstat (limited to 'src/arrow/r/R/util.R')
-rw-r--r-- | src/arrow/r/R/util.R | 195 |
1 files changed, 195 insertions, 0 deletions
diff --git a/src/arrow/r/R/util.R b/src/arrow/r/R/util.R new file mode 100644 index 000000000..9e3ade6a9 --- /dev/null +++ b/src/arrow/r/R/util.R @@ -0,0 +1,195 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# for compatibility with R versions earlier than 4.0.0 +if (!exists("deparse1")) { + deparse1 <- function(expr, collapse = " ", width.cutoff = 500L, ...) { + paste(deparse(expr, width.cutoff, ...), collapse = collapse) + } +} + +# for compatibility with R versions earlier than 3.6.0 +if (!exists("str2lang")) { + str2lang <- function(s) { + parse(text = s, keep.source = FALSE)[[1]] + } +} + +oxford_paste <- function(x, conjunction = "and", quote = TRUE) { + if (quote && is.character(x)) { + x <- paste0('"', x, '"') + } + if (length(x) < 2) { + return(x) + } + x[length(x)] <- paste(conjunction, x[length(x)]) + if (length(x) > 2) { + return(paste(x, collapse = ", ")) + } else { + return(paste(x, collapse = " ")) + } +} + +assert_is <- function(object, class) { + msg <- paste(substitute(object), "must be a", oxford_paste(class, "or")) + assert_that(inherits(object, class), msg = msg) +} + +assert_is_list_of <- function(object, class) { + msg <- paste(substitute(object), "must be a list of", oxford_paste(class, "or")) + assert_that(is_list_of(object, class), msg = msg) +} + +is_list_of <- function(object, class) { + is.list(object) && all(map_lgl(object, ~ inherits(., class))) +} + +empty_named_list <- function() structure(list(), .Names = character(0)) + +r_symbolic_constants <- c( + "pi", "TRUE", "FALSE", "NULL", "Inf", "NA", "NaN", + "NA_integer_", "NA_real_", "NA_complex_", "NA_character_" +) + +is_function <- function(expr, name) { + # We could have a quosure here if we have an expression like `sum({{ var }})` + if (is_quosure(expr)) { + expr <- quo_get_expr(expr) + } + if (!is.call(expr)) { + return(FALSE) + } else { + if (deparse(expr[[1]]) == name) { + return(TRUE) + } + out <- lapply(expr, is_function, name) + } + any(map_lgl(out, isTRUE)) +} + +all_funs <- function(expr) { + # It is not sufficient to simply do: setdiff(all.names, all.vars) + # here because that would fail to return the names of functions that + # share names with variables. + # To preserve duplicates, call `all.names()` not `all_names()` here. + if (is_quosure(expr)) { + expr <- quo_get_expr(expr) + } + names <- all.names(expr) + names[map_lgl(names, ~ is_function(expr, .))] +} + +all_vars <- function(expr) { + setdiff(all.vars(expr), r_symbolic_constants) +} + +all_names <- function(expr) { + setdiff(all.names(expr), r_symbolic_constants) +} + +is_constant <- function(expr) { + length(all_vars(expr)) == 0 +} + +read_compressed_error <- function(e) { + msg <- conditionMessage(e) + if (grepl(" codec ", msg)) { + compression <- sub(".*Support for codec '(.*)'.*", "\\1", msg) + e$message <- paste0( + msg, + "\nIn order to read this file, you will need to reinstall arrow with additional features enabled.", + "\nSet one of these environment variables before installing:", + sprintf("\n\n * LIBARROW_MINIMAL=false (for all optional features, including '%s')", compression), + sprintf("\n * ARROW_WITH_%s=ON (for just '%s')", toupper(compression), compression), + "\n\nSee https://arrow.apache.org/docs/r/articles/install.html for details" + ) + } + stop(e) +} + +handle_parquet_io_error <- function(e, format) { + msg <- conditionMessage(e) + if (grepl("Parquet magic bytes not found in footer", msg) && length(format) > 1 && is_character(format)) { + # If length(format) > 1, that means it is (almost certainly) the default/not specified value + # so let the user know that they should specify the actual (not parquet) format + abort(c( + msg, + i = "Did you mean to specify a 'format' other than the default (parquet)?" + )) + } + stop(e) +} + +is_writable_table <- function(x) { + inherits(x, c("data.frame", "ArrowTabular")) +} + +# This attribute is used when is_writable is passed into assert_that, and allows +# the call to form part of the error message when is_writable is FALSE +attr(is_writable_table, "fail") <- function(call, env) { + paste0( + deparse(call$x), + " must be an object of class 'data.frame', 'RecordBatch', or 'Table', not '", + class(env[[deparse(call$x)]])[[1]], + "'." + ) +} + +#' Recycle scalar values in a list of arrays +#' +#' @param arrays List of arrays +#' @return List of arrays with any vector/Scalar/Array/ChunkedArray values of length 1 recycled +#' @keywords internal +recycle_scalars <- function(arrays) { + # Get lengths of items in arrays + arr_lens <- map_int(arrays, NROW) + + is_scalar <- arr_lens == 1 + + if (length(arrays) > 1 && any(is_scalar) && !all(is_scalar)) { + + # Recycling not supported for tibbles and data.frames + if (all(map_lgl(arrays, ~ inherits(.x, "data.frame")))) { + abort(c( + "All input tibbles or data.frames must have the same number of rows", + x = paste( + "Number of rows in longest and shortest inputs:", + oxford_paste(c(max(arr_lens), min(arr_lens))) + ) + )) + } + + max_array_len <- max(arr_lens) + arrays[is_scalar] <- lapply(arrays[is_scalar], repeat_value_as_array, max_array_len) + } + arrays +} + +#' Take an object of length 1 and repeat it. +#' +#' @param object Object of length 1 to be repeated - vector, `Scalar`, `Array`, or `ChunkedArray` +#' @param n Number of repetitions +#' +#' @return `Array` of length `n` +#' +#' @keywords internal +repeat_value_as_array <- function(object, n) { + if (inherits(object, "ChunkedArray")) { + return(Scalar$create(object$chunks[[1]])$as_array(n)) + } + return(Scalar$create(object)$as_array(n)) +} |