diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
commit | e6918187568dbd01842d8d1d2c808ce16a894239 (patch) | |
tree | 64f88b554b444a49f656b6c656111a145cbbaa28 /src/arrow/r/R/dplyr-eval.R | |
parent | Initial commit. (diff) | |
download | ceph-b26c4052f3542036551aa9dec9caa4226e456195.tar.xz ceph-b26c4052f3542036551aa9dec9caa4226e456195.zip |
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/arrow/r/R/dplyr-eval.R')
-rw-r--r-- | src/arrow/r/R/dplyr-eval.R | 123 |
1 files changed, 123 insertions, 0 deletions
diff --git a/src/arrow/r/R/dplyr-eval.R b/src/arrow/r/R/dplyr-eval.R new file mode 100644 index 000000000..9d944ab80 --- /dev/null +++ b/src/arrow/r/R/dplyr-eval.R @@ -0,0 +1,123 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +arrow_eval <- function(expr, mask) { + # filter(), mutate(), etc. work by evaluating the quoted `exprs` to generate Expressions + # with references to Arrays (if .data is Table/RecordBatch) or Fields (if + # .data is a Dataset). + + # This yields an Expression as long as the `exprs` are implemented in Arrow. + # Otherwise, it returns a try-error + tryCatch(eval_tidy(expr, mask), error = function(e) { + # Look for the cases where bad input was given, i.e. this would fail + # in regular dplyr anyway, and let those raise those as errors; + # else, for things not supported by Arrow return a "try-error", + # which we'll handle differently + msg <- conditionMessage(e) + if (getOption("arrow.debug", FALSE)) print(msg) + patterns <- .cache$i18ized_error_pattern + if (is.null(patterns)) { + patterns <- i18ize_error_messages() + # Memoize it + .cache$i18ized_error_pattern <- patterns + } + if (grepl(patterns, msg)) { + stop(e) + } + + out <- structure(msg, class = "try-error", condition = e) + if (grepl("not supported.*Arrow", msg) || getOption("arrow.debug", FALSE)) { + # One of ours. Mark it so that consumers can handle it differently + class(out) <- c("arrow-try-error", class(out)) + } + invisible(out) + }) +} + +handle_arrow_not_supported <- function(err, lab) { + # Look for informative message from the Arrow function version (see above) + if (inherits(err, "arrow-try-error")) { + # Include it if found + paste0("In ", lab, ", ", as.character(err)) + } else { + # Otherwise be opaque (the original error is probably not useful) + paste("Expression", lab, "not supported in Arrow") + } +} + +i18ize_error_messages <- function() { + # Figure out what the error messages will be with this LANGUAGE + # so that we can look for them + out <- list( + obj = tryCatch(eval(parse(text = "X_____X")), error = function(e) conditionMessage(e)), + fun = tryCatch(eval(parse(text = "X_____X()")), error = function(e) conditionMessage(e)) + ) + paste(map(out, ~ sub("X_____X", ".*", .)), collapse = "|") +} + +# Helper to raise a common error +arrow_not_supported <- function(msg) { + # TODO: raise a classed error? + stop(paste(msg, "not supported by Arrow"), call. = FALSE) +} + +# Create a data mask for evaluating a dplyr expression +arrow_mask <- function(.data, aggregation = FALSE) { + f_env <- new_environment(.cache$functions) + + # Add functions that need to error hard and clear. + # Some R functions will still try to evaluate on an Expression + # and return NA with a warning + fail <- function(...) stop("Not implemented") + for (f in c("mean", "sd")) { + f_env[[f]] <- fail + } + + if (aggregation) { + # This should probably be done with an environment inside an environment + # but a first attempt at that had scoping problems (ARROW-13499) + for (f in names(agg_funcs)) { + f_env[[f]] <- agg_funcs[[f]] + } + } + + # Assign the schema to the expressions + map(.data$selected_columns, ~ (.$schema <- .data$.data$schema)) + + # Add the column references and make the mask + out <- new_data_mask( + new_environment(.data$selected_columns, parent = f_env), + f_env + ) + # Then insert the data pronoun + # TODO: figure out what rlang::as_data_pronoun does/why we should use it + # (because if we do we get `Error: Can't modify the data pronoun` in mutate()) + out$.data <- .data$selected_columns + out +} + +format_expr <- function(x) { + if (is_quosure(x)) { + x <- quo_get_expr(x) + } + out <- deparse(x) + if (length(out) > 1) { + # Add ellipses because we are going to truncate + out[1] <- paste0(out[1], "...") + } + head(out, 1) +} |