summaryrefslogtreecommitdiffstats
path: root/src/arrow/r/R/dplyr-eval.R
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/arrow/r/R/dplyr-eval.R123
1 files changed, 123 insertions, 0 deletions
diff --git a/src/arrow/r/R/dplyr-eval.R b/src/arrow/r/R/dplyr-eval.R
new file mode 100644
index 000000000..9d944ab80
--- /dev/null
+++ b/src/arrow/r/R/dplyr-eval.R
@@ -0,0 +1,123 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+arrow_eval <- function(expr, mask) {
+ # filter(), mutate(), etc. work by evaluating the quoted `exprs` to generate Expressions
+ # with references to Arrays (if .data is Table/RecordBatch) or Fields (if
+ # .data is a Dataset).
+
+ # This yields an Expression as long as the `exprs` are implemented in Arrow.
+ # Otherwise, it returns a try-error
+ tryCatch(eval_tidy(expr, mask), error = function(e) {
+ # Look for the cases where bad input was given, i.e. this would fail
+ # in regular dplyr anyway, and let those raise those as errors;
+ # else, for things not supported by Arrow return a "try-error",
+ # which we'll handle differently
+ msg <- conditionMessage(e)
+ if (getOption("arrow.debug", FALSE)) print(msg)
+ patterns <- .cache$i18ized_error_pattern
+ if (is.null(patterns)) {
+ patterns <- i18ize_error_messages()
+ # Memoize it
+ .cache$i18ized_error_pattern <- patterns
+ }
+ if (grepl(patterns, msg)) {
+ stop(e)
+ }
+
+ out <- structure(msg, class = "try-error", condition = e)
+ if (grepl("not supported.*Arrow", msg) || getOption("arrow.debug", FALSE)) {
+ # One of ours. Mark it so that consumers can handle it differently
+ class(out) <- c("arrow-try-error", class(out))
+ }
+ invisible(out)
+ })
+}
+
+handle_arrow_not_supported <- function(err, lab) {
+ # Look for informative message from the Arrow function version (see above)
+ if (inherits(err, "arrow-try-error")) {
+ # Include it if found
+ paste0("In ", lab, ", ", as.character(err))
+ } else {
+ # Otherwise be opaque (the original error is probably not useful)
+ paste("Expression", lab, "not supported in Arrow")
+ }
+}
+
+i18ize_error_messages <- function() {
+ # Figure out what the error messages will be with this LANGUAGE
+ # so that we can look for them
+ out <- list(
+ obj = tryCatch(eval(parse(text = "X_____X")), error = function(e) conditionMessage(e)),
+ fun = tryCatch(eval(parse(text = "X_____X()")), error = function(e) conditionMessage(e))
+ )
+ paste(map(out, ~ sub("X_____X", ".*", .)), collapse = "|")
+}
+
+# Helper to raise a common error
+arrow_not_supported <- function(msg) {
+ # TODO: raise a classed error?
+ stop(paste(msg, "not supported by Arrow"), call. = FALSE)
+}
+
+# Create a data mask for evaluating a dplyr expression
+arrow_mask <- function(.data, aggregation = FALSE) {
+ f_env <- new_environment(.cache$functions)
+
+ # Add functions that need to error hard and clear.
+ # Some R functions will still try to evaluate on an Expression
+ # and return NA with a warning
+ fail <- function(...) stop("Not implemented")
+ for (f in c("mean", "sd")) {
+ f_env[[f]] <- fail
+ }
+
+ if (aggregation) {
+ # This should probably be done with an environment inside an environment
+ # but a first attempt at that had scoping problems (ARROW-13499)
+ for (f in names(agg_funcs)) {
+ f_env[[f]] <- agg_funcs[[f]]
+ }
+ }
+
+ # Assign the schema to the expressions
+ map(.data$selected_columns, ~ (.$schema <- .data$.data$schema))
+
+ # Add the column references and make the mask
+ out <- new_data_mask(
+ new_environment(.data$selected_columns, parent = f_env),
+ f_env
+ )
+ # Then insert the data pronoun
+ # TODO: figure out what rlang::as_data_pronoun does/why we should use it
+ # (because if we do we get `Error: Can't modify the data pronoun` in mutate())
+ out$.data <- .data$selected_columns
+ out
+}
+
+format_expr <- function(x) {
+ if (is_quosure(x)) {
+ x <- quo_get_expr(x)
+ }
+ out <- deparse(x)
+ if (length(out) > 1) {
+ # Add ellipses because we are going to truncate
+ out[1] <- paste0(out[1], "...")
+ }
+ head(out, 1)
+}