1 files changed, 123 insertions, 0 deletions
diff --git a/src/arrow/r/R/dplyr-eval.R b/src/arrow/r/R/dplyr-eval.R
new file mode 100644
index 000000000..9d944ab80
--- /dev/null
+++ b/src/arrow/r/R/dplyr-eval.R
@@ -0,0 +1,123 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+arrow_eval <- function(expr, mask) {
+  # filter(), mutate(), etc. work by evaluating the quoted `exprs` to generate Expressions
+  # with references to Arrays (if .data is Table/RecordBatch) or Fields (if
+  # .data is a Dataset).
+
+  # This yields an Expression as long as the `exprs` are implemented in Arrow.
+  # Otherwise, it returns a try-error
+  tryCatch(eval_tidy(expr, mask), error = function(e) {
+    # Look for the cases where bad input was given, i.e. this would fail
+    # in regular dplyr anyway, and let those raise those as errors;
+    # else, for things not supported by Arrow return a "try-error",
+    # which we'll handle differently
+    msg <- conditionMessage(e)
+    if (getOption("arrow.debug", FALSE)) print(msg)
+    patterns <- .cache$i18ized_error_pattern
+    if (is.null(patterns)) {
+      patterns <- i18ize_error_messages()
+      # Memoize it
+      .cache$i18ized_error_pattern <- patterns
+    }
+    if (grepl(patterns, msg)) {
+      stop(e)
+    }
+
+    out <- structure(msg, class = "try-error", condition = e)
+    if (grepl("not supported.*Arrow", msg) || getOption("arrow.debug", FALSE)) {
+      # One of ours. Mark it so that consumers can handle it differently
+      class(out) <- c("arrow-try-error", class(out))
+    }
+    invisible(out)
+  })
+}
+
+handle_arrow_not_supported <- function(err, lab) {
+  # Look for informative message from the Arrow function version (see above)
+  if (inherits(err, "arrow-try-error")) {
+    # Include it if found
+    paste0("In ", lab, ", ", as.character(err))
+  } else {
+    # Otherwise be opaque (the original error is probably not useful)
+    paste("Expression", lab, "not supported in Arrow")
+  }
+}
+
+i18ize_error_messages <- function() {
+  # Figure out what the error messages will be with this LANGUAGE
+  # so that we can look for them
+  out <- list(
+    obj = tryCatch(eval(parse(text = "X_____X")), error = function(e) conditionMessage(e)),
+    fun = tryCatch(eval(parse(text = "X_____X()")), error = function(e) conditionMessage(e))
+  )
+  paste(map(out, ~ sub("X_____X", ".*", .)), collapse = "|")
+}
+
+# Helper to raise a common error
+arrow_not_supported <- function(msg) {
+  # TODO: raise a classed error?
+  stop(paste(msg, "not supported by Arrow"), call. = FALSE)
+}
+
+# Create a data mask for evaluating a dplyr expression
+arrow_mask <- function(.data, aggregation = FALSE) {
+  f_env <- new_environment(.cache$functions)
+
+  # Add functions that need to error hard and clear.
+  # Some R functions will still try to evaluate on an Expression
+  # and return NA with a warning
+  fail <- function(...) stop("Not implemented")
+  for (f in c("mean", "sd")) {
+    f_env[[f]] <- fail
+  }
+
+  if (aggregation) {
+    # This should probably be done with an environment inside an environment
+    # but a first attempt at that had scoping problems (ARROW-13499)
+    for (f in names(agg_funcs)) {
+      f_env[[f]] <- agg_funcs[[f]]
+    }
+  }
+
+  # Assign the schema to the expressions
+  map(.data$selected_columns, ~ (.$schema <- .data$.data$schema))
+
+  # Add the column references and make the mask
+  out <- new_data_mask(
+    new_environment(.data$selected_columns, parent = f_env),
+    f_env
+  )
+  # Then insert the data pronoun
+  # TODO: figure out what rlang::as_data_pronoun does/why we should use it
+  # (because if we do we get `Error: Can't modify the data pronoun` in mutate())
+  out$.data <- .data$selected_columns
+  out
+}
+
+format_expr <- function(x) {
+  if (is_quosure(x)) {
+    x <- quo_get_expr(x)
+  }
+  out <- deparse(x)
+  if (length(out) > 1) {
+    # Add ellipses because we are going to truncate
+    out[1] <- paste0(out[1], "...")
+  }
+  head(out, 1)
+}