1 files changed, 320 insertions, 0 deletions
diff --git a/src/arrow/r/tests/testthat/helper-expectation.R b/src/arrow/r/tests/testthat/helper-expectation.R
new file mode 100644
index 000000000..ef6142bb4
--- /dev/null
+++ b/src/arrow/r/tests/testthat/helper-expectation.R
@@ -0,0 +1,320 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+expect_as_vector <- function(x, y, ...) {
+  expect_equal(as.vector(x), y, ...)
+}
+
+expect_data_frame <- function(x, y, ...) {
+  expect_equal(as.data.frame(x), y, ...)
+}
+
+expect_r6_class <- function(object, class) {
+  expect_s3_class(object, class)
+  expect_s3_class(object, "R6")
+}
+
+#' Mask `testthat::expect_equal()` in order to compare ArrowObjects using their
+#' `Equals` methods from the C++ library.
+expect_equal <- function(object, expected, ignore_attr = FALSE, ..., info = NULL, label = NULL) {
+  if (inherits(object, "ArrowObject") && inherits(expected, "ArrowObject")) {
+    mc <- match.call()
+    expect_true(
+      all.equal(object, expected, check.attributes = !ignore_attr),
+      info = info,
+      label = paste(rlang::as_label(mc[["object"]]), "==", rlang::as_label(mc[["expected"]]))
+    )
+  } else {
+    testthat::expect_equal(object, expected, ignore_attr = ignore_attr, ..., info = info, label = label)
+  }
+}
+
+expect_type_equal <- function(object, expected, ...) {
+  if (is.Array(object)) {
+    object <- object$type
+  }
+  if (is.Array(expected)) {
+    expected <- expected$type
+  }
+  expect_equal(object, expected, ...)
+}
+
+expect_match_arg_error <- function(object, values = c()) {
+  expect_error(object, paste0("'arg' .*", paste(dQuote(values), collapse = ", ")))
+}
+
+expect_deprecated <- expect_warning
+
+verify_output <- function(...) {
+  if (isTRUE(grepl("conda", R.Version()$platform))) {
+    skip("On conda")
+  }
+  testthat::verify_output(...)
+}
+
+#' Ensure that dplyr methods on Arrow objects return the same as for data frames
+#'
+#' This function compares the output of running a dplyr expression on a tibble
+#' or data.frame object against the output of the same expression run on
+#' Arrow Table and RecordBatch objects.
+#'
+#'
+#' @param expr A dplyr pipeline which must have `.input` as its start
+#' @param tbl A tibble or data.frame which will be substituted for `.input`
+#' @param skip_record_batch The skip message to show (if you should skip the
+#' RecordBatch test)
+#' @param skip_table The skip message to show (if you should skip the Table test)
+#' @param warning The expected warning from the RecordBatch and Table comparison
+#'  paths, passed to `expect_warning()`. Special values:
+#'     * `NA` (the default) for ensuring no warning message
+#'     * `TRUE` is a special case to mean to check for the
+#'      "not supported in Arrow; pulling data into R" message.
+#' @param ... additional arguments, passed to `expect_equal()`
+compare_dplyr_binding <- function(expr,
+                                  tbl,
+                                  skip_record_batch = NULL,
+                                  skip_table = NULL,
+                                  warning = NA,
+                                  ...) {
+
+  # Quote the contents of `expr` so that we can evaluate it a few different ways
+  expr <- rlang::enquo(expr)
+  # Get the expected output by evaluating expr on the .input data.frame using regular dplyr
+  expected <- rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(.input = tbl)))
+
+  if (isTRUE(warning)) {
+    # Special-case the simple warning:
+    # TODO: ARROW-13362 pick one of in or by and use it everywhere
+    warning <- "not supported (in|by) Arrow; pulling data into R"
+  }
+
+  skip_msg <- NULL
+
+  # Evaluate `expr` on a RecordBatch object and compare with `expected`
+  if (is.null(skip_record_batch)) {
+    expect_warning(
+      via_batch <- rlang::eval_tidy(
+        expr,
+        rlang::new_data_mask(rlang::env(.input = record_batch(tbl)))
+      ),
+      warning
+    )
+    expect_equal(via_batch, expected, ...)
+  } else {
+    skip_msg <- c(skip_msg, skip_record_batch)
+  }
+
+  # Evaluate `expr` on a Table object and compare with `expected`
+  if (is.null(skip_table)) {
+    expect_warning(
+      via_table <- rlang::eval_tidy(
+        expr,
+        rlang::new_data_mask(rlang::env(.input = arrow_table(tbl)))
+      ),
+      warning
+    )
+    expect_equal(via_table, expected, ...)
+  } else {
+    skip_msg <- c(skip_msg, skip_table)
+  }
+
+  if (!is.null(skip_msg)) {
+    skip(paste(skip_msg, collapse = "\n"))
+  }
+}
+
+#' Assert that Arrow dplyr methods error in the same way as methods on data.frame
+#'
+#' Comparing the error message generated when running expressions on R objects
+#' against the error message generated by running the same expression on Arrow
+#' Tables and RecordBatches.
+#'
+#' @param expr A dplyr pipeline which must have `.input` as its start
+#' @param tbl A tibble or data.frame which will be substituted for `.input`
+#' @param ... additional arguments, passed to `expect_error()`
+compare_dplyr_error <- function(expr,
+                                tbl,
+                                ...) {
+  # ensure we have supplied tbl
+  force(tbl)
+
+  expr <- rlang::enquo(expr)
+  msg <- tryCatch(
+    rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(.input = tbl))),
+    error = function(e) {
+      msg <- conditionMessage(e)
+
+      # The error here is of the form:
+      #
+      # Problem with `filter()` .input `..1`.
+      # x object 'b_var' not found
+      # ℹ Input `..1` is `chr == b_var`.
+      #
+      # but what we really care about is the `x` block
+      # so (temporarily) let's pull those blocks out when we find them
+      pattern <- i18ize_error_messages()
+
+      if (grepl(pattern, msg)) {
+        msg <- sub(paste0("^.*(", pattern, ").*$"), "\\1", msg)
+      }
+      msg
+    }
+  )
+  # make sure msg is a character object (i.e. there has been an error)
+  # If it did not error, we would get a data.frame or whatever
+  # This expectation will tell us "dplyr on data.frame errored is not TRUE"
+  expect_true(identical(typeof(msg), "character"), label = "dplyr on data.frame errored")
+
+  expect_error(
+    rlang::eval_tidy(
+      expr,
+      rlang::new_data_mask(rlang::env(.input = record_batch(tbl)))
+    ),
+    msg,
+    ...
+  )
+  expect_error(
+    rlang::eval_tidy(
+      expr,
+      rlang::new_data_mask(rlang::env(.input = arrow_table(tbl)))
+    ),
+    msg,
+    ...
+  )
+}
+
+#' Comparing the output of running expressions on R vectors against the same
+#' expression run on Arrow Arrays and ChunkedArrays.
+#'
+#' @param expr A vectorized R expression which must have `.input` as its start
+#' @param vec A vector which will be substituted for `.input`
+#' @param skip_array The skip message to show (if you should skip the Array test)
+#' @param skip_chunked_array The skip message to show (if you should skip the ChunkedArray test)
+#' @param ignore_attr Ignore differences in specified attributes?
+#' @param ... additional arguments, passed to `expect_as_vector()`
+compare_expression <- function(expr,
+                           vec,
+                           skip_array = NULL,
+                           skip_chunked_array = NULL,
+                           ignore_attr = FALSE,
+                           ...) {
+  expr <- rlang::enquo(expr)
+  expected <- rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(.input = vec)))
+  skip_msg <- NULL
+
+  if (is.null(skip_array)) {
+    via_array <- rlang::eval_tidy(
+      expr,
+      rlang::new_data_mask(rlang::env(.input = Array$create(vec)))
+    )
+    expect_as_vector(via_array, expected, ignore_attr, ...)
+  } else {
+    skip_msg <- c(skip_msg, skip_array)
+  }
+
+  if (is.null(skip_chunked_array)) {
+    # split input vector into two to exercise ChunkedArray with >1 chunk
+    split_vector <- split_vector_as_list(vec)
+
+    via_chunked <- rlang::eval_tidy(
+      expr,
+      rlang::new_data_mask(rlang::env(.input = ChunkedArray$create(split_vector[[1]], split_vector[[2]])))
+    )
+    expect_as_vector(via_chunked, expected, ignore_attr, ...)
+  } else {
+    skip_msg <- c(skip_msg, skip_chunked_array)
+  }
+
+  if (!is.null(skip_msg)) {
+    skip(paste(skip_msg, collapse = "\n"))
+  }
+}
+
+#' Comparing the error message generated when running expressions on R objects
+#' against the error message generated by running the same expression on Arrow
+#' Arrays and ChunkedArrays.
+#'
+#' @param expr An R expression which must have `.input` as its start
+#' @param vec A vector which will be substituted for `.input`
+#' @param skip_array The skip message to show (if you should skip the Array test)
+#' @param skip_chunked_array The skip message to show (if you should skip the ChunkedArray test)
+#' @param ... additional arguments, passed to `expect_error()`
+compare_expression_error <- function(expr,
+                                 vec,
+                                 skip_array = NULL,
+                                 skip_chunked_array = NULL,
+                                 ...) {
+  expr <- rlang::enquo(expr)
+
+  msg <- tryCatch(
+    rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(.input = vec))),
+    error = function(e) {
+      msg <- conditionMessage(e)
+
+      pattern <- i18ize_error_messages()
+
+      if (grepl(pattern, msg)) {
+        msg <- sub(paste0("^.*(", pattern, ").*$"), "\\1", msg)
+      }
+      msg
+    }
+  )
+
+  expect_true(identical(typeof(msg), "character"), label = "vector errored")
+
+  skip_msg <- NULL
+
+  if (is.null(skip_array)) {
+    expect_error(
+      rlang::eval_tidy(
+        expr,
+        rlang::new_data_mask(rlang::env(.input = Array$create(vec)))
+      ),
+      msg,
+      ...
+    )
+  } else {
+    skip_msg <- c(skip_msg, skip_array)
+  }
+
+  if (is.null(skip_chunked_array)) {
+    # split input vector into two to exercise ChunkedArray with >1 chunk
+    split_vector <- split_vector_as_list(vec)
+
+    expect_error(
+      rlang::eval_tidy(
+        expr,
+        rlang::new_data_mask(rlang::env(.input = ChunkedArray$create(split_vector[[1]], split_vector[[2]])))
+      ),
+      msg,
+      ...
+    )
+  } else {
+    skip_msg <- c(skip_msg, skip_chunked_array)
+  }
+
+  if (!is.null(skip_msg)) {
+    skip(paste(skip_msg, collapse = "\n"))
+  }
+}
+
+split_vector_as_list <- function(vec) {
+  vec_split <- length(vec) %/% 2
+  vec1 <- vec[seq(from = min(1, length(vec) - 1), to = min(length(vec) - 1, vec_split), by = 1)]
+  vec2 <- vec[seq(from = min(length(vec), vec_split + 1), to = length(vec), by = 1)]
+  list(vec1, vec2)
+}