summaryrefslogtreecommitdiffstats
path: root/src/arrow/r/tests/testthat/helper-expectation.R
diff options
context:
space:
mode:
Diffstat (limited to 'src/arrow/r/tests/testthat/helper-expectation.R')
-rw-r--r--src/arrow/r/tests/testthat/helper-expectation.R320
1 files changed, 320 insertions, 0 deletions
diff --git a/src/arrow/r/tests/testthat/helper-expectation.R b/src/arrow/r/tests/testthat/helper-expectation.R
new file mode 100644
index 000000000..ef6142bb4
--- /dev/null
+++ b/src/arrow/r/tests/testthat/helper-expectation.R
@@ -0,0 +1,320 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+expect_as_vector <- function(x, y, ...) {
+ expect_equal(as.vector(x), y, ...)
+}
+
+expect_data_frame <- function(x, y, ...) {
+ expect_equal(as.data.frame(x), y, ...)
+}
+
+expect_r6_class <- function(object, class) {
+ expect_s3_class(object, class)
+ expect_s3_class(object, "R6")
+}
+
+#' Mask `testthat::expect_equal()` in order to compare ArrowObjects using their
+#' `Equals` methods from the C++ library.
+expect_equal <- function(object, expected, ignore_attr = FALSE, ..., info = NULL, label = NULL) {
+ if (inherits(object, "ArrowObject") && inherits(expected, "ArrowObject")) {
+ mc <- match.call()
+ expect_true(
+ all.equal(object, expected, check.attributes = !ignore_attr),
+ info = info,
+ label = paste(rlang::as_label(mc[["object"]]), "==", rlang::as_label(mc[["expected"]]))
+ )
+ } else {
+ testthat::expect_equal(object, expected, ignore_attr = ignore_attr, ..., info = info, label = label)
+ }
+}
+
+expect_type_equal <- function(object, expected, ...) {
+ if (is.Array(object)) {
+ object <- object$type
+ }
+ if (is.Array(expected)) {
+ expected <- expected$type
+ }
+ expect_equal(object, expected, ...)
+}
+
+expect_match_arg_error <- function(object, values = c()) {
+ expect_error(object, paste0("'arg' .*", paste(dQuote(values), collapse = ", ")))
+}
+
+expect_deprecated <- expect_warning
+
+verify_output <- function(...) {
+ if (isTRUE(grepl("conda", R.Version()$platform))) {
+ skip("On conda")
+ }
+ testthat::verify_output(...)
+}
+
+#' Ensure that dplyr methods on Arrow objects return the same as for data frames
+#'
+#' This function compares the output of running a dplyr expression on a tibble
+#' or data.frame object against the output of the same expression run on
+#' Arrow Table and RecordBatch objects.
+#'
+#'
+#' @param expr A dplyr pipeline which must have `.input` as its start
+#' @param tbl A tibble or data.frame which will be substituted for `.input`
+#' @param skip_record_batch The skip message to show (if you should skip the
+#' RecordBatch test)
+#' @param skip_table The skip message to show (if you should skip the Table test)
+#' @param warning The expected warning from the RecordBatch and Table comparison
+#' paths, passed to `expect_warning()`. Special values:
+#' * `NA` (the default) for ensuring no warning message
+#' * `TRUE` is a special case to mean to check for the
+#' "not supported in Arrow; pulling data into R" message.
+#' @param ... additional arguments, passed to `expect_equal()`
+compare_dplyr_binding <- function(expr,
+ tbl,
+ skip_record_batch = NULL,
+ skip_table = NULL,
+ warning = NA,
+ ...) {
+
+ # Quote the contents of `expr` so that we can evaluate it a few different ways
+ expr <- rlang::enquo(expr)
+ # Get the expected output by evaluating expr on the .input data.frame using regular dplyr
+ expected <- rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(.input = tbl)))
+
+ if (isTRUE(warning)) {
+ # Special-case the simple warning:
+ # TODO: ARROW-13362 pick one of in or by and use it everywhere
+ warning <- "not supported (in|by) Arrow; pulling data into R"
+ }
+
+ skip_msg <- NULL
+
+ # Evaluate `expr` on a RecordBatch object and compare with `expected`
+ if (is.null(skip_record_batch)) {
+ expect_warning(
+ via_batch <- rlang::eval_tidy(
+ expr,
+ rlang::new_data_mask(rlang::env(.input = record_batch(tbl)))
+ ),
+ warning
+ )
+ expect_equal(via_batch, expected, ...)
+ } else {
+ skip_msg <- c(skip_msg, skip_record_batch)
+ }
+
+ # Evaluate `expr` on a Table object and compare with `expected`
+ if (is.null(skip_table)) {
+ expect_warning(
+ via_table <- rlang::eval_tidy(
+ expr,
+ rlang::new_data_mask(rlang::env(.input = arrow_table(tbl)))
+ ),
+ warning
+ )
+ expect_equal(via_table, expected, ...)
+ } else {
+ skip_msg <- c(skip_msg, skip_table)
+ }
+
+ if (!is.null(skip_msg)) {
+ skip(paste(skip_msg, collapse = "\n"))
+ }
+}
+
+#' Assert that Arrow dplyr methods error in the same way as methods on data.frame
+#'
+#' Comparing the error message generated when running expressions on R objects
+#' against the error message generated by running the same expression on Arrow
+#' Tables and RecordBatches.
+#'
+#' @param expr A dplyr pipeline which must have `.input` as its start
+#' @param tbl A tibble or data.frame which will be substituted for `.input`
+#' @param ... additional arguments, passed to `expect_error()`
+compare_dplyr_error <- function(expr,
+ tbl,
+ ...) {
+ # ensure we have supplied tbl
+ force(tbl)
+
+ expr <- rlang::enquo(expr)
+ msg <- tryCatch(
+ rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(.input = tbl))),
+ error = function(e) {
+ msg <- conditionMessage(e)
+
+ # The error here is of the form:
+ #
+ # Problem with `filter()` .input `..1`.
+ # x object 'b_var' not found
+ # ℹ Input `..1` is `chr == b_var`.
+ #
+ # but what we really care about is the `x` block
+ # so (temporarily) let's pull those blocks out when we find them
+ pattern <- i18ize_error_messages()
+
+ if (grepl(pattern, msg)) {
+ msg <- sub(paste0("^.*(", pattern, ").*$"), "\\1", msg)
+ }
+ msg
+ }
+ )
+ # make sure msg is a character object (i.e. there has been an error)
+ # If it did not error, we would get a data.frame or whatever
+ # This expectation will tell us "dplyr on data.frame errored is not TRUE"
+ expect_true(identical(typeof(msg), "character"), label = "dplyr on data.frame errored")
+
+ expect_error(
+ rlang::eval_tidy(
+ expr,
+ rlang::new_data_mask(rlang::env(.input = record_batch(tbl)))
+ ),
+ msg,
+ ...
+ )
+ expect_error(
+ rlang::eval_tidy(
+ expr,
+ rlang::new_data_mask(rlang::env(.input = arrow_table(tbl)))
+ ),
+ msg,
+ ...
+ )
+}
+
+#' Comparing the output of running expressions on R vectors against the same
+#' expression run on Arrow Arrays and ChunkedArrays.
+#'
+#' @param expr A vectorized R expression which must have `.input` as its start
+#' @param vec A vector which will be substituted for `.input`
+#' @param skip_array The skip message to show (if you should skip the Array test)
+#' @param skip_chunked_array The skip message to show (if you should skip the ChunkedArray test)
+#' @param ignore_attr Ignore differences in specified attributes?
+#' @param ... additional arguments, passed to `expect_as_vector()`
+compare_expression <- function(expr,
+ vec,
+ skip_array = NULL,
+ skip_chunked_array = NULL,
+ ignore_attr = FALSE,
+ ...) {
+ expr <- rlang::enquo(expr)
+ expected <- rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(.input = vec)))
+ skip_msg <- NULL
+
+ if (is.null(skip_array)) {
+ via_array <- rlang::eval_tidy(
+ expr,
+ rlang::new_data_mask(rlang::env(.input = Array$create(vec)))
+ )
+ expect_as_vector(via_array, expected, ignore_attr, ...)
+ } else {
+ skip_msg <- c(skip_msg, skip_array)
+ }
+
+ if (is.null(skip_chunked_array)) {
+ # split input vector into two to exercise ChunkedArray with >1 chunk
+ split_vector <- split_vector_as_list(vec)
+
+ via_chunked <- rlang::eval_tidy(
+ expr,
+ rlang::new_data_mask(rlang::env(.input = ChunkedArray$create(split_vector[[1]], split_vector[[2]])))
+ )
+ expect_as_vector(via_chunked, expected, ignore_attr, ...)
+ } else {
+ skip_msg <- c(skip_msg, skip_chunked_array)
+ }
+
+ if (!is.null(skip_msg)) {
+ skip(paste(skip_msg, collapse = "\n"))
+ }
+}
+
+#' Comparing the error message generated when running expressions on R objects
+#' against the error message generated by running the same expression on Arrow
+#' Arrays and ChunkedArrays.
+#'
+#' @param expr An R expression which must have `.input` as its start
+#' @param vec A vector which will be substituted for `.input`
+#' @param skip_array The skip message to show (if you should skip the Array test)
+#' @param skip_chunked_array The skip message to show (if you should skip the ChunkedArray test)
+#' @param ... additional arguments, passed to `expect_error()`
+compare_expression_error <- function(expr,
+ vec,
+ skip_array = NULL,
+ skip_chunked_array = NULL,
+ ...) {
+ expr <- rlang::enquo(expr)
+
+ msg <- tryCatch(
+ rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(.input = vec))),
+ error = function(e) {
+ msg <- conditionMessage(e)
+
+ pattern <- i18ize_error_messages()
+
+ if (grepl(pattern, msg)) {
+ msg <- sub(paste0("^.*(", pattern, ").*$"), "\\1", msg)
+ }
+ msg
+ }
+ )
+
+ expect_true(identical(typeof(msg), "character"), label = "vector errored")
+
+ skip_msg <- NULL
+
+ if (is.null(skip_array)) {
+ expect_error(
+ rlang::eval_tidy(
+ expr,
+ rlang::new_data_mask(rlang::env(.input = Array$create(vec)))
+ ),
+ msg,
+ ...
+ )
+ } else {
+ skip_msg <- c(skip_msg, skip_array)
+ }
+
+ if (is.null(skip_chunked_array)) {
+ # split input vector into two to exercise ChunkedArray with >1 chunk
+ split_vector <- split_vector_as_list(vec)
+
+ expect_error(
+ rlang::eval_tidy(
+ expr,
+ rlang::new_data_mask(rlang::env(.input = ChunkedArray$create(split_vector[[1]], split_vector[[2]])))
+ ),
+ msg,
+ ...
+ )
+ } else {
+ skip_msg <- c(skip_msg, skip_chunked_array)
+ }
+
+ if (!is.null(skip_msg)) {
+ skip(paste(skip_msg, collapse = "\n"))
+ }
+}
+
+split_vector_as_list <- function(vec) {
+ vec_split <- length(vec) %/% 2
+ vec1 <- vec[seq(from = min(1, length(vec) - 1), to = min(length(vec) - 1, vec_split), by = 1)]
+ vec2 <- vec[seq(from = min(length(vec), vec_split + 1), to = length(vec), by = 1)]
+ list(vec1, vec2)
+}