86 files changed, 17814 insertions, 0 deletions
diff --git a/src/arrow/r/tests/testthat.R b/src/arrow/r/tests/testthat.R
new file mode 100644
index 000000000..d0f5b1e0d
--- /dev/null
+++ b/src/arrow/r/tests/testthat.R
@@ -0,0 +1,27 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+library(testthat)
+library(arrow)
+library(tibble)
+
+if (identical(tolower(Sys.getenv("ARROW_R_DEV", "false")), "true")) {
+  arrow_reporter <- MultiReporter$new(list(CheckReporter$new(), LocationReporter$new()))
+} else {
+  arrow_reporter <- check_reporter()
+}
+test_check("arrow", reporter = arrow_reporter)
diff --git a/src/arrow/r/tests/testthat/golden-files/data-arrow-extra-meta_3.0.0.parquet b/src/arrow/r/tests/testthat/golden-files/data-arrow-extra-meta_3.0.0.parquet
new file mode 100644
index 000000000..3394be241
--- /dev/null
+++ b/src/arrow/r/tests/testthat/golden-files/data-arrow-extra-meta_3.0.0.parquet
diff --git a/src/arrow/r/tests/testthat/golden-files/data-arrow_0.17.0_lz4.feather b/src/arrow/r/tests/testthat/golden-files/data-arrow_0.17.0_lz4.feather
new file mode 100644
index 000000000..d91acd0cc
--- /dev/null
+++ b/src/arrow/r/tests/testthat/golden-files/data-arrow_0.17.0_lz4.feather
diff --git a/src/arrow/r/tests/testthat/golden-files/data-arrow_0.17.0_uncompressed.feather b/src/arrow/r/tests/testthat/golden-files/data-arrow_0.17.0_uncompressed.feather
new file mode 100644
index 000000000..0198024ec
--- /dev/null
+++ b/src/arrow/r/tests/testthat/golden-files/data-arrow_0.17.0_uncompressed.feather
diff --git a/src/arrow/r/tests/testthat/golden-files/data-arrow_0.17.0_zstd.feather b/src/arrow/r/tests/testthat/golden-files/data-arrow_0.17.0_zstd.feather
new file mode 100644
index 000000000..f6788231c
--- /dev/null
+++ b/src/arrow/r/tests/testthat/golden-files/data-arrow_0.17.0_zstd.feather
diff --git a/src/arrow/r/tests/testthat/golden-files/data-arrow_1.0.1.parquet b/src/arrow/r/tests/testthat/golden-files/data-arrow_1.0.1.parquet
new file mode 100644
index 000000000..e1d589bf0
--- /dev/null
+++ b/src/arrow/r/tests/testthat/golden-files/data-arrow_1.0.1.parquet
diff --git a/src/arrow/r/tests/testthat/golden-files/data-arrow_1.0.1_lz4.feather b/src/arrow/r/tests/testthat/golden-files/data-arrow_1.0.1_lz4.feather
new file mode 100644
index 000000000..f3a71435a
--- /dev/null
+++ b/src/arrow/r/tests/testthat/golden-files/data-arrow_1.0.1_lz4.feather
diff --git a/src/arrow/r/tests/testthat/golden-files/data-arrow_1.0.1_uncompressed.feather b/src/arrow/r/tests/testthat/golden-files/data-arrow_1.0.1_uncompressed.feather
new file mode 100644
index 000000000..1188ac669
--- /dev/null
+++ b/src/arrow/r/tests/testthat/golden-files/data-arrow_1.0.1_uncompressed.feather
diff --git a/src/arrow/r/tests/testthat/golden-files/data-arrow_1.0.1_zstd.feather b/src/arrow/r/tests/testthat/golden-files/data-arrow_1.0.1_zstd.feather
new file mode 100644
index 000000000..056b26c17
--- /dev/null
+++ b/src/arrow/r/tests/testthat/golden-files/data-arrow_1.0.1_zstd.feather
diff --git a/src/arrow/r/tests/testthat/golden-files/data-arrow_2.0.0.parquet b/src/arrow/r/tests/testthat/golden-files/data-arrow_2.0.0.parquet
new file mode 100644
index 000000000..6c5911560
--- /dev/null
+++ b/src/arrow/r/tests/testthat/golden-files/data-arrow_2.0.0.parquet
diff --git a/src/arrow/r/tests/testthat/golden-files/data-arrow_2.0.0_lz4.feather b/src/arrow/r/tests/testthat/golden-files/data-arrow_2.0.0_lz4.feather
new file mode 100644
index 000000000..b65da7234
--- /dev/null
+++ b/src/arrow/r/tests/testthat/golden-files/data-arrow_2.0.0_lz4.feather
diff --git a/src/arrow/r/tests/testthat/golden-files/data-arrow_2.0.0_uncompressed.feather b/src/arrow/r/tests/testthat/golden-files/data-arrow_2.0.0_uncompressed.feather
new file mode 100644
index 000000000..508903cb4
--- /dev/null
+++ b/src/arrow/r/tests/testthat/golden-files/data-arrow_2.0.0_uncompressed.feather
diff --git a/src/arrow/r/tests/testthat/golden-files/data-arrow_2.0.0_zstd.feather b/src/arrow/r/tests/testthat/golden-files/data-arrow_2.0.0_zstd.feather
new file mode 100644
index 000000000..39c829fda
--- /dev/null
+++ b/src/arrow/r/tests/testthat/golden-files/data-arrow_2.0.0_zstd.feather
diff --git a/src/arrow/r/tests/testthat/helper-arrow.R b/src/arrow/r/tests/testthat/helper-arrow.R
new file mode 100644
index 000000000..545f2d044
--- /dev/null
+++ b/src/arrow/r/tests/testthat/helper-arrow.R
@@ -0,0 +1,69 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Wrap testthat::test_that with a check for the C++ library
+options(..skip.tests = !arrow:::arrow_available())
+
+set.seed(1)
+
+MAX_INT <- 2147483647L
+
+# Make sure this is unset
+Sys.setenv(ARROW_PRE_0_15_IPC_FORMAT = "")
+
+# use the C locale for string collation (ARROW-12046)
+Sys.setlocale("LC_COLLATE", "C")
+
+# Set English language so that error messages aren't internationalized
+# (R CMD check does this, but in case you're running outside of check)
+Sys.setenv(LANGUAGE = "en")
+
+with_language <- function(lang, expr) {
+  old <- Sys.getenv("LANGUAGE")
+  # Check what this message is before changing languages; this will
+  # trigger caching the transations if the OS does that (some do).
+  # If the OS does cache, then we can't test changing languages safely.
+  before <- i18ize_error_messages()
+  Sys.setenv(LANGUAGE = lang)
+  on.exit({
+    Sys.setenv(LANGUAGE = old)
+    .cache$i18ized_error_pattern <<- NULL
+  })
+  if (!identical(before, i18ize_error_messages())) {
+    skip(paste("This OS either does not support changing languages to", lang, "or it caches translations"))
+  }
+  force(expr)
+}
+
+test_that <- function(what, code) {
+  testthat::test_that(what, {
+    skip_if(getOption("..skip.tests", TRUE), "arrow C++ library not available")
+    code
+  })
+}
+
+# Wrapper to run tests that only touch R code even when the C++ library isn't
+# available (so that at least some tests are run on those platforms)
+r_only <- function(code) {
+  withr::with_options(list(..skip.tests = FALSE), code)
+}
+
+make_temp_dir <- function() {
+  path <- tempfile()
+  dir.create(path)
+  normalizePath(path, winslash = "/")
+}
diff --git a/src/arrow/r/tests/testthat/helper-data.R b/src/arrow/r/tests/testthat/helper-data.R
new file mode 100644
index 000000000..c693e84b2
--- /dev/null
+++ b/src/arrow/r/tests/testthat/helper-data.R
@@ -0,0 +1,191 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+example_data <- tibble::tibble(
+  int = c(1:3, NA_integer_, 5:10),
+  dbl = c(1:8, NA, 10) + .1,
+  dbl2 = rep(5, 10),
+  lgl = sample(c(TRUE, FALSE, NA), 10, replace = TRUE),
+  false = logical(10),
+  chr = letters[c(1:5, NA, 7:10)],
+  fct = factor(letters[c(1:4, NA, NA, 7:10)])
+)
+
+example_with_metadata <- tibble::tibble(
+  a = structure("one", class = "special_string"),
+  b = 2,
+  c = tibble::tibble(
+    c1 = structure("inner", extra_attr = "something"),
+    c2 = 4,
+    c3 = 50
+  ),
+  d = "four"
+)
+
+attr(example_with_metadata, "top_level") <- list(
+  field_one = 12,
+  field_two = "more stuff"
+)
+
+haven_data <- tibble::tibble(
+  num = structure(c(5.1, 4.9),
+    format.spss = "F8.2"
+  ),
+  cat_int = structure(c(3, 1),
+    format.spss = "F8.0",
+    labels = c(first = 1, second = 2, third = 3),
+    class = c("haven_labelled", "vctrs_vctr", "double")
+  ),
+  cat_chr = structure(c("B", "B"),
+    labels = c(Alpha = "A", Beta = "B"),
+    class = c("haven_labelled", "vctrs_vctr", "character")
+  )
+)
+
+example_with_times <- tibble::tibble(
+  date = Sys.Date() + 1:10,
+  posixct = lubridate::ymd_hms("2018-10-07 19:04:05") + 1:10,
+  posixct_tz = lubridate::ymd_hms("2018-10-07 19:04:05", tz = "US/Eastern") + 1:10,
+  posixlt = as.POSIXlt(lubridate::ymd_hms("2018-10-07 19:04:05") + 1:10),
+  posixlt_tz = as.POSIXlt(lubridate::ymd_hms("2018-10-07 19:04:05", tz = "US/Eastern") + 1:10)
+)
+
+verses <- list(
+  # Since we tend to test with dataframes with 10 rows, here are verses from
+  # "Milonga del moro judío", by Jorge Drexler. They are décimas, 10-line
+  # poems with a particular meter and rhyme scheme.
+  # (They also have non-ASCII characters, which is nice for testing)
+  c(
+    "Por cada muro, un lamento",
+    "En Jerusalén la dorada",
+    "Y mil vidas malgastadas",
+    "Por cada mandamiento",
+    "Yo soy polvo de tu viento",
+    "Y aunque sangro de tu herida",
+    "Y cada piedra querida",
+    "Guarda mi amor más profundo",
+    "No hay una piedra en el mundo",
+    "Que valga lo que una vida"
+  ),
+  c(
+    "No hay muerto que no me duela",
+    "No hay un bando ganador",
+    "No hay nada más que dolor",
+    "Y otra vida que se vuela",
+    "La guerra es muy mala escuela",
+    "No importa el disfraz que viste",
+    "Perdonen que no me aliste",
+    "Bajo ninguna bandera",
+    "Vale más cualquier quimera",
+    "Que un trozo de tela triste"
+  ),
+  c(
+    "Y a nadie le di permiso",
+    "Para matar en mi nombre",
+    "Un hombre no es más que un hombre",
+    "Y si hay Dios, así lo quiso",
+    "El mismo suelo que piso",
+    "Seguirá, yo me habré ido",
+    "Rumbo también del olvido",
+    "No hay doctrina que no vaya",
+    "Y no hay pueblo que no se haya",
+    "Creído el pueblo elegido"
+  )
+)
+
+make_big_string <- function() {
+  # This creates a character vector that would exceed the capacity of BinaryArray
+  rep(purrr::map_chr(2047:2050, ~ paste(sample(letters, ., replace = TRUE), collapse = "")), 2^18)
+}
+
+make_random_string_of_size <- function(size = 1) {
+  purrr::map_chr(1000 * size, ~ paste(sample(letters, ., replace = TRUE), collapse = ""))
+}
+
+make_string_of_size <- function(size = 1) {
+  paste(rep(letters, length.out = 1000 * size), collapse = "")
+}
+
+example_with_extra_metadata <- example_with_metadata
+attributes(example_with_extra_metadata$b) <- list(lots = rep(make_string_of_size(1), 100))
+
+example_with_logical_factors <- tibble::tibble(
+  starting_a_fight = factor(c(FALSE, TRUE, TRUE, TRUE)),
+  consoling_a_child = factor(c(TRUE, FALSE, TRUE, TRUE)),
+  petting_a_dog = factor(c(TRUE, TRUE, FALSE, TRUE)),
+  saying = c(
+    "shhhhh, it's ok",
+    "you wanna go outside?",
+    "you want your mommy?",
+    "hey buddy"
+  )
+)
+
+# The values in each column of this tibble are in ascending order. There are
+# some ties, so tests should use two or more columns to ensure deterministic
+# sort order. The Arrow C++ library orders strings lexicographically as byte
+# strings. The order of a string array sorted by Arrow will not match the order
+# of an equivalent character vector sorted by R unless you set the R collation
+# locale to "C" by running:   Sys.setlocale("LC_COLLATE", "C")
+# These test scripts set that, but if you are running individual tests you might
+# need to set it manually. When finished, you can restore the default
+# collation locale by running:   Sys.setlocale("LC_COLLATE")
+# In the future, the string collation locale used by the Arrow C++ library might
+# be configurable (ARROW-12046).
+example_data_for_sorting <- tibble::tibble(
+  int = c(-.Machine$integer.max, -101L, -100L, 0L, 0L, 1L, 100L, 1000L, .Machine$integer.max, NA_integer_),
+  dbl = c(
+    -Inf, -.Machine$double.xmax, -.Machine$double.xmin, 0, .Machine$double.xmin,
+    pi, .Machine$double.xmax, Inf, NaN, NA_real_
+  ),
+  chr = c("", "", "\"", "&", "ABC", "NULL", "a", "abc", "zzz", NA_character_),
+  lgl = c(rep(FALSE, 4L), rep(TRUE, 5L), NA),
+  dttm = lubridate::ymd_hms(c(
+    "0000-01-01 00:00:00",
+    "1919-05-29 13:08:55",
+    "1955-06-20 04:10:42",
+    "1973-06-30 11:38:41",
+    "1987-03-29 12:49:47",
+    "1991-06-11 19:07:01",
+    NA_character_,
+    "2017-08-21 18:26:40",
+    "2017-08-21 18:26:40",
+    "9999-12-31 23:59:59"
+  )),
+  grp = c(rep("A", 5), rep("B", 5))
+)
+
+# For Dataset tests
+first_date <- lubridate::ymd_hms("2015-04-29 03:12:39")
+df1 <- tibble::tibble(
+  int = 1:10,
+  dbl = as.numeric(1:10),
+  lgl = rep(c(TRUE, FALSE, NA, TRUE, FALSE), 2),
+  chr = letters[1:10],
+  fct = factor(LETTERS[1:10]),
+  ts = first_date + lubridate::days(1:10)
+)
+
+second_date <- lubridate::ymd_hms("2017-03-09 07:01:02")
+df2 <- tibble::tibble(
+  int = 101:110,
+  dbl = c(as.numeric(51:59), NaN),
+  lgl = rep(c(TRUE, FALSE, NA, TRUE, FALSE), 2),
+  chr = letters[10:1],
+  fct = factor(LETTERS[10:1]),
+  ts = second_date + lubridate::days(10:1)
+)
diff --git a/src/arrow/r/tests/testthat/helper-expectation.R b/src/arrow/r/tests/testthat/helper-expectation.R
new file mode 100644
index 000000000..ef6142bb4
--- /dev/null
+++ b/src/arrow/r/tests/testthat/helper-expectation.R
@@ -0,0 +1,320 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+expect_as_vector <- function(x, y, ...) {
+  expect_equal(as.vector(x), y, ...)
+}
+
+expect_data_frame <- function(x, y, ...) {
+  expect_equal(as.data.frame(x), y, ...)
+}
+
+expect_r6_class <- function(object, class) {
+  expect_s3_class(object, class)
+  expect_s3_class(object, "R6")
+}
+
+#' Mask `testthat::expect_equal()` in order to compare ArrowObjects using their
+#' `Equals` methods from the C++ library.
+expect_equal <- function(object, expected, ignore_attr = FALSE, ..., info = NULL, label = NULL) {
+  if (inherits(object, "ArrowObject") && inherits(expected, "ArrowObject")) {
+    mc <- match.call()
+    expect_true(
+      all.equal(object, expected, check.attributes = !ignore_attr),
+      info = info,
+      label = paste(rlang::as_label(mc[["object"]]), "==", rlang::as_label(mc[["expected"]]))
+    )
+  } else {
+    testthat::expect_equal(object, expected, ignore_attr = ignore_attr, ..., info = info, label = label)
+  }
+}
+
+expect_type_equal <- function(object, expected, ...) {
+  if (is.Array(object)) {
+    object <- object$type
+  }
+  if (is.Array(expected)) {
+    expected <- expected$type
+  }
+  expect_equal(object, expected, ...)
+}
+
+expect_match_arg_error <- function(object, values = c()) {
+  expect_error(object, paste0("'arg' .*", paste(dQuote(values), collapse = ", ")))
+}
+
+expect_deprecated <- expect_warning
+
+verify_output <- function(...) {
+  if (isTRUE(grepl("conda", R.Version()$platform))) {
+    skip("On conda")
+  }
+  testthat::verify_output(...)
+}
+
+#' Ensure that dplyr methods on Arrow objects return the same as for data frames
+#'
+#' This function compares the output of running a dplyr expression on a tibble
+#' or data.frame object against the output of the same expression run on
+#' Arrow Table and RecordBatch objects.
+#'
+#'
+#' @param expr A dplyr pipeline which must have `.input` as its start
+#' @param tbl A tibble or data.frame which will be substituted for `.input`
+#' @param skip_record_batch The skip message to show (if you should skip the
+#' RecordBatch test)
+#' @param skip_table The skip message to show (if you should skip the Table test)
+#' @param warning The expected warning from the RecordBatch and Table comparison
+#'  paths, passed to `expect_warning()`. Special values:
+#'     * `NA` (the default) for ensuring no warning message
+#'     * `TRUE` is a special case to mean to check for the
+#'      "not supported in Arrow; pulling data into R" message.
+#' @param ... additional arguments, passed to `expect_equal()`
+compare_dplyr_binding <- function(expr,
+                                  tbl,
+                                  skip_record_batch = NULL,
+                                  skip_table = NULL,
+                                  warning = NA,
+                                  ...) {
+
+  # Quote the contents of `expr` so that we can evaluate it a few different ways
+  expr <- rlang::enquo(expr)
+  # Get the expected output by evaluating expr on the .input data.frame using regular dplyr
+  expected <- rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(.input = tbl)))
+
+  if (isTRUE(warning)) {
+    # Special-case the simple warning:
+    # TODO: ARROW-13362 pick one of in or by and use it everywhere
+    warning <- "not supported (in|by) Arrow; pulling data into R"
+  }
+
+  skip_msg <- NULL
+
+  # Evaluate `expr` on a RecordBatch object and compare with `expected`
+  if (is.null(skip_record_batch)) {
+    expect_warning(
+      via_batch <- rlang::eval_tidy(
+        expr,
+        rlang::new_data_mask(rlang::env(.input = record_batch(tbl)))
+      ),
+      warning
+    )
+    expect_equal(via_batch, expected, ...)
+  } else {
+    skip_msg <- c(skip_msg, skip_record_batch)
+  }
+
+  # Evaluate `expr` on a Table object and compare with `expected`
+  if (is.null(skip_table)) {
+    expect_warning(
+      via_table <- rlang::eval_tidy(
+        expr,
+        rlang::new_data_mask(rlang::env(.input = arrow_table(tbl)))
+      ),
+      warning
+    )
+    expect_equal(via_table, expected, ...)
+  } else {
+    skip_msg <- c(skip_msg, skip_table)
+  }
+
+  if (!is.null(skip_msg)) {
+    skip(paste(skip_msg, collapse = "\n"))
+  }
+}
+
+#' Assert that Arrow dplyr methods error in the same way as methods on data.frame
+#'
+#' Comparing the error message generated when running expressions on R objects
+#' against the error message generated by running the same expression on Arrow
+#' Tables and RecordBatches.
+#'
+#' @param expr A dplyr pipeline which must have `.input` as its start
+#' @param tbl A tibble or data.frame which will be substituted for `.input`
+#' @param ... additional arguments, passed to `expect_error()`
+compare_dplyr_error <- function(expr,
+                                tbl,
+                                ...) {
+  # ensure we have supplied tbl
+  force(tbl)
+
+  expr <- rlang::enquo(expr)
+  msg <- tryCatch(
+    rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(.input = tbl))),
+    error = function(e) {
+      msg <- conditionMessage(e)
+
+      # The error here is of the form:
+      #
+      # Problem with `filter()` .input `..1`.
+      # x object 'b_var' not found
+      # ℹ Input `..1` is `chr == b_var`.
+      #
+      # but what we really care about is the `x` block
+      # so (temporarily) let's pull those blocks out when we find them
+      pattern <- i18ize_error_messages()
+
+      if (grepl(pattern, msg)) {
+        msg <- sub(paste0("^.*(", pattern, ").*$"), "\\1", msg)
+      }
+      msg
+    }
+  )
+  # make sure msg is a character object (i.e. there has been an error)
+  # If it did not error, we would get a data.frame or whatever
+  # This expectation will tell us "dplyr on data.frame errored is not TRUE"
+  expect_true(identical(typeof(msg), "character"), label = "dplyr on data.frame errored")
+
+  expect_error(
+    rlang::eval_tidy(
+      expr,
+      rlang::new_data_mask(rlang::env(.input = record_batch(tbl)))
+    ),
+    msg,
+    ...
+  )
+  expect_error(
+    rlang::eval_tidy(
+      expr,
+      rlang::new_data_mask(rlang::env(.input = arrow_table(tbl)))
+    ),
+    msg,
+    ...
+  )
+}
+
+#' Comparing the output of running expressions on R vectors against the same
+#' expression run on Arrow Arrays and ChunkedArrays.
+#'
+#' @param expr A vectorized R expression which must have `.input` as its start
+#' @param vec A vector which will be substituted for `.input`
+#' @param skip_array The skip message to show (if you should skip the Array test)
+#' @param skip_chunked_array The skip message to show (if you should skip the ChunkedArray test)
+#' @param ignore_attr Ignore differences in specified attributes?
+#' @param ... additional arguments, passed to `expect_as_vector()`
+compare_expression <- function(expr,
+                           vec,
+                           skip_array = NULL,
+                           skip_chunked_array = NULL,
+                           ignore_attr = FALSE,
+                           ...) {
+  expr <- rlang::enquo(expr)
+  expected <- rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(.input = vec)))
+  skip_msg <- NULL
+
+  if (is.null(skip_array)) {
+    via_array <- rlang::eval_tidy(
+      expr,
+      rlang::new_data_mask(rlang::env(.input = Array$create(vec)))
+    )
+    expect_as_vector(via_array, expected, ignore_attr, ...)
+  } else {
+    skip_msg <- c(skip_msg, skip_array)
+  }
+
+  if (is.null(skip_chunked_array)) {
+    # split input vector into two to exercise ChunkedArray with >1 chunk
+    split_vector <- split_vector_as_list(vec)
+
+    via_chunked <- rlang::eval_tidy(
+      expr,
+      rlang::new_data_mask(rlang::env(.input = ChunkedArray$create(split_vector[[1]], split_vector[[2]])))
+    )
+    expect_as_vector(via_chunked, expected, ignore_attr, ...)
+  } else {
+    skip_msg <- c(skip_msg, skip_chunked_array)
+  }
+
+  if (!is.null(skip_msg)) {
+    skip(paste(skip_msg, collapse = "\n"))
+  }
+}
+
+#' Comparing the error message generated when running expressions on R objects
+#' against the error message generated by running the same expression on Arrow
+#' Arrays and ChunkedArrays.
+#'
+#' @param expr An R expression which must have `.input` as its start
+#' @param vec A vector which will be substituted for `.input`
+#' @param skip_array The skip message to show (if you should skip the Array test)
+#' @param skip_chunked_array The skip message to show (if you should skip the ChunkedArray test)
+#' @param ... additional arguments, passed to `expect_error()`
+compare_expression_error <- function(expr,
+                                 vec,
+                                 skip_array = NULL,
+                                 skip_chunked_array = NULL,
+                                 ...) {
+  expr <- rlang::enquo(expr)
+
+  msg <- tryCatch(
+    rlang::eval_tidy(expr, rlang::new_data_mask(rlang::env(.input = vec))),
+    error = function(e) {
+      msg <- conditionMessage(e)
+
+      pattern <- i18ize_error_messages()
+
+      if (grepl(pattern, msg)) {
+        msg <- sub(paste0("^.*(", pattern, ").*$"), "\\1", msg)
+      }
+      msg
+    }
+  )
+
+  expect_true(identical(typeof(msg), "character"), label = "vector errored")
+
+  skip_msg <- NULL
+
+  if (is.null(skip_array)) {
+    expect_error(
+      rlang::eval_tidy(
+        expr,
+        rlang::new_data_mask(rlang::env(.input = Array$create(vec)))
+      ),
+      msg,
+      ...
+    )
+  } else {
+    skip_msg <- c(skip_msg, skip_array)
+  }
+
+  if (is.null(skip_chunked_array)) {
+    # split input vector into two to exercise ChunkedArray with >1 chunk
+    split_vector <- split_vector_as_list(vec)
+
+    expect_error(
+      rlang::eval_tidy(
+        expr,
+        rlang::new_data_mask(rlang::env(.input = ChunkedArray$create(split_vector[[1]], split_vector[[2]])))
+      ),
+      msg,
+      ...
+    )
+  } else {
+    skip_msg <- c(skip_msg, skip_chunked_array)
+  }
+
+  if (!is.null(skip_msg)) {
+    skip(paste(skip_msg, collapse = "\n"))
+  }
+}
+
+split_vector_as_list <- function(vec) {
+  vec_split <- length(vec) %/% 2
+  vec1 <- vec[seq(from = min(1, length(vec) - 1), to = min(length(vec) - 1, vec_split), by = 1)]
+  vec2 <- vec[seq(from = min(length(vec), vec_split + 1), to = length(vec), by = 1)]
+  list(vec1, vec2)
+}
diff --git a/src/arrow/r/tests/testthat/helper-parquet.R b/src/arrow/r/tests/testthat/helper-parquet.R
new file mode 100644
index 000000000..a0dd445bb
--- /dev/null
+++ b/src/arrow/r/tests/testthat/helper-parquet.R
@@ -0,0 +1,29 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+expect_parquet_roundtrip <- function(tab, ...) {
+  expect_equal(parquet_roundtrip(tab, ...), tab)
+}
+
+parquet_roundtrip <- function(x, ..., as_data_frame = FALSE) {
+  # write/read parquet, returns Table
+  tf <- tempfile()
+  on.exit(unlink(tf))
+
+  write_parquet(x, tf, ...)
+  read_parquet(tf, as_data_frame = as_data_frame)
+}
diff --git a/src/arrow/r/tests/testthat/helper-roundtrip.R b/src/arrow/r/tests/testthat/helper-roundtrip.R
new file mode 100644
index 000000000..80bcb42f1
--- /dev/null
+++ b/src/arrow/r/tests/testthat/helper-roundtrip.R
@@ -0,0 +1,44 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+expect_array_roundtrip <- function(x, type, as = NULL) {
+  a <- Array$create(x, type = as)
+  expect_equal(a$type, type)
+  expect_identical(length(a), length(x))
+  if (!inherits(type, c("ListType", "LargeListType", "FixedSizeListType"))) {
+    # TODO: revisit how missingness works with ListArrays
+    # R list objects don't handle missingness the same way as other vectors.
+    # Is there some vctrs thing we should do on the roundtrip back to R?
+    expect_as_vector(is.na(a), is.na(x))
+  }
+  roundtrip <- as.vector(a)
+  expect_equal(roundtrip, x, ignore_attr = TRUE)
+  # Make sure the storage mode is the same on roundtrip (esp. integer vs. numeric)
+  expect_identical(typeof(roundtrip), typeof(x))
+
+  if (length(x)) {
+    a_sliced <- a$Slice(1)
+    x_sliced <- x[-1]
+    expect_equal(a_sliced$type, type)
+    expect_identical(length(a_sliced), length(x_sliced))
+    if (!inherits(type, c("ListType", "LargeListType", "FixedSizeListType"))) {
+      expect_as_vector(is.na(a_sliced), is.na(x_sliced))
+    }
+    expect_as_vector(a_sliced, x_sliced, ignore_attr = TRUE)
+  }
+  invisible(a)
+}
diff --git a/src/arrow/r/tests/testthat/helper-skip.R b/src/arrow/r/tests/testthat/helper-skip.R
new file mode 100644
index 000000000..4256ec4ab
--- /dev/null
+++ b/src/arrow/r/tests/testthat/helper-skip.R
@@ -0,0 +1,81 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+build_features <- c(
+  arrow_info()$capabilities,
+  # Special handling for "uncompressed", for tests that iterate over compressions
+  uncompressed = TRUE
+)
+
+skip_if_not_available <- function(feature) {
+  if (feature == "re2") {
+    # RE2 does not support valgrind (on purpose): https://github.com/google/re2/issues/177
+    skip_on_valgrind()
+  }
+
+  yes <- feature %in% names(build_features) && build_features[feature]
+  if (!yes) {
+    skip(paste("Arrow C++ not built with", feature))
+  }
+}
+
+skip_if_no_pyarrow <- function() {
+  skip_on_valgrind()
+  skip_on_os("windows")
+
+  skip_if_not_installed("reticulate")
+  if (!reticulate::py_module_available("pyarrow")) {
+    skip("pyarrow not available for testing")
+  }
+}
+
+skip_if_not_dev_mode <- function() {
+  skip_if_not(
+    identical(tolower(Sys.getenv("ARROW_R_DEV")), "true"),
+    "environment variable ARROW_R_DEV"
+  )
+}
+
+skip_if_not_running_large_memory_tests <- function() {
+  skip_if_not(
+    identical(tolower(Sys.getenv("ARROW_LARGE_MEMORY_TESTS")), "true"),
+    "environment variable ARROW_LARGE_MEMORY_TESTS"
+  )
+}
+
+skip_on_valgrind <- function() {
+  # This does not actually skip on valgrind because we can't exactly detect it.
+  # Instead, it skips on CRAN when the OS is linux + and the R version is development
+  # (which is where valgrind is run as of this code)
+  linux_dev <- identical(tolower(Sys.info()[["sysname"]]), "linux") &&
+    grepl("devel", R.version.string)
+
+  if (linux_dev) {
+    skip_on_cran()
+  }
+}
+
+skip_if_r_version <- function(r_version) {
+  if (getRversion() <= r_version) {
+    skip(paste("R version:", getRversion()))
+  }
+}
+
+process_is_running <- function(x) {
+  cmd <- sprintf("ps aux | grep '%s' | grep -v grep", x)
+  tryCatch(system(cmd, ignore.stdout = TRUE) == 0, error = function(e) FALSE)
+}
diff --git a/src/arrow/r/tests/testthat/latin1.R b/src/arrow/r/tests/testthat/latin1.R
new file mode 100644
index 000000000..150192d31
--- /dev/null
+++ b/src/arrow/r/tests/testthat/latin1.R
@@ -0,0 +1,76 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+x <- iconv("Veitingasta�ir", to = "latin1")
+df <- tibble::tibble(
+  chr = x,
+  fct = as.factor(x)
+)
+names(df) <- iconv(paste(x, names(df), sep = "_"), to = "latin1")
+df_struct <- tibble::tibble(a = df)
+
+raw_schema <- list(utf8(), dictionary(int8(), utf8()))
+names(raw_schema) <- names(df)
+
+# Confirm setup
+expect_identical(Encoding(x), "latin1")
+expect_identical(Encoding(names(df)), c("latin1", "latin1"))
+expect_identical(Encoding(df[[1]]), "latin1")
+expect_identical(Encoding(levels(df[[2]])), "latin1")
+
+# Array
+expect_identical(as.vector(Array$create(x)), x)
+# struct
+expect_identical(as.vector(Array$create(df)), df)
+
+# ChunkedArray
+expect_identical(as.vector(ChunkedArray$create(x)), x)
+# struct
+expect_identical(as.vector(ChunkedArray$create(df)), df)
+
+# Table (including field name)
+expect_identical(as.data.frame(Table$create(df)), df)
+expect_identical(as.data.frame(Table$create(df_struct)), df_struct)
+
+# RecordBatch
+expect_identical(as.data.frame(record_batch(df)), df)
+expect_identical(as.data.frame(record_batch(df_struct)), df_struct)
+
+# Schema field name
+df_schema <- do.call(schema, raw_schema)
+expect_identical(names(df_schema), names(df))
+
+df_struct_schema <- schema(a = do.call(struct, raw_schema))
+# StructType doesn't expose names (in C++)
+# expect_identical(names(df_struct_schema$a), names(df))
+
+# Create table/batch with schema
+expect_identical(as.data.frame(Table$create(df, schema = df_schema)), df)
+expect_identical(as.data.frame(Table$create(df_struct, schema = df_struct_schema)), df_struct)
+expect_identical(as.data.frame(record_batch(df, schema = df_schema)), df)
+expect_identical(as.data.frame(record_batch(df_struct, schema = df_struct_schema)), df_struct)
+
+# Serialization
+feather_file <- tempfile()
+write_feather(df_struct, feather_file)
+expect_identical(read_feather(feather_file), df_struct)
+
+if (arrow_with_parquet()) {
+	parquet_file <- tempfile()
+	write_parquet(df, parquet_file) # Parquet doesn't yet support nested types
+	expect_identical(read_parquet(parquet_file), df)
+}
diff --git a/src/arrow/r/tests/testthat/test-Array.R b/src/arrow/r/tests/testthat/test-Array.R
new file mode 100644
index 000000000..ce23c2609
--- /dev/null
+++ b/src/arrow/r/tests/testthat/test-Array.R
@@ -0,0 +1,963 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+test_that("Integer Array", {
+  ints <- c(1:10, 1:10, 1:5)
+  x <- expect_array_roundtrip(ints, int32())
+})
+
+test_that("binary Array", {
+  # if the type is given, we just need a list of raw vectors
+  bin <- list(as.raw(1:10), as.raw(1:10))
+  expect_array_roundtrip(bin, binary(), as = binary())
+  expect_array_roundtrip(bin, large_binary(), as = large_binary())
+  expect_array_roundtrip(bin, fixed_size_binary(10), as = fixed_size_binary(10))
+
+  bin[[1L]] <- as.raw(1:20)
+  expect_error(Array$create(bin, fixed_size_binary(10)))
+
+  # otherwise the arrow type is deduced from the R classes
+  bin <- vctrs::new_vctr(
+    list(as.raw(1:10), as.raw(11:20)),
+    class = "arrow_binary"
+  )
+  expect_array_roundtrip(bin, binary())
+
+  bin <- vctrs::new_vctr(
+    list(as.raw(1:10), as.raw(11:20)),
+    class = "arrow_large_binary"
+  )
+  expect_array_roundtrip(bin, large_binary())
+
+  bin <- vctrs::new_vctr(
+    list(as.raw(1:10), as.raw(11:20)),
+    class = "arrow_fixed_size_binary",
+    byte_width = 10L
+  )
+  expect_array_roundtrip(bin, fixed_size_binary(byte_width = 10))
+
+  # degenerate cases
+  skip_on_valgrind() # valgrind errors on these tests ARROW-12638
+  bin <- vctrs::new_vctr(
+    list(1:10),
+    class = "arrow_binary"
+  )
+  expect_error(Array$create(bin))
+
+  bin <- vctrs::new_vctr(
+    list(1:10),
+    ptype = raw(),
+    class = "arrow_large_binary"
+  )
+  expect_error(Array$create(bin))
+
+  bin <- vctrs::new_vctr(
+    list(1:10),
+    class = "arrow_fixed_size_binary",
+    byte_width = 10
+  )
+  expect_error(Array$create(bin))
+
+  bin <- vctrs::new_vctr(
+    list(as.raw(1:5)),
+    class = "arrow_fixed_size_binary",
+    byte_width = 10
+  )
+  expect_error(Array$create(bin))
+
+  bin <- vctrs::new_vctr(
+    list(as.raw(1:5)),
+    class = "arrow_fixed_size_binary"
+  )
+  expect_error(Array$create(bin))
+})
+
+test_that("Slice() and RangeEquals()", {
+  ints <- c(1:10, 101:110, 201:205)
+  x <- Array$create(ints)
+
+  y <- x$Slice(10)
+  expect_equal(y$type, int32())
+  expect_equal(length(y), 15L)
+  expect_as_vector(y, c(101:110, 201:205))
+  expect_true(x$RangeEquals(y, 10, 24))
+  expect_false(x$RangeEquals(y, 9, 23))
+  expect_false(x$RangeEquals(y, 11, 24))
+
+  z <- x$Slice(10, 5)
+  expect_as_vector(z, c(101:105))
+  expect_true(x$RangeEquals(z, 10, 15, 0))
+
+  # Input validation
+  expect_error(x$Slice("ten"))
+  expect_error(x$Slice(NA_integer_), "Slice 'offset' cannot be NA")
+  expect_error(x$Slice(NA), "Slice 'offset' cannot be NA")
+  expect_error(x$Slice(10, "ten"))
+  expect_error(x$Slice(10, NA_integer_), "Slice 'length' cannot be NA")
+  expect_error(x$Slice(NA_integer_, NA_integer_), "Slice 'offset' cannot be NA")
+  expect_error(x$Slice(c(10, 10)))
+  expect_error(x$Slice(10, c(10, 10)))
+  expect_error(x$Slice(1000), "Slice 'offset' greater than array length")
+  expect_error(x$Slice(-1), "Slice 'offset' cannot be negative")
+  expect_error(z$Slice(10, 10), "Slice 'offset' greater than array length")
+  expect_error(x$Slice(10, -1), "Slice 'length' cannot be negative")
+  expect_error(x$Slice(-1, 10), "Slice 'offset' cannot be negative")
+
+  expect_warning(x$Slice(10, 15), NA)
+  expect_warning(
+    overslice <- x$Slice(10, 16),
+    "Slice 'length' greater than available length"
+  )
+  expect_equal(length(overslice), 15)
+  expect_warning(z$Slice(2, 10), "Slice 'length' greater than available length")
+
+  expect_error(x$RangeEquals(10, 24, 0), 'other must be a "Array"')
+  expect_error(x$RangeEquals(y, NA, 24), "'start_idx' cannot be NA")
+  expect_error(x$RangeEquals(y, 10, NA), "'end_idx' cannot be NA")
+  expect_error(x$RangeEquals(y, 10, 24, NA), "'other_start_idx' cannot be NA")
+  expect_error(x$RangeEquals(y, "ten", 24))
+
+  skip("TODO: (if anyone uses RangeEquals)")
+  expect_error(x$RangeEquals(y, 10, 2400, 0)) # does not error
+  expect_error(x$RangeEquals(y, 1000, 24, 0)) # does not error
+  expect_error(x$RangeEquals(y, 10, 24, 1000)) # does not error
+})
+
+test_that("Double Array", {
+  dbls <- c(1, 2, 3, 4, 5, 6)
+  x_dbl <- expect_array_roundtrip(dbls, float64())
+})
+
+test_that("Array print method includes type", {
+  x <- Array$create(c(1:10, 1:10, 1:5))
+  expect_output(print(x), "Array\n<int32>\n[\n", fixed = TRUE)
+})
+
+test_that("Array supports NA", {
+  x_int <- Array$create(as.integer(c(1:10, NA)))
+  x_dbl <- Array$create(as.numeric(c(1:10, NA)))
+  expect_true(x_int$IsValid(0))
+  expect_true(x_dbl$IsValid(0L))
+  expect_true(x_int$IsNull(10L))
+  expect_true(x_dbl$IsNull(10))
+
+  expect_as_vector(is.na(x_int), c(rep(FALSE, 10), TRUE))
+  expect_as_vector(is.na(x_dbl), c(rep(FALSE, 10), TRUE))
+
+  # Input validation
+  expect_error(x_int$IsValid("ten"))
+  expect_error(x_int$IsNull("ten"))
+  expect_error(x_int$IsValid(c(10, 10)))
+  expect_error(x_int$IsNull(c(10, 10)))
+  expect_error(x_int$IsValid(NA), "'i' cannot be NA")
+  expect_error(x_int$IsNull(NA), "'i' cannot be NA")
+  expect_error(x_int$IsValid(1000), "subscript out of bounds")
+  expect_error(x_int$IsValid(-1), "subscript out of bounds")
+  expect_error(x_int$IsNull(1000), "subscript out of bounds")
+  expect_error(x_int$IsNull(-1), "subscript out of bounds")
+})
+
+test_that("Array support null type (ARROW-7064)", {
+  expect_array_roundtrip(vctrs::unspecified(10), null())
+})
+
+test_that("Array supports logical vectors (ARROW-3341)", {
+  # with NA
+  x <- sample(c(TRUE, FALSE, NA), 1000, replace = TRUE)
+  expect_array_roundtrip(x, bool())
+
+  # without NA
+  x <- sample(c(TRUE, FALSE), 1000, replace = TRUE)
+  expect_array_roundtrip(x, bool())
+})
+
+test_that("Array supports character vectors (ARROW-3339)", {
+  # without NA
+  expect_array_roundtrip(c("itsy", "bitsy", "spider"), utf8())
+  expect_array_roundtrip(c("itsy", "bitsy", "spider"), large_utf8(), as = large_utf8())
+
+  # with NA
+  expect_array_roundtrip(c("itsy", NA, "spider"), utf8())
+  expect_array_roundtrip(c("itsy", NA, "spider"), large_utf8(), as = large_utf8())
+})
+
+test_that("Character vectors > 2GB become large_utf8", {
+  skip_on_cran()
+  skip_if_not_running_large_memory_tests()
+  big <- make_big_string()
+  expect_array_roundtrip(big, large_utf8())
+})
+
+test_that("empty arrays are supported", {
+  expect_array_roundtrip(character(), utf8())
+  expect_array_roundtrip(character(), large_utf8(), as = large_utf8())
+  expect_array_roundtrip(integer(), int32())
+  expect_array_roundtrip(numeric(), float64())
+  expect_array_roundtrip(factor(character()), dictionary(int8(), utf8()))
+  expect_array_roundtrip(logical(), bool())
+})
+
+test_that("array with all nulls are supported", {
+  nas <- c(NA, NA)
+  expect_array_roundtrip(as.character(nas), utf8())
+  expect_array_roundtrip(as.integer(nas), int32())
+  expect_array_roundtrip(as.numeric(nas), float64())
+  expect_array_roundtrip(as.factor(nas), dictionary(int8(), utf8()))
+  expect_array_roundtrip(as.logical(nas), bool())
+})
+
+test_that("Array supports unordered factors (ARROW-3355)", {
+  # without NA
+  f <- factor(c("itsy", "bitsy", "spider", "spider"))
+  expect_array_roundtrip(f, dictionary(int8(), utf8()))
+
+  # with NA
+  f <- factor(c("itsy", "bitsy", NA, "spider", "spider"))
+  expect_array_roundtrip(f, dictionary(int8(), utf8()))
+})
+
+test_that("Array supports ordered factors (ARROW-3355)", {
+  # without NA
+  f <- ordered(c("itsy", "bitsy", "spider", "spider"))
+  arr_fac <- expect_array_roundtrip(f, dictionary(int8(), utf8(), ordered = TRUE))
+  expect_true(arr_fac$ordered)
+
+  # with NA
+  f <- ordered(c("itsy", "bitsy", NA, "spider", "spider"))
+  expect_array_roundtrip(f, dictionary(int8(), utf8(), ordered = TRUE))
+})
+
+test_that("array supports Date (ARROW-3340)", {
+  d <- Sys.Date() + 1:10
+  expect_array_roundtrip(d, date32())
+
+  d[5] <- NA
+  expect_array_roundtrip(d, date32())
+})
+
+test_that("array supports POSIXct (ARROW-3340)", {
+  times <- lubridate::ymd_hms("2018-10-07 19:04:05") + 1:10
+  expect_array_roundtrip(times, timestamp("us", "UTC"))
+
+  times[5] <- NA
+  expect_array_roundtrip(times, timestamp("us", "UTC"))
+
+  times2 <- lubridate::ymd_hms("2018-10-07 19:04:05", tz = "US/Eastern") + 1:10
+  expect_array_roundtrip(times2, timestamp("us", "US/Eastern"))
+})
+
+test_that("array supports POSIXct without timezone", {
+  # Make sure timezone is not set
+  withr::with_envvar(c(TZ = ""), {
+    times <- strptime("2019-02-03 12:34:56", format = "%Y-%m-%d %H:%M:%S") + 1:10
+    expect_array_roundtrip(times, timestamp("us", ""))
+
+    # Also test the INTSXP code path
+    skip("Ingest_POSIXct only implemented for REALSXP")
+    times_int <- as.integer(times)
+    attributes(times_int) <- attributes(times)
+    expect_array_roundtrip(times_int, timestamp("us", ""))
+  })
+})
+
+test_that("Timezone handling in Arrow roundtrip (ARROW-3543)", {
+  # Write a feather file as that's what the initial bug report used
+  df <- tibble::tibble(
+    no_tz = lubridate::ymd_hms("2018-10-07 19:04:05") + 1:10,
+    yes_tz = lubridate::ymd_hms("2018-10-07 19:04:05", tz = "Pacific/Marquesas") + 1:10
+  )
+  if (!identical(Sys.timezone(), "Pacific/Marquesas")) {
+    # Confirming that the columns are in fact different
+    expect_false(any(df$no_tz == df$yes_tz))
+  }
+  feather_file <- tempfile()
+  on.exit(unlink(feather_file))
+  write_feather(df, feather_file)
+  expect_identical(read_feather(feather_file), df)
+})
+
+test_that("array supports integer64", {
+  x <- bit64::as.integer64(1:10) + MAX_INT
+  expect_array_roundtrip(x, int64())
+
+  x[4] <- NA
+  expect_array_roundtrip(x, int64())
+
+  # all NA int64 (ARROW-3795)
+  all_na <- Array$create(bit64::as.integer64(NA))
+  expect_type_equal(all_na, int64())
+  expect_true(as.vector(is.na(all_na)))
+})
+
+test_that("array supports difftime", {
+  time <- hms::hms(56, 34, 12)
+  expect_array_roundtrip(c(time, time), time32("s"))
+  expect_array_roundtrip(vctrs::vec_c(NA, time), time32("s"))
+})
+
+test_that("support for NaN (ARROW-3615)", {
+  x <- c(1, NA, NaN, -1)
+  y <- Array$create(x)
+  expect_true(y$IsValid(2))
+  expect_equal(y$null_count, 1L)
+})
+
+test_that("is.nan() evalutes to FALSE on NA (for consistency with base R)", {
+  x <- c(1.0, NA, NaN, -1.0)
+  compare_expression(is.nan(.input), x)
+})
+
+test_that("is.nan() evalutes to FALSE on non-floats (for consistency with base R)", {
+  x <- c(1L, 2L, 3L)
+  y <- c("foo", "bar")
+  compare_expression(is.nan(.input), x)
+  compare_expression(is.nan(.input), y)
+})
+
+test_that("is.na() evalutes to TRUE on NaN (for consistency with base R)", {
+  x <- c(1, NA, NaN, -1)
+  compare_expression(is.na(.input), x)
+})
+
+test_that("integer types casts (ARROW-3741)", {
+  # Defining some type groups for use here and in the following tests
+  int_types <- c(int8(), int16(), int32(), int64())
+  uint_types <- c(uint8(), uint16(), uint32(), uint64())
+  float_types <- c(float32(), float64()) # float16() not really supported in C++ yet
+
+  a <- Array$create(c(1:10, NA))
+  for (type in c(int_types, uint_types)) {
+    casted <- a$cast(type)
+    expect_equal(casted$type, type)
+    expect_identical(as.vector(is.na(casted)), c(rep(FALSE, 10), TRUE))
+  }
+})
+
+test_that("integer types cast safety (ARROW-3741, ARROW-5541)", {
+  a <- Array$create(-(1:10))
+  for (type in uint_types) {
+    expect_error(a$cast(type), regexp = "Integer value -1 not in range")
+    expect_error(a$cast(type, safe = FALSE), NA)
+  }
+})
+
+test_that("float types casts (ARROW-3741)", {
+  x <- c(1, 2, 3, NA)
+  a <- Array$create(x)
+  for (type in float_types) {
+    casted <- a$cast(type)
+    expect_equal(casted$type, type)
+    expect_identical(as.vector(is.na(casted)), c(rep(FALSE, 3), TRUE))
+    expect_identical(as.vector(casted), x)
+  }
+})
+
+test_that("cast to half float works", {
+  skip("Need halffloat support: https://issues.apache.org/jira/browse/ARROW-3802")
+  a <- Array$create(1:4)
+  a_f16 <- a$cast(float16())
+  expect_type_equal(a_16$type, float16())
+})
+
+test_that("cast input validation", {
+  a <- Array$create(1:4)
+  expect_error(a$cast("not a type"), "type must be a DataType, not character")
+})
+
+test_that("Array$create() supports the type= argument. conversion from INTSXP and int64 to all int types", {
+  num_int32 <- 12L
+  num_int64 <- bit64::as.integer64(10)
+
+  types <- c(
+    int_types,
+    uint_types,
+    float_types,
+    double() # not actually a type, a base R function but should be alias for float64
+  )
+  for (type in types) {
+    expect_type_equal(Array$create(num_int32, type = type)$type, as_type(type))
+    expect_type_equal(Array$create(num_int64, type = type)$type, as_type(type))
+  }
+
+  # Input validation
+  expect_error(
+    Array$create(5, type = "not a type"),
+    "type must be a DataType, not character"
+  )
+})
+
+test_that("Array$create() aborts on overflow", {
+  expect_error(Array$create(128L, type = int8()))
+  expect_error(Array$create(-129L, type = int8()))
+
+  expect_error(Array$create(256L, type = uint8()))
+  expect_error(Array$create(-1L, type = uint8()))
+
+  expect_error(Array$create(32768L, type = int16()))
+  expect_error(Array$create(-32769L, type = int16()))
+
+  expect_error(Array$create(65536L, type = uint16()))
+  expect_error(Array$create(-1L, type = uint16()))
+
+  expect_error(Array$create(65536L, type = uint16()))
+  expect_error(Array$create(-1L, type = uint16()))
+
+  expect_error(Array$create(bit64::as.integer64(2^31), type = int32()))
+  expect_error(Array$create(bit64::as.integer64(2^32), type = uint32()))
+})
+
+test_that("Array$create() does not convert doubles to integer", {
+  for (type in c(int_types, uint_types)) {
+    a <- Array$create(10, type = type)
+    expect_type_equal(a$type, type)
+    expect_true(as.vector(a) == 10L)
+  }
+})
+
+test_that("Array$create() converts raw vectors to uint8 arrays (ARROW-3794)", {
+  expect_type_equal(Array$create(as.raw(1:10))$type, uint8())
+})
+
+test_that("Array<int8>$as_vector() converts to integer (ARROW-3794)", {
+  i8 <- (-128):127
+  a <- Array$create(i8)$cast(int8())
+  expect_type_equal(a, int8())
+  expect_as_vector(a, i8)
+
+  u8 <- 0:255
+  a <- Array$create(u8)$cast(uint8())
+  expect_type_equal(a, uint8())
+  expect_as_vector(a, u8)
+})
+
+test_that("Arrays of {,u}int{32,64} convert to integer if they can fit", {
+  u32 <- Array$create(1L)$cast(uint32())
+  expect_identical(as.vector(u32), 1L)
+
+  u64 <- Array$create(1L)$cast(uint64())
+  expect_identical(as.vector(u64), 1L)
+
+  i64 <- Array$create(bit64::as.integer64(1:10))
+  expect_identical(as.vector(i64), 1:10)
+})
+
+test_that("Arrays of uint{32,64} convert to numeric if they can't fit integer", {
+  u32 <- Array$create(bit64::as.integer64(1) + MAX_INT)$cast(uint32())
+  expect_identical(as.vector(u32), 1 + MAX_INT)
+
+  u64 <- Array$create(bit64::as.integer64(1) + MAX_INT)$cast(uint64())
+  expect_identical(as.vector(u64), 1 + MAX_INT)
+})
+
+test_that("Array$create() recognise arrow::Array (ARROW-3815)", {
+  a <- Array$create(1:10)
+  expect_equal(a, Array$create(a))
+})
+
+test_that("Array$create() handles data frame -> struct arrays (ARROW-3811)", {
+  df <- tibble::tibble(x = 1:10, y = x / 2, z = letters[1:10])
+  a <- Array$create(df)
+  expect_type_equal(a$type, struct(x = int32(), y = float64(), z = utf8()))
+  expect_as_vector(a, df)
+
+  df <- structure(
+    list(col = structure(list(structure(list(list(structure(1))), class = "inner")), class = "outer")),
+    class = "data.frame", row.names = c(NA, -1L)
+  )
+  a <- Array$create(df)
+  expect_type_equal(a$type, struct(col = list_of(list_of(list_of(float64())))))
+  expect_as_vector(a, df, ignore_attr = TRUE)
+})
+
+test_that("StructArray methods", {
+  df <- tibble::tibble(x = 1:10, y = x / 2, z = letters[1:10])
+  a <- Array$create(df)
+  expect_equal(a$x, Array$create(df$x))
+  expect_equal(a[["x"]], Array$create(df$x))
+  expect_equal(a[[1]], Array$create(df$x))
+  expect_identical(names(a), c("x", "y", "z"))
+  expect_identical(dim(a), c(10L, 3L))
+})
+
+test_that("Array$create() can handle data frame with custom struct type (not inferred)", {
+  df <- tibble::tibble(x = 1:10, y = 1:10)
+  type <- struct(x = float64(), y = int16())
+  a <- Array$create(df, type = type)
+  expect_type_equal(a$type, type)
+
+  type <- struct(x = float64(), y = int16(), z = int32())
+  expect_error(
+    Array$create(df, type = type),
+    regexp = "Number of fields in struct.* incompatible with number of columns in the data frame"
+  )
+
+  type <- struct(y = int16(), x = float64())
+  expect_error(
+    Array$create(df, type = type),
+    regexp = "Field name in position.*does not match the name of the column of the data frame"
+  )
+
+  type <- struct(x = float64(), y = utf8())
+  expect_error(Array$create(df, type = type), regexp = "Invalid")
+})
+
+test_that("Array$create() supports tibble with no columns (ARROW-8354)", {
+  df <- tibble::tibble()
+  expect_equal(Array$create(df)$as_vector(), df)
+})
+
+test_that("Array$create() handles vector -> list arrays (ARROW-7662)", {
+  # Should be able to create an empty list with a type hint.
+  expect_r6_class(Array$create(list(), list_of(bool())), "ListArray")
+
+  # logical
+  expect_array_roundtrip(list(NA), list_of(bool()))
+  expect_array_roundtrip(list(logical(0)), list_of(bool()))
+  expect_array_roundtrip(list(c(TRUE), c(FALSE), c(FALSE, TRUE)), list_of(bool()))
+  expect_array_roundtrip(list(c(TRUE), c(FALSE), NA, logical(0), c(FALSE, NA, TRUE)), list_of(bool()))
+
+  # integer
+  expect_array_roundtrip(list(NA_integer_), list_of(int32()))
+  expect_array_roundtrip(list(integer(0)), list_of(int32()))
+  expect_array_roundtrip(list(1:2, 3:4, 12:18), list_of(int32()))
+  expect_array_roundtrip(list(c(1:2), NA_integer_, integer(0), c(12:18, NA_integer_)), list_of(int32()))
+
+  # numeric
+  expect_array_roundtrip(list(NA_real_), list_of(float64()))
+  expect_array_roundtrip(list(numeric(0)), list_of(float64()))
+  expect_array_roundtrip(list(1, c(2, 3), 4), list_of(float64()))
+  expect_array_roundtrip(list(1, numeric(0), c(2, 3, NA_real_), 4), list_of(float64()))
+
+  # character
+  expect_array_roundtrip(list(NA_character_), list_of(utf8()))
+  expect_array_roundtrip(list(character(0)), list_of(utf8()))
+  expect_array_roundtrip(list("itsy", c("bitsy", "spider"), c("is")), list_of(utf8()))
+  expect_array_roundtrip(list("itsy", character(0), c("bitsy", "spider", NA_character_), c("is")), list_of(utf8()))
+
+  # factor
+  expect_array_roundtrip(list(factor(c("b", "a"), levels = c("a", "b"))), list_of(dictionary(int8(), utf8())))
+  expect_array_roundtrip(list(factor(NA, levels = c("a", "b"))), list_of(dictionary(int8(), utf8())))
+
+  # struct
+  expect_array_roundtrip(
+    list(tibble::tibble(a = integer(0), b = integer(0), c = character(0), d = logical(0))),
+    list_of(struct(a = int32(), b = int32(), c = utf8(), d = bool()))
+  )
+  expect_array_roundtrip(
+    list(tibble::tibble(a = list(integer()))),
+    list_of(struct(a = list_of(int32())))
+  )
+  # degenerated data frame
+  df <- structure(list(x = 1:2, y = 1), class = "data.frame", row.names = 1:2)
+  expect_error(Array$create(list(df)))
+})
+
+test_that("Array$create() handles vector -> large list arrays", {
+  # Should be able to create an empty list with a type hint.
+  expect_r6_class(Array$create(list(), type = large_list_of(bool())), "LargeListArray")
+
+  # logical
+  expect_array_roundtrip(list(NA), large_list_of(bool()), as = large_list_of(bool()))
+  expect_array_roundtrip(list(logical(0)), large_list_of(bool()), as = large_list_of(bool()))
+  expect_array_roundtrip(list(c(TRUE), c(FALSE), c(FALSE, TRUE)), large_list_of(bool()), as = large_list_of(bool()))
+  expect_array_roundtrip(
+    list(c(TRUE), c(FALSE), NA, logical(0), c(FALSE, NA, TRUE)),
+    large_list_of(bool()),
+    as = large_list_of(bool())
+  )
+
+  # integer
+  expect_array_roundtrip(list(NA_integer_), large_list_of(int32()), as = large_list_of(int32()))
+  expect_array_roundtrip(list(integer(0)), large_list_of(int32()), as = large_list_of(int32()))
+  expect_array_roundtrip(list(1:2, 3:4, 12:18), large_list_of(int32()), as = large_list_of(int32()))
+  expect_array_roundtrip(
+    list(c(1:2), NA_integer_, integer(0), c(12:18, NA_integer_)),
+    large_list_of(int32()),
+    as = large_list_of(int32())
+  )
+
+  # numeric
+  expect_array_roundtrip(list(NA_real_), large_list_of(float64()), as = large_list_of(float64()))
+  expect_array_roundtrip(list(numeric(0)), large_list_of(float64()), as = large_list_of(float64()))
+  expect_array_roundtrip(list(1, c(2, 3), 4), large_list_of(float64()), as = large_list_of(float64()))
+  expect_array_roundtrip(
+    list(1, numeric(0), c(2, 3, NA_real_), 4),
+    large_list_of(float64()),
+    as = large_list_of(float64())
+  )
+
+  # character
+  expect_array_roundtrip(list(NA_character_), large_list_of(utf8()), as = large_list_of(utf8()))
+  expect_array_roundtrip(list(character(0)), large_list_of(utf8()), as = large_list_of(utf8()))
+  expect_array_roundtrip(
+    list("itsy", c("bitsy", "spider"), c("is")),
+    large_list_of(utf8()),
+    as = large_list_of(utf8())
+  )
+  expect_array_roundtrip(
+    list("itsy", character(0), c("bitsy", "spider", NA_character_), c("is")),
+    large_list_of(utf8()),
+    as = large_list_of(utf8())
+  )
+
+  # factor
+  expect_array_roundtrip(
+    list(factor(c("b", "a"), levels = c("a", "b"))),
+    large_list_of(dictionary(int8(), utf8())),
+    as = large_list_of(dictionary(int8(), utf8()))
+  )
+  expect_array_roundtrip(
+    list(factor(NA, levels = c("a", "b"))),
+    large_list_of(dictionary(int8(), utf8())),
+    as = large_list_of(dictionary(int8(), utf8()))
+  )
+
+  # struct
+  expect_array_roundtrip(
+    list(tibble::tibble(a = integer(0), b = integer(0), c = character(0), d = logical(0))),
+    large_list_of(struct(a = int32(), b = int32(), c = utf8(), d = bool())),
+    as = large_list_of(struct(a = int32(), b = int32(), c = utf8(), d = bool()))
+  )
+  expect_array_roundtrip(
+    list(tibble::tibble(a = list(integer()))),
+    large_list_of(struct(a = list_of(int32()))),
+    as = large_list_of(struct(a = list_of(int32())))
+  )
+})
+
+test_that("Array$create() handles vector -> fixed size list arrays", {
+  # Should be able to create an empty list with a type hint.
+  expect_r6_class(Array$create(list(), type = fixed_size_list_of(bool(), 20)), "FixedSizeListArray")
+
+  # logical
+  expect_array_roundtrip(list(NA), fixed_size_list_of(bool(), 1L), as = fixed_size_list_of(bool(), 1L))
+  expect_array_roundtrip(
+    list(c(TRUE, FALSE), c(FALSE, TRUE)),
+    fixed_size_list_of(bool(), 2L),
+    as = fixed_size_list_of(bool(), 2L)
+  )
+  expect_array_roundtrip(
+    list(c(TRUE), c(FALSE), NA),
+    fixed_size_list_of(bool(), 1L),
+    as = fixed_size_list_of(bool(), 1L)
+  )
+
+  # integer
+  expect_array_roundtrip(list(NA_integer_), fixed_size_list_of(int32(), 1L), as = fixed_size_list_of(int32(), 1L))
+  expect_array_roundtrip(list(1:2, 3:4, 11:12), fixed_size_list_of(int32(), 2L), as = fixed_size_list_of(int32(), 2L))
+  expect_array_roundtrip(
+    list(c(1:2), c(NA_integer_, 3L)),
+    fixed_size_list_of(int32(), 2L),
+    as = fixed_size_list_of(int32(), 2L)
+  )
+
+  # numeric
+  expect_array_roundtrip(list(NA_real_), fixed_size_list_of(float64(), 1L), as = fixed_size_list_of(float64(), 1L))
+  expect_array_roundtrip(
+    list(c(1, 2), c(2, 3)),
+    fixed_size_list_of(float64(), 2L),
+    as = fixed_size_list_of(float64(), 2L)
+  )
+  expect_array_roundtrip(
+    list(c(1, 2), c(NA_real_, 4)),
+    fixed_size_list_of(float64(), 2L),
+    as = fixed_size_list_of(float64(), 2L)
+  )
+
+  # character
+  expect_array_roundtrip(list(NA_character_), fixed_size_list_of(utf8(), 1L), as = fixed_size_list_of(utf8(), 1L))
+  expect_array_roundtrip(
+    list(c("itsy", "bitsy"), c("spider", "is"), c(NA_character_, NA_character_), c("", "")),
+    fixed_size_list_of(utf8(), 2L),
+    as = fixed_size_list_of(utf8(), 2L)
+  )
+
+  # factor
+  expect_array_roundtrip(
+    list(factor(c("b", "a"), levels = c("a", "b"))),
+    fixed_size_list_of(dictionary(int8(), utf8()), 2L),
+    as = fixed_size_list_of(dictionary(int8(), utf8()), 2L)
+  )
+
+  # struct
+  expect_array_roundtrip(
+    list(tibble::tibble(a = 1L, b = 1L, c = "", d = TRUE)),
+    fixed_size_list_of(struct(a = int32(), b = int32(), c = utf8(), d = bool()), 1L),
+    as = fixed_size_list_of(struct(a = int32(), b = int32(), c = utf8(), d = bool()), 1L)
+  )
+  expect_array_roundtrip(
+    list(tibble::tibble(a = list(1L))),
+    fixed_size_list_of(struct(a = list_of(int32())), 1L),
+    as = fixed_size_list_of(struct(a = list_of(int32())), 1L)
+  )
+  expect_array_roundtrip(
+    list(tibble::tibble(a = list(1L))),
+    list_of(struct(a = fixed_size_list_of(int32(), 1L))),
+    as = list_of(struct(a = fixed_size_list_of(int32(), 1L)))
+  )
+})
+
+test_that("Handling string data with embedded nuls", {
+  raws <- structure(list(
+    as.raw(c(0x70, 0x65, 0x72, 0x73, 0x6f, 0x6e)),
+    as.raw(c(0x77, 0x6f, 0x6d, 0x61, 0x6e)),
+    as.raw(c(0x6d, 0x61, 0x00, 0x6e)), # <-- there's your nul, 0x00
+    as.raw(c(0x66, 0x00, 0x00, 0x61, 0x00, 0x6e)), # multiple nuls
+    as.raw(c(0x63, 0x61, 0x6d, 0x65, 0x72, 0x61)),
+    as.raw(c(0x74, 0x76))
+  ),
+  class = c("arrow_binary", "vctrs_vctr", "list")
+  )
+  expect_error(
+    rawToChar(raws[[3]]),
+    "embedded nul in string: 'ma\\0n'", # See?
+    fixed = TRUE
+  )
+  array_with_nul <- Array$create(raws)$cast(utf8())
+
+  # The behavior of the warnings/errors is slightly different with and without
+  # altrep. Without it (i.e. 3.5.0 and below, the error would trigger immediately
+  # on `as.vector()` where as with it, the error only happens on materialization)
+  skip_if_r_version("3.5.0")
+
+  # no error on conversion, because altrep laziness
+  v <- expect_error(as.vector(array_with_nul), NA)
+
+  # attempting materialization -> error
+
+  expect_error(v[],
+    paste0(
+      "embedded nul in string: 'ma\\0n'; to strip nuls when converting from Arrow ",
+      "to R, set options(arrow.skip_nul = TRUE)"
+    ),
+    fixed = TRUE
+  )
+
+  # also error on materializing v[3]
+  expect_error(v[3],
+    paste0(
+      "embedded nul in string: 'ma\\0n'; to strip nuls when converting from Arrow ",
+      "to R, set options(arrow.skip_nul = TRUE)"
+    ),
+    fixed = TRUE
+  )
+
+  withr::with_options(list(arrow.skip_nul = TRUE), {
+    # no warning yet because altrep laziness
+    v <- as.vector(array_with_nul)
+
+    expect_warning(
+      expect_identical(
+        v[],
+        c("person", "woman", "man", "fan", "camera", "tv")
+      ),
+      "Stripping '\\0' (nul) from character vector",
+      fixed = TRUE
+    )
+
+    v <- as.vector(array_with_nul)
+    expect_warning(
+      expect_identical(v[3], "man"),
+      "Stripping '\\0' (nul) from character vector",
+      fixed = TRUE
+    )
+
+    v <- as.vector(array_with_nul)
+    expect_warning(
+      expect_identical(v[4], "fan"),
+      "Stripping '\\0' (nul) from character vector",
+      fixed = TRUE
+    )
+  })
+})
+
+test_that("Array$create() should have helpful error", {
+  expect_error(Array$create(list(numeric(0)), list_of(bool())), "Expecting a logical vector")
+
+  lgl <- logical(0)
+  int <- integer(0)
+  num <- numeric(0)
+  char <- character(0)
+  expect_error(Array$create(list()), "Requires at least one element to infer")
+  expect_error(Array$create(list(lgl, lgl, int)), "Expecting a logical vector")
+  expect_error(Array$create(list(char, num, char)), "Expecting a character vector")
+})
+
+test_that("Array$View() (ARROW-6542)", {
+  a <- Array$create(1:3)
+  b <- a$View(float32())
+  expect_equal(b$type, float32())
+  expect_equal(length(b), 3L)
+
+  # Input validation
+  expect_error(a$View("not a type"), "type must be a DataType, not character")
+})
+
+test_that("Array$Validate()", {
+  a <- Array$create(1:10)
+  expect_error(a$Validate(), NA)
+})
+
+test_that("is.Array", {
+  a <- Array$create(1, type = int32())
+  expect_true(is.Array(a))
+  expect_true(is.Array(a, "int32"))
+  expect_true(is.Array(a, c("int32", "int16")))
+  expect_false(is.Array(a, "utf8"))
+  expect_true(is.Array(a$View(float32())), "float32")
+  expect_false(is.Array(1))
+  expect_true(is.Array(ChunkedArray$create(1, 2)))
+})
+
+test_that("Array$Take()", {
+  a <- Array$create(10:20)
+  expect_as_vector(a$Take(c(4, 2)), c(14, 12))
+})
+
+test_that("[ method on Array", {
+  vec <- 11:20
+  a <- Array$create(vec)
+  expect_as_vector(a[5:9], vec[5:9])
+  expect_as_vector(a[c(9, 3, 5)], vec[c(9, 3, 5)])
+  expect_as_vector(a[rep(c(TRUE, FALSE), 5)], vec[c(1, 3, 5, 7, 9)])
+  expect_as_vector(a[rep(c(TRUE, FALSE, NA, FALSE, TRUE), 2)], c(11, NA, 15, 16, NA, 20))
+  expect_as_vector(a[-4], vec[-4])
+  expect_as_vector(a[-1], vec[-1])
+})
+
+test_that("[ accepts Arrays and otherwise handles bad input", {
+  vec <- 11:20
+  a <- Array$create(vec)
+  ind <- c(9, 3, 5)
+  expect_error(
+    a[Array$create(ind)],
+    "Cannot extract rows with an Array of type double"
+  )
+  expect_as_vector(a[Array$create(ind - 1, type = int8())], vec[ind])
+  expect_as_vector(a[Array$create(ind - 1, type = uint8())], vec[ind])
+  expect_as_vector(a[ChunkedArray$create(8, 2, 4, type = uint8())], vec[ind])
+
+  filt <- seq_along(vec) %in% ind
+  expect_as_vector(a[Array$create(filt)], vec[filt])
+
+  expect_error(
+    a["string"],
+    "Cannot extract rows with an object of class character"
+  )
+})
+
+test_that("%in% works on dictionary arrays", {
+  a1 <- Array$create(as.factor(c("A", "B", "C")))
+  a2 <- DictionaryArray$create(c(0L, 1L, 2L), c(4.5, 3.2, 1.1))
+  c1 <- Array$create(c(FALSE, TRUE, FALSE))
+  c2 <- Array$create(c(FALSE, FALSE, FALSE))
+  b1 <- Array$create("B")
+  b2 <- Array$create(5.4)
+
+  expect_equal(is_in(a1, b1), c1)
+  expect_equal(is_in(a2, b2), c2)
+  expect_error(is_in(a1, b2))
+})
+
+test_that("[ accepts Expressions", {
+  vec <- 11:20
+  a <- Array$create(vec)
+  b <- Array$create(1:10)
+  expect_as_vector(a[b > 4], vec[5:10])
+})
+
+test_that("Array head/tail", {
+  vec <- 11:20
+  a <- Array$create(vec)
+  expect_as_vector(head(a), head(vec))
+  expect_as_vector(head(a, 4), head(vec, 4))
+  expect_as_vector(head(a, 40), head(vec, 40))
+  expect_as_vector(head(a, -4), head(vec, -4))
+  expect_as_vector(head(a, -40), head(vec, -40))
+  expect_as_vector(tail(a), tail(vec))
+  expect_as_vector(tail(a, 4), tail(vec, 4))
+  expect_as_vector(tail(a, 40), tail(vec, 40))
+  expect_as_vector(tail(a, -40), tail(vec, -40))
+})
+
+test_that("Dictionary array: create from arrays, not factor", {
+  a <- DictionaryArray$create(c(2L, 1L, 1L, 2L, 0L), c(4.5, 3.2, 1.1))
+  expect_equal(a$type, dictionary(int32(), float64()))
+})
+
+test_that("Dictionary array: translate to R when dict isn't string", {
+  a <- DictionaryArray$create(c(2L, 1L, 1L, 2L, 0L), c(4.5, 3.2, 1.1))
+  expect_warning(
+    expect_identical(
+      as.vector(a),
+      factor(c(3, 2, 2, 3, 1), labels = c("4.5", "3.2", "1.1"))
+    )
+  )
+})
+
+test_that("Array$Equals", {
+  vec <- 11:20
+  a <- Array$create(vec)
+  b <- Array$create(vec)
+  d <- Array$create(3:4)
+  expect_equal(a, b)
+  expect_true(a$Equals(b))
+  expect_false(a$Equals(vec))
+  expect_false(a$Equals(d))
+})
+
+test_that("Array$ApproxEquals", {
+  vec <- c(1.0000000000001, 2.400000000000001)
+  a <- Array$create(vec)
+  b <- Array$create(round(vec, 1))
+  expect_false(a$Equals(b))
+  expect_true(a$ApproxEquals(b))
+  expect_false(a$ApproxEquals(vec))
+})
+
+test_that("auto int64 conversion to int can be disabled (ARROW-10093)", {
+  withr::with_options(list(arrow.int64_downcast = FALSE), {
+    a <- Array$create(1:10, int64())
+    expect_true(inherits(a$as_vector(), "integer64"))
+
+    batch <- RecordBatch$create(x = a)
+    expect_true(inherits(as.data.frame(batch)$x, "integer64"))
+
+    tab <- Table$create(x = a)
+    expect_true(inherits(as.data.frame(batch)$x, "integer64"))
+  })
+})
+
+
+test_that("Array to C-interface", {
+  # create a struct array since that's one of the more complicated array types
+  df <- tibble::tibble(x = 1:10, y = x / 2, z = letters[1:10])
+  arr <- Array$create(df)
+
+  # export the array via the C-interface
+  schema_ptr <- allocate_arrow_schema()
+  array_ptr <- allocate_arrow_array()
+  arr$export_to_c(array_ptr, schema_ptr)
+
+  # then import it and check that the roundtripped value is the same
+  circle <- Array$import_from_c(array_ptr, schema_ptr)
+  expect_equal(arr, circle)
+
+  # must clean up the pointers or we leak
+  delete_arrow_schema(schema_ptr)
+  delete_arrow_array(array_ptr)
+})
diff --git a/src/arrow/r/tests/testthat/test-RecordBatch.R b/src/arrow/r/tests/testthat/test-RecordBatch.R
new file mode 100644
index 000000000..d280754a3
--- /dev/null
+++ b/src/arrow/r/tests/testthat/test-RecordBatch.R
@@ -0,0 +1,690 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+test_that("RecordBatch", {
+  # Note that we're reusing `tbl` and `batch` throughout the tests in this file
+  tbl <- tibble::tibble(
+    int = 1:10,
+    dbl = as.numeric(1:10),
+    lgl = sample(c(TRUE, FALSE, NA), 10, replace = TRUE),
+    chr = letters[1:10],
+    fct = factor(letters[1:10])
+  )
+  batch <- record_batch(tbl)
+
+  expect_equal(batch, batch)
+  expect_equal(
+    batch$schema,
+    schema(
+      int = int32(), dbl = float64(),
+      lgl = boolean(), chr = utf8(),
+      fct = dictionary(int8(), utf8())
+    )
+  )
+  expect_equal(batch$num_columns, 5L)
+  expect_equal(batch$num_rows, 10L)
+  expect_equal(batch$column_name(0), "int")
+  expect_equal(batch$column_name(1), "dbl")
+  expect_equal(batch$column_name(2), "lgl")
+  expect_equal(batch$column_name(3), "chr")
+  expect_equal(batch$column_name(4), "fct")
+  expect_equal(names(batch), c("int", "dbl", "lgl", "chr", "fct"))
+
+  # input validation
+  expect_error(batch$column_name(NA), "'i' cannot be NA")
+  expect_error(batch$column_name(-1), "subscript out of bounds")
+  expect_error(batch$column_name(1000), "subscript out of bounds")
+  expect_error(batch$column_name(1:2))
+  expect_error(batch$column_name("one"))
+
+  col_int <- batch$column(0)
+  expect_true(inherits(col_int, "Array"))
+  expect_equal(col_int$as_vector(), tbl$int)
+  expect_equal(col_int$type, int32())
+
+  col_dbl <- batch$column(1)
+  expect_true(inherits(col_dbl, "Array"))
+  expect_equal(col_dbl$as_vector(), tbl$dbl)
+  expect_equal(col_dbl$type, float64())
+
+  col_lgl <- batch$column(2)
+  expect_true(inherits(col_dbl, "Array"))
+  expect_equal(col_lgl$as_vector(), tbl$lgl)
+  expect_equal(col_lgl$type, boolean())
+
+  col_chr <- batch$column(3)
+  expect_true(inherits(col_chr, "Array"))
+  expect_equal(col_chr$as_vector(), tbl$chr)
+  expect_equal(col_chr$type, utf8())
+
+  col_fct <- batch$column(4)
+  expect_true(inherits(col_fct, "Array"))
+  expect_equal(col_fct$as_vector(), tbl$fct)
+  expect_equal(col_fct$type, dictionary(int8(), utf8()))
+
+  # input validation
+  expect_error(batch$column(NA), "'i' cannot be NA")
+  expect_error(batch$column(-1), "subscript out of bounds")
+  expect_error(batch$column(1000), "subscript out of bounds")
+  expect_error(batch$column(1:2))
+  expect_error(batch$column("one"))
+
+  batch2 <- batch$RemoveColumn(0)
+  expect_equal(
+    batch2$schema,
+    schema(dbl = float64(), lgl = boolean(), chr = utf8(), fct = dictionary(int8(), utf8()))
+  )
+  expect_equal(batch2$column(0), batch$column(1))
+  expect_data_frame(batch2, tbl[, -1])
+
+  # input validation
+  expect_error(batch$RemoveColumn(NA), "'i' cannot be NA")
+  expect_error(batch$RemoveColumn(-1), "subscript out of bounds")
+  expect_error(batch$RemoveColumn(1000), "subscript out of bounds")
+  expect_error(batch$RemoveColumn(1:2))
+  expect_error(batch$RemoveColumn("one"))
+})
+
+test_that("RecordBatch S3 methods", {
+  tab <- RecordBatch$create(example_data)
+  for (f in c("dim", "nrow", "ncol", "dimnames", "colnames", "row.names", "as.list")) {
+    fun <- get(f)
+    expect_identical(fun(tab), fun(example_data), info = f)
+  }
+})
+
+test_that("RecordBatch$Slice", {
+  batch3 <- batch$Slice(5)
+  expect_data_frame(batch3, tbl[6:10, ])
+
+  batch4 <- batch$Slice(5, 2)
+  expect_data_frame(batch4, tbl[6:7, ])
+
+  # Input validation
+  expect_error(batch$Slice("ten"))
+  expect_error(batch$Slice(NA_integer_), "Slice 'offset' cannot be NA")
+  expect_error(batch$Slice(NA), "Slice 'offset' cannot be NA")
+  expect_error(batch$Slice(10, "ten"))
+  expect_error(batch$Slice(10, NA_integer_), "Slice 'length' cannot be NA")
+  expect_error(batch$Slice(NA_integer_, NA_integer_), "Slice 'offset' cannot be NA")
+  expect_error(batch$Slice(c(10, 10)))
+  expect_error(batch$Slice(10, c(10, 10)))
+  expect_error(batch$Slice(1000), "Slice 'offset' greater than array length")
+  expect_error(batch$Slice(-1), "Slice 'offset' cannot be negative")
+  expect_error(batch4$Slice(10, 10), "Slice 'offset' greater than array length")
+  expect_error(batch$Slice(10, -1), "Slice 'length' cannot be negative")
+  expect_error(batch$Slice(-1, 10), "Slice 'offset' cannot be negative")
+})
+
+test_that("[ on RecordBatch", {
+  expect_data_frame(batch[6:7, ], tbl[6:7, ])
+  expect_data_frame(batch[c(6, 7), ], tbl[6:7, ])
+  expect_data_frame(batch[6:7, 2:4], tbl[6:7, 2:4])
+  expect_data_frame(batch[, c("dbl", "fct")], tbl[, c(2, 5)])
+  expect_identical(as.vector(batch[, "chr", drop = TRUE]), tbl$chr)
+  expect_data_frame(batch[c(7, 3, 5), 2:4], tbl[c(7, 3, 5), 2:4])
+  expect_data_frame(
+    batch[rep(c(FALSE, TRUE), 5), ],
+    tbl[c(2, 4, 6, 8, 10), ]
+  )
+  # bool Array
+  expect_data_frame(batch[batch$lgl, ], tbl[tbl$lgl, ])
+  # int Array
+  expect_data_frame(batch[Array$create(5:6), 2:4], tbl[6:7, 2:4])
+
+  # input validation
+  expect_error(batch[, c("dbl", "NOTACOLUMN")], 'Column not found: "NOTACOLUMN"')
+  expect_error(batch[, c(6, NA)], "Column indices cannot be NA")
+  expect_error(batch[, c(2, -2)], "Invalid column index")
+})
+
+test_that("[[ and $ on RecordBatch", {
+  expect_as_vector(batch[["int"]], tbl$int)
+  expect_as_vector(batch$int, tbl$int)
+  expect_as_vector(batch[[4]], tbl$chr)
+  expect_null(batch$qwerty)
+  expect_null(batch[["asdf"]])
+  expect_error(batch[[c(4, 3)]])
+  expect_error(batch[[NA]], "'i' must be character or numeric, not logical")
+  expect_error(batch[[NULL]], "'i' must be character or numeric, not NULL")
+  expect_error(batch[[c("asdf", "jkl;")]], "name is not a string", fixed = TRUE)
+})
+
+test_that("[[<- assignment", {
+  tbl <- tibble::tibble(
+    int = 1:10,
+    dbl = as.numeric(1:10),
+    lgl = sample(c(TRUE, FALSE, NA), 10, replace = TRUE),
+    chr = letters[1:10],
+    fct = factor(letters[1:10])
+  )
+  batch <- RecordBatch$create(tbl)
+
+  # can remove a column
+  batch[["chr"]] <- NULL
+  expect_data_frame(batch, tbl[-4])
+
+  # can remove a column by index
+  batch[[4]] <- NULL
+  expect_data_frame(batch, tbl[1:3])
+
+  # can add a named column
+  batch[["new"]] <- letters[10:1]
+  expect_data_frame(batch, dplyr::bind_cols(tbl[1:3], new = letters[10:1]))
+
+  # can replace a column by index
+  batch[[2]] <- as.numeric(10:1)
+  expect_as_vector(batch[[2]], as.numeric(10:1))
+
+  # can add a column by index
+  batch[[5]] <- as.numeric(10:1)
+  expect_as_vector(batch[[5]], as.numeric(10:1))
+  expect_as_vector(batch[["5"]], as.numeric(10:1))
+
+  # can replace a column
+  batch[["int"]] <- 10:1
+  expect_as_vector(batch[["int"]], 10:1)
+
+  # can use $
+  batch$new <- NULL
+  expect_null(as.vector(batch$new))
+  expect_identical(dim(batch), c(10L, 4L))
+
+  batch$int <- 1:10
+  expect_as_vector(batch$int, 1:10)
+
+  # recycling
+  batch[["atom"]] <- 1L
+  expect_as_vector(batch[["atom"]], rep(1L, 10))
+
+  expect_error(
+    batch[["atom"]] <- 1:6,
+    "Can't recycle input of size 6 to size 10."
+  )
+
+  # assign Arrow array
+  array <- Array$create(c(10:1))
+  batch$array <- array
+  expect_as_vector(batch$array, 10:1)
+
+  # nonsense indexes
+  expect_error(batch[[NA]] <- letters[10:1], "'i' must be character or numeric, not logical")
+  expect_error(batch[[NULL]] <- letters[10:1], "'i' must be character or numeric, not NULL")
+  expect_error(batch[[NA_integer_]] <- letters[10:1], "!is.na(i) is not TRUE", fixed = TRUE)
+  expect_error(batch[[NA_real_]] <- letters[10:1], "!is.na(i) is not TRUE", fixed = TRUE)
+  expect_error(batch[[NA_character_]] <- letters[10:1], "!is.na(i) is not TRUE", fixed = TRUE)
+  expect_error(batch[[c(1, 4)]] <- letters[10:1], "length(i) not equal to 1", fixed = TRUE)
+})
+
+test_that("head and tail on RecordBatch", {
+  tbl <- tibble::tibble(
+    int = 1:10,
+    dbl = as.numeric(1:10),
+    lgl = sample(c(TRUE, FALSE, NA), 10, replace = TRUE),
+    chr = letters[1:10],
+    fct = factor(letters[1:10])
+  )
+  batch <- RecordBatch$create(tbl)
+  expect_data_frame(head(batch), head(tbl))
+  expect_data_frame(head(batch, 4), head(tbl, 4))
+  expect_data_frame(head(batch, 40), head(tbl, 40))
+  expect_data_frame(head(batch, -4), head(tbl, -4))
+  expect_data_frame(head(batch, -40), head(tbl, -40))
+  expect_data_frame(tail(batch), tail(tbl))
+  expect_data_frame(tail(batch, 4), tail(tbl, 4))
+  expect_data_frame(tail(batch, 40), tail(tbl, 40))
+  expect_data_frame(tail(batch, -4), tail(tbl, -4))
+  expect_data_frame(tail(batch, -40), tail(tbl, -40))
+})
+
+test_that("RecordBatch print method", {
+  expect_output(
+    print(batch),
+    paste(
+      "RecordBatch",
+      "10 rows x 5 columns",
+      "$int <int32>",
+      "$dbl <double>",
+      "$lgl <bool>",
+      "$chr <string>",
+      "$fct <dictionary<values=string, indices=int8>>",
+      sep = "\n"
+    ),
+    fixed = TRUE
+  )
+})
+
+test_that("RecordBatch with 0 rows are supported", {
+  tbl <- tibble::tibble(
+    int = integer(),
+    dbl = numeric(),
+    lgl = logical(),
+    chr = character(),
+    fct = factor(character(), levels = c("a", "b"))
+  )
+
+  batch <- record_batch(tbl)
+  expect_equal(batch$num_columns, 5L)
+  expect_equal(batch$num_rows, 0L)
+  expect_equal(
+    batch$schema,
+    schema(
+      int = int32(),
+      dbl = float64(),
+      lgl = boolean(),
+      chr = utf8(),
+      fct = dictionary(int8(), utf8())
+    )
+  )
+})
+
+test_that("RecordBatch cast (ARROW-3741)", {
+  batch <- record_batch(x = 1:10, y = 1:10)
+
+  expect_error(batch$cast(schema(x = int32())))
+  expect_error(batch$cast(schema(x = int32(), z = int32())))
+
+  s2 <- schema(x = int16(), y = int64())
+  batch2 <- batch$cast(s2)
+  expect_equal(batch2$schema, s2)
+  expect_equal(batch2$column(0L)$type, int16())
+  expect_equal(batch2$column(1L)$type, int64())
+})
+
+test_that("record_batch() handles schema= argument", {
+  s <- schema(x = int32(), y = int32())
+  batch <- record_batch(x = 1:10, y = 1:10, schema = s)
+  expect_equal(s, batch$schema)
+
+  s <- schema(x = int32(), y = float64())
+  batch <- record_batch(x = 1:10, y = 1:10, schema = s)
+  expect_equal(s, batch$schema)
+
+  s <- schema(x = int32(), y = utf8())
+  expect_error(record_batch(x = 1:10, y = 1:10, schema = s))
+})
+
+test_that("record_batch(schema=) does some basic consistency checking of the schema", {
+  s <- schema(x = int32())
+  expect_error(record_batch(x = 1:10, y = 1:10, schema = s))
+  expect_error(record_batch(z = 1:10, schema = s))
+})
+
+test_that("RecordBatch dim() and nrow() (ARROW-3816)", {
+  batch <- record_batch(x = 1:10, y = 1:10)
+  expect_equal(dim(batch), c(10L, 2L))
+  expect_equal(nrow(batch), 10L)
+})
+
+test_that("record_batch() handles Array", {
+  batch <- record_batch(x = 1:10, y = Array$create(1:10))
+  expect_equal(batch$schema, schema(x = int32(), y = int32()))
+})
+
+test_that("record_batch() handles data frame columns", {
+  tib <- tibble::tibble(x = 1:10, y = 1:10)
+  # because tib is named here, this becomes a struct array
+  batch <- record_batch(a = 1:10, b = tib)
+  expect_equal(
+    batch$schema,
+    schema(
+      a = int32(),
+      b = struct(x = int32(), y = int32())
+    )
+  )
+  out <- as.data.frame(batch)
+  expect_equal(out, tibble::tibble(a = 1:10, b = tib))
+
+  # if not named, columns from tib are auto spliced
+  batch2 <- record_batch(a = 1:10, tib)
+  expect_equal(
+    batch2$schema,
+    schema(a = int32(), x = int32(), y = int32())
+  )
+  out <- as.data.frame(batch2)
+  expect_equal(out, tibble::tibble(a = 1:10, !!!tib))
+})
+
+test_that("record_batch() handles data frame columns with schema spec", {
+  tib <- tibble::tibble(x = 1:10, y = 1:10)
+  tib_float <- tib
+  tib_float$y <- as.numeric(tib_float$y)
+  schema <- schema(a = int32(), b = struct(x = int16(), y = float64()))
+  batch <- record_batch(a = 1:10, b = tib, schema = schema)
+  expect_equal(batch$schema, schema)
+  out <- as.data.frame(batch)
+  expect_equal(out, tibble::tibble(a = 1:10, b = tib_float))
+
+  schema <- schema(a = int32(), b = struct(x = int16(), y = utf8()))
+  expect_error(record_batch(a = 1:10, b = tib, schema = schema))
+})
+
+test_that("record_batch() auto splices (ARROW-5718)", {
+  df <- tibble::tibble(x = 1:10, y = letters[1:10])
+  batch1 <- record_batch(df)
+  batch2 <- record_batch(!!!df)
+  expect_equal(batch1, batch2)
+  expect_equal(batch1$schema, schema(x = int32(), y = utf8()))
+  expect_data_frame(batch1, df)
+
+  batch3 <- record_batch(df, z = 1:10)
+  batch4 <- record_batch(!!!df, z = 1:10)
+  expect_equal(batch3, batch4)
+  expect_equal(batch3$schema, schema(x = int32(), y = utf8(), z = int32()))
+  expect_equal(
+    as.data.frame(batch3),
+    tibble::as_tibble(cbind(df, data.frame(z = 1:10)))
+  )
+
+  s <- schema(x = float64(), y = utf8())
+  batch5 <- record_batch(df, schema = s)
+  batch6 <- record_batch(!!!df, schema = s)
+  expect_equal(batch5, batch6)
+  expect_equal(batch5$schema, s)
+  expect_equal(as.data.frame(batch5), df)
+
+  s2 <- schema(x = float64(), y = utf8(), z = int16())
+  batch7 <- record_batch(df, z = 1:10, schema = s2)
+  batch8 <- record_batch(!!!df, z = 1:10, schema = s2)
+  expect_equal(batch7, batch8)
+  expect_equal(batch7$schema, s2)
+  expect_equal(
+    as.data.frame(batch7),
+    tibble::as_tibble(cbind(df, data.frame(z = 1:10)))
+  )
+})
+
+test_that("record_batch() only auto splice data frames", {
+  expect_error(
+    record_batch(1:10),
+    regexp = "only data frames are allowed as unnamed arguments to be auto spliced"
+  )
+})
+
+test_that("record_batch() handles null type (ARROW-7064)", {
+  batch <- record_batch(a = 1:10, n = vctrs::unspecified(10))
+  expect_equal(
+    batch$schema,
+    schema(a = int32(), n = null()),
+    ignore_attr = TRUE
+  )
+})
+
+test_that("record_batch() scalar recycling with vectors", {
+  expect_data_frame(
+    record_batch(a = 1:10, b = 5),
+    tibble::tibble(a = 1:10, b = 5)
+  )
+})
+
+test_that("record_batch() scalar recycling with Scalars, Arrays, and ChunkedArrays", {
+  expect_data_frame(
+    record_batch(a = Array$create(1:10), b = Scalar$create(5)),
+    tibble::tibble(a = 1:10, b = 5)
+  )
+
+  expect_data_frame(
+    record_batch(a = Array$create(1:10), b = Array$create(5)),
+    tibble::tibble(a = 1:10, b = 5)
+  )
+
+  expect_data_frame(
+    record_batch(a = Array$create(1:10), b = ChunkedArray$create(5)),
+    tibble::tibble(a = 1:10, b = 5)
+  )
+})
+
+test_that("record_batch() no recycling with tibbles", {
+  expect_error(
+    record_batch(
+      tibble::tibble(a = 1:10),
+      tibble::tibble(a = 1, b = 5)
+    ),
+    regexp = "All input tibbles or data.frames must have the same number of rows"
+  )
+
+  expect_error(
+    record_batch(
+      tibble::tibble(a = 1:10),
+      tibble::tibble(a = 1)
+    ),
+    regexp = "All input tibbles or data.frames must have the same number of rows"
+  )
+})
+
+test_that("RecordBatch$Equals", {
+  df <- tibble::tibble(x = 1:10, y = letters[1:10])
+  a <- record_batch(df)
+  b <- record_batch(df)
+  expect_equal(a, b)
+  expect_true(a$Equals(b))
+  expect_false(a$Equals(df))
+})
+
+test_that("RecordBatch$Equals(check_metadata)", {
+  df <- tibble::tibble(x = 1:2, y = c("a", "b"))
+  rb1 <- record_batch(df)
+  rb2 <- record_batch(df, schema = rb1$schema$WithMetadata(list(some = "metadata")))
+
+  expect_r6_class(rb1, "RecordBatch")
+  expect_r6_class(rb2, "RecordBatch")
+  expect_false(rb1$schema$HasMetadata)
+  expect_true(rb2$schema$HasMetadata)
+  expect_identical(rb2$schema$metadata, list(some = "metadata"))
+
+  expect_true(rb1 == rb2)
+  expect_true(rb1$Equals(rb2))
+  expect_false(rb1$Equals(rb2, check_metadata = TRUE))
+
+  expect_failure(expect_equal(rb1, rb2)) # expect_equal has check_metadata=TRUE
+  expect_equal(rb1, rb2, ignore_attr = TRUE) # this passes check_metadata=FALSE
+
+  expect_false(rb1$Equals(24)) # Not a RecordBatch
+})
+
+test_that("RecordBatch name assignment", {
+  rb <- record_batch(x = 1:10, y = 1:10)
+  expect_identical(names(rb), c("x", "y"))
+  names(rb) <- c("a", "b")
+  expect_identical(names(rb), c("a", "b"))
+  expect_error(names(rb) <- "f")
+  expect_error(names(rb) <- letters)
+  expect_error(names(rb) <- character(0))
+  expect_error(names(rb) <- NULL)
+  expect_error(names(rb) <- c(TRUE, FALSE))
+})
+
+test_that("record_batch() with different length arrays", {
+  msg <- "All arrays must have the same length"
+  expect_error(record_batch(a = 1:5, b = 1:6), msg)
+})
+
+test_that("Handling string data with embedded nuls", {
+  raws <- Array$create(structure(list(
+    as.raw(c(0x70, 0x65, 0x72, 0x73, 0x6f, 0x6e)),
+    as.raw(c(0x77, 0x6f, 0x6d, 0x61, 0x6e)),
+    as.raw(c(0x6d, 0x61, 0x00, 0x6e)), # <-- there's your nul, 0x00
+    as.raw(c(0x63, 0x61, 0x6d, 0x65, 0x72, 0x61)),
+    as.raw(c(0x74, 0x76))
+  ),
+  class = c("arrow_binary", "vctrs_vctr", "list")
+  ))
+  batch_with_nul <- record_batch(a = 1:5, b = raws)
+  batch_with_nul$b <- batch_with_nul$b$cast(utf8())
+
+  # The behavior of the warnings/errors is slightly different with and without
+  # altrep. Without it (i.e. 3.5.0 and below, the error would trigger immediately
+  # on `as.vector()` where as with it, the error only happens on materialization)
+  skip_if_r_version("3.5.0")
+  df <- as.data.frame(batch_with_nul)
+
+  expect_error(
+    df$b[],
+    paste0(
+      "embedded nul in string: 'ma\\0n'; to strip nuls when converting from Arrow to R, ",
+      "set options(arrow.skip_nul = TRUE)"
+    ),
+    fixed = TRUE
+  )
+
+  batch_with_nul <- record_batch(a = 1:5, b = raws)
+  batch_with_nul$b <- batch_with_nul$b$cast(utf8())
+
+  withr::with_options(list(arrow.skip_nul = TRUE), {
+    expect_warning(
+      expect_equal(
+        as.data.frame(batch_with_nul)$b,
+        c("person", "woman", "man", "camera", "tv"),
+        ignore_attr = TRUE
+      ),
+      "Stripping '\\0' (nul) from character vector",
+      fixed = TRUE
+    )
+  })
+})
+
+test_that("ARROW-11769/ARROW-13860 - grouping preserved in record batch creation", {
+  skip_if_not_available("dataset")
+  library(dplyr, warn.conflicts = FALSE)
+
+  tbl <- tibble::tibble(
+    int = 1:10,
+    fct = factor(rep(c("A", "B"), 5)),
+    fct2 = factor(rep(c("C", "D"), each = 5)),
+  )
+
+  expect_r6_class(
+    tbl %>%
+      group_by(fct, fct2) %>%
+      record_batch(),
+    "RecordBatch"
+  )
+  expect_identical(
+    tbl %>%
+      group_by(fct, fct2) %>%
+      record_batch() %>%
+      group_vars(),
+    c("fct", "fct2")
+  )
+  expect_identical(
+    tbl %>%
+      group_by(fct, fct2) %>%
+      record_batch() %>%
+      ungroup() %>%
+      group_vars(),
+    NULL
+  )
+  expect_identical(
+    tbl %>%
+      group_by(fct, fct2) %>%
+      record_batch() %>%
+      select(-int) %>%
+      group_vars(),
+    c("fct", "fct2")
+  )
+})
+
+test_that("ARROW-12729 - length returns number of columns in RecordBatch", {
+  tbl <- tibble::tibble(
+    int = 1:10,
+    fct = factor(rep(c("A", "B"), 5)),
+    fct2 = factor(rep(c("C", "D"), each = 5)),
+  )
+
+  rb <- record_batch(!!!tbl)
+
+  expect_identical(length(rb), 3L)
+})
+
+test_that("RecordBatchReader to C-interface", {
+  skip_if_not_available("dataset")
+
+  tab <- Table$create(example_data)
+
+  # export the RecordBatchReader via the C-interface
+  stream_ptr <- allocate_arrow_array_stream()
+  scan <- Scanner$create(tab)
+  reader <- scan$ToRecordBatchReader()
+  reader$export_to_c(stream_ptr)
+
+  # then import it and check that the roundtripped value is the same
+  circle <- RecordBatchStreamReader$import_from_c(stream_ptr)
+  tab_from_c_new <- circle$read_table()
+  expect_equal(tab, tab_from_c_new)
+
+  # must clean up the pointer or we leak
+  delete_arrow_array_stream(stream_ptr)
+
+  # export the RecordBatchStreamReader via the C-interface
+  stream_ptr_new <- allocate_arrow_array_stream()
+  bytes <- write_to_raw(example_data)
+  expect_type(bytes, "raw")
+  reader_new <- RecordBatchStreamReader$create(bytes)
+  reader_new$export_to_c(stream_ptr_new)
+
+  # then import it and check that the roundtripped value is the same
+  circle_new <- RecordBatchStreamReader$import_from_c(stream_ptr_new)
+  tab_from_c_new <- circle_new$read_table()
+  expect_equal(tab, tab_from_c_new)
+
+  # must clean up the pointer or we leak
+  delete_arrow_array_stream(stream_ptr_new)
+})
+
+test_that("RecordBatch to C-interface", {
+  batch <- RecordBatch$create(example_data)
+
+  # export the RecordBatch via the C-interface
+  schema_ptr <- allocate_arrow_schema()
+  array_ptr <- allocate_arrow_array()
+  batch$export_to_c(array_ptr, schema_ptr)
+
+  # then import it and check that the roundtripped value is the same
+  circle <- RecordBatch$import_from_c(array_ptr, schema_ptr)
+  expect_equal
+
+  # must clean up the pointers or we leak
+  delete_arrow_schema(schema_ptr)
+  delete_arrow_array(array_ptr)
+})
+
+
+
+test_that("RecordBatchReader to C-interface to arrow_dplyr_query", {
+  skip_if_not_available("dataset")
+
+  tab <- Table$create(example_data)
+
+  # export the RecordBatchReader via the C-interface
+  stream_ptr <- allocate_arrow_array_stream()
+  scan <- Scanner$create(tab)
+  reader <- scan$ToRecordBatchReader()
+  reader$export_to_c(stream_ptr)
+
+  # then import it and check that the roundtripped value is the same
+  circle <- RecordBatchStreamReader$import_from_c(stream_ptr)
+
+  # create an arrow_dplyr_query() from the recordbatch reader
+  reader_adq <- arrow_dplyr_query(circle)
+
+  tab_from_c_new <- reader_adq %>%
+    dplyr::compute()
+  expect_equal(tab_from_c_new, tab)
+
+  # must clean up the pointer or we leak
+  delete_arrow_array_stream(stream_ptr)
+})
diff --git a/src/arrow/r/tests/testthat/test-Table.R b/src/arrow/r/tests/testthat/test-Table.R
new file mode 100644
index 000000000..44144c00b
--- /dev/null
+++ b/src/arrow/r/tests/testthat/test-Table.R
@@ -0,0 +1,549 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+test_that("read_table handles various input streams (ARROW-3450, ARROW-3505)", {
+  tbl <- tibble::tibble(
+    int = 1:10, dbl = as.numeric(1:10),
+    lgl = sample(c(TRUE, FALSE, NA), 10, replace = TRUE),
+    chr = letters[1:10]
+  )
+  tab <- Table$create(!!!tbl)
+
+  tf <- tempfile()
+  on.exit(unlink(tf))
+  expect_deprecated(
+    write_arrow(tab, tf),
+    "write_feather"
+  )
+
+  tab1 <- read_feather(tf, as_data_frame = FALSE)
+  tab2 <- read_feather(normalizePath(tf), as_data_frame = FALSE)
+
+  readable_file <- ReadableFile$create(tf)
+  expect_deprecated(
+    tab3 <- read_arrow(readable_file, as_data_frame = FALSE),
+    "read_feather"
+  )
+  readable_file$close()
+
+  mmap_file <- mmap_open(tf)
+  mmap_file$close()
+
+  expect_equal(tab, tab1)
+  expect_equal(tab, tab2)
+  expect_equal(tab, tab3)
+})
+
+test_that("Table cast (ARROW-3741)", {
+  tab <- Table$create(x = 1:10, y = 1:10)
+
+  expect_error(tab$cast(schema(x = int32())))
+  expect_error(tab$cast(schema(x = int32(), z = int32())))
+
+  s2 <- schema(x = int16(), y = int64())
+  tab2 <- tab$cast(s2)
+  expect_equal(tab2$schema, s2)
+  expect_equal(tab2$column(0L)$type, int16())
+  expect_equal(tab2$column(1L)$type, int64())
+})
+
+test_that("Table S3 methods", {
+  tab <- Table$create(example_data)
+  for (f in c("dim", "nrow", "ncol", "dimnames", "colnames", "row.names", "as.list")) {
+    fun <- get(f)
+    expect_identical(fun(tab), fun(example_data), info = f)
+  }
+})
+
+test_that("Table $column and $field", {
+  tab <- Table$create(x = 1:10, y = 1:10)
+
+  expect_equal(tab$field(0), field("x", int32()))
+
+  # input validation
+  expect_error(tab$column(NA), "'i' cannot be NA")
+  expect_error(tab$column(-1), "subscript out of bounds")
+  expect_error(tab$column(1000), "subscript out of bounds")
+  expect_error(tab$column(1:2))
+  expect_error(tab$column("one"))
+
+  expect_error(tab$field(NA), "'i' cannot be NA")
+  expect_error(tab$field(-1), "subscript out of bounds")
+  expect_error(tab$field(1000), "subscript out of bounds")
+  expect_error(tab$field(1:2))
+  expect_error(tab$field("one"))
+})
+
+test_that("[, [[, $ for Table", {
+  tbl <- tibble::tibble(
+    int = 1:10,
+    dbl = as.numeric(1:10),
+    lgl = sample(c(TRUE, FALSE, NA), 10, replace = TRUE),
+    chr = letters[1:10],
+    fct = factor(letters[1:10])
+  )
+  tab <- Table$create(tbl)
+
+  expect_identical(names(tab), names(tbl))
+
+  expect_data_frame(tab[6:7, ], tbl[6:7, ])
+  expect_data_frame(tab[6:7, 2:4], tbl[6:7, 2:4])
+  expect_data_frame(tab[, c("dbl", "fct")], tbl[, c(2, 5)])
+  expect_as_vector(tab[, "chr", drop = TRUE], tbl$chr)
+  # Take within a single chunk
+  expect_data_frame(tab[c(7, 3, 5), 2:4], tbl[c(7, 3, 5), 2:4])
+  expect_data_frame(tab[rep(c(FALSE, TRUE), 5), ], tbl[c(2, 4, 6, 8, 10), ])
+  # bool ChunkedArray (with one chunk)
+  expect_data_frame(tab[tab$lgl, ], tbl[tbl$lgl, ])
+  # ChunkedArray with multiple chunks
+  c1 <- c(TRUE, FALSE, TRUE, TRUE, FALSE)
+  c2 <- c(FALSE, FALSE, TRUE, TRUE, FALSE)
+  ca <- ChunkedArray$create(c1, c2)
+  expect_data_frame(tab[ca, ], tbl[c(1, 3, 4, 8, 9), ])
+  # int Array
+  expect_data_frame(tab[Array$create(5:6), 2:4], tbl[6:7, 2:4])
+  # ChunkedArray
+  expect_data_frame(tab[ChunkedArray$create(5L, 6L), 2:4], tbl[6:7, 2:4])
+  # Expression
+  expect_data_frame(tab[tab$int > 6, ], tbl[tbl$int > 6, ])
+
+  expect_as_vector(tab[["int"]], tbl$int)
+  expect_as_vector(tab$int, tbl$int)
+  expect_as_vector(tab[[4]], tbl$chr)
+  expect_null(tab$qwerty)
+  expect_null(tab[["asdf"]])
+  # List-like column slicing
+  expect_data_frame(tab[2:4], tbl[2:4])
+  expect_data_frame(tab[c(2, 1)], tbl[c(2, 1)])
+  expect_data_frame(tab[-3], tbl[-3])
+
+  expect_error(tab[[c(4, 3)]])
+  expect_error(tab[[NA]], "'i' must be character or numeric, not logical")
+  expect_error(tab[[NULL]], "'i' must be character or numeric, not NULL")
+  expect_error(tab[[c("asdf", "jkl;")]], "length(name) not equal to 1", fixed = TRUE)
+  expect_error(tab[-3:3], "Invalid column index")
+  expect_error(tab[1000], "Invalid column index")
+  expect_error(tab[1:1000], "Invalid column index")
+
+  # input validation
+  expect_error(tab[, c("dbl", "NOTACOLUMN")], 'Column not found: "NOTACOLUMN"')
+  expect_error(tab[, c(6, NA)], "Column indices cannot be NA")
+
+  skip("Table with 0 cols doesn't know how many rows it should have")
+  expect_data_frame(tab[0], tbl[0])
+})
+
+test_that("[[<- assignment", {
+  tbl <- tibble::tibble(
+    int = 1:10,
+    dbl = as.numeric(1:10),
+    lgl = sample(c(TRUE, FALSE, NA), 10, replace = TRUE),
+    chr = letters[1:10],
+    fct = factor(letters[1:10])
+  )
+  tab <- Table$create(tbl)
+
+  # can remove a column
+  tab[["chr"]] <- NULL
+  expect_data_frame(tab, tbl[-4])
+
+  # can remove a column by index
+  tab[[4]] <- NULL
+  expect_data_frame(tab, tbl[1:3])
+
+  # can add a named column
+  tab[["new"]] <- letters[10:1]
+  expect_data_frame(tab, dplyr::bind_cols(tbl[1:3], new = letters[10:1]))
+
+  # can replace a column by index
+  tab[[2]] <- as.numeric(10:1)
+  expect_as_vector(tab[[2]], as.numeric(10:1))
+
+  # can add a column by index
+  tab[[5]] <- as.numeric(10:1)
+  expect_as_vector(tab[[5]], as.numeric(10:1))
+  expect_as_vector(tab[["5"]], as.numeric(10:1))
+
+  # can replace a column
+  tab[["int"]] <- 10:1
+  expect_as_vector(tab[["int"]], 10:1)
+
+  # can use $
+  tab$new <- NULL
+  expect_null(as.vector(tab$new))
+  expect_identical(dim(tab), c(10L, 4L))
+
+  tab$int <- 1:10
+  expect_as_vector(tab$int, 1:10)
+
+  # recycling
+  tab[["atom"]] <- 1L
+  expect_as_vector(tab[["atom"]], rep(1L, 10))
+
+  expect_error(
+    tab[["atom"]] <- 1:6,
+    "Can't recycle input of size 6 to size 10."
+  )
+
+  # assign Arrow array and chunked_array
+  array <- Array$create(c(10:1))
+  tab$array <- array
+  expect_as_vector(tab$array, 10:1)
+
+  tab$chunked <- chunked_array(1:10)
+  expect_as_vector(tab$chunked, 1:10)
+
+  # nonsense indexes
+  expect_error(tab[[NA]] <- letters[10:1], "'i' must be character or numeric, not logical")
+  expect_error(tab[[NULL]] <- letters[10:1], "'i' must be character or numeric, not NULL")
+  expect_error(tab[[NA_integer_]] <- letters[10:1], "!is.na(i) is not TRUE", fixed = TRUE)
+  expect_error(tab[[NA_real_]] <- letters[10:1], "!is.na(i) is not TRUE", fixed = TRUE)
+  expect_error(tab[[NA_character_]] <- letters[10:1], "!is.na(i) is not TRUE", fixed = TRUE)
+  expect_error(tab[[c(1, 4)]] <- letters[10:1], "length(i) not equal to 1", fixed = TRUE)
+})
+
+test_that("Table$Slice", {
+  tbl <- tibble::tibble(
+    int = 1:10,
+    dbl = as.numeric(1:10),
+    lgl = sample(c(TRUE, FALSE, NA), 10, replace = TRUE),
+    chr = letters[1:10],
+    fct = factor(letters[1:10])
+  )
+  tab <- Table$create(tbl)
+  tab2 <- tab$Slice(5)
+  expect_data_frame(tab2, tbl[6:10, ])
+
+  tab3 <- tab$Slice(5, 2)
+  expect_data_frame(tab3, tbl[6:7, ])
+
+  # Input validation
+  expect_error(tab$Slice("ten"))
+  expect_error(tab$Slice(NA_integer_), "Slice 'offset' cannot be NA")
+  expect_error(tab$Slice(NA), "Slice 'offset' cannot be NA")
+  expect_error(tab$Slice(10, "ten"))
+  expect_error(tab$Slice(10, NA_integer_), "Slice 'length' cannot be NA")
+  expect_error(tab$Slice(NA_integer_, NA_integer_), "Slice 'offset' cannot be NA")
+  expect_error(tab$Slice(c(10, 10)))
+  expect_error(tab$Slice(10, c(10, 10)))
+  expect_error(tab$Slice(1000), "Slice 'offset' greater than array length")
+  expect_error(tab$Slice(-1), "Slice 'offset' cannot be negative")
+  expect_error(tab3$Slice(10, 10), "Slice 'offset' greater than array length")
+  expect_error(tab$Slice(10, -1), "Slice 'length' cannot be negative")
+  expect_error(tab$Slice(-1, 10), "Slice 'offset' cannot be negative")
+})
+
+test_that("head and tail on Table", {
+  tbl <- tibble::tibble(
+    int = 1:10,
+    dbl = as.numeric(1:10),
+    lgl = sample(c(TRUE, FALSE, NA), 10, replace = TRUE),
+    chr = letters[1:10],
+    fct = factor(letters[1:10])
+  )
+  tab <- Table$create(tbl)
+
+  expect_data_frame(head(tab), head(tbl))
+  expect_data_frame(head(tab, 4), head(tbl, 4))
+  expect_data_frame(head(tab, 40), head(tbl, 40))
+  expect_data_frame(head(tab, -4), head(tbl, -4))
+  expect_data_frame(head(tab, -40), head(tbl, -40))
+  expect_data_frame(tail(tab), tail(tbl))
+  expect_data_frame(tail(tab, 4), tail(tbl, 4))
+  expect_data_frame(tail(tab, 40), tail(tbl, 40))
+  expect_data_frame(tail(tab, -4), tail(tbl, -4))
+  expect_data_frame(tail(tab, -40), tail(tbl, -40))
+})
+
+test_that("Table print method", {
+  expect_output(
+    print(tab),
+    paste(
+      "Table",
+      "10 rows x 5 columns",
+      "$int <int32>",
+      "$dbl <double>",
+      "$lgl <bool>",
+      "$chr <string>",
+      "$fct <dictionary<values=string, indices=int8>>",
+      sep = "\n"
+    ),
+    fixed = TRUE
+  )
+})
+
+test_that("table active bindings", {
+  tbl <- tibble::tibble(
+    int = 1:10,
+    dbl = as.numeric(1:10),
+    lgl = sample(c(TRUE, FALSE, NA), 10, replace = TRUE),
+    chr = letters[1:10],
+    fct = factor(letters[1:10])
+  )
+  tab <- Table$create(tbl)
+
+  expect_identical(dim(tbl), dim(tab))
+  expect_type(tab$columns, "list")
+  expect_equal(tab$columns[[1]], tab[[1]])
+})
+
+test_that("table() handles record batches with splicing", {
+  batch <- record_batch(x = 1:2, y = letters[1:2])
+  tab <- Table$create(batch, batch, batch)
+  expect_equal(tab$schema, batch$schema)
+  expect_equal(tab$num_rows, 6L)
+  expect_equal(
+    as.data.frame(tab),
+    vctrs::vec_rbind(as.data.frame(batch), as.data.frame(batch), as.data.frame(batch))
+  )
+
+  batches <- list(batch, batch, batch)
+  tab <- Table$create(!!!batches)
+  expect_equal(tab$schema, batch$schema)
+  expect_equal(tab$num_rows, 6L)
+  expect_equal(
+    as.data.frame(tab),
+    vctrs::vec_rbind(!!!purrr::map(batches, as.data.frame))
+  )
+})
+
+test_that("table() handles ... of arrays, chunked arrays, vectors", {
+  a <- Array$create(1:10)
+  ca <- chunked_array(1:5, 6:10)
+  v <- rnorm(10)
+  tbl <- tibble::tibble(x = 1:10, y = letters[1:10])
+
+  tab <- Table$create(a = a, b = ca, c = v, !!!tbl)
+  expect_equal(
+    tab$schema,
+    schema(a = int32(), b = int32(), c = float64(), x = int32(), y = utf8())
+  )
+  res <- as.data.frame(tab)
+  expect_equal(names(res), c("a", "b", "c", "x", "y"))
+  expect_equal(
+    res,
+    tibble::tibble(a = 1:10, b = 1:10, c = v, x = 1:10, y = letters[1:10])
+  )
+})
+
+test_that("table() auto splices (ARROW-5718)", {
+  df <- tibble::tibble(x = 1:10, y = letters[1:10])
+
+  tab1 <- Table$create(df)
+  tab2 <- Table$create(!!!df)
+  expect_equal(tab1, tab2)
+  expect_equal(tab1$schema, schema(x = int32(), y = utf8()))
+  expect_equal(as.data.frame(tab1), df)
+
+  s <- schema(x = float64(), y = utf8())
+  tab3 <- Table$create(df, schema = s)
+  tab4 <- Table$create(!!!df, schema = s)
+  expect_equal(tab3, tab4)
+  expect_equal(tab3$schema, s)
+  expect_equal(as.data.frame(tab3), df)
+})
+
+test_that("Validation when creating table with schema (ARROW-10953)", {
+  expect_error(
+    Table$create(data.frame(), schema = schema(a = int32())),
+    "incompatible. schema has 1 fields, and 0 columns are supplied",
+    fixed = TRUE
+  )
+  expect_error(
+    Table$create(data.frame(b = 1), schema = schema(a = int32())),
+    "field at index 1 has name 'a' != 'b'",
+    fixed = TRUE
+  )
+  expect_error(
+    Table$create(data.frame(b = 2, c = 3), schema = schema(a = int32())),
+    "incompatible. schema has 1 fields, and 2 columns are supplied",
+    fixed = TRUE
+  )
+})
+
+test_that("==.Table", {
+  tab1 <- Table$create(x = 1:2, y = c("a", "b"))
+  tab2 <- Table$create(x = 1:2, y = c("a", "b"))
+  tab3 <- Table$create(x = 1:2)
+  tab4 <- Table$create(x = 1:2, y = c("a", "b"), z = 3:4)
+
+  expect_true(tab1 == tab2)
+  expect_true(tab2 == tab1)
+
+  expect_false(tab1 == tab3)
+  expect_false(tab3 == tab1)
+
+  expect_false(tab1 == tab4)
+  expect_false(tab4 == tab1)
+
+  expect_true(all.equal(tab1, tab2))
+  expect_equal(tab1, tab2)
+})
+
+test_that("Table$Equals(check_metadata)", {
+  tab1 <- Table$create(x = 1:2, y = c("a", "b"))
+  tab2 <- Table$create(
+    x = 1:2, y = c("a", "b"),
+    schema = tab1$schema$WithMetadata(list(some = "metadata"))
+  )
+
+  expect_r6_class(tab1, "Table")
+  expect_r6_class(tab2, "Table")
+  expect_false(tab1$schema$HasMetadata)
+  expect_true(tab2$schema$HasMetadata)
+  expect_identical(tab2$schema$metadata, list(some = "metadata"))
+
+  expect_true(tab1 == tab2)
+  expect_true(tab1$Equals(tab2))
+  expect_false(tab1$Equals(tab2, check_metadata = TRUE))
+
+  expect_failure(expect_equal(tab1, tab2)) # expect_equal has check_metadata=TRUE
+  expect_equal(tab1, tab2, ignore_attr = TRUE) # this sets check_metadata=FALSE
+
+  expect_false(tab1$Equals(24)) # Not a Table
+})
+
+test_that("Table handles null type (ARROW-7064)", {
+  tab <- Table$create(a = 1:10, n = vctrs::unspecified(10))
+  expect_equal(tab$schema, schema(a = int32(), n = null()), ignore_attr = TRUE)
+})
+
+test_that("Can create table with specific dictionary types", {
+  fact <- example_data[, "fct"]
+  int_types <- c(int8(), int16(), int32(), int64())
+  # TODO: test uint types when format allows
+  # uint_types <- c(uint8(), uint16(), uint32(), uint64()) # nolint
+  for (i in int_types) {
+    sch <- schema(fct = dictionary(i, utf8()))
+    tab <- Table$create(fact, schema = sch)
+    expect_equal(sch, tab$schema)
+    if (i != int64()) {
+      # TODO: same downcast to int32 as we do for int64() type elsewhere
+      expect_identical(as.data.frame(tab), fact)
+    }
+  }
+})
+
+test_that("Table unifies dictionary on conversion back to R (ARROW-8374)", {
+  b1 <- record_batch(f = factor(c("a"), levels = c("a", "b")))
+  b2 <- record_batch(f = factor(c("c"), levels = c("c", "d")))
+  b3 <- record_batch(f = factor(NA, levels = "a"))
+  b4 <- record_batch(f = factor())
+
+  res <- tibble::tibble(f = factor(c("a", "c", NA), levels = c("a", "b", "c", "d")))
+  tab <- Table$create(b1, b2, b3, b4)
+
+  expect_identical(as.data.frame(tab), res)
+})
+
+test_that("Table$SelectColumns()", {
+  tab <- Table$create(x = 1:10, y = 1:10)
+
+  expect_equal(tab$SelectColumns(0L), Table$create(x = 1:10))
+
+  expect_error(tab$SelectColumns(2:4))
+  expect_error(tab$SelectColumns(""))
+})
+
+test_that("Table name assignment", {
+  tab <- Table$create(x = 1:10, y = 1:10)
+  expect_identical(names(tab), c("x", "y"))
+  names(tab) <- c("a", "b")
+  expect_identical(names(tab), c("a", "b"))
+  expect_error(names(tab) <- "f")
+  expect_error(names(tab) <- letters)
+  expect_error(names(tab) <- character(0))
+  expect_error(names(tab) <- NULL)
+  expect_error(names(tab) <- c(TRUE, FALSE))
+})
+
+test_that("Table$create() with different length columns", {
+  msg <- "All columns must have the same length"
+  expect_error(Table$create(a = 1:5, b = 1:6), msg)
+})
+
+test_that("Table$create() scalar recycling with vectors", {
+  expect_data_frame(
+    Table$create(a = 1:10, b = 5),
+    tibble::tibble(a = 1:10, b = 5)
+  )
+})
+
+test_that("Table$create() scalar recycling with Scalars, Arrays, and ChunkedArrays", {
+  expect_data_frame(
+    Table$create(a = Array$create(1:10), b = Scalar$create(5)),
+    tibble::tibble(a = 1:10, b = 5)
+  )
+
+  expect_data_frame(
+    Table$create(a = Array$create(1:10), b = Array$create(5)),
+    tibble::tibble(a = 1:10, b = 5)
+  )
+
+  expect_data_frame(
+    Table$create(a = Array$create(1:10), b = ChunkedArray$create(5)),
+    tibble::tibble(a = 1:10, b = 5)
+  )
+})
+
+test_that("Table$create() no recycling with tibbles", {
+  expect_error(
+    Table$create(
+      tibble::tibble(a = 1:10, b = 5),
+      tibble::tibble(a = 1, b = 5)
+    ),
+    regexp = "All input tibbles or data.frames must have the same number of rows"
+  )
+
+  expect_error(
+    Table$create(
+      tibble::tibble(a = 1:10, b = 5),
+      tibble::tibble(a = 1)
+    ),
+    regexp = "All input tibbles or data.frames must have the same number of rows"
+  )
+})
+
+test_that("ARROW-11769 - grouping preserved in table creation", {
+  skip_if_not_available("dataset")
+
+  tbl <- tibble::tibble(
+    int = 1:10,
+    fct = factor(rep(c("A", "B"), 5)),
+    fct2 = factor(rep(c("C", "D"), each = 5)),
+  )
+
+  expect_identical(
+    tbl %>%
+      dplyr::group_by(fct, fct2) %>%
+      Table$create() %>%
+      dplyr::group_vars(),
+    c("fct", "fct2")
+  )
+})
+
+test_that("ARROW-12729 - length returns number of columns in Table", {
+  tbl <- tibble::tibble(
+    int = 1:10,
+    fct = factor(rep(c("A", "B"), 5)),
+    fct2 = factor(rep(c("C", "D"), each = 5)),
+  )
+
+  tab <- Table$create(!!!tbl)
+
+  expect_identical(length(tab), 3L)
+})
diff --git a/src/arrow/r/tests/testthat/test-altrep.R b/src/arrow/r/tests/testthat/test-altrep.R
new file mode 100644
index 000000000..dff369438
--- /dev/null
+++ b/src/arrow/r/tests/testthat/test-altrep.R
@@ -0,0 +1,243 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+skip_if_r_version("3.5.0")
+
+test_that("is_arrow_altrep() does not include base altrep", {
+  expect_false(is_arrow_altrep(1:10))
+})
+
+test_that("altrep vectors from int32 and dbl arrays with no nulls", {
+  withr::local_options(list(arrow.use_altrep = TRUE))
+  v_int <- Array$create(1:1000)
+  v_dbl <- Array$create(as.numeric(1:1000))
+  c_int <- ChunkedArray$create(1:1000)
+  c_dbl <- ChunkedArray$create(as.numeric(1:1000))
+
+  expect_true(is_arrow_altrep(as.vector(v_int)))
+  expect_true(is_arrow_altrep(as.vector(v_int$Slice(1))))
+  expect_true(is_arrow_altrep(as.vector(v_dbl)))
+  expect_true(is_arrow_altrep(as.vector(v_dbl$Slice(1))))
+
+  expect_equal(c_int$num_chunks, 1L)
+  expect_true(is_arrow_altrep(as.vector(c_int)))
+  expect_true(is_arrow_altrep(as.vector(c_int$Slice(1))))
+
+  expect_equal(c_dbl$num_chunks, 1L)
+  expect_true(is_arrow_altrep(as.vector(c_dbl)))
+  expect_true(is_arrow_altrep(as.vector(c_dbl$Slice(1))))
+
+  withr::local_options(list(arrow.use_altrep = NULL))
+  expect_true(is_arrow_altrep(as.vector(v_int)))
+  expect_true(is_arrow_altrep(as.vector(v_int$Slice(1))))
+  expect_true(is_arrow_altrep(as.vector(v_dbl)))
+  expect_true(is_arrow_altrep(as.vector(v_dbl$Slice(1))))
+
+  withr::local_options(list(arrow.use_altrep = FALSE))
+  expect_false(is_arrow_altrep(as.vector(v_int)))
+  expect_false(is_arrow_altrep(as.vector(v_int$Slice(1))))
+  expect_false(is_arrow_altrep(as.vector(v_dbl)))
+  expect_false(is_arrow_altrep(as.vector(v_dbl$Slice(1))))
+})
+
+test_that("altrep vectors from int32 and dbl arrays with nulls", {
+  withr::local_options(list(arrow.use_altrep = TRUE))
+  v_int <- Array$create(c(1L, NA, 3L))
+  v_dbl <- Array$create(c(1, NA, 3))
+  c_int <- ChunkedArray$create(c(1L, NA, 3L))
+  c_dbl <- ChunkedArray$create(c(1, NA, 3))
+
+  expect_true(is_arrow_altrep(as.vector(v_int)))
+  expect_true(is_arrow_altrep(as.vector(v_int$Slice(1))))
+  expect_true(is_arrow_altrep(as.vector(v_dbl)))
+  expect_true(is_arrow_altrep(as.vector(v_dbl$Slice(1))))
+  expect_true(is_arrow_altrep(as.vector(c_int)))
+  expect_true(is_arrow_altrep(as.vector(c_int$Slice(1))))
+  expect_true(is_arrow_altrep(as.vector(c_dbl)))
+  expect_true(is_arrow_altrep(as.vector(c_dbl$Slice(1))))
+
+  expect_true(is_arrow_altrep(as.vector(v_int$Slice(2))))
+  expect_true(is_arrow_altrep(as.vector(v_dbl$Slice(2))))
+  expect_true(is_arrow_altrep(as.vector(c_int$Slice(2))))
+  expect_true(is_arrow_altrep(as.vector(c_dbl$Slice(2))))
+
+  # chunked array with 2 chunks cannot be altrep
+  c_int <- ChunkedArray$create(0L, c(1L, NA, 3L))
+  c_dbl <- ChunkedArray$create(0, c(1, NA, 3))
+  expect_equal(c_int$num_chunks, 2L)
+  expect_equal(c_dbl$num_chunks, 2L)
+
+  expect_false(is_arrow_altrep(as.vector(c_int)))
+  expect_false(is_arrow_altrep(as.vector(c_dbl)))
+  expect_true(is_arrow_altrep(as.vector(c_int$Slice(3))))
+  expect_true(is_arrow_altrep(as.vector(c_dbl$Slice(3))))
+})
+
+test_that("empty vectors are not altrep", {
+  withr::local_options(list(arrow.use_altrep = TRUE))
+  v_int <- Array$create(integer())
+  v_dbl <- Array$create(numeric())
+
+  expect_false(is_arrow_altrep(as.vector(v_int)))
+  expect_false(is_arrow_altrep(as.vector(v_dbl)))
+})
+
+test_that("as.data.frame(<Table>, <RecordBatch>) can create altrep vectors", {
+  withr::local_options(list(arrow.use_altrep = TRUE))
+
+  table <- Table$create(int = c(1L, 2L, 3L), dbl = c(1, 2, 3), str = c("un", "deux", "trois"))
+  df_table <- as.data.frame(table)
+  expect_true(is_arrow_altrep(df_table$int))
+  expect_true(is_arrow_altrep(df_table$dbl))
+  expect_true(is_arrow_altrep(df_table$str))
+
+  batch <- RecordBatch$create(int = c(1L, 2L, 3L), dbl = c(1, 2, 3), str = c("un", "deux", "trois"))
+  df_batch <- as.data.frame(batch)
+  expect_true(is_arrow_altrep(df_batch$int))
+  expect_true(is_arrow_altrep(df_batch$dbl))
+  expect_true(is_arrow_altrep(df_batch$str))
+})
+
+expect_altrep_rountrip <- function(x, fn, ...) {
+  alt <- Array$create(x)$as_vector()
+
+  expect_true(is_arrow_altrep(alt))
+  expect_identical(fn(x, ...), fn(alt, ...))
+  expect_true(is_arrow_altrep(alt))
+}
+
+test_that("altrep min/max/sum identical to R versions for double", {
+  x <- c(1, 2, 3)
+  expect_altrep_rountrip(x, min, na.rm = TRUE)
+  expect_altrep_rountrip(x, max, na.rm = TRUE)
+  expect_altrep_rountrip(x, sum, na.rm = TRUE)
+
+  expect_altrep_rountrip(x, min)
+  expect_altrep_rountrip(x, max)
+  expect_altrep_rountrip(x, sum)
+
+  x <- c(1, 2, NA_real_)
+  expect_altrep_rountrip(x, min, na.rm = TRUE)
+  expect_altrep_rountrip(x, max, na.rm = TRUE)
+  expect_altrep_rountrip(x, sum, na.rm = TRUE)
+
+  expect_altrep_rountrip(x, min)
+  expect_altrep_rountrip(x, max)
+  expect_altrep_rountrip(x, sum)
+
+  x <- rep(NA_real_, 3)
+  expect_warning(
+    expect_altrep_rountrip(x, min, na.rm = TRUE),
+    "no non-missing arguments to min"
+  )
+  expect_warning(
+    expect_altrep_rountrip(x, max, na.rm = TRUE),
+    "no non-missing arguments to max"
+  )
+  expect_altrep_rountrip(x, sum, na.rm = TRUE)
+
+  expect_altrep_rountrip(x, min)
+  expect_altrep_rountrip(x, max)
+  expect_altrep_rountrip(x, sum)
+})
+
+test_that("altrep min/max/sum identical to R versions for int", {
+  x <- c(1L, 2L, 3L)
+  expect_altrep_rountrip(x, min, na.rm = TRUE)
+  expect_altrep_rountrip(x, max, na.rm = TRUE)
+  expect_altrep_rountrip(x, sum, na.rm = TRUE)
+
+  expect_altrep_rountrip(x, min)
+  expect_altrep_rountrip(x, max)
+  expect_altrep_rountrip(x, sum)
+
+  x <- c(1L, 2L, NA_integer_)
+  expect_altrep_rountrip(x, min, na.rm = TRUE)
+  expect_altrep_rountrip(x, max, na.rm = TRUE)
+  expect_altrep_rountrip(x, sum, na.rm = TRUE)
+
+  expect_altrep_rountrip(x, min)
+  expect_altrep_rountrip(x, max)
+  expect_altrep_rountrip(x, sum)
+
+  x <- rep(NA_integer_, 3)
+  expect_warning(
+    expect_altrep_rountrip(x, min, na.rm = TRUE),
+    "no non-missing arguments to min"
+  )
+  expect_warning(
+    expect_altrep_rountrip(x, max, na.rm = TRUE),
+    "no non-missing arguments to max"
+  )
+  expect_altrep_rountrip(x, sum, na.rm = TRUE)
+
+  expect_altrep_rountrip(x, min)
+  expect_altrep_rountrip(x, max)
+  expect_altrep_rountrip(x, sum)
+
+  # sum(x) is INT_MIN -> convert to double.
+  x <- as.integer(c(-2^31 + 1L, -1L))
+  expect_altrep_rountrip(x, sum)
+})
+
+test_that("altrep vectors handle serialization", {
+  ints <- c(1L, 2L, NA_integer_)
+  dbls <- c(1, 2, NA_real_)
+  strs <- c("un", "deux", NA_character_)
+
+  expect_identical(ints, unserialize(serialize(Array$create(ints)$as_vector(), NULL)))
+  expect_identical(dbls, unserialize(serialize(Array$create(dbls)$as_vector(), NULL)))
+  expect_identical(strs, unserialize(serialize(Array$create(strs)$as_vector(), NULL)))
+  expect_identical(strs, unserialize(serialize(Array$create(strs, large_utf8())$as_vector(), NULL)))
+})
+
+test_that("altrep vectors handle coercion", {
+  ints <- c(1L, 2L, NA_integer_)
+  dbls <- c(1, 2, NA_real_)
+  strs <- c("1", "2", NA_character_)
+
+  expect_identical(ints, as.integer(Array$create(dbls)$as_vector()))
+  expect_identical(ints, as.integer(Array$create(strs)$as_vector()))
+
+  expect_identical(dbls, as.numeric(Array$create(ints)$as_vector()))
+  expect_identical(dbls, as.numeric(Array$create(strs)$as_vector()))
+
+  expect_identical(strs, as.character(Array$create(ints)$as_vector()))
+  expect_identical(strs, as.character(Array$create(dbls)$as_vector()))
+})
+
+test_that("columns of struct types may be altrep", {
+  st <- Array$create(data.frame(x = 1:10, y = runif(10)))
+  df <- st$as_vector()
+
+  expect_true(is_arrow_altrep(df$x))
+  expect_true(is_arrow_altrep(df$y))
+})
+
+test_that("Conversion from altrep R vector to Array uses the existing Array", {
+  a_int <- Array$create(c(1L, 2L, 3L))
+  b_int <- Array$create(a_int$as_vector())
+  expect_true(test_same_Array(a_int$pointer(), b_int$pointer()))
+
+  a_dbl <- Array$create(c(1, 2, 3))
+  b_dbl <- Array$create(a_dbl$as_vector())
+  expect_true(test_same_Array(a_dbl$pointer(), b_dbl$pointer()))
+
+  a_str <- Array$create(c("un", "deux", "trois"))
+  b_str <- Array$create(a_str$as_vector())
+  expect_true(test_same_Array(a_str$pointer(), b_str$pointer()))
+})
diff --git a/src/arrow/r/tests/testthat/test-array-data.R b/src/arrow/r/tests/testthat/test-array-data.R
new file mode 100644
index 000000000..05d070d8a
--- /dev/null
+++ b/src/arrow/r/tests/testthat/test-array-data.R
@@ -0,0 +1,33 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+test_that("string vectors with only empty strings and nulls don't allocate a data buffer (ARROW-3693)", {
+  a <- Array$create("")
+  expect_equal(a$length(), 1L)
+
+  buffers <- a$data()$buffers
+
+  # No nulls
+  expect_equal(buffers[[1]], NULL)
+
+  # Offsets has 2 elements
+  expect_equal(buffers[[2]]$size, 8L)
+
+  # As per ARROW-2744, values buffer should preferably be non-null.
+  expect_equal(buffers[[3]]$size, 0L)
+  expect_equal(buffers[[3]]$capacity, 0L)
+})
diff --git a/src/arrow/r/tests/testthat/test-arrow-info.R b/src/arrow/r/tests/testthat/test-arrow-info.R
new file mode 100644
index 000000000..9eac60814
--- /dev/null
+++ b/src/arrow/r/tests/testthat/test-arrow-info.R
@@ -0,0 +1,23 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+test_that("arrow_info()", {
+  expect_s3_class(arrow_info(), "arrow_info")
+  expect_output(print(arrow_info()), "Arrow package version")
+  options(arrow.foo = FALSE)
+  expect_output(print(arrow_info()), "arrow.foo")
+})
diff --git a/src/arrow/r/tests/testthat/test-arrow.R b/src/arrow/r/tests/testthat/test-arrow.R
new file mode 100644
index 000000000..48970ab89
--- /dev/null
+++ b/src/arrow/r/tests/testthat/test-arrow.R
@@ -0,0 +1,78 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+if (!identical(tolower(Sys.getenv("TEST_R_WITHOUT_LIBARROW")), "true")) {
+  testthat::test_that("Arrow C++ is available", {
+    skip_on_cran()
+    expect_true(arrow_available())
+  })
+}
+
+test_that("Can't $new() an object with anything other than a pointer", {
+  expect_error(
+    Array$new(1:5),
+    "Array$new() requires a pointer as input: did you mean $create() instead?",
+    fixed = TRUE
+  )
+})
+
+r_only({
+  test_that("assert_is", {
+    x <- 42
+    expect_true(assert_is(x, "numeric"))
+    expect_true(assert_is(x, c("numeric", "character")))
+    expect_error(assert_is(x, "factor"), 'x must be a "factor"')
+    expect_error(
+      assert_is(x, c("factor", "list")),
+      'x must be a "factor" or "list"'
+    )
+    expect_error(
+      assert_is(x, c("factor", "character", "list")),
+      'x must be a "factor", "character", or "list"'
+    )
+  })
+})
+
+test_that("arrow gracefully fails to load objects from other sessions (ARROW-10071)", {
+  a <- Array$create(1:10)
+  tf <- tempfile()
+  on.exit(unlink(tf))
+  saveRDS(a, tf)
+
+  b <- readRDS(tf)
+  expect_error(b$length(), "Invalid <Array>")
+})
+
+test_that("check for an ArrowObject in functions use std::shared_ptr", {
+  expect_error(Array__length(1), "Invalid R object")
+})
+
+test_that("MemoryPool calls gc() to free memory when allocation fails (ARROW-10080)", {
+  # There is a valgrind error on this test because there cannot be memory allocated
+  # which is exactly what this test is checking, but we quiet this
+  skip_on_valgrind()
+
+  env <- new.env()
+  suppressMessages(trace(gc, print = FALSE, tracer = function() {
+    env$gc_was_called <- TRUE
+  }))
+  on.exit(suppressMessages(untrace(gc)))
+  # We expect this should fail because we don't have this much memory,
+  # but it should gc() and retry (and fail again)
+  expect_error(BufferOutputStream$create(2**60))
+  expect_true(env$gc_was_called)
+})
diff --git a/src/arrow/r/tests/testthat/test-backwards-compatibility.R b/src/arrow/r/tests/testthat/test-backwards-compatibility.R
new file mode 100644
index 000000000..32e86d5f6
--- /dev/null
+++ b/src/arrow/r/tests/testthat/test-backwards-compatibility.R
@@ -0,0 +1,121 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# nolint start
+# To write a new version of a test file for a current version:
+# write_parquet(example_with_metadata, test_path("golden-files/data-arrow_2.0.0.parquet"))
+
+# To write a new version of a test file for an old version, use docker(-compose)
+# to setup a linux distribution and use RStudio's public package manager binary
+# repo to install the old version. The following commands should be run at the
+# root of the arrow repo directory and might need slight adjusments.
+# R_ORG=rstudio R_IMAGE=r-base R_TAG=4.0-focal docker-compose build --no-cache r
+# R_ORG=rstudio R_IMAGE=r-base R_TAG=4.0-focal docker-compose run r /bin/bash
+# R
+# options(repos = "https://packagemanager.rstudio.com/all/__linux__/focal/latest")
+# remotes::install_version("arrow", version = "1.0.1")
+# # get example data into the global env
+# write_parquet(example_with_metadata, "arrow/r/tests/testthat/golden-files/data-arrow_1.0.1.parquet")
+# quit()/exit
+# nolint end
+
+skip_if(getRversion() < "3.5.0", "The serialization format changed in 3.5")
+
+expect_identical_with_metadata <- function(object, expected, ..., top_level = TRUE) {
+  attrs_to_keep <- c("names", "class", "row.names")
+  if (!top_level) {
+    # remove not-tbl and not-data.frame attributes
+    for (attribute in names(attributes(expected))) {
+      if (attribute %in% attrs_to_keep) next
+      attributes(expected)[[attribute]] <- NULL
+    }
+  }
+  expect_identical(object, expected, ...)
+}
+
+test_that("reading a known Parquet file to dataframe with 3.0.0", {
+  skip_if_not_available("parquet")
+  skip_if_not_available("snappy")
+  pq_file <- test_path("golden-files/data-arrow-extra-meta_3.0.0.parquet")
+
+  df <- read_parquet(pq_file)
+  # this is equivalent to `expect_identical()`
+  expect_identical_with_metadata(df, example_with_extra_metadata)
+})
+
+test_that("reading a known Parquet file to dataframe with 2.0.0", {
+  skip_if_not_available("parquet")
+  skip_if_not_available("snappy")
+  pq_file <- test_path("golden-files/data-arrow_2.0.0.parquet")
+
+  df <- read_parquet(pq_file)
+  # this is equivalent to `expect_identical()`
+  expect_identical_with_metadata(df, example_with_metadata)
+})
+
+test_that("reading a known Parquet file to dataframe with 1.0.1", {
+  skip_if_not_available("parquet")
+  skip_if_not_available("snappy")
+  pq_file <- test_path("golden-files/data-arrow_1.0.1.parquet")
+
+  df <- read_parquet(pq_file)
+  # 1.0.1 didn't save top-level metadata, so we need to remove it.
+  expect_identical_with_metadata(df, example_with_metadata, top_level = FALSE)
+})
+
+for (comp in c("lz4", "uncompressed", "zstd")) {
+  # nolint start
+  # write_feather(example_with_metadata, test_path("golden-files/data-arrow_2.0.0_lz4.feather"), compression = "lz4")
+  # write_feather(example_with_metadata, test_path("golden-files/data-arrow_2.0.0_uncompressed.feather"), compression = "uncompressed")
+  # write_feather(example_with_metadata, test_path("golden-files/data-arrow_2.0.0_zstd.feather"), compression = "zstd")
+  # nolint end
+  test_that("reading a known Feather file to dataframe with 2.0.0", {
+    skip_if_not_available("parquet")
+    skip_if_not_available(comp)
+    feather_file <- test_path(paste0("golden-files/data-arrow_2.0.0_", comp, ".feather"))
+
+    df <- read_feather(feather_file)
+    expect_identical_with_metadata(df, example_with_metadata)
+  })
+
+  test_that("reading a known Feather file to dataframe with 1.0.1", {
+    skip_if_not_available("parquet")
+    skip_if_not_available(comp)
+    feather_file <- test_path(paste0("golden-files/data-arrow_1.0.1_", comp, ".feather"))
+
+    df <- read_feather(feather_file)
+    # 1.0.1 didn't save top-level metadata, so we need to remove it.
+    expect_identical_with_metadata(df, example_with_metadata, top_level = FALSE)
+  })
+
+  test_that("reading a known Feather file to dataframe with 0.17.0", {
+    skip_if_not_available("parquet")
+    skip_if_not_available(comp)
+    feather_file <- test_path(paste0("golden-files/data-arrow_0.17.0_", comp, ".feather"))
+
+    df <- read_feather(feather_file)
+    # the metadata from 0.17.0 doesn't have the top level, the special class is
+    # not maintained and the embedded tibble's attributes are read in a wrong
+    # order. Since this is prior to 1.0.0 punting on checking the attributes
+    # though classes are always checked, so that must be removed before checking.
+    example_with_metadata_sans_special_class <- example_with_metadata
+    example_with_metadata_sans_special_class$a <- unclass(example_with_metadata_sans_special_class$a)
+    expect_equal(df, example_with_metadata_sans_special_class, ignore_attr = TRUE)
+  })
+}
+
+# TODO: streams(?)
diff --git a/src/arrow/r/tests/testthat/test-buffer-reader.R b/src/arrow/r/tests/testthat/test-buffer-reader.R
new file mode 100644
index 000000000..b790ed0da
--- /dev/null
+++ b/src/arrow/r/tests/testthat/test-buffer-reader.R
@@ -0,0 +1,38 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+test_that("BufferReader can be created from R objects", {
+  num <- BufferReader$create(numeric(13))
+  int <- BufferReader$create(integer(13))
+  raw <- BufferReader$create(raw(16))
+
+  expect_r6_class(num, "BufferReader")
+  expect_r6_class(int, "BufferReader")
+  expect_r6_class(raw, "BufferReader")
+
+  expect_equal(num$GetSize(), 13 * 8)
+  expect_equal(int$GetSize(), 13 * 4)
+  expect_equal(raw$GetSize(), 16)
+})
+
+test_that("BufferReader can be created from Buffer", {
+  buf <- buffer(raw(76))
+  reader <- BufferReader$create(buf)
+
+  expect_r6_class(reader, "BufferReader")
+  expect_equal(reader$GetSize(), 76)
+})
diff --git a/src/arrow/r/tests/testthat/test-buffer.R b/src/arrow/r/tests/testthat/test-buffer.R
new file mode 100644
index 000000000..9b3ebc6de
--- /dev/null
+++ b/src/arrow/r/tests/testthat/test-buffer.R
@@ -0,0 +1,97 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+test_that("Buffer can be created from raw vector", {
+  vec <- raw(123)
+  buf <- buffer(vec)
+  expect_r6_class(buf, "Buffer")
+  expect_equal(buf$size, 123)
+})
+
+test_that("Buffer can be created from integer vector", {
+  vec <- integer(17)
+  buf <- buffer(vec)
+  expect_r6_class(buf, "Buffer")
+  expect_equal(buf$size, 17 * 4)
+})
+
+test_that("Buffer can be created from numeric vector", {
+  vec <- numeric(17)
+  buf <- buffer(vec)
+  expect_r6_class(buf, "Buffer")
+  expect_equal(buf$size, 17 * 8)
+})
+
+test_that("Buffer can be created from complex vector", {
+  vec <- complex(3)
+  buf <- buffer(vec)
+  expect_r6_class(buf, "Buffer")
+  expect_equal(buf$size, 3 * 16)
+})
+
+test_that("buffer buffer buffers buffers", {
+  expect_r6_class(buffer(buffer(42)), "Buffer")
+})
+
+test_that("Other types can't be converted to Buffers", {
+  expect_error(
+    buffer(data.frame(a = "asdf")),
+    "Cannot convert object of class data.frame to arrow::Buffer"
+  )
+})
+
+test_that("can convert Buffer to raw", {
+  buf <- buffer(rnorm(10))
+  expect_equal(buf$data(), as.raw(buf))
+})
+
+test_that("can read remaining bytes of a RandomAccessFile", {
+  tbl <- tibble::tibble(
+    int = 1:10, dbl = as.numeric(1:10),
+    lgl = sample(c(TRUE, FALSE, NA), 10, replace = TRUE),
+    chr = letters[1:10]
+  )
+  tab <- Table$create(!!!tbl)
+
+  tf <- tempfile()
+  all_bytes <- write_feather(tab, tf)
+
+  file <- ReadableFile$create(tf)
+  expect_equal(file$tell(), 0)
+  x <- file$Read(20)$data()
+  expect_equal(file$tell(), 20)
+  y <- file$Read()$data()
+
+  file <- ReadableFile$create(tf)
+  z <- file$Read()$data()
+
+  file <- ReadableFile$create(tf)
+  a <- file$ReadAt(20)$data()
+
+  expect_equal(file$GetSize(), length(x) + length(y))
+  expect_equal(z, c(x, y))
+  expect_equal(a, y)
+})
+
+test_that("Buffer$Equals", {
+  vec <- integer(17)
+  buf1 <- buffer(vec)
+  buf2 <- buffer(vec)
+  expect_equal(buf1, buf2)
+  expect_true(buf1$Equals(buf2))
+  expect_false(buf1$Equals(vec))
+})
diff --git a/src/arrow/r/tests/testthat/test-chunked-array.R b/src/arrow/r/tests/testthat/test-chunked-array.R
new file mode 100644
index 000000000..c931ddec5
--- /dev/null
+++ b/src/arrow/r/tests/testthat/test-chunked-array.R
@@ -0,0 +1,468 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+expect_chunked_roundtrip <- function(x, type) {
+  a <- ChunkedArray$create(!!!x)
+  flat_x <- unlist(x, recursive = FALSE)
+  attributes(flat_x) <- attributes(x[[1]])
+  expect_type_equal(a$type, type)
+  expect_identical(a$num_chunks, length(x))
+  expect_identical(length(a), length(flat_x))
+  if (!inherits(type, "ListType")) {
+    # TODO: revisit how missingness works with ListArrays
+    # R list objects don't handle missingness the same way as other vectors.
+    # Is there some vctrs thing we should do on the roundtrip back to R?
+    expect_identical(as.vector(is.na(a)), is.na(flat_x))
+  }
+  expect_as_vector(a, flat_x)
+  expect_as_vector(a$chunk(0), x[[1]])
+
+  if (length(flat_x)) {
+    a_sliced <- a$Slice(1)
+    x_sliced <- flat_x[-1]
+    expect_type_equal(a_sliced$type, type)
+    expect_identical(length(a_sliced), length(x_sliced))
+    if (!inherits(type, "ListType")) {
+      expect_identical(as.vector(is.na(a_sliced)), is.na(x_sliced))
+    }
+    expect_as_vector(a_sliced, x_sliced)
+  }
+  invisible(a)
+}
+
+test_that("ChunkedArray", {
+  x <- expect_chunked_roundtrip(list(1:10, 1:10, 1:5), int32())
+
+  y <- x$Slice(8)
+  expect_equal(y$type, int32())
+  expect_equal(y$num_chunks, 3L)
+  expect_equal(length(y), 17L)
+  expect_as_vector(y, c(9:10, 1:10, 1:5))
+
+  z <- x$Slice(8, 5)
+  expect_equal(z$type, int32())
+  expect_equal(z$num_chunks, 2L)
+  expect_equal(z$length(), 5L)
+  expect_equal(z$as_vector(), c(9:10, 1:3))
+
+  expect_chunked_roundtrip(list(c(1, 2, 3), c(4, 5, 6)), float64())
+
+  # input validation
+  expect_error(x$chunk(14), "subscript out of bounds")
+  expect_error(x$chunk("one"))
+  expect_error(x$chunk(NA_integer_), "'i' cannot be NA")
+  expect_error(x$chunk(-1), "subscript out of bounds")
+
+  expect_error(x$Slice("ten"))
+  expect_error(x$Slice(NA_integer_), "Slice 'offset' cannot be NA")
+  expect_error(x$Slice(NA), "Slice 'offset' cannot be NA")
+  expect_error(x$Slice(10, "ten"))
+  expect_error(x$Slice(10, NA_integer_), "Slice 'length' cannot be NA")
+  expect_error(x$Slice(NA_integer_, NA_integer_), "Slice 'offset' cannot be NA")
+  expect_error(x$Slice(c(10, 10)))
+  expect_error(x$Slice(10, c(10, 10)))
+  expect_error(x$Slice(1000), "Slice 'offset' greater than array length")
+  expect_error(x$Slice(-1), "Slice 'offset' cannot be negative")
+  expect_error(z$Slice(10, 10), "Slice 'offset' greater than array length")
+  expect_error(x$Slice(10, -1), "Slice 'length' cannot be negative")
+  expect_error(x$Slice(-1, 10), "Slice 'offset' cannot be negative")
+
+  expect_warning(x$Slice(10, 15), NA)
+  expect_warning(
+    overslice <- x$Slice(10, 16),
+    "Slice 'length' greater than available length"
+  )
+  expect_equal(length(overslice), 15)
+  expect_warning(z$Slice(2, 10), "Slice 'length' greater than available length")
+})
+
+test_that("print ChunkedArray", {
+  verify_output(test_path("test-chunked-array.txt"), {
+    chunked_array(c(1, 2, 3), c(4, 5, 6))
+    chunked_array(1:30, c(4, 5, 6))
+    chunked_array(1:30)
+    chunked_array(factor(c("a", "b")), factor(c("c", "d")))
+  })
+})
+
+test_that("ChunkedArray handles !!! splicing", {
+  data <- list(1, 2, 3)
+  x <- chunked_array(!!!data)
+  expect_equal(x$type, float64())
+  expect_equal(x$num_chunks, 3L)
+})
+
+test_that("ChunkedArray handles Inf", {
+  data <- list(c(Inf, 2:10), c(1:3, Inf, 5L), 1:10)
+  x <- chunked_array(!!!data)
+  expect_equal(x$type, float64())
+  expect_equal(x$num_chunks, 3L)
+  expect_equal(length(x), 25L)
+  expect_as_vector(x, c(c(Inf, 2:10), c(1:3, Inf, 5), 1:10))
+
+  chunks <- x$chunks
+  expect_as_vector(is.infinite(chunks[[2]]), is.infinite(data[[2]]))
+  expect_equal(
+    as.vector(is.infinite(x)),
+    c(is.infinite(data[[1]]), is.infinite(data[[2]]), is.infinite(data[[3]]))
+  )
+})
+
+test_that("ChunkedArray handles NA", {
+  data <- list(1:10, c(NA, 2:10), c(1:3, NA, 5L))
+  x <- chunked_array(!!!data)
+  expect_equal(x$type, int32())
+  expect_equal(x$num_chunks, 3L)
+  expect_equal(length(x), 25L)
+  expect_as_vector(x, c(1:10, c(NA, 2:10), c(1:3, NA, 5)))
+
+  chunks <- x$chunks
+  expect_as_vector(is.na(chunks[[2]]), is.na(data[[2]]))
+  expect_as_vector(is.na(x), c(is.na(data[[1]]), is.na(data[[2]]), is.na(data[[3]])))
+})
+
+test_that("ChunkedArray handles NaN", {
+  data <- list(as.numeric(1:10), c(NaN, 2:10), c(1:3, NaN, 5L))
+  x <- chunked_array(!!!data)
+
+  expect_equal(x$type, float64())
+  expect_equal(x$num_chunks, 3L)
+  expect_equal(length(x), 25L)
+  expect_as_vector(x, c(1:10, c(NaN, 2:10), c(1:3, NaN, 5)))
+
+  chunks <- x$chunks
+  expect_as_vector(is.nan(chunks[[2]]), is.nan(data[[2]]))
+  expect_as_vector(is.nan(x), c(is.nan(data[[1]]), is.nan(data[[2]]), is.nan(data[[3]])))
+})
+
+test_that("ChunkedArray supports logical vectors (ARROW-3341)", {
+  # with NA
+  data <- purrr::rerun(3, sample(c(TRUE, FALSE, NA), 100, replace = TRUE))
+  expect_chunked_roundtrip(data, bool())
+  # without NA
+  data <- purrr::rerun(3, sample(c(TRUE, FALSE), 100, replace = TRUE))
+  expect_chunked_roundtrip(data, bool())
+})
+
+test_that("ChunkedArray supports character vectors (ARROW-3339)", {
+  data <- list(
+    c("itsy", NA, "spider"),
+    c("Climbed", "up", "the", "water", "spout"),
+    c("Down", "came", "the", "rain"),
+    "And washed the spider out. "
+  )
+  expect_chunked_roundtrip(data, utf8())
+})
+
+test_that("ChunkedArray supports factors (ARROW-3716)", {
+  f <- factor(c("itsy", "bitsy", "spider", "spider"))
+  expect_chunked_roundtrip(list(f, f, f), dictionary(int8()))
+})
+
+test_that("ChunkedArray supports dates (ARROW-3716)", {
+  d <- Sys.Date() + 1:10
+  expect_chunked_roundtrip(list(d, d), date32())
+})
+
+test_that("ChunkedArray supports POSIXct (ARROW-3716)", {
+  times <- lubridate::ymd_hms("2018-10-07 19:04:05") + 1:10
+  expect_chunked_roundtrip(list(times, times), timestamp("us", "UTC"))
+})
+
+test_that("ChunkedArray supports integer64 (ARROW-3716)", {
+  x <- bit64::as.integer64(1:10) + MAX_INT
+  expect_chunked_roundtrip(list(x, x), int64())
+  # Also with a first chunk that would downcast
+  zero <- Array$create(0L)$cast(int64())
+  expect_type_equal(zero, int64())
+  ca <- ChunkedArray$create(zero, x)
+  expect_type_equal(ca, int64())
+  expect_s3_class(as.vector(ca), "integer64")
+  expect_identical(as.vector(ca), c(bit64::as.integer64(0L), x))
+})
+
+test_that("ChunkedArray supports difftime", {
+  time <- hms::hms(56, 34, 12)
+  expect_chunked_roundtrip(list(time, time), time32("s"))
+})
+
+test_that("ChunkedArray supports empty arrays (ARROW-13761)", {
+  types <- c(
+    int8(), int16(), int32(), int64(), uint8(), uint16(), uint32(),
+    uint64(), float32(), float64(), timestamp("ns"), binary(),
+    large_binary(), fixed_size_binary(32), date32(), date64(),
+    decimal(4, 2), dictionary(), struct(x = int32())
+  )
+
+  empty_filter <- ChunkedArray$create(type = bool())
+  for (type in types) {
+    one_empty_chunk <- ChunkedArray$create(type = type)
+    expect_type_equal(one_empty_chunk$type, type)
+    if (type != struct(x = int32())) {
+      expect_identical(length(one_empty_chunk), length(as.vector(one_empty_chunk)))
+    } else {
+      # struct -> tbl and length(tbl) is num_columns instead of num_rows
+      expect_identical(length(as.vector(one_empty_chunk)), 1L)
+    }
+    zero_empty_chunks <- one_empty_chunk$Filter(empty_filter)
+    expect_equal(zero_empty_chunks$num_chunks, 0)
+    expect_type_equal(zero_empty_chunks$type, type)
+    if (type != struct(x = int32())) {
+      expect_identical(length(zero_empty_chunks), length(as.vector(zero_empty_chunks)))
+    } else {
+      expect_identical(length(as.vector(zero_empty_chunks)), 1L)
+    }
+  }
+})
+
+test_that("integer types casts for ChunkedArray (ARROW-3741)", {
+  int_types <- c(int8(), int16(), int32(), int64())
+  uint_types <- c(uint8(), uint16(), uint32(), uint64())
+  float_types <- c(float32(), float64()) # float16() not really supported in C++ yet
+  all_types <- c(
+    int_types,
+    uint_types,
+    float_types
+  )
+
+  a <- chunked_array(1:10, 1:10)
+  for (type in c(int_types, uint_types)) {
+    casted <- a$cast(type)
+    expect_r6_class(casted, "ChunkedArray")
+    expect_type_equal(casted$type, type)
+  }
+  # Also test casting to double(), not actually a type, a base R function but should be alias for float64
+  dbl <- a$cast(double())
+  expect_r6_class(dbl, "ChunkedArray")
+  expect_type_equal(dbl$type, float64())
+})
+
+test_that("chunked_array() supports the type= argument. conversion from INTSXP and int64 to all int types", {
+  num_int32 <- 12L
+  num_int64 <- bit64::as.integer64(10)
+  for (type in all_types) {
+    expect_type_equal(chunked_array(num_int32, type = type)$type, type)
+    expect_type_equal(chunked_array(num_int64, type = type)$type, type)
+  }
+  # also test creating with double() "type"
+  expect_type_equal(chunked_array(num_int32, type = double())$type, float64())
+})
+
+test_that("ChunkedArray$create() aborts on overflow", {
+  expect_error(chunked_array(128L, type = int8())$type)
+  expect_error(chunked_array(-129L, type = int8())$type)
+
+  expect_error(chunked_array(256L, type = uint8())$type)
+  expect_error(chunked_array(-1L, type = uint8())$type)
+
+  expect_error(chunked_array(32768L, type = int16())$type)
+  expect_error(chunked_array(-32769L, type = int16())$type)
+
+  expect_error(chunked_array(65536L, type = uint16())$type)
+  expect_error(chunked_array(-1L, type = uint16())$type)
+
+  expect_error(chunked_array(65536L, type = uint16())$type)
+  expect_error(chunked_array(-1L, type = uint16())$type)
+
+  expect_error(chunked_array(bit64::as.integer64(2^31), type = int32()))
+  expect_error(chunked_array(bit64::as.integer64(2^32), type = uint32()))
+})
+
+test_that("chunked_array() convert doubles to integers", {
+  for (type in c(int_types, uint_types)) {
+    a <- chunked_array(10, type = type)
+    expect_type_equal(a$type, type)
+    if (type != uint64()) {
+      # exception for unsigned integer 64 that
+      # wa cannot handle yet
+      expect_true(as.vector(a) == 10)
+    }
+  }
+})
+
+test_that("chunked_array() uses the first ... to infer type", {
+  a <- chunked_array(10, 10L)
+  expect_type_equal(a$type, float64())
+})
+
+test_that("chunked_array() handles downcasting", {
+  a <- chunked_array(10L, 10)
+  expect_type_equal(a$type, int32())
+  expect_as_vector(a, c(10L, 10L))
+})
+
+test_that("chunked_array() makes chunks of the same type", {
+  a <- chunked_array(10L, bit64::as.integer64(13), type = int64())
+  for (chunk in a$chunks) {
+    expect_type_equal(chunk$type, int64())
+  }
+})
+
+test_that("chunked_array() handles 0 chunks if given a type", {
+  for (type in all_types) {
+    a <- chunked_array(type = type)
+    expect_type_equal(a$type, as_type(type))
+    expect_equal(length(a), 0L)
+  }
+})
+
+test_that("chunked_array() can ingest arrays (ARROW-3815)", {
+  expect_equal(
+    as.vector(chunked_array(1:5, Array$create(6:10))),
+    1:10
+  )
+})
+
+test_that("chunked_array() handles data frame -> struct arrays (ARROW-3811)", {
+  df <- tibble::tibble(x = 1:10, y = x / 2, z = letters[1:10])
+  a <- chunked_array(df, df)
+  expect_type_equal(a$type, struct(x = int32(), y = float64(), z = utf8()))
+  expect_equal(a$as_vector(), rbind(df, df), ignore_attr = TRUE)
+})
+
+test_that("ChunkedArray$View() (ARROW-6542)", {
+  a <- ChunkedArray$create(1:3, 1:4)
+  b <- a$View(float32())
+  expect_equal(b$type, float32())
+  expect_equal(length(b), 7L)
+  expect_true(all(
+    sapply(b$chunks, function(.x) .x$type == float32())
+  ))
+  # Input validation
+  expect_error(a$View("not a type"), "type must be a DataType, not character")
+})
+
+test_that("ChunkedArray$Validate()", {
+  a <- ChunkedArray$create(1:10)
+  expect_error(a$Validate(), NA)
+})
+
+test_that("[ ChunkedArray", {
+  one_chunk <- chunked_array(2:11)
+  x <- chunked_array(1:10, 31:40, 51:55)
+  # Slice
+  expect_as_vector(x[8:12], c(8:10, 31:32))
+  # Take from same chunk
+  expect_as_vector(x[c(11, 15, 12)], c(31, 35, 32))
+  # Take from multiple chunks (calls Concatenate)
+  expect_as_vector(x[c(2, 11, 15, 12, 3)], c(2, 31, 35, 32, 3))
+  # Take with Array (note these are 0-based)
+  take1 <- Array$create(c(10L, 14L, 11L))
+  expect_as_vector(x[take1], c(31, 35, 32))
+  # Take with ChunkedArray
+  take2 <- ChunkedArray$create(c(10L, 14L), 11L)
+  expect_as_vector(x[take2], c(31, 35, 32))
+
+  # Filter (with recycling)
+  expect_as_vector(
+    one_chunk[c(FALSE, TRUE, FALSE, FALSE, TRUE)],
+    c(3, 6, 8, 11)
+  )
+  # Filter where both are 1-chunk
+  expect_as_vector(
+    one_chunk[ChunkedArray$create(rep(c(FALSE, TRUE, FALSE, FALSE, TRUE), 2))],
+    c(3, 6, 8, 11)
+  )
+  # Filter multi-chunk with logical (-> Array)
+  expect_as_vector(
+    x[c(FALSE, TRUE, FALSE, FALSE, TRUE)],
+    c(2, 5, 7, 10, 32, 35, 37, 40, 52, 55)
+  )
+  # Filter with a chunked array with different sized chunks
+  p1 <- c(FALSE, TRUE, FALSE, FALSE, TRUE)
+  p2 <- c(TRUE, FALSE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE)
+  filt <- ChunkedArray$create(p1, p2, p2)
+  expect_as_vector(
+    x[filt],
+    c(2, 5, 6, 8, 9, 35, 36, 38, 39, 55)
+  )
+})
+
+test_that("ChunkedArray head/tail", {
+  vec <- 11:20
+  a <- ChunkedArray$create(11:15, 16:20)
+  expect_as_vector(head(a), head(vec))
+  expect_as_vector(head(a, 4), head(vec, 4))
+  expect_as_vector(head(a, 40), head(vec, 40))
+  expect_as_vector(head(a, -4), head(vec, -4))
+  expect_as_vector(head(a, -40), head(vec, -40))
+  expect_as_vector(tail(a), tail(vec))
+  expect_as_vector(tail(a, 4), tail(vec, 4))
+  expect_as_vector(tail(a, 40), tail(vec, 40))
+  expect_as_vector(tail(a, -40), tail(vec, -40))
+})
+
+test_that("ChunkedArray$Equals", {
+  vec <- 11:20
+  a <- ChunkedArray$create(vec[1:5], vec[6:10])
+  b <- ChunkedArray$create(vec[1:5], vec[6:10])
+  expect_equal(a, b)
+  expect_true(a$Equals(b))
+  expect_false(a$Equals(vec))
+})
+
+test_that("Converting a chunked array unifies factors (ARROW-8374)", {
+  f1 <- factor(c("a"), levels = c("a", "b"))
+  f2 <- factor(c("c"), levels = c("c", "d"))
+  f3 <- factor(NA, levels = "a")
+  f4 <- factor()
+
+  res <- factor(c("a", "c", NA), levels = c("a", "b", "c", "d"))
+  ca <- ChunkedArray$create(f1, f2, f3, f4)
+
+  expect_identical(ca$as_vector(), res)
+})
+
+test_that("Handling string data with embedded nuls", {
+  raws <- structure(list(
+    as.raw(c(0x70, 0x65, 0x72, 0x73, 0x6f, 0x6e)),
+    as.raw(c(0x77, 0x6f, 0x6d, 0x61, 0x6e)),
+    as.raw(c(0x6d, 0x61, 0x00, 0x6e)), # <-- there's your nul, 0x00
+    as.raw(c(0x66, 0x00, 0x00, 0x61, 0x00, 0x6e)), # multiple nuls
+    as.raw(c(0x63, 0x61, 0x6d, 0x65, 0x72, 0x61)),
+    as.raw(c(0x74, 0x76))
+  ),
+  class = c("arrow_binary", "vctrs_vctr", "list")
+  )
+  chunked_array_with_nul <- ChunkedArray$create(raws)$cast(utf8())
+
+  # The behavior of the warnings/errors is slightly different with and without
+  # altrep. Without it (i.e. 3.5.0 and below, the error would trigger immediately
+  # on `as.vector()` where as with it, the error only happens on materialization)
+  skip_if_r_version("3.5.0")
+
+  v <- expect_error(as.vector(chunked_array_with_nul), NA)
+
+  expect_error(
+    v[],
+    paste0(
+      "embedded nul in string: 'ma\\0n'; to strip nuls when converting from Arrow to R, ",
+      "set options(arrow.skip_nul = TRUE)"
+    ),
+    fixed = TRUE
+  )
+
+  withr::with_options(list(arrow.skip_nul = TRUE), {
+    v <- expect_warning(as.vector(chunked_array_with_nul), NA)
+    expect_warning(
+      expect_identical(v[3], "man"),
+      "Stripping '\\0' (nul) from character vector",
+      fixed = TRUE
+    )
+  })
+})
diff --git a/src/arrow/r/tests/testthat/test-chunked-array.txt b/src/arrow/r/tests/testthat/test-chunked-array.txt
new file mode 100644
index 000000000..c7101359d
--- /dev/null
+++ b/src/arrow/r/tests/testthat/test-chunked-array.txt
@@ -0,0 +1,103 @@
+> chunked_array(c(1, 2, 3), c(4, 5, 6))
+ChunkedArray
+[
+  [
+    1,
+    2,
+    3
+  ],
+  [
+    4,
+    5,
+    6
+  ]
+]
+
+> chunked_array(1:30, c(4, 5, 6))
+ChunkedArray
+[
+  [
+    1,
+    2,
+    3,
+    4,
+    5,
+    6,
+    7,
+    8,
+    9,
+    10,
+    ...
+    21,
+    22,
+    23,
+    24,
+    25,
+    26,
+    27,
+    28,
+    29,
+    30
+  ],
+  [
+    4,
+    5,
+    6
+  ]
+]
+
+> chunked_array(1:30)
+ChunkedArray
+[
+  [
+    1,
+    2,
+    3,
+    4,
+    5,
+    6,
+    7,
+    8,
+    9,
+    10,
+    ...
+    21,
+    22,
+    23,
+    24,
+    25,
+    26,
+    27,
+    28,
+    29,
+    30
+  ]
+]
+
+> chunked_array(factor(c("a", "b")), factor(c("c", "d")))
+ChunkedArray
+[
+
+  -- dictionary:
+    [
+      "a",
+      "b"
+    ]
+  -- indices:
+    [
+      0,
+      1
+    ],
+
+  -- dictionary:
+    [
+      "c",
+      "d"
+    ]
+  -- indices:
+    [
+      0,
+      1
+    ]
+]
+
diff --git a/src/arrow/r/tests/testthat/test-compressed.R b/src/arrow/r/tests/testthat/test-compressed.R
new file mode 100644
index 000000000..d796e3e75
--- /dev/null
+++ b/src/arrow/r/tests/testthat/test-compressed.R
@@ -0,0 +1,73 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+test_that("codec_is_available", {
+  expect_true(codec_is_available("uncompressed")) # Always true
+  expect_match_arg_error(codec_is_available("sdfasdf"))
+  skip_if_not_available("gzip")
+  expect_true(codec_is_available("gzip"))
+  expect_true(codec_is_available("GZIP"))
+})
+
+if (identical(Sys.getenv("APPVEYOR"), "True")) {
+  test_that("Compression codecs are included in the Windows build", {
+    expect_true(codec_is_available("lz4"))
+    expect_true(codec_is_available("zstd"))
+  })
+}
+
+test_that("Codec attributes", {
+  skip_if_not_available("gzip")
+  cod <- Codec$create("gzip")
+  expect_equal(cod$name, "gzip")
+  # TODO: implement $level
+  expect_error(cod$level)
+})
+
+test_that("can write Buffer to CompressedOutputStream and read back in CompressedInputStream", {
+  skip_if_not_available("gzip")
+  buf <- buffer(as.raw(sample(0:255, size = 1024, replace = TRUE)))
+
+  tf1 <- tempfile()
+  stream1 <- CompressedOutputStream$create(tf1)
+  expect_equal(stream1$tell(), 0)
+  stream1$write(buf)
+  expect_equal(stream1$tell(), buf$size)
+  stream1$close()
+
+  tf2 <- tempfile()
+  sink2 <- FileOutputStream$create(tf2)
+  stream2 <- CompressedOutputStream$create(sink2)
+  expect_equal(stream2$tell(), 0)
+  stream2$write(buf)
+  expect_equal(stream2$tell(), buf$size)
+  stream2$close()
+  sink2$close()
+
+  input1 <- CompressedInputStream$create(tf1)
+  buf1 <- input1$Read(1024L)
+
+  file2 <- ReadableFile$create(tf2)
+  input2 <- CompressedInputStream$create(file2)
+  buf2 <- input2$Read(1024L)
+
+  expect_equal(buf, buf1)
+  expect_equal(buf, buf2)
+
+  unlink(tf1)
+  unlink(tf2)
+})
diff --git a/src/arrow/r/tests/testthat/test-compute-aggregate.R b/src/arrow/r/tests/testthat/test-compute-aggregate.R
new file mode 100644
index 000000000..018279d4b
--- /dev/null
+++ b/src/arrow/r/tests/testthat/test-compute-aggregate.R
@@ -0,0 +1,434 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+test_that("list_compute_functions", {
+  allfuncs <- list_compute_functions()
+  expect_false(all(grepl("min", allfuncs)))
+  justmins <- list_compute_functions("^min")
+  expect_true(length(justmins) > 0)
+  expect_true(all(grepl("min", justmins)))
+  no_hash_funcs <- list_compute_functions("^hash")
+  expect_true(length(no_hash_funcs) == 0)
+})
+
+test_that("sum.Array", {
+  ints <- 1:5
+  a <- Array$create(ints)
+  expect_r6_class(sum(a), "Scalar")
+  expect_identical(as.integer(sum(a)), sum(ints))
+
+  floats <- c(1.3, 2.4, 3)
+  f <- Array$create(floats)
+  expect_identical(as.numeric(sum(f)), sum(floats))
+
+  floats <- c(floats, NA)
+  na <- Array$create(floats)
+  if (!grepl("devel", R.version.string)) {
+    # Valgrind on R-devel confuses NaN and NA_real_
+    # https://r.789695.n4.nabble.com/Difference-in-NA-behavior-in-R-devel-running-under-valgrind-td4768731.html
+    expect_identical(as.numeric(sum(na)), sum(floats))
+  }
+  expect_r6_class(sum(na, na.rm = TRUE), "Scalar")
+  expect_identical(as.numeric(sum(na, na.rm = TRUE)), sum(floats, na.rm = TRUE))
+
+  bools <- c(TRUE, NA, TRUE, FALSE)
+  b <- Array$create(bools)
+  expect_identical(as.integer(sum(b)), sum(bools))
+  expect_identical(as.integer(sum(b, na.rm = TRUE)), sum(bools, na.rm = TRUE))
+})
+
+test_that("sum.ChunkedArray", {
+  a <- ChunkedArray$create(1:4, c(1:4, NA), 1:5)
+  expect_r6_class(sum(a), "Scalar")
+  expect_true(is.na(as.vector(sum(a))))
+  expect_identical(as.numeric(sum(a, na.rm = TRUE)), 35)
+})
+
+test_that("sum dots", {
+  a1 <- Array$create(1:4)
+  a2 <- ChunkedArray$create(1:4, c(1:4, NA), 1:5)
+  expect_identical(as.numeric(sum(a1, a2, na.rm = TRUE)), 45)
+})
+
+test_that("sum.Scalar", {
+  s <- Scalar$create(4)
+  expect_identical(as.numeric(s), as.numeric(sum(s)))
+})
+
+test_that("mean.Array", {
+  ints <- 1:4
+  a <- Array$create(ints)
+  expect_r6_class(mean(a), "Scalar")
+  expect_identical(as.vector(mean(a)), mean(ints))
+
+  floats <- c(1.3, 2.4, 3)
+  f <- Array$create(floats)
+  expect_identical(as.vector(mean(f)), mean(floats))
+
+  floats <- c(floats, NA)
+  na <- Array$create(floats)
+  if (!grepl("devel", R.version.string)) {
+    # Valgrind on R-devel confuses NaN and NA_real_
+    # https://r.789695.n4.nabble.com/Difference-in-NA-behavior-in-R-devel-running-under-valgrind-td4768731.html
+    expect_identical(as.vector(mean(na)), mean(floats))
+  }
+  expect_r6_class(mean(na, na.rm = TRUE), "Scalar")
+  expect_identical(as.vector(mean(na, na.rm = TRUE)), mean(floats, na.rm = TRUE))
+
+  bools <- c(TRUE, NA, TRUE, FALSE)
+  b <- Array$create(bools)
+  expect_identical(as.vector(mean(b)), mean(bools))
+  expect_identical(as.integer(sum(b, na.rm = TRUE)), sum(bools, na.rm = TRUE))
+})
+
+test_that("mean.ChunkedArray", {
+  a <- ChunkedArray$create(1:4, c(1:4, NA), 1:5)
+  expect_r6_class(mean(a), "Scalar")
+  expect_true(is.na(as.vector(mean(a))))
+  expect_identical(as.vector(mean(a, na.rm = TRUE)), 35 / 13)
+})
+
+test_that("mean.Scalar", {
+  s <- Scalar$create(4)
+  expect_equal(s, mean(s))
+})
+
+test_that("Bad input handling of call_function", {
+  expect_error(
+    call_function("sum", 2, 3),
+    'Argument 1 is of class numeric but it must be one of "Array", "ChunkedArray", "RecordBatch", "Table", or "Scalar"'
+  )
+})
+
+test_that("min.Array", {
+  ints <- 1:4
+  a <- Array$create(ints)
+  expect_r6_class(min(a), "Scalar")
+  expect_identical(as.vector(min(a)), min(ints))
+
+  floats <- c(1.3, 3, 2.4)
+  f <- Array$create(floats)
+  expect_identical(as.vector(min(f)), min(floats))
+
+  floats <- c(floats, NA)
+  na <- Array$create(floats)
+  expect_identical(as.vector(min(na)), min(floats))
+  expect_r6_class(min(na, na.rm = TRUE), "Scalar")
+  expect_identical(as.vector(min(na, na.rm = TRUE)), min(floats, na.rm = TRUE))
+
+  bools <- c(TRUE, TRUE, FALSE)
+  b <- Array$create(bools)
+  # R is inconsistent here: typeof(min(NA)) == "integer", not "logical"
+  expect_identical(as.vector(min(b)), as.logical(min(bools)))
+})
+
+test_that("max.Array", {
+  ints <- 1:4
+  a <- Array$create(ints)
+  expect_r6_class(max(a), "Scalar")
+  expect_identical(as.vector(max(a)), max(ints))
+
+  floats <- c(1.3, 3, 2.4)
+  f <- Array$create(floats)
+  expect_identical(as.vector(max(f)), max(floats))
+
+  floats <- c(floats, NA)
+  na <- Array$create(floats)
+  expect_identical(as.vector(max(na)), max(floats))
+  expect_r6_class(max(na, na.rm = TRUE), "Scalar")
+  expect_identical(as.vector(max(na, na.rm = TRUE)), max(floats, na.rm = TRUE))
+
+  bools <- c(TRUE, TRUE, FALSE)
+  b <- Array$create(bools)
+  # R is inconsistent here: typeof(max(NA)) == "integer", not "logical"
+  expect_identical(as.vector(max(b)), as.logical(max(bools)))
+})
+
+test_that("min.ChunkedArray", {
+  ints <- 1:4
+  a <- ChunkedArray$create(ints)
+  expect_r6_class(min(a), "Scalar")
+  expect_identical(as.vector(min(a)), min(ints))
+
+  floats <- c(1.3, 3, 2.4)
+  f <- ChunkedArray$create(floats)
+  expect_identical(as.vector(min(f)), min(floats))
+
+  floats <- c(floats, NA)
+  na <- ChunkedArray$create(floats)
+  expect_identical(as.vector(min(na)), min(floats))
+  expect_r6_class(min(na, na.rm = TRUE), "Scalar")
+  expect_identical(as.vector(min(na, na.rm = TRUE)), min(floats, na.rm = TRUE))
+
+  bools <- c(TRUE, TRUE, FALSE)
+  b <- ChunkedArray$create(bools)
+  # R is inconsistent here: typeof(min(NA)) == "integer", not "logical"
+  expect_identical(as.vector(min(b)), as.logical(min(bools)))
+})
+
+test_that("max.ChunkedArray", {
+  ints <- 1:4
+  a <- ChunkedArray$create(ints)
+  expect_r6_class(max(a), "Scalar")
+  expect_identical(as.vector(max(a)), max(ints))
+
+  floats <- c(1.3, 3, 2.4)
+  f <- ChunkedArray$create(floats)
+  expect_identical(as.vector(max(f)), max(floats))
+
+  floats <- c(floats, NA)
+  na <- ChunkedArray$create(floats)
+  expect_identical(as.vector(max(na)), max(floats))
+  expect_r6_class(max(na, na.rm = TRUE), "Scalar")
+  expect_identical(as.vector(max(na, na.rm = TRUE)), max(floats, na.rm = TRUE))
+
+  bools <- c(TRUE, TRUE, FALSE)
+  b <- ChunkedArray$create(bools)
+  # R is inconsistent here: typeof(max(NA)) == "integer", not "logical"
+  expect_identical(as.vector(max(b)), as.logical(max(bools)))
+})
+
+test_that("Edge cases", {
+  a <- Array$create(NA)
+  for (type in c(int32(), float64(), bool())) {
+    expect_as_vector(sum(a$cast(type), na.rm = TRUE), sum(NA, na.rm = TRUE))
+    expect_as_vector(mean(a$cast(type), na.rm = TRUE), mean(NA, na.rm = TRUE))
+    expect_as_vector(
+      min(a$cast(type), na.rm = TRUE),
+      # Suppress the base R warning about no non-missing arguments
+      suppressWarnings(min(NA, na.rm = TRUE))
+    )
+    expect_as_vector(
+      max(a$cast(type), na.rm = TRUE),
+      suppressWarnings(max(NA, na.rm = TRUE))
+    )
+  }
+})
+
+test_that("quantile.Array and quantile.ChunkedArray", {
+  a <- Array$create(c(0, 1, 2, 3))
+  ca <- ChunkedArray$create(c(0, 1), c(2, 3))
+  probs <- c(0.49, 0.51)
+  for (ad in list(a, ca)) {
+    for (type in c(int32(), uint64(), float64())) {
+      expect_equal(
+        quantile(ad$cast(type), probs = probs, interpolation = "linear"),
+        Array$create(c(1.47, 1.53))
+      )
+      expect_equal(
+        quantile(ad$cast(type), probs = probs, interpolation = "lower"),
+        Array$create(c(1, 1))$cast(type)
+      )
+      expect_equal(
+        quantile(ad$cast(type), probs = probs, interpolation = "higher"),
+        Array$create(c(2, 2))$cast(type)
+      )
+      expect_equal(
+        quantile(ad$cast(type), probs = probs, interpolation = "nearest"),
+        Array$create(c(1, 2))$cast(type)
+      )
+      expect_equal(
+        quantile(ad$cast(type), probs = probs, interpolation = "midpoint"),
+        Array$create(c(1.5, 1.5))
+      )
+    }
+  }
+})
+
+test_that("quantile and median NAs, edge cases, and exceptions", {
+  expect_equal(
+    quantile(Array$create(c(1, 2)), probs = c(0, 1)),
+    Array$create(c(1, 2))
+  )
+  expect_error(
+    quantile(Array$create(c(1, 2, NA))),
+    "Missing values not allowed if 'na.rm' is FALSE"
+  )
+  expect_equal(
+    quantile(Array$create(numeric(0))),
+    Array$create(rep(NA_real_, 5))
+  )
+  expect_equal(
+    quantile(Array$create(rep(NA_integer_, 3)), na.rm = TRUE),
+    Array$create(rep(NA_real_, 5))
+  )
+  expect_equal(
+    quantile(Scalar$create(0L)),
+    Array$create(rep(0, 5))
+  )
+  expect_equal(
+    median(Scalar$create(1L)),
+    Scalar$create(1)
+  )
+  expect_error(
+    quantile(Array$create(1:3), type = 9),
+    "not supported"
+  )
+})
+
+test_that("median passes ... args to quantile", {
+  skip_if(
+    !"..." %in% names(formals(median)),
+    "The median generic lacks dots in R 3.3.0 and earlier"
+  )
+  expect_equal(
+    median(Array$create(c(1, 2)), interpolation = "higher"),
+    Scalar$create(2)
+  )
+  expect_error(
+    median(Array$create(c(1, 2)), probs = c(.25, .75))
+  )
+})
+
+test_that("median.Array and median.ChunkedArray", {
+  compare_expression(
+    median(.input),
+    1:4
+  )
+  compare_expression(
+    median(.input),
+    1:5
+  )
+  compare_expression(
+    median(.input),
+    numeric(0)
+  )
+  compare_expression(
+    median(.input, na.rm = FALSE),
+    c(1, 2, NA)
+  )
+  compare_expression(
+    median(.input, na.rm = TRUE),
+    c(1, 2, NA)
+  )
+  compare_expression(
+    median(.input, na.rm = TRUE),
+    NA_real_
+  )
+  compare_expression(
+    median(.input, na.rm = FALSE),
+    c(1, 2, NA)
+  )
+  compare_expression(
+    median(.input, na.rm = TRUE),
+    c(1, 2, NA)
+  )
+  compare_expression(
+    median(.input, na.rm = TRUE),
+    NA_real_
+  )
+})
+
+test_that("unique.Array", {
+  a <- Array$create(c(1, 4, 3, 1, 1, 3, 4))
+  expect_equal(unique(a), Array$create(c(1, 4, 3)))
+  ca <- ChunkedArray$create(a, a)
+  expect_equal(unique(ca), Array$create(c(1, 4, 3)))
+})
+
+test_that("match_arrow", {
+  a <- Array$create(c(1, 4, 3, 1, 1, 3, 4))
+  tab <- c(4, 3, 2, 1)
+  expect_equal(match_arrow(a, tab), Array$create(c(3L, 0L, 1L, 3L, 3L, 1L, 0L)))
+
+  ca <- ChunkedArray$create(c(1, 4, 3, 1, 1, 3, 4))
+  expect_equal(match_arrow(ca, tab), ChunkedArray$create(c(3L, 0L, 1L, 3L, 3L, 1L, 0L)))
+
+  sc <- Scalar$create(3)
+  expect_equal(match_arrow(sc, tab), Scalar$create(1L))
+
+  vec <- c(1, 2)
+  expect_equal(match_arrow(vec, tab), Array$create(c(3L, 2L)))
+})
+
+test_that("is_in", {
+  a <- Array$create(c(9, 4, 3))
+  tab <- c(4, 3, 2, 1)
+  expect_equal(is_in(a, tab), Array$create(c(FALSE, TRUE, TRUE)))
+
+  ca <- ChunkedArray$create(c(9, 4, 3))
+  expect_equal(is_in(ca, tab), ChunkedArray$create(c(FALSE, TRUE, TRUE)))
+
+  sc <- Scalar$create(3)
+  expect_equal(is_in(sc, tab), Scalar$create(TRUE))
+
+  vec <- c(1, 9)
+  expect_equal(is_in(vec, tab), Array$create(c(TRUE, FALSE)))
+})
+
+test_that("value_counts", {
+  a <- Array$create(c(1, 4, 3, 1, 1, 3, 4))
+  result_df <- tibble::tibble(
+    values = c(1, 4, 3),
+    counts = c(3L, 2L, 2L)
+  )
+  result <- Array$create(
+    result_df,
+    type = struct(values = float64(), counts = int64())
+  )
+  expect_equal(value_counts(a), result)
+  expect_identical(as.data.frame(value_counts(a)), result_df)
+  expect_identical(as.vector(value_counts(a)$counts), result_df$counts)
+})
+
+test_that("any.Array and any.ChunkedArray", {
+  data <- c(1:10, NA, NA)
+
+  compare_expression(any(.input > 5), data)
+  compare_expression(any(.input > 5, na.rm = TRUE), data)
+  compare_expression(any(.input < 1), data)
+  compare_expression(any(.input < 1, na.rm = TRUE), data)
+
+  data_logical <- c(TRUE, FALSE, TRUE, NA, FALSE)
+
+  compare_expression(any(.input), data_logical)
+  compare_expression(any(.input, na.rm = FALSE), data_logical)
+  compare_expression(any(.input, na.rm = TRUE), data_logical)
+})
+
+test_that("all.Array and all.ChunkedArray", {
+  data <- c(1:10, NA, NA)
+
+  compare_expression(all(.input > 5), data)
+  compare_expression(all(.input > 5, na.rm = TRUE), data)
+
+  compare_expression(all(.input < 11), data)
+  compare_expression(all(.input < 11, na.rm = TRUE), data)
+
+  data_logical <- c(TRUE, TRUE, NA)
+
+  compare_expression(all(.input), data_logical)
+  compare_expression(all(.input, na.rm = TRUE), data_logical)
+})
+
+test_that("variance", {
+  data <- c(-37, 267, 88, -120, 9, 101, -65, -23, NA)
+  arr <- Array$create(data)
+  chunked_arr <- ChunkedArray$create(data)
+
+  expect_equal(call_function("variance", arr, options = list(ddof = 5)), Scalar$create(34596))
+  expect_equal(call_function("variance", chunked_arr, options = list(ddof = 5)), Scalar$create(34596))
+})
+
+test_that("stddev", {
+  data <- c(-37, 267, 88, -120, 9, 101, -65, -23, NA)
+  arr <- Array$create(data)
+  chunked_arr <- ChunkedArray$create(data)
+
+  expect_equal(call_function("stddev", arr, options = list(ddof = 5)), Scalar$create(186))
+  expect_equal(call_function("stddev", chunked_arr, options = list(ddof = 5)), Scalar$create(186))
+})
diff --git a/src/arrow/r/tests/testthat/test-compute-arith.R b/src/arrow/r/tests/testthat/test-compute-arith.R
new file mode 100644
index 000000000..e8674e315
--- /dev/null
+++ b/src/arrow/r/tests/testthat/test-compute-arith.R
@@ -0,0 +1,129 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+test_that("Addition", {
+  a <- Array$create(c(1:4, NA_integer_))
+  expect_type_equal(a, int32())
+  expect_type_equal(a + 4L, int32())
+  expect_type_equal(a + 4, float64())
+  expect_equal(a + 4L, Array$create(c(5:8, NA_integer_)))
+  expect_identical(as.vector(a + 4L), c(5:8, NA_integer_))
+  expect_equal(a + 4L, Array$create(c(5:8, NA_integer_)))
+  expect_as_vector(a + 4L, c(5:8, NA_integer_))
+  expect_equal(a + NA_integer_, Array$create(rep(NA_integer_, 5)))
+
+  a8 <- a$cast(int8())
+  expect_type_equal(a8 + Scalar$create(1, int8()), int8())
+
+  # int8 will be promoted to int32 when added to int32
+  expect_type_equal(a8 + 127L, int32())
+  expect_equal(a8 + 127L, Array$create(c(128:131, NA_integer_)))
+
+  b <- Array$create(c(4:1, NA_integer_))
+  expect_type_equal(a8 + b, int32())
+  expect_equal(a8 + b, Array$create(c(5L, 5L, 5L, 5L, NA_integer_)))
+
+  expect_type_equal(a + 4.1, float64())
+  expect_equal(a + 4.1, Array$create(c(5.1, 6.1, 7.1, 8.1, NA_real_)))
+})
+
+test_that("Subtraction", {
+  a <- Array$create(c(1:4, NA_integer_))
+  expect_equal(a - 3L, Array$create(c(-2:1, NA_integer_)))
+
+  expect_equal(
+    Array$create(c(5.1, 6.1, 7.1, 8.1, NA_real_)) - a,
+    Array$create(c(4.1, 4.1, 4.1, 4.1, NA_real_))
+  )
+})
+
+test_that("Multiplication", {
+  a <- Array$create(c(1:4, NA_integer_))
+  expect_equal(a * 2L, Array$create(c(1:4 * 2L, NA_integer_)))
+
+  expect_equal(
+    (a * 0.5) * 3L,
+    Array$create(c(1.5, 3, 4.5, 6, NA_real_))
+  )
+})
+
+test_that("Division", {
+  a <- Array$create(c(1:4, NA_integer_))
+  expect_equal(a / 2, Array$create(c(1:4 / 2, NA_real_)))
+  expect_equal(a %/% 2, Array$create(c(0L, 1L, 1L, 2L, NA_integer_)))
+  expect_equal(a / 2 / 2, Array$create(c(1:4 / 2 / 2, NA_real_)))
+  expect_equal(a %/% 2 %/% 2, Array$create(c(0L, 0L, 0L, 1L, NA_integer_)))
+  expect_equal(a / 0, Array$create(c(Inf, Inf, Inf, Inf, NA_real_)))
+  # TODO add tests for integer division %/% by 0
+  # see https://issues.apache.org/jira/browse/ARROW-14297
+
+  b <- a$cast(float64())
+  expect_equal(b / 2, Array$create(c(1:4 / 2, NA_real_)))
+  expect_equal(b %/% 2, Array$create(c(0L, 1L, 1L, 2L, NA_integer_)))
+  expect_equal(b / 0, Array$create(c(Inf, Inf, Inf, Inf, NA_real_)))
+  # TODO add tests for integer division %/% by 0
+  # see https://issues.apache.org/jira/browse/ARROW-14297
+
+  # the behavior of %/% matches R's (i.e. the integer of the quotient, not
+  # simply dividing two integers)
+  expect_equal(b / 2.2, Array$create(c(1:4 / 2.2, NA_real_)))
+  # nolint start
+  # c(1:4) %/% 2.2 != c(1:4) %/% as.integer(2.2)
+  # c(1:4) %/% 2.2             == c(0L, 0L, 1L, 1L)
+  # c(1:4) %/% as.integer(2.2) == c(0L, 1L, 1L, 2L)
+  # nolint end
+  expect_equal(b %/% 2.2, Array$create(c(0L, 0L, 1L, 1L, NA_integer_)))
+
+  expect_equal(a %% 2, Array$create(c(1L, 0L, 1L, 0L, NA_integer_)))
+
+  expect_equal(b %% 2, Array$create(c(1:4 %% 2, NA_real_)))
+})
+
+test_that("Power", {
+  a <- Array$create(c(1:4, NA_integer_))
+  b <- a$cast(float64())
+  c <- a$cast(int64())
+  d <- a$cast(uint64())
+
+  expect_equal(a^0, Array$create(c(1, 1, 1, 1, NA_real_)))
+  expect_equal(a^2, Array$create(c(1, 4, 9, 16, NA_real_)))
+  expect_equal(a^(-1), Array$create(c(1, 1 / 2, 1 / 3, 1 / 4, NA_real_)))
+  expect_equal(a^(.5), Array$create(c(1, sqrt(2), sqrt(3), sqrt(4), NA_real_)))
+
+  expect_equal(b^0, Array$create(c(1, 1, 1, 1, NA_real_)))
+  expect_equal(b^2, Array$create(c(1, 4, 9, 16, NA_real_)))
+  expect_equal(b^(-1), Array$create(c(1, 1 / 2, 1 / 3, 1 / 4, NA_real_)))
+  expect_equal(b^(.5), Array$create(c(1, sqrt(2), sqrt(3), sqrt(4), NA_real_)))
+
+  expect_equal(c^0, Array$create(c(1, 1, 1, 1, NA_real_)))
+  expect_equal(c^2, Array$create(c(1, 4, 9, 16, NA_real_)))
+  expect_equal(c^(-1), Array$create(c(1, 1 / 2, 1 / 3, 1 / 4, NA_real_)))
+  expect_equal(c^(.5), Array$create(c(1, sqrt(2), sqrt(3), sqrt(4), NA_real_)))
+
+  expect_equal(d^0, Array$create(c(1, 1, 1, 1, NA_real_)))
+  expect_equal(d^2, Array$create(c(1, 4, 9, 16, NA_real_)))
+  expect_equal(d^(-1), Array$create(c(1, 1 / 2, 1 / 3, 1 / 4, NA_real_)))
+  expect_equal(d^(.5), Array$create(c(1, sqrt(2), sqrt(3), sqrt(4), NA_real_)))
+})
+
+test_that("Dates casting", {
+  a <- Array$create(c(Sys.Date() + 1:4, NA_integer_))
+
+  skip("ARROW-11090 (date/datetime arithmetic)")
+  # Error: NotImplemented: Function add_checked has no kernel matching input types (array[date32[day]], scalar[double])
+  expect_equal(a + 2, Array$create(c((Sys.Date() + 1:4) + 2), NA_integer_))
+})
diff --git a/src/arrow/r/tests/testthat/test-compute-no-bindings.R b/src/arrow/r/tests/testthat/test-compute-no-bindings.R
new file mode 100644
index 000000000..a51d797a4
--- /dev/null
+++ b/src/arrow/r/tests/testthat/test-compute-no-bindings.R
@@ -0,0 +1,201 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+test_that("non-bound compute kernels using TrimOptions", {
+  skip_if_not_available("utf8proc")
+  expect_equal(
+    call_function(
+      "utf8_trim",
+      Scalar$create("abracadabra"),
+      options = list(characters = "ab")
+    ),
+    Scalar$create("racadabr")
+  )
+
+  expect_equal(
+    call_function(
+      "utf8_ltrim",
+      Scalar$create("abracadabra"),
+      options = list(characters = "ab")
+    ),
+    Scalar$create("racadabra")
+  )
+
+  expect_equal(
+    call_function(
+      "utf8_rtrim",
+      Scalar$create("abracadabra"),
+      options = list(characters = "ab")
+    ),
+    Scalar$create("abracadabr")
+  )
+
+  expect_equal(
+    call_function(
+      "utf8_rtrim",
+      Scalar$create("abracadabra"),
+      options = list(characters = "ab")
+    ),
+    Scalar$create("abracadabr")
+  )
+
+  expect_equal(
+    call_function(
+      "ascii_ltrim",
+      Scalar$create("abracadabra"),
+      options = list(characters = "ab")
+    ),
+    Scalar$create("racadabra")
+  )
+
+  expect_equal(
+    call_function(
+      "ascii_rtrim",
+      Scalar$create("abracadabra"),
+      options = list(characters = "ab")
+    ),
+    Scalar$create("abracadabr")
+  )
+
+  expect_equal(
+    call_function(
+      "ascii_rtrim",
+      Scalar$create("abracadabra"),
+      options = list(characters = "ab")
+    ),
+    Scalar$create("abracadabr")
+  )
+})
+
+test_that("non-bound compute kernels using ReplaceSliceOptions", {
+  skip_if_not_available("utf8proc")
+
+  expect_equal(
+    call_function(
+      "binary_replace_slice",
+      Array$create("I need to fix this string"),
+      options = list(start = 1, stop = 1, replacement = " don't")
+    ),
+    Array$create("I don't need to fix this string")
+  )
+
+  expect_equal(
+    call_function(
+      "utf8_replace_slice",
+      Array$create("I need to fix this string"),
+      options = list(start = 1, stop = 1, replacement = " don't")
+    ),
+    Array$create("I don't need to fix this string")
+  )
+})
+
+test_that("non-bound compute kernels using ModeOptions", {
+  expect_equal(
+    as.vector(
+      call_function("mode", Array$create(c(1:10, 10, 9, NA)), options = list(n = 3))
+    ),
+    tibble::tibble("mode" = c(9, 10, 1), "count" = c(2L, 2L, 1L))
+  )
+
+  expect_equal(
+    as.vector(
+      call_function("mode", Array$create(c(1:10, 10, 9, NA)), options = list(n = 3, skip_nulls = FALSE))
+    ),
+    tibble::tibble("mode" = numeric(), "count" = integer())
+  )
+})
+
+test_that("non-bound compute kernels using PartitionNthOptions", {
+  result <- call_function(
+    "partition_nth_indices",
+    Array$create(c(11:20)),
+    options = list(pivot = 3)
+  )
+  # Order of indices on either side of the pivot is not deterministic
+  # (depends on C++ standard library implementation)
+  expect_true(all(as.vector(result[1:3]) < 3))
+  expect_true(all(as.vector(result[4:10]) >= 3))
+})
+
+
+test_that("non-bound compute kernels using MatchSubstringOptions", {
+  skip_if_not_available("utf8proc")
+
+  # Remove this test when ARROW-13924 has been completed
+  expect_equal(
+    call_function(
+      "starts_with",
+      Array$create(c("abracadabra", "abacus", "abdicate", "abrasive")),
+      options = list(pattern = "abr")
+    ),
+    Array$create(c(TRUE, FALSE, FALSE, TRUE))
+  )
+
+  # Remove this test when ARROW-13924 has been completed
+  expect_equal(
+    call_function(
+      "ends_with",
+      Array$create(c("abracadabra", "abacus", "abdicate", "abrasive")),
+      options = list(pattern = "e")
+    ),
+    Array$create(c(FALSE, FALSE, TRUE, TRUE))
+  )
+
+  # Remove this test when ARROW-13156 has been completed
+  expect_equal(
+    as.vector(
+      call_function(
+        "count_substring",
+        Array$create(c("abracadabra", "abacus", "abdicate", "abrasive")),
+        options = list(pattern = "e")
+      )
+    ),
+    c(0, 0, 1, 1)
+  )
+
+  skip_if_not_available("re2")
+
+  # Remove this test when ARROW-13156 has been completed
+  expect_equal(
+    as.vector(
+      call_function(
+        "count_substring_regex",
+        Array$create(c("abracadabra", "abacus", "abdicate", "abrasive")),
+        options = list(pattern = "e")
+      )
+    ),
+    c(0, 0, 1, 1)
+  )
+})
+
+test_that("non-bound compute kernels using ExtractRegexOptions", {
+  skip_if_not_available("re2")
+  expect_equal(
+    call_function("extract_regex", Scalar$create("abracadabra"), options = list(pattern = "(?P<letter>[a])")),
+    Scalar$create(tibble::tibble(letter = "a"))
+  )
+})
+
+test_that("non-bound compute kernels using IndexOptions", {
+  expect_equal(
+    as.vector(
+      call_function("index", Array$create(c(10, 20, 30, 40)), options = list(value = Scalar$create(40)))
+    ),
+    3
+  )
+})
diff --git a/src/arrow/r/tests/testthat/test-compute-sort.R b/src/arrow/r/tests/testthat/test-compute-sort.R
new file mode 100644
index 000000000..e3574d86f
--- /dev/null
+++ b/src/arrow/r/tests/testthat/test-compute-sort.R
@@ -0,0 +1,155 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+library(dplyr, warn.conflicts = FALSE)
+
+# randomize order of rows in test data
+tbl <- slice_sample(example_data_for_sorting, prop = 1L)
+
+test_that("sort(Scalar) is identity function", {
+  int <- Scalar$create(42L)
+  expect_equal(sort(int), int)
+  dbl <- Scalar$create(3.14)
+  expect_equal(sort(dbl), dbl)
+  chr <- Scalar$create("foo")
+  expect_equal(sort(chr), chr)
+})
+
+test_that("Array$SortIndices()", {
+  int <- tbl$int
+  # Remove ties because they could give non-deterministic sort indices, and this
+  # test compares sort indices. Other tests compare sorted values, which are
+  # deterministic in the case of ties.
+  int <- int[!duplicated(int)]
+  expect_equal(
+    Array$create(int)$SortIndices(),
+    Array$create(order(int) - 1L, type = uint64())
+  )
+  # Need to remove NAs because ARROW-12063
+  int <- na.omit(int)
+  expect_equal(
+    Array$create(int)$SortIndices(descending = TRUE),
+    Array$create(rev(order(int)) - 1, type = uint64())
+  )
+})
+
+test_that("ChunkedArray$SortIndices()", {
+  int <- tbl$int
+  # Remove ties because they could give non-deterministic sort indices, and this
+  # test compares sort indices. Other tests compare sorted values, which are
+  # deterministic in the case of ties.
+  int <- int[!duplicated(int)]
+  expect_equal(
+    ChunkedArray$create(int[1:4], int[5:length(int)])$SortIndices(),
+    Array$create(order(int) - 1L, type = uint64())
+  )
+  # Need to remove NAs because ARROW-12063
+  int <- na.omit(int)
+  expect_equal(
+    ChunkedArray$create(int[1:4], int[5:length(int)])$SortIndices(descending = TRUE),
+    Array$create(rev(order(int)) - 1, type = uint64())
+  )
+})
+
+test_that("sort(vector), sort(Array), sort(ChunkedArray) give equivalent results on integers", {
+  compare_expression(
+    sort(.input),
+    tbl$int
+  )
+  compare_expression(
+    sort(.input, na.last = NA),
+    tbl$int
+  )
+  compare_expression(
+    sort(.input, na.last = TRUE),
+    tbl$int
+  )
+  compare_expression(
+    sort(.input, na.last = FALSE),
+    tbl$int
+  )
+  compare_expression(
+    sort(.input, decreasing = TRUE),
+    tbl$int,
+  )
+  compare_expression(
+    sort(.input, decreasing = TRUE, na.last = TRUE),
+    tbl$int,
+  )
+  compare_expression(
+    sort(.input, decreasing = TRUE, na.last = FALSE),
+    tbl$int,
+  )
+})
+
+test_that("sort(vector), sort(Array), sort(ChunkedArray) give equivalent results on strings", {
+  compare_expression(
+    sort(.input, decreasing = TRUE, na.last = FALSE),
+    tbl$chr
+  )
+  compare_expression(
+    sort(.input, decreasing = TRUE, na.last = FALSE),
+    tbl$chr
+  )
+})
+
+test_that("sort(vector), sort(Array), sort(ChunkedArray) give equivalent results on floats", {
+  compare_expression(
+    sort(.input, decreasing = TRUE, na.last = TRUE),
+    tbl$dbl
+  )
+  compare_expression(
+    sort(.input, decreasing = FALSE, na.last = TRUE),
+    tbl$dbl
+  )
+  compare_expression(
+    sort(.input, decreasing = TRUE, na.last = NA),
+    tbl$dbl
+  )
+  compare_expression(
+    sort(.input, decreasing = TRUE, na.last = FALSE),
+    tbl$dbl,
+  )
+  compare_expression(
+    sort(.input, decreasing = FALSE, na.last = NA),
+    tbl$dbl
+  )
+  compare_expression(
+    sort(.input, decreasing = FALSE, na.last = FALSE),
+    tbl$dbl,
+  )
+})
+
+test_that("Table$SortIndices()", {
+  x <- Table$create(tbl)
+  expect_identical(
+    as.vector(x$Take(x$SortIndices("chr"))$chr),
+    sort(tbl$chr, na.last = TRUE)
+  )
+  expect_identical(
+    as.data.frame(x$Take(x$SortIndices(c("int", "dbl"), c(FALSE, FALSE)))),
+    tbl %>% arrange(int, dbl)
+  )
+})
+
+test_that("RecordBatch$SortIndices()", {
+  x <- record_batch(tbl)
+  expect_identical(
+    as.data.frame(x$Take(x$SortIndices(c("chr", "int", "dbl"), TRUE))),
+    tbl %>% arrange(desc(chr), desc(int), desc(dbl))
+  )
+})
diff --git a/src/arrow/r/tests/testthat/test-compute-vector.R b/src/arrow/r/tests/testthat/test-compute-vector.R
new file mode 100644
index 000000000..345da5656
--- /dev/null
+++ b/src/arrow/r/tests/testthat/test-compute-vector.R
@@ -0,0 +1,133 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+expect_bool_function_equal <- function(array_exp, r_exp) {
+  # Assert that the Array operation returns a boolean array
+  # and that its contents are equal to expected
+  expect_r6_class(array_exp, "ArrowDatum")
+  expect_type_equal(array_exp, bool())
+  expect_identical(as.vector(array_exp), r_exp)
+}
+
+expect_array_compares <- function(x, compared_to) {
+  r_values <- as.vector(x)
+  r_compared_to <- as.vector(compared_to)
+  # Iterate over all comparison functions
+  expect_bool_function_equal(x == compared_to, r_values == r_compared_to)
+  expect_bool_function_equal(x != compared_to, r_values != r_compared_to)
+  expect_bool_function_equal(x > compared_to, r_values > r_compared_to)
+  expect_bool_function_equal(x >= compared_to, r_values >= r_compared_to)
+  expect_bool_function_equal(x < compared_to, r_values < r_compared_to)
+  expect_bool_function_equal(x <= compared_to, r_values <= r_compared_to)
+}
+
+test_that("compare ops with Array", {
+  a <- Array$create(1:5)
+  expect_array_compares(a, 4L)
+  expect_array_compares(a, 4) # implicit casting
+  expect_array_compares(a, Scalar$create(4))
+  expect_array_compares(Array$create(c(NA, 1:5)), 4)
+  expect_array_compares(Array$create(as.numeric(c(NA, 1:5))), 4)
+  expect_array_compares(Array$create(c(NA, 1:5)), Array$create(rev(c(NA, 1:5))))
+  expect_array_compares(Array$create(c(NA, 1:5)), Array$create(rev(c(NA, 1:5)), type = double()))
+})
+
+test_that("compare ops with ChunkedArray", {
+  expect_array_compares(ChunkedArray$create(1:3, 4:5), 4L)
+  expect_array_compares(ChunkedArray$create(1:3, 4:5), 4) # implicit casting
+  expect_array_compares(ChunkedArray$create(1:3, 4:5), Scalar$create(4))
+  expect_array_compares(ChunkedArray$create(c(NA, 1:3), 4:5), 4)
+  expect_array_compares(
+    ChunkedArray$create(c(NA, 1:3), 4:5),
+    ChunkedArray$create(4:5, c(NA, 1:3))
+  )
+  expect_array_compares(
+    ChunkedArray$create(c(NA, 1:3), 4:5),
+    Array$create(c(NA, 1:5))
+  )
+  expect_array_compares(
+    Array$create(c(NA, 1:5)),
+    ChunkedArray$create(c(NA, 1:3), 4:5)
+  )
+})
+
+test_that("logic ops with Array", {
+  truth <- expand.grid(left = c(TRUE, FALSE, NA), right = c(TRUE, FALSE, NA))
+  a_left <- Array$create(truth$left)
+  a_right <- Array$create(truth$right)
+  expect_bool_function_equal(a_left & a_right, truth$left & truth$right)
+  expect_bool_function_equal(a_left | a_right, truth$left | truth$right)
+  expect_bool_function_equal(a_left == a_right, truth$left == truth$right)
+  expect_bool_function_equal(a_left != a_right, truth$left != truth$right)
+  expect_bool_function_equal(!a_left, !truth$left)
+
+  # More complexity
+  isEqualTo <- function(x, y) x == y & !is.na(x)
+  expect_bool_function_equal(
+    isEqualTo(a_left, a_right),
+    isEqualTo(truth$left, truth$right)
+  )
+})
+
+test_that("logic ops with ChunkedArray", {
+  truth <- expand.grid(left = c(TRUE, FALSE, NA), right = c(TRUE, FALSE, NA))
+  a_left <- ChunkedArray$create(truth$left)
+  a_right <- ChunkedArray$create(truth$right)
+  expect_bool_function_equal(a_left & a_right, truth$left & truth$right)
+  expect_bool_function_equal(a_left | a_right, truth$left | truth$right)
+  expect_bool_function_equal(a_left == a_right, truth$left == truth$right)
+  expect_bool_function_equal(a_left != a_right, truth$left != truth$right)
+  expect_bool_function_equal(!a_left, !truth$left)
+
+  # More complexity
+  isEqualTo <- function(x, y) x == y & !is.na(x)
+  expect_bool_function_equal(
+    isEqualTo(a_left, a_right),
+    isEqualTo(truth$left, truth$right)
+  )
+})
+
+test_that("call_function validation", {
+  expect_error(
+    call_function("filter", 4),
+    'Argument 1 is of class numeric but it must be one of "Array", "ChunkedArray", "RecordBatch", "Table", or "Scalar"'
+  )
+  expect_error(
+    call_function("filter", Array$create(1:4), 3),
+    "Argument 2 is of class numeric"
+  )
+  expect_error(
+    call_function("filter",
+      Array$create(1:4),
+      Array$create(c(TRUE, FALSE, TRUE)),
+      options = list(keep_na = TRUE)
+    ),
+    "Array arguments must all be the same length"
+  )
+  expect_error(
+    call_function("filter",
+      record_batch(a = 1:3),
+      Array$create(c(TRUE, FALSE, TRUE)),
+      options = list(keep_na = TRUE)
+    ),
+    NA
+  )
+  expect_error(
+    call_function("filter", options = list(keep_na = TRUE)),
+    "accepts 2 arguments"
+  )
+})
diff --git a/src/arrow/r/tests/testthat/test-csv.R b/src/arrow/r/tests/testthat/test-csv.R
new file mode 100644
index 000000000..023eee92e
--- /dev/null
+++ b/src/arrow/r/tests/testthat/test-csv.R
@@ -0,0 +1,357 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Not all types round trip via CSV 100% identical by default
+tbl <- example_data[, c("dbl", "lgl", "false", "chr")]
+tbl_no_dates <- tbl
+# Add a date to test its parsing
+tbl$date <- Sys.Date() + 1:10
+
+csv_file <- tempfile()
+
+test_that("Can read csv file", {
+  tf <- tempfile()
+  on.exit(unlink(tf))
+
+  write.csv(tbl, tf, row.names = FALSE)
+
+  tab0 <- Table$create(tbl)
+  tab1 <- read_csv_arrow(tf, as_data_frame = FALSE)
+  expect_equal(tab0, tab1)
+  tab2 <- read_csv_arrow(mmap_open(tf), as_data_frame = FALSE)
+  expect_equal(tab0, tab2)
+  tab3 <- read_csv_arrow(ReadableFile$create(tf), as_data_frame = FALSE)
+  expect_equal(tab0, tab3)
+})
+
+test_that("read_csv_arrow(as_data_frame=TRUE)", {
+  tf <- tempfile()
+  on.exit(unlink(tf))
+
+  write.csv(tbl, tf, row.names = FALSE)
+  tab1 <- read_csv_arrow(tf, as_data_frame = TRUE)
+  expect_equal(tbl, tab1)
+})
+
+test_that("read_delim_arrow parsing options: delim", {
+  tf <- tempfile()
+  on.exit(unlink(tf))
+
+  write.table(tbl, tf, sep = "\t", row.names = FALSE)
+  tab1 <- read_tsv_arrow(tf)
+  tab2 <- read_delim_arrow(tf, delim = "\t")
+  expect_equal(tab1, tab2)
+  expect_equal(tbl, tab1)
+})
+
+test_that("read_delim_arrow parsing options: quote", {
+  tf <- tempfile()
+  on.exit(unlink(tf))
+
+  df <- data.frame(a = c(1, 2), b = c("'abc'", "'def'"))
+  write.table(df, sep = ";", tf, row.names = FALSE, quote = FALSE)
+  tab1 <- read_delim_arrow(tf, delim = ";", quote = "'")
+
+  # Is this a problem?
+  # Component “a”: target is integer64, current is numeric
+  tab1$a <- as.numeric(tab1$a)
+  expect_equal(
+    tab1,
+    tibble::tibble(a = c(1, 2), b = c("abc", "def"))
+  )
+})
+
+test_that("read_csv_arrow parsing options: col_names", {
+  tf <- tempfile()
+  on.exit(unlink(tf))
+
+  # Writing the CSV without the header
+  write.table(tbl, tf, sep = ",", row.names = FALSE, col.names = FALSE)
+
+  # Reading with col_names = FALSE autogenerates names
+  no_names <- read_csv_arrow(tf, col_names = FALSE)
+  expect_equal(no_names$f0, tbl[[1]])
+
+  tab1 <- read_csv_arrow(tf, col_names = names(tbl))
+
+  expect_identical(names(tab1), names(tbl))
+  expect_equal(tbl, tab1)
+
+  # This errors (correctly) because I haven't given enough names
+  # but the error message is "Invalid: Empty CSV file", which is not accurate
+  expect_error(
+    read_csv_arrow(tf, col_names = names(tbl)[1])
+  )
+  # Same here
+  expect_error(
+    read_csv_arrow(tf, col_names = c(names(tbl), names(tbl)))
+  )
+})
+
+test_that("read_csv_arrow parsing options: skip", {
+  tf <- tempfile()
+  on.exit(unlink(tf))
+
+  # Adding two garbage lines to start the csv
+  cat("asdf\nqwer\n", file = tf)
+  suppressWarnings(write.table(tbl, tf, sep = ",", row.names = FALSE, append = TRUE))
+
+  tab1 <- read_csv_arrow(tf, skip = 2)
+
+  expect_identical(names(tab1), names(tbl))
+  expect_equal(tbl, tab1)
+})
+
+test_that("read_csv_arrow parsing options: skip_empty_rows", {
+  tf <- tempfile()
+  on.exit(unlink(tf))
+
+  write.csv(tbl, tf, row.names = FALSE)
+  cat("\n\n", file = tf, append = TRUE)
+
+  tab1 <- read_csv_arrow(tf, skip_empty_rows = FALSE)
+
+  expect_equal(nrow(tab1), nrow(tbl) + 2)
+  expect_true(is.na(tail(tab1, 1)[[1]]))
+})
+
+test_that("read_csv_arrow parsing options: na strings", {
+  tf <- tempfile()
+  on.exit(unlink(tf))
+
+  df <- data.frame(
+    a = c(1.2, NA, NA, 3.4),
+    b = c(NA, "B", "C", NA),
+    stringsAsFactors = FALSE
+  )
+  write.csv(df, tf, row.names = FALSE)
+  expect_equal(grep("NA", readLines(tf)), 2:5)
+
+  tab1 <- read_csv_arrow(tf)
+  expect_equal(is.na(tab1$a), is.na(df$a))
+  expect_equal(is.na(tab1$b), is.na(df$b))
+
+  unlink(tf) # Delete and write to the same file name again
+
+  write.csv(df, tf, row.names = FALSE, na = "asdf")
+  expect_equal(grep("asdf", readLines(tf)), 2:5)
+
+  tab2 <- read_csv_arrow(tf, na = "asdf")
+  expect_equal(is.na(tab2$a), is.na(df$a))
+  expect_equal(is.na(tab2$b), is.na(df$b))
+})
+
+test_that("read_csv_arrow() respects col_select", {
+  tf <- tempfile()
+  on.exit(unlink(tf))
+
+  write.csv(tbl, tf, row.names = FALSE, quote = FALSE)
+
+  tab <- read_csv_arrow(tf, col_select = ends_with("l"), as_data_frame = FALSE)
+  expect_equal(tab, Table$create(example_data[, c("dbl", "lgl")]))
+
+  tib <- read_csv_arrow(tf, col_select = ends_with("l"), as_data_frame = TRUE)
+  expect_equal(tib, example_data[, c("dbl", "lgl")])
+})
+
+test_that("read_csv_arrow() can detect compression from file name", {
+  skip_if_not_available("gzip")
+  tf <- tempfile(fileext = ".csv.gz")
+  on.exit(unlink(tf))
+
+  write.csv(tbl, gzfile(tf), row.names = FALSE, quote = FALSE)
+  tab1 <- read_csv_arrow(tf)
+  expect_equal(tbl, tab1)
+})
+
+test_that("read_csv_arrow(schema=)", {
+  tbl <- example_data[, "int"]
+  tf <- tempfile()
+  on.exit(unlink(tf))
+  write.csv(tbl, tf, row.names = FALSE)
+
+  df <- read_csv_arrow(tf, schema = schema(int = float64()), skip = 1)
+  expect_identical(df, tibble::tibble(int = as.numeric(tbl$int)))
+})
+
+test_that("read_csv_arrow(col_types = <Schema>)", {
+  tbl <- example_data[, "int"]
+  tf <- tempfile()
+  on.exit(unlink(tf))
+  write.csv(tbl, tf, row.names = FALSE)
+
+  df <- read_csv_arrow(tf, col_types = schema(int = float64()))
+  expect_identical(df, tibble::tibble(int = as.numeric(tbl$int)))
+})
+
+test_that("read_csv_arrow(col_types=string, col_names)", {
+  tbl <- example_data[, "int"]
+  tf <- tempfile()
+  on.exit(unlink(tf))
+  write.csv(tbl, tf, row.names = FALSE)
+
+  df <- read_csv_arrow(tf, col_names = "int", col_types = "d", skip = 1)
+  expect_identical(df, tibble::tibble(int = as.numeric(tbl$int)))
+
+  expect_error(read_csv_arrow(tf, col_types = c("i", "d")))
+  expect_error(read_csv_arrow(tf, col_types = "d"))
+  expect_error(read_csv_arrow(tf, col_types = "i", col_names = c("a", "b")))
+  expect_error(read_csv_arrow(tf, col_types = "y", col_names = "a"))
+})
+
+test_that("read_csv_arrow() can read timestamps", {
+  tbl <- tibble::tibble(time = as.POSIXct("2020-07-20 16:20", tz = "UTC"))
+  tf <- tempfile()
+  on.exit(unlink(tf))
+  write.csv(tbl, tf, row.names = FALSE)
+
+  df <- read_csv_arrow(tf, col_types = schema(time = timestamp(timezone = "UTC")))
+  expect_equal(tbl, df)
+
+  # time zones are being read in as time zone-naive, hence ignore_attr = "tzone"
+  df <- read_csv_arrow(tf, col_types = "T", col_names = "time", skip = 1)
+  expect_equal(tbl, df, ignore_attr = "tzone")
+})
+
+test_that("read_csv_arrow(timestamp_parsers=)", {
+  tf <- tempfile()
+  on.exit(unlink(tf))
+  tbl <- tibble::tibble(time = "23/09/2020")
+  write.csv(tbl, tf, row.names = FALSE)
+
+  df <- read_csv_arrow(
+    tf,
+    col_types = schema(time = timestamp(timezone = "UTC")),
+    timestamp_parsers = "%d/%m/%Y"
+  )
+  expect_equal(df$time, as.POSIXct(tbl$time, format = "%d/%m/%Y", tz = "UTC"))
+})
+
+test_that("Skipping columns with null()", {
+  tf <- tempfile()
+  on.exit(unlink(tf))
+  cols <- c("dbl", "lgl", "false", "chr")
+  tbl <- example_data[, cols]
+  write.csv(tbl, tf, row.names = FALSE)
+
+  df <- read_csv_arrow(tf, col_types = "d-_c", col_names = cols, skip = 1)
+  expect_identical(df, tbl[, c("dbl", "chr")])
+})
+
+test_that("Mix of guessing and declaring types", {
+  tf <- tempfile()
+  on.exit(unlink(tf))
+  cols <- c("dbl", "lgl", "false", "chr")
+  tbl <- example_data[, cols]
+  write.csv(tbl, tf, row.names = FALSE)
+
+  tab <- read_csv_arrow(tf, col_types = schema(dbl = float32()), as_data_frame = FALSE)
+  expect_equal(tab$schema, schema(dbl = float32(), lgl = bool(), false = bool(), chr = utf8()))
+
+  df <- read_csv_arrow(tf, col_types = "d-?c", col_names = cols, skip = 1)
+  expect_identical(df, tbl[, c("dbl", "false", "chr")])
+})
+
+
+test_that("Write a CSV file with header", {
+  tbl_out <- write_csv_arrow(tbl_no_dates, csv_file)
+  expect_true(file.exists(csv_file))
+  expect_identical(tbl_out, tbl_no_dates)
+
+  tbl_in <- read_csv_arrow(csv_file)
+  expect_identical(tbl_in, tbl_no_dates)
+
+  tbl_out <- write_csv_arrow(tbl, csv_file)
+  expect_true(file.exists(csv_file))
+  expect_identical(tbl_out, tbl)
+
+  tbl_in <- read_csv_arrow(csv_file)
+  expect_identical(tbl_in, tbl)
+})
+
+
+test_that("Write a CSV file with no header", {
+  tbl_out <- write_csv_arrow(tbl_no_dates, csv_file, include_header = FALSE)
+  expect_true(file.exists(csv_file))
+  expect_identical(tbl_out, tbl_no_dates)
+  tbl_in <- read_csv_arrow(csv_file, col_names = FALSE)
+
+  tbl_expected <- tbl_no_dates
+  names(tbl_expected) <- c("f0", "f1", "f2", "f3")
+
+  expect_identical(tbl_in, tbl_expected)
+})
+
+test_that("Write a CSV file with different batch sizes", {
+  tbl_out1 <- write_csv_arrow(tbl_no_dates, csv_file, batch_size = 1)
+  expect_true(file.exists(csv_file))
+  expect_identical(tbl_out1, tbl_no_dates)
+  tbl_in1 <- read_csv_arrow(csv_file)
+  expect_identical(tbl_in1, tbl_no_dates)
+
+  tbl_out2 <- write_csv_arrow(tbl_no_dates, csv_file, batch_size = 2)
+  expect_true(file.exists(csv_file))
+  expect_identical(tbl_out2, tbl_no_dates)
+  tbl_in2 <- read_csv_arrow(csv_file)
+  expect_identical(tbl_in2, tbl_no_dates)
+
+  tbl_out3 <- write_csv_arrow(tbl_no_dates, csv_file, batch_size = 12)
+  expect_true(file.exists(csv_file))
+  expect_identical(tbl_out3, tbl_no_dates)
+  tbl_in3 <- read_csv_arrow(csv_file)
+  expect_identical(tbl_in3, tbl_no_dates)
+})
+
+test_that("Write a CSV file with invalid input type", {
+  bad_input <- Array$create(1:5)
+  expect_error(
+    write_csv_arrow(bad_input, csv_file),
+    regexp = "x must be an object of class 'data.frame', 'RecordBatch', or 'Table', not 'Array'."
+  )
+})
+
+test_that("Write a CSV file with invalid batch size", {
+  expect_error(
+    write_csv_arrow(tbl_no_dates, csv_file, batch_size = -1),
+    regexp = "batch_size not greater than 0"
+  )
+})
+
+test_that("time mapping work as expected (ARROW-13624)", {
+  tbl <- tibble::tibble(
+    dt = as.POSIXct(c("2020-07-20 16:20", NA), tz = "UTC"),
+    time = c(hms::as_hms("16:20:00"), NA)
+  )
+  tf <- tempfile()
+  on.exit(unlink(tf))
+  write.csv(tbl, tf, row.names = FALSE)
+
+  df <- read_csv_arrow(tf,
+    col_names = c("dt", "time"),
+    col_types = "Tt",
+    skip = 1
+  )
+
+  expect_error(
+    read_csv_arrow(tf,
+      col_names = c("dt", "time"),
+      col_types = "tT", skip = 1
+    )
+  )
+
+  expect_equal(df, tbl, ignore_attr = "tzone")
+})
diff --git a/src/arrow/r/tests/testthat/test-data-type.R b/src/arrow/r/tests/testthat/test-data-type.R
new file mode 100644
index 000000000..a9d0879b8
--- /dev/null
+++ b/src/arrow/r/tests/testthat/test-data-type.R
@@ -0,0 +1,429 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+test_that("null type works as expected", {
+  x <- null()
+  expect_equal(x$id, 0L)
+  expect_equal(x$name, "null")
+  expect_equal(x$ToString(), "null")
+  expect_true(x == x)
+  expect_false(x == int8())
+  expect_equal(x$num_fields, 0L)
+  expect_equal(x$fields(), list())
+})
+
+test_that("boolean type work as expected", {
+  x <- boolean()
+  expect_equal(x$id, Type$BOOL)
+  expect_equal(x$name, "bool")
+  expect_equal(x$ToString(), "bool")
+  expect_true(x == x)
+  expect_false(x == null())
+  expect_equal(x$num_fields, 0L)
+  expect_equal(x$fields(), list())
+  expect_equal(x$bit_width, 1L)
+})
+
+test_that("int types works as expected", {
+  x <- uint8()
+  expect_equal(x$id, Type$UINT8)
+  expect_equal(x$name, "uint8")
+  expect_equal(x$ToString(), "uint8")
+  expect_true(x == x)
+  expect_false(x == null())
+  expect_equal(x$num_fields, 0L)
+  expect_equal(x$fields(), list())
+  expect_equal(x$bit_width, 8L)
+
+  x <- int8()
+  expect_equal(x$id, Type$INT8)
+  expect_equal(x$name, "int8")
+  expect_equal(x$ToString(), "int8")
+  expect_true(x == x)
+  expect_false(x == null())
+  expect_equal(x$num_fields, 0L)
+  expect_equal(x$fields(), list())
+  expect_equal(x$bit_width, 8L)
+
+  x <- uint16()
+  expect_equal(x$id, Type$UINT16)
+  expect_equal(x$name, "uint16")
+  expect_equal(x$ToString(), "uint16")
+  expect_true(x == x)
+  expect_false(x == null())
+  expect_equal(x$num_fields, 0L)
+  expect_equal(x$fields(), list())
+  expect_equal(x$bit_width, 16L)
+
+  x <- int16()
+  expect_equal(x$id, Type$INT16)
+  expect_equal(x$name, "int16")
+  expect_equal(x$ToString(), "int16")
+  expect_true(x == x)
+  expect_false(x == null())
+  expect_equal(x$num_fields, 0L)
+  expect_equal(x$fields(), list())
+  expect_equal(x$bit_width, 16L)
+
+  x <- uint32()
+  expect_equal(x$id, Type$UINT32)
+  expect_equal(x$name, "uint32")
+  expect_equal(x$ToString(), "uint32")
+  expect_true(x == x)
+  expect_false(x == null())
+  expect_equal(x$num_fields, 0L)
+  expect_equal(x$fields(), list())
+  expect_equal(x$bit_width, 32L)
+
+  x <- int32()
+  expect_equal(x$id, Type$INT32)
+  expect_equal(x$name, "int32")
+  expect_equal(x$ToString(), "int32")
+  expect_true(x == x)
+  expect_false(x == null())
+  expect_equal(x$num_fields, 0L)
+  expect_equal(x$fields(), list())
+  expect_equal(x$bit_width, 32L)
+
+  x <- uint64()
+  expect_equal(x$id, Type$UINT64)
+  expect_equal(x$name, "uint64")
+  expect_equal(x$ToString(), "uint64")
+  expect_true(x == x)
+  expect_false(x == null())
+  expect_equal(x$num_fields, 0L)
+  expect_equal(x$fields(), list())
+  expect_equal(x$bit_width, 64L)
+
+  x <- int64()
+  expect_equal(x$id, Type$INT64)
+  expect_equal(x$name, "int64")
+  expect_equal(x$ToString(), "int64")
+  expect_true(x == x)
+  expect_false(x == null())
+  expect_equal(x$num_fields, 0L)
+  expect_equal(x$fields(), list())
+  expect_equal(x$bit_width, 64L)
+})
+
+test_that("float types work as expected", {
+  x <- float16()
+  expect_equal(x$id, Type$HALF_FLOAT)
+  expect_equal(x$name, "halffloat")
+  expect_equal(x$ToString(), "halffloat")
+  expect_true(x == x)
+  expect_false(x == null())
+  expect_equal(x$num_fields, 0L)
+  expect_equal(x$fields(), list())
+  expect_equal(x$bit_width, 16L)
+
+  x <- float32()
+  expect_equal(x$id, Type$FLOAT)
+  expect_equal(x$name, "float")
+  expect_equal(x$ToString(), "float")
+  expect_true(x == x)
+  expect_false(x == null())
+  expect_equal(x$num_fields, 0L)
+  expect_equal(x$fields(), list())
+  expect_equal(x$bit_width, 32L)
+
+  x <- float64()
+  expect_equal(x$id, Type$DOUBLE)
+  expect_equal(x$name, "double")
+  expect_equal(x$ToString(), "double")
+  expect_true(x == x)
+  expect_false(x == null())
+  expect_equal(x$num_fields, 0L)
+  expect_equal(x$fields(), list())
+  expect_equal(x$bit_width, 64L)
+})
+
+test_that("utf8 type works as expected", {
+  x <- utf8()
+  expect_equal(x$id, Type$STRING)
+  expect_equal(x$name, "utf8")
+  expect_equal(x$ToString(), "string")
+  expect_true(x == x)
+  expect_false(x == null())
+  expect_equal(x$num_fields, 0L)
+  expect_equal(x$fields(), list())
+})
+
+test_that("date types work as expected", {
+  x <- date32()
+  expect_equal(x$id, Type$DATE32)
+  expect_equal(x$name, "date32")
+  expect_equal(x$ToString(), "date32[day]")
+  expect_true(x == x)
+  expect_false(x == null())
+  expect_equal(x$num_fields, 0L)
+  expect_equal(x$fields(), list())
+  expect_equal(x$unit(), unclass(DateUnit$DAY))
+
+  x <- date64()
+  expect_equal(x$id, Type$DATE64)
+  expect_equal(x$name, "date64")
+  expect_equal(x$ToString(), "date64[ms]")
+  expect_true(x == x)
+  expect_false(x == null())
+  expect_equal(x$num_fields, 0L)
+  expect_equal(x$fields(), list())
+  expect_equal(x$unit(), unclass(DateUnit$MILLI))
+})
+
+test_that("timestamp type works as expected", {
+  x <- timestamp(TimeUnit$SECOND)
+  expect_equal(x$id, Type$TIMESTAMP)
+  expect_equal(x$name, "timestamp")
+  expect_equal(x$ToString(), "timestamp[s]")
+  expect_true(x == x)
+  expect_false(x == null())
+  expect_equal(x$num_fields, 0L)
+  expect_equal(x$fields(), list())
+  expect_equal(x$bit_width, 64L)
+  expect_equal(x$timezone(), "")
+  expect_equal(x$unit(), unclass(TimeUnit$SECOND))
+
+  x <- timestamp(TimeUnit$MILLI)
+  expect_equal(x$id, Type$TIMESTAMP)
+  expect_equal(x$name, "timestamp")
+  expect_equal(x$ToString(), "timestamp[ms]")
+  expect_true(x == x)
+  expect_false(x == null())
+  expect_equal(x$num_fields, 0L)
+  expect_equal(x$fields(), list())
+  expect_equal(x$bit_width, 64L)
+  expect_equal(x$timezone(), "")
+  expect_equal(x$unit(), unclass(TimeUnit$MILLI))
+
+  x <- timestamp(TimeUnit$MICRO)
+  expect_equal(x$id, Type$TIMESTAMP)
+  expect_equal(x$name, "timestamp")
+  expect_equal(x$ToString(), "timestamp[us]")
+  expect_true(x == x)
+  expect_false(x == null())
+  expect_equal(x$num_fields, 0L)
+  expect_equal(x$fields(), list())
+  expect_equal(x$bit_width, 64L)
+  expect_equal(x$timezone(), "")
+  expect_equal(x$unit(), unclass(TimeUnit$MICRO))
+
+  x <- timestamp(TimeUnit$NANO)
+  expect_equal(x$id, Type$TIMESTAMP)
+  expect_equal(x$name, "timestamp")
+  expect_equal(x$ToString(), "timestamp[ns]")
+  expect_true(x == x)
+  expect_false(x == null())
+  expect_equal(x$num_fields, 0L)
+  expect_equal(x$fields(), list())
+  expect_equal(x$bit_width, 64L)
+  expect_equal(x$timezone(), "")
+  expect_equal(x$unit(), unclass(TimeUnit$NANO))
+})
+
+test_that("timestamp with timezone", {
+  expect_equal(timestamp(timezone = "EST")$ToString(), "timestamp[s, tz=EST]")
+})
+
+test_that("time32 types work as expected", {
+  x <- time32(TimeUnit$SECOND)
+  expect_equal(x$id, Type$TIME32)
+  expect_equal(x$name, "time32")
+  expect_equal(x$ToString(), "time32[s]")
+  expect_true(x == x)
+  expect_false(x == null())
+  expect_equal(x$num_fields, 0L)
+  expect_equal(x$fields(), list())
+  expect_equal(x$bit_width, 32L)
+  expect_equal(x$unit(), unclass(TimeUnit$SECOND))
+
+  x <- time32(TimeUnit$MILLI)
+  expect_equal(x$id, Type$TIME32)
+  expect_equal(x$name, "time32")
+  expect_equal(x$ToString(), "time32[ms]")
+  expect_true(x == x)
+  expect_false(x == null())
+  expect_equal(x$num_fields, 0L)
+  expect_equal(x$fields(), list())
+  expect_equal(x$bit_width, 32L)
+  expect_equal(x$unit(), unclass(TimeUnit$MILLI))
+})
+
+test_that("time64 types work as expected", {
+  x <- time64(TimeUnit$MICRO)
+  expect_equal(x$id, Type$TIME64)
+  expect_equal(x$name, "time64")
+  expect_equal(x$ToString(), "time64[us]")
+  expect_true(x == x)
+  expect_false(x == null())
+  expect_equal(x$num_fields, 0L)
+  expect_equal(x$fields(), list())
+  expect_equal(x$bit_width, 64L)
+  expect_equal(x$unit(), unclass(TimeUnit$MICRO))
+
+  x <- time64(TimeUnit$NANO)
+  expect_equal(x$id, Type$TIME64)
+  expect_equal(x$name, "time64")
+  expect_equal(x$ToString(), "time64[ns]")
+  expect_true(x == x)
+  expect_false(x == null())
+  expect_equal(x$num_fields, 0L)
+  expect_equal(x$fields(), list())
+  expect_equal(x$bit_width, 64L)
+  expect_equal(x$unit(), unclass(TimeUnit$NANO))
+})
+
+test_that("time type unit validation", {
+  expect_equal(time32(TimeUnit$SECOND), time32("s"))
+  expect_equal(time32(TimeUnit$MILLI), time32("ms"))
+  expect_equal(time32(), time32(TimeUnit$MILLI))
+  expect_error(time32(4), '"unit" should be one of 1 or 0')
+  expect_error(time32(NULL), '"unit" should be one of "ms" or "s"')
+  expect_match_arg_error(time32("years"))
+
+  expect_equal(time64(TimeUnit$NANO), time64("n"))
+  expect_equal(time64(TimeUnit$MICRO), time64("us"))
+  expect_equal(time64(), time64(TimeUnit$NANO))
+  expect_error(time64(4), '"unit" should be one of 3 or 2')
+  expect_error(time64(NULL), '"unit" should be one of "ns" or "us"')
+  expect_match_arg_error(time64("years"))
+})
+
+test_that("timestamp type input validation", {
+  expect_equal(timestamp("ms"), timestamp(TimeUnit$MILLI))
+  expect_equal(timestamp(), timestamp(TimeUnit$SECOND))
+  expect_error(
+    timestamp(NULL),
+    '"unit" should be one of "ns", "us", "ms", or "s"'
+  )
+  expect_error(
+    timestamp(timezone = 1231231),
+    "timezone is not a string"
+  )
+  expect_error(
+    timestamp(timezone = c("not", "a", "timezone")),
+    "timezone is not a string"
+  )
+})
+
+test_that("list type works as expected", {
+  x <- list_of(int32())
+  expect_equal(x$id, Type$LIST)
+  expect_equal(x$name, "list")
+  expect_equal(x$ToString(), "list<item: int32>")
+  expect_true(x == x)
+  expect_false(x == null())
+  expect_equal(x$num_fields, 1L)
+  expect_equal(
+    x$fields()[[1]],
+    field("item", int32())
+  )
+  expect_equal(x$value_type, int32())
+  expect_equal(x$value_field, field("item", int32()))
+})
+
+test_that("struct type works as expected", {
+  x <- struct(x = int32(), y = boolean())
+  expect_equal(x$id, Type$STRUCT)
+  expect_equal(x$name, "struct")
+  expect_equal(x$ToString(), "struct<x: int32, y: bool>")
+  expect_true(x == x)
+  expect_false(x == null())
+  expect_equal(x$num_fields, 2L)
+  expect_equal(
+    x$fields()[[1]],
+    field("x", int32())
+  )
+  expect_equal(
+    x$fields()[[2]],
+    field("y", boolean())
+  )
+  expect_equal(x$GetFieldIndex("x"), 0L)
+  expect_equal(x$GetFieldIndex("y"), 1L)
+  expect_equal(x$GetFieldIndex("z"), -1L)
+
+  expect_equal(x$GetFieldByName("x"), field("x", int32()))
+  expect_equal(x$GetFieldByName("y"), field("y", boolean()))
+  expect_null(x$GetFieldByName("z"))
+})
+
+test_that("DictionaryType works as expected (ARROW-3355)", {
+  d <- dictionary(int32(), utf8())
+  expect_equal(d, d)
+  expect_true(d == d)
+  expect_false(d == int32())
+  expect_equal(d$id, Type$DICTIONARY)
+  expect_equal(d$bit_width, 32L)
+  expect_equal(d$ToString(), "dictionary<values=string, indices=int32>")
+  expect_equal(d$index_type, int32())
+  expect_equal(d$value_type, utf8())
+  ord <- dictionary(ordered = TRUE)
+  expect_equal(ord$ToString(), "dictionary<values=string, indices=int32, ordered>")
+})
+
+test_that("DictionaryType validation", {
+  expect_error(
+    dictionary(utf8(), int32()),
+    "Dictionary index type should be .*integer, got string"
+  )
+  expect_error(dictionary(4, utf8()), 'index_type must be a "DataType"')
+  expect_error(dictionary(int8(), "strings"), 'value_type must be a "DataType"')
+})
+
+test_that("decimal type and validation", {
+  expect_error(decimal())
+  expect_error(decimal("four"), '"precision" must be an integer')
+  expect_error(decimal(4))
+  expect_error(decimal(4, "two"), '"scale" must be an integer')
+  expect_error(decimal(NA, 2), '"precision" must be an integer')
+  expect_error(decimal(0, 2), "Invalid: Decimal precision out of range: 0")
+  expect_error(decimal(100, 2), "Invalid: Decimal precision out of range: 100")
+  expect_error(decimal(4, NA), '"scale" must be an integer')
+
+  expect_r6_class(decimal(4, 2), "Decimal128Type")
+})
+
+test_that("Binary", {
+  expect_r6_class(binary(), "Binary")
+  expect_equal(binary()$ToString(), "binary")
+})
+
+test_that("FixedSizeBinary", {
+  expect_r6_class(fixed_size_binary(4), "FixedSizeBinary")
+  expect_equal(fixed_size_binary(4)$ToString(), "fixed_size_binary[4]")
+
+  # input validation
+  expect_error(fixed_size_binary(NA), "'byte_width' cannot be NA")
+  expect_error(fixed_size_binary(-1), "'byte_width' must be > 0")
+  expect_error(fixed_size_binary("four"))
+  expect_error(fixed_size_binary(c(2, 4)))
+})
+
+test_that("DataType to C-interface", {
+  datatype <- timestamp("ms", timezone = "Pacific/Marquesas")
+
+  # export the datatype via the C-interface
+  ptr <- allocate_arrow_schema()
+  datatype$export_to_c(ptr)
+
+  # then import it and check that the roundtripped value is the same
+  circle <- DataType$import_from_c(ptr)
+  expect_equal(circle, datatype)
+
+  # must clean up the pointer or we leak
+  delete_arrow_schema(ptr)
+})
diff --git a/src/arrow/r/tests/testthat/test-dataset-csv.R b/src/arrow/r/tests/testthat/test-dataset-csv.R
new file mode 100644
index 000000000..ab6693148
--- /dev/null
+++ b/src/arrow/r/tests/testthat/test-dataset-csv.R
@@ -0,0 +1,290 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+skip_if_not_available("dataset")
+
+library(dplyr, warn.conflicts = FALSE)
+
+csv_dir <- make_temp_dir()
+tsv_dir <- make_temp_dir()
+
+test_that("Setup (putting data in the dirs)", {
+  dir.create(file.path(csv_dir, 5))
+  dir.create(file.path(csv_dir, 6))
+  write.csv(df1, file.path(csv_dir, 5, "file1.csv"), row.names = FALSE)
+  write.csv(df2, file.path(csv_dir, 6, "file2.csv"), row.names = FALSE)
+  expect_length(dir(csv_dir, recursive = TRUE), 2)
+
+  # Now, tab-delimited
+  dir.create(file.path(tsv_dir, 5))
+  dir.create(file.path(tsv_dir, 6))
+  write.table(df1, file.path(tsv_dir, 5, "file1.tsv"), row.names = FALSE, sep = "\t")
+  write.table(df2, file.path(tsv_dir, 6, "file2.tsv"), row.names = FALSE, sep = "\t")
+  expect_length(dir(tsv_dir, recursive = TRUE), 2)
+})
+
+test_that("CSV dataset", {
+  ds <- open_dataset(csv_dir, partitioning = "part", format = "csv")
+  expect_r6_class(ds$format, "CsvFileFormat")
+  expect_r6_class(ds$filesystem, "LocalFileSystem")
+  expect_identical(names(ds), c(names(df1), "part"))
+  if (getRversion() >= "4.0.0") {
+    # CountRows segfaults on RTools35/R 3.6, so don't test it there
+    expect_identical(dim(ds), c(20L, 7L))
+  }
+  expect_equal(
+    ds %>%
+      select(string = chr, integer = int, part) %>%
+      filter(integer > 6 & part == 5) %>%
+      collect() %>%
+      summarize(mean = mean(as.numeric(integer))), # as.numeric bc they're being parsed as int64
+    df1 %>%
+      select(string = chr, integer = int) %>%
+      filter(integer > 6) %>%
+      summarize(mean = mean(integer))
+  )
+  # Collecting virtual partition column works
+  expect_equal(
+    collect(ds) %>% arrange(part) %>% pull(part),
+    c(rep(5, 10), rep(6, 10))
+  )
+})
+
+test_that("CSV scan options", {
+  options <- FragmentScanOptions$create("text")
+  expect_equal(options$type, "csv")
+  options <- FragmentScanOptions$create("csv",
+    null_values = c("mynull"),
+    strings_can_be_null = TRUE
+  )
+  expect_equal(options$type, "csv")
+
+  dst_dir <- make_temp_dir()
+  dst_file <- file.path(dst_dir, "data.csv")
+  df <- tibble(chr = c("foo", "mynull"))
+  write.csv(df, dst_file, row.names = FALSE, quote = FALSE)
+
+  ds <- open_dataset(dst_dir, format = "csv")
+  expect_equal(ds %>% collect(), df)
+
+  sb <- ds$NewScan()
+  sb$FragmentScanOptions(options)
+
+  tab <- sb$Finish()$ToTable()
+  expect_equal(as.data.frame(tab), tibble(chr = c("foo", NA)))
+
+  # Set default convert options in CsvFileFormat
+  csv_format <- CsvFileFormat$create(
+    null_values = c("mynull"),
+    strings_can_be_null = TRUE
+  )
+  ds <- open_dataset(dst_dir, format = csv_format)
+  expect_equal(ds %>% collect(), tibble(chr = c("foo", NA)))
+
+  # Set both parse and convert options
+  df <- tibble(chr = c("foo", "mynull"), chr2 = c("bar", "baz"))
+  write.table(df, dst_file, row.names = FALSE, quote = FALSE, sep = "\t")
+  ds <- open_dataset(dst_dir,
+    format = "csv",
+    delimiter = "\t",
+    null_values = c("mynull"),
+    strings_can_be_null = TRUE
+  )
+  expect_equal(ds %>% collect(), tibble(
+    chr = c("foo", NA),
+    chr2 = c("bar", "baz")
+  ))
+  expect_equal(
+    ds %>%
+      group_by(chr2) %>%
+      summarize(na = all(is.na(chr))) %>%
+      arrange(chr2) %>%
+      collect(),
+    tibble(
+      chr2 = c("bar", "baz"),
+      na = c(FALSE, TRUE)
+    )
+  )
+})
+
+test_that("compressed CSV dataset", {
+  skip_if_not_available("gzip")
+  dst_dir <- make_temp_dir()
+  dst_file <- file.path(dst_dir, "data.csv.gz")
+  write.csv(df1, gzfile(dst_file), row.names = FALSE, quote = FALSE)
+  format <- FileFormat$create("csv")
+  ds <- open_dataset(dst_dir, format = format)
+  expect_r6_class(ds$format, "CsvFileFormat")
+  expect_r6_class(ds$filesystem, "LocalFileSystem")
+
+  expect_equal(
+    ds %>%
+      select(string = chr, integer = int) %>%
+      filter(integer > 6 & integer < 11) %>%
+      collect() %>%
+      summarize(mean = mean(integer)),
+    df1 %>%
+      select(string = chr, integer = int) %>%
+      filter(integer > 6) %>%
+      summarize(mean = mean(integer))
+  )
+})
+
+test_that("CSV dataset options", {
+  dst_dir <- make_temp_dir()
+  dst_file <- file.path(dst_dir, "data.csv")
+  df <- tibble(chr = letters[1:10])
+  write.csv(df, dst_file, row.names = FALSE, quote = FALSE)
+
+  format <- FileFormat$create("csv", skip_rows = 1)
+  ds <- open_dataset(dst_dir, format = format)
+
+  expect_equal(
+    ds %>%
+      select(string = a) %>%
+      collect(),
+    df1[-1, ] %>%
+      select(string = chr)
+  )
+
+  ds <- open_dataset(dst_dir, format = "csv", column_names = c("foo"))
+
+  expect_equal(
+    ds %>%
+      select(string = foo) %>%
+      collect(),
+    tibble(string = c(c("chr"), letters[1:10]))
+  )
+})
+
+test_that("Other text delimited dataset", {
+  ds1 <- open_dataset(tsv_dir, partitioning = "part", format = "tsv")
+  expect_equal(
+    ds1 %>%
+      select(string = chr, integer = int, part) %>%
+      filter(integer > 6 & part == 5) %>%
+      collect() %>%
+      summarize(mean = mean(as.numeric(integer))), # as.numeric bc they're being parsed as int64
+    df1 %>%
+      select(string = chr, integer = int) %>%
+      filter(integer > 6) %>%
+      summarize(mean = mean(integer))
+  )
+
+  ds2 <- open_dataset(tsv_dir, partitioning = "part", format = "text", delimiter = "\t")
+  expect_equal(
+    ds2 %>%
+      select(string = chr, integer = int, part) %>%
+      filter(integer > 6 & part == 5) %>%
+      collect() %>%
+      summarize(mean = mean(as.numeric(integer))), # as.numeric bc they're being parsed as int64
+    df1 %>%
+      select(string = chr, integer = int) %>%
+      filter(integer > 6) %>%
+      summarize(mean = mean(integer))
+  )
+})
+
+test_that("readr parse options", {
+  arrow_opts <- names(formals(CsvParseOptions$create))
+  readr_opts <- names(formals(readr_to_csv_parse_options))
+
+  # Arrow and readr parse options must be mutually exclusive, or else the code
+  # in `csv_file_format_parse_options()` will error or behave incorrectly. A
+  # failure of this test indicates that these two sets of option names are not
+  # mutually exclusive.
+  expect_equal(
+    intersect(arrow_opts, readr_opts),
+    character(0)
+  )
+
+  # With not yet supported readr parse options (ARROW-8631)
+  expect_error(
+    open_dataset(tsv_dir, partitioning = "part", delim = "\t", na = "\\N"),
+    "supported"
+  )
+
+  # With unrecognized (garbage) parse options
+  expect_error(
+    open_dataset(
+      tsv_dir,
+      partitioning = "part",
+      format = "text",
+      asdfg = "\\"
+    ),
+    "Unrecognized"
+  )
+
+  # With both Arrow and readr parse options (disallowed)
+  expect_error(
+    open_dataset(
+      tsv_dir,
+      partitioning = "part",
+      format = "text",
+      quote = "\"",
+      quoting = TRUE
+    ),
+    "either"
+  )
+
+  # With ambiguous partial option names (disallowed)
+  expect_error(
+    open_dataset(
+      tsv_dir,
+      partitioning = "part",
+      format = "text",
+      quo = "\"",
+    ),
+    "Ambiguous"
+  )
+
+  # With only readr parse options (and omitting format = "text")
+  ds1 <- open_dataset(tsv_dir, partitioning = "part", delim = "\t")
+  expect_equal(
+    ds1 %>%
+      select(string = chr, integer = int, part) %>%
+      filter(integer > 6 & part == 5) %>%
+      collect() %>%
+      summarize(mean = mean(as.numeric(integer))), # as.numeric bc they're being parsed as int64
+    df1 %>%
+      select(string = chr, integer = int) %>%
+      filter(integer > 6) %>%
+      summarize(mean = mean(integer))
+  )
+})
+
+# see https://issues.apache.org/jira/browse/ARROW-12791
+test_that("Error if no format specified and files are not parquet", {
+  expect_error(
+    open_dataset(csv_dir, partitioning = "part"),
+    "Did you mean to specify a 'format' other than the default (parquet)?",
+    fixed = TRUE
+  )
+  expect_error(
+    open_dataset(csv_dir, partitioning = "part", format = "parquet"),
+    "Parquet magic bytes not found"
+  )
+})
+
+test_that("Column names inferred from schema for headerless CSVs (ARROW-14063)", {
+  headerless_csv_dir <- make_temp_dir()
+  tbl <- df1[, c("int", "dbl")]
+  write.table(tbl, file.path(headerless_csv_dir, "file1.csv"), sep = ",", row.names = FALSE, col.names = FALSE)
+
+  ds <- open_dataset(headerless_csv_dir, format = "csv", schema = schema(int = int32(), dbl = float64()))
+  expect_equal(ds %>% collect(), tbl)
+})
diff --git a/src/arrow/r/tests/testthat/test-dataset-dplyr.R b/src/arrow/r/tests/testthat/test-dataset-dplyr.R
new file mode 100644
index 000000000..b4519377c
--- /dev/null
+++ b/src/arrow/r/tests/testthat/test-dataset-dplyr.R
@@ -0,0 +1,340 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+skip_if_not_available("dataset")
+skip_if_not_available("parquet")
+
+library(dplyr, warn.conflicts = FALSE)
+
+dataset_dir <- make_temp_dir()
+hive_dir <- make_temp_dir()
+
+test_that("Setup (putting data in the dir)", {
+  dir.create(file.path(dataset_dir, 1))
+  dir.create(file.path(dataset_dir, 2))
+  write_parquet(df1, file.path(dataset_dir, 1, "file1.parquet"))
+  write_parquet(df2, file.path(dataset_dir, 2, "file2.parquet"))
+  expect_length(dir(dataset_dir, recursive = TRUE), 2)
+
+  dir.create(file.path(hive_dir, "subdir", "group=1", "other=xxx"), recursive = TRUE)
+  dir.create(file.path(hive_dir, "subdir", "group=2", "other=yyy"), recursive = TRUE)
+  write_parquet(df1, file.path(hive_dir, "subdir", "group=1", "other=xxx", "file1.parquet"))
+  write_parquet(df2, file.path(hive_dir, "subdir", "group=2", "other=yyy", "file2.parquet"))
+  expect_length(dir(hive_dir, recursive = TRUE), 2)
+})
+
+test_that("filter() with is.nan()", {
+  ds <- open_dataset(dataset_dir, partitioning = schema(part = uint8()))
+  expect_equal(
+    ds %>%
+      select(part, dbl) %>%
+      filter(!is.nan(dbl), part == 2) %>%
+      collect(),
+    tibble(part = 2L, dbl = df2$dbl[!is.nan(df2$dbl)])
+  )
+})
+
+test_that("filter() with %in%", {
+  ds <- open_dataset(dataset_dir, partitioning = schema(part = uint8()))
+  expect_equal(
+    ds %>%
+      select(int, part) %>%
+      filter(int %in% c(6, 4, 3, 103, 107), part == 1) %>%
+      collect(),
+    tibble(int = df1$int[c(3, 4, 6)], part = 1)
+  )
+
+  # ARROW-9606: bug in %in% filter on partition column with >1 partition columns
+  ds <- open_dataset(hive_dir)
+  expect_equal(
+    ds %>%
+      filter(group %in% 2) %>%
+      select(names(df2)) %>%
+      collect(),
+    df2
+  )
+})
+
+test_that("filter() on timestamp columns", {
+  ds <- open_dataset(dataset_dir, partitioning = schema(part = uint8()))
+  expect_equal(
+    ds %>%
+      filter(ts >= lubridate::ymd_hms("2015-05-04 03:12:39")) %>%
+      filter(part == 1) %>%
+      select(ts) %>%
+      collect(),
+    df1[5:10, c("ts")],
+  )
+
+  # Now with Date
+  expect_equal(
+    ds %>%
+      filter(ts >= as.Date("2015-05-04")) %>%
+      filter(part == 1) %>%
+      select(ts) %>%
+      collect(),
+    df1[5:10, c("ts")],
+  )
+
+  # Now with bare string date
+  skip("Implement more aggressive implicit casting for scalars (ARROW-11402)")
+  expect_equal(
+    ds %>%
+      filter(ts >= "2015-05-04") %>%
+      filter(part == 1) %>%
+      select(ts) %>%
+      collect(),
+    df1[5:10, c("ts")],
+  )
+})
+
+test_that("filter() on date32 columns", {
+  tmp <- tempfile()
+  dir.create(tmp)
+  df <- data.frame(date = as.Date(c("2020-02-02", "2020-02-03")))
+  write_parquet(df, file.path(tmp, "file.parquet"))
+
+  expect_equal(
+    open_dataset(tmp) %>%
+      filter(date > as.Date("2020-02-02")) %>%
+      collect() %>%
+      nrow(),
+    1L
+  )
+
+  # Also with timestamp scalar
+  expect_equal(
+    open_dataset(tmp) %>%
+      filter(date > lubridate::ymd_hms("2020-02-02 00:00:00")) %>%
+      collect() %>%
+      nrow(),
+    1L
+  )
+})
+
+
+test_that("mutate()", {
+  ds <- open_dataset(dataset_dir, partitioning = schema(part = uint8()))
+  mutated <- ds %>%
+    select(chr, dbl, int) %>%
+    filter(dbl * 2 > 14 & dbl - 50 < 3L) %>%
+    mutate(twice = int * 2)
+  expect_output(
+    print(mutated),
+    "FileSystemDataset (query)
+chr: string
+dbl: double
+int: int32
+twice: double (multiply_checked(int, 2))
+
+* Filter: ((multiply_checked(dbl, 2) > 14) and (subtract_checked(dbl, 50) < 3))
+See $.data for the source Arrow object",
+    fixed = TRUE
+  )
+  expect_equal(
+    mutated %>%
+      collect() %>%
+      arrange(dbl),
+    rbind(
+      df1[8:10, c("chr", "dbl", "int")],
+      df2[1:2, c("chr", "dbl", "int")]
+    ) %>%
+      mutate(
+        twice = int * 2
+      )
+  )
+})
+
+test_that("mutate() features not yet implemented", {
+  expect_error(
+    ds %>%
+      group_by(int) %>%
+      mutate(avg = mean(int)),
+    "window functions not currently supported in Arrow\nCall collect() first to pull data into R.",
+    fixed = TRUE
+  )
+})
+
+test_that("filter scalar validation doesn't crash (ARROW-7772)", {
+  expect_error(
+    ds %>%
+      filter(int == "fff", part == 1) %>%
+      collect(),
+    "equal has no kernel matching input types .array.int32., scalar.string.."
+  )
+})
+
+test_that("collect() on Dataset works (if fits in memory)", {
+  expect_equal(
+    collect(open_dataset(dataset_dir)) %>% arrange(int),
+    rbind(df1, df2)
+  )
+})
+
+test_that("count()", {
+  ds <- open_dataset(dataset_dir)
+  df <- rbind(df1, df2)
+  expect_equal(
+    ds %>%
+      filter(int > 6, int < 108) %>%
+      count(chr) %>%
+      arrange(chr) %>%
+      collect(),
+    df %>%
+      filter(int > 6, int < 108) %>%
+      count(chr)
+  )
+})
+
+test_that("arrange()", {
+  ds <- open_dataset(dataset_dir, partitioning = schema(part = uint8()))
+  arranged <- ds %>%
+    select(chr, dbl, int) %>%
+    filter(dbl * 2 > 14 & dbl - 50 < 3L) %>%
+    mutate(twice = int * 2) %>%
+    arrange(chr, desc(twice), dbl + int)
+  expect_output(
+    print(arranged),
+    "FileSystemDataset (query)
+chr: string
+dbl: double
+int: int32
+twice: double (multiply_checked(int, 2))
+
+* Filter: ((multiply_checked(dbl, 2) > 14) and (subtract_checked(dbl, 50) < 3))
+* Sorted by chr [asc], multiply_checked(int, 2) [desc], add_checked(dbl, int) [asc]
+See $.data for the source Arrow object",
+    fixed = TRUE
+  )
+  expect_equal(
+    arranged %>%
+      collect(),
+    rbind(
+      df1[8, c("chr", "dbl", "int")],
+      df2[2, c("chr", "dbl", "int")],
+      df1[9, c("chr", "dbl", "int")],
+      df2[1, c("chr", "dbl", "int")],
+      df1[10, c("chr", "dbl", "int")]
+    ) %>%
+      mutate(
+        twice = int * 2
+      )
+  )
+})
+
+test_that("compute()/collect(as_data_frame=FALSE)", {
+  ds <- open_dataset(dataset_dir)
+
+  tab1 <- ds %>% compute()
+  expect_r6_class(tab1, "Table")
+
+  tab2 <- ds %>% collect(as_data_frame = FALSE)
+  expect_r6_class(tab2, "Table")
+
+  tab3 <- ds %>%
+    mutate(negint = -int) %>%
+    filter(negint > -100) %>%
+    arrange(chr) %>%
+    select(negint) %>%
+    compute()
+
+  expect_r6_class(tab3, "Table")
+
+  expect_equal(
+    tab3 %>% collect(),
+    tibble(negint = -1:-10)
+  )
+
+  tab4 <- ds %>%
+    mutate(negint = -int) %>%
+    filter(negint > -100) %>%
+    arrange(chr) %>%
+    select(negint) %>%
+    collect(as_data_frame = FALSE)
+
+  expect_r6_class(tab3, "Table")
+
+  expect_equal(
+    tab4 %>% collect(),
+    tibble(negint = -1:-10)
+  )
+
+  tab5 <- ds %>%
+    mutate(negint = -int) %>%
+    group_by(fct) %>%
+    compute()
+
+  # the group_by() prevents compute() from returning a Table...
+  expect_s3_class(tab5, "arrow_dplyr_query")
+
+  # ... but $.data is a Table (InMemoryDataset)...
+  expect_r6_class(tab5$.data, "InMemoryDataset")
+  # ... and the mutate() was evaluated
+  expect_true("negint" %in% names(tab5$.data))
+})
+
+test_that("head/tail on query on dataset", {
+  # head/tail on arrow_dplyr_query does not have deterministic order,
+  # so without sorting we can only assert the correct number of rows
+  ds <- open_dataset(dataset_dir)
+
+  expect_identical(
+    ds %>%
+      filter(int > 6) %>%
+      head(5) %>%
+      compute() %>%
+      nrow(),
+    5L
+  )
+
+  expect_equal(
+    ds %>%
+      filter(int > 6) %>%
+      arrange(int) %>%
+      head() %>%
+      collect(),
+    rbind(df1[7:10, ], df2[1:2, ])
+  )
+
+  expect_equal(
+    ds %>%
+      filter(int < 105) %>%
+      tail(4) %>%
+      compute() %>%
+      nrow(),
+    4L
+  )
+
+  expect_equal(
+    ds %>%
+      filter(int < 105) %>%
+      arrange(int) %>%
+      tail() %>%
+      collect(),
+    rbind(df1[9:10, ], df2[1:4, ])
+  )
+})
+
+test_that("dplyr method not implemented messages", {
+  ds <- open_dataset(dataset_dir)
+  # This one is more nuanced
+  expect_error(
+    ds %>% filter(int > 6, dbl > max(dbl)),
+    "Filter expression not supported for Arrow Datasets: dbl > max(dbl)\nCall collect() first to pull data into R.",
+    fixed = TRUE
+  )
+})
diff --git a/src/arrow/r/tests/testthat/test-dataset-uri.R b/src/arrow/r/tests/testthat/test-dataset-uri.R
new file mode 100644
index 000000000..bdcccf282
--- /dev/null
+++ b/src/arrow/r/tests/testthat/test-dataset-uri.R
@@ -0,0 +1,123 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+skip_on_os("windows")
+skip_if_not_available("parquet")
+skip_if_not_available("dataset")
+
+
+library(dplyr, warn.conflicts = FALSE)
+
+dataset_dir <- make_temp_dir()
+
+test_that("Setup (putting data in the dir)", {
+  dir.create(file.path(dataset_dir, 1))
+  dir.create(file.path(dataset_dir, 2))
+  write_parquet(df1, file.path(dataset_dir, 1, "file1.parquet"))
+  write_parquet(df2, file.path(dataset_dir, 2, "file2.parquet"))
+  expect_length(dir(dataset_dir, recursive = TRUE), 2)
+})
+
+files <- c(
+  file.path(dataset_dir, 1, "file1.parquet", fsep = "/"),
+  file.path(dataset_dir, 2, "file2.parquet", fsep = "/")
+)
+
+
+test_that("dataset from single local file path", {
+  ds <- open_dataset(files[1])
+  expect_r6_class(ds, "Dataset")
+  expect_equal(
+    ds %>%
+      select(chr, dbl) %>%
+      filter(dbl > 7) %>%
+      collect() %>%
+      arrange(dbl),
+    df1[8:10, c("chr", "dbl")]
+  )
+})
+
+test_that("dataset from vector of file paths", {
+  ds <- open_dataset(files)
+  expect_r6_class(ds, "Dataset")
+  expect_equal(
+    ds %>%
+      select(chr, dbl) %>%
+      filter(dbl > 7 & dbl < 53L) %>%
+      collect() %>%
+      arrange(dbl),
+    rbind(
+      df1[8:10, c("chr", "dbl")],
+      df2[1:2, c("chr", "dbl")]
+    )
+  )
+})
+
+test_that("dataset from directory URI", {
+  uri <- paste0("file://", dataset_dir)
+  ds <- open_dataset(uri, partitioning = schema(part = uint8()))
+  expect_r6_class(ds, "Dataset")
+  expect_equal(
+    ds %>%
+      select(chr, dbl) %>%
+      filter(dbl > 7 & dbl < 53L) %>%
+      collect() %>%
+      arrange(dbl),
+    rbind(
+      df1[8:10, c("chr", "dbl")],
+      df2[1:2, c("chr", "dbl")]
+    )
+  )
+})
+
+test_that("dataset from single file URI", {
+  uri <- paste0("file://", files[1])
+  ds <- open_dataset(uri)
+  expect_r6_class(ds, "Dataset")
+  expect_equal(
+    ds %>%
+      select(chr, dbl) %>%
+      filter(dbl > 7) %>%
+      collect() %>%
+      arrange(dbl),
+    df1[8:10, c("chr", "dbl")]
+  )
+})
+
+test_that("dataset from vector of file URIs", {
+  uris <- paste0("file://", files)
+  ds <- open_dataset(uris)
+  expect_r6_class(ds, "Dataset")
+  expect_equal(
+    ds %>%
+      select(chr, dbl) %>%
+      filter(dbl > 7 & dbl < 53L) %>%
+      collect() %>%
+      arrange(dbl),
+    rbind(
+      df1[8:10, c("chr", "dbl")],
+      df2[1:2, c("chr", "dbl")]
+    )
+  )
+})
+
+test_that("open_dataset errors on mixed paths and URIs", {
+  expect_error(
+    open_dataset(c(files[1], paste0("file://", files[2]))),
+    "Vectors of mixed paths and URIs are not supported"
+  )
+})
diff --git a/src/arrow/r/tests/testthat/test-dataset-write.R b/src/arrow/r/tests/testthat/test-dataset-write.R
new file mode 100644
index 000000000..8e7c077e6
--- /dev/null
+++ b/src/arrow/r/tests/testthat/test-dataset-write.R
@@ -0,0 +1,454 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+skip_if_not_available("dataset")
+
+library(dplyr, warn.conflicts = FALSE)
+
+
+hive_dir <- make_temp_dir()
+csv_dir <- make_temp_dir()
+
+test_that("Setup (putting data in the dirs)", {
+  if (arrow_with_parquet()) {
+    dir.create(file.path(hive_dir, "subdir", "group=1", "other=xxx"), recursive = TRUE)
+    dir.create(file.path(hive_dir, "subdir", "group=2", "other=yyy"), recursive = TRUE)
+    write_parquet(df1, file.path(hive_dir, "subdir", "group=1", "other=xxx", "file1.parquet"))
+    write_parquet(df2, file.path(hive_dir, "subdir", "group=2", "other=yyy", "file2.parquet"))
+    expect_length(dir(hive_dir, recursive = TRUE), 2)
+  }
+
+  # Now, CSV
+  dir.create(file.path(csv_dir, 5))
+  dir.create(file.path(csv_dir, 6))
+  write.csv(df1, file.path(csv_dir, 5, "file1.csv"), row.names = FALSE)
+  write.csv(df2, file.path(csv_dir, 6, "file2.csv"), row.names = FALSE)
+  expect_length(dir(csv_dir, recursive = TRUE), 2)
+})
+
+test_that("Writing a dataset: CSV->IPC", {
+  ds <- open_dataset(csv_dir, partitioning = "part", format = "csv")
+  dst_dir <- make_temp_dir()
+  write_dataset(ds, dst_dir, format = "feather", partitioning = "int")
+  expect_true(dir.exists(dst_dir))
+  expect_identical(dir(dst_dir), sort(paste("int", c(1:10, 101:110), sep = "=")))
+
+  new_ds <- open_dataset(dst_dir, format = "feather")
+
+  expect_equal(
+    new_ds %>%
+      select(string = chr, integer = int) %>%
+      filter(integer > 6 & integer < 11) %>%
+      collect() %>%
+      summarize(mean = mean(integer)),
+    df1 %>%
+      select(string = chr, integer = int) %>%
+      filter(integer > 6) %>%
+      summarize(mean = mean(integer))
+  )
+
+  # Check whether "int" is present in the files or just in the dirs
+  first <- read_feather(
+    dir(dst_dir, pattern = ".feather$", recursive = TRUE, full.names = TRUE)[1],
+    as_data_frame = FALSE
+  )
+  # It shouldn't be there
+  expect_false("int" %in% names(first))
+})
+
+test_that("Writing a dataset: Parquet->IPC", {
+  skip_if_not_available("parquet")
+  ds <- open_dataset(hive_dir)
+  dst_dir <- make_temp_dir()
+  write_dataset(ds, dst_dir, format = "feather", partitioning = "int")
+  expect_true(dir.exists(dst_dir))
+  expect_identical(dir(dst_dir), sort(paste("int", c(1:10, 101:110), sep = "=")))
+
+  new_ds <- open_dataset(dst_dir, format = "feather")
+
+  expect_equal(
+    new_ds %>%
+      select(string = chr, integer = int, group) %>%
+      filter(integer > 6 & group == 1) %>%
+      collect() %>%
+      summarize(mean = mean(integer)),
+    df1 %>%
+      select(string = chr, integer = int) %>%
+      filter(integer > 6) %>%
+      summarize(mean = mean(integer))
+  )
+})
+
+test_that("Writing a dataset: CSV->Parquet", {
+  skip_if_not_available("parquet")
+  ds <- open_dataset(csv_dir, partitioning = "part", format = "csv")
+  dst_dir <- make_temp_dir()
+  write_dataset(ds, dst_dir, format = "parquet", partitioning = "int")
+  expect_true(dir.exists(dst_dir))
+  expect_identical(dir(dst_dir), sort(paste("int", c(1:10, 101:110), sep = "=")))
+
+  new_ds <- open_dataset(dst_dir)
+
+  expect_equal(
+    new_ds %>%
+      select(string = chr, integer = int) %>%
+      filter(integer > 6 & integer < 11) %>%
+      collect() %>%
+      summarize(mean = mean(integer)),
+    df1 %>%
+      select(string = chr, integer = int) %>%
+      filter(integer > 6) %>%
+      summarize(mean = mean(integer))
+  )
+})
+
+test_that("Writing a dataset: Parquet->Parquet (default)", {
+  skip_if_not_available("parquet")
+  ds <- open_dataset(hive_dir)
+  dst_dir <- make_temp_dir()
+  write_dataset(ds, dst_dir, partitioning = "int")
+  expect_true(dir.exists(dst_dir))
+  expect_identical(dir(dst_dir), sort(paste("int", c(1:10, 101:110), sep = "=")))
+
+  new_ds <- open_dataset(dst_dir)
+
+  expect_equal(
+    new_ds %>%
+      select(string = chr, integer = int, group) %>%
+      filter(integer > 6 & group == 1) %>%
+      collect() %>%
+      summarize(mean = mean(integer)),
+    df1 %>%
+      select(string = chr, integer = int) %>%
+      filter(integer > 6) %>%
+      summarize(mean = mean(integer))
+  )
+})
+
+test_that("Writing a dataset: existing data behavior", {
+  # This test does not work on Windows because unlink does not immediately
+  # delete the data.
+  skip_on_os("windows")
+  ds <- open_dataset(csv_dir, partitioning = "part", format = "csv")
+  dst_dir <- make_temp_dir()
+  write_dataset(ds, dst_dir, format = "feather", partitioning = "int")
+  expect_true(dir.exists(dst_dir))
+
+  check_dataset <- function() {
+    new_ds <- open_dataset(dst_dir, format = "feather")
+
+    expect_equal(
+      new_ds %>%
+        select(string = chr, integer = int) %>%
+        filter(integer > 6 & integer < 11) %>%
+        collect() %>%
+        summarize(mean = mean(integer)),
+      df1 %>%
+        select(string = chr, integer = int) %>%
+        filter(integer > 6) %>%
+        summarize(mean = mean(integer))
+    )
+  }
+
+  check_dataset()
+  # By default we should overwrite
+  write_dataset(ds, dst_dir, format = "feather", partitioning = "int")
+  check_dataset()
+  write_dataset(ds, dst_dir, format = "feather", partitioning = "int", existing_data_behavior = "overwrite")
+  check_dataset()
+  expect_error(
+    write_dataset(ds, dst_dir, format = "feather", partitioning = "int", existing_data_behavior = "error"),
+    "directory is not empty"
+  )
+  unlink(dst_dir, recursive = TRUE)
+  write_dataset(ds, dst_dir, format = "feather", partitioning = "int", existing_data_behavior = "error")
+  check_dataset()
+})
+
+test_that("Writing a dataset: no format specified", {
+  dst_dir <- make_temp_dir()
+  write_dataset(example_data, dst_dir)
+  new_ds <- open_dataset(dst_dir)
+  expect_equal(
+    list.files(dst_dir, pattern = "parquet"),
+    "part-0.parquet"
+  )
+  expect_true(
+    inherits(new_ds$format, "ParquetFileFormat")
+  )
+  expect_equal(
+    new_ds %>% collect(),
+    example_data
+  )
+})
+
+test_that("Dataset writing: dplyr methods", {
+  skip_if_not_available("parquet")
+  ds <- open_dataset(hive_dir)
+  dst_dir <- tempfile()
+  # Specify partition vars by group_by
+  ds %>%
+    group_by(int) %>%
+    write_dataset(dst_dir, format = "feather")
+  expect_true(dir.exists(dst_dir))
+  expect_identical(dir(dst_dir), sort(paste("int", c(1:10, 101:110), sep = "=")))
+
+  # select to specify schema (and rename)
+  dst_dir2 <- tempfile()
+  ds %>%
+    group_by(int) %>%
+    select(chr, dubs = dbl) %>%
+    write_dataset(dst_dir2, format = "feather")
+  new_ds <- open_dataset(dst_dir2, format = "feather")
+
+  expect_equal(
+    collect(new_ds) %>% arrange(int),
+    rbind(df1[c("chr", "dbl", "int")], df2[c("chr", "dbl", "int")]) %>% rename(dubs = dbl)
+  )
+
+  # filter to restrict written rows
+  dst_dir3 <- tempfile()
+  ds %>%
+    filter(int == 4) %>%
+    write_dataset(dst_dir3, format = "feather")
+  new_ds <- open_dataset(dst_dir3, format = "feather")
+
+  expect_equal(
+    new_ds %>% select(names(df1)) %>% collect(),
+    df1 %>% filter(int == 4)
+  )
+
+  # mutate
+  dst_dir3 <- tempfile()
+  ds %>%
+    filter(int == 4) %>%
+    mutate(twice = int * 2) %>%
+    write_dataset(dst_dir3, format = "feather")
+  new_ds <- open_dataset(dst_dir3, format = "feather")
+
+  expect_equal(
+    new_ds %>% select(c(names(df1), "twice")) %>% collect(),
+    df1 %>% filter(int == 4) %>% mutate(twice = int * 2)
+  )
+})
+
+test_that("Dataset writing: non-hive", {
+  skip_if_not_available("parquet")
+  ds <- open_dataset(hive_dir)
+  dst_dir <- tempfile()
+  write_dataset(ds, dst_dir, format = "feather", partitioning = "int", hive_style = FALSE)
+  expect_true(dir.exists(dst_dir))
+  expect_identical(dir(dst_dir), sort(as.character(c(1:10, 101:110))))
+})
+
+test_that("Dataset writing: no partitioning", {
+  skip_if_not_available("parquet")
+  ds <- open_dataset(hive_dir)
+  dst_dir <- tempfile()
+  write_dataset(ds, dst_dir, format = "feather", partitioning = NULL)
+  expect_true(dir.exists(dst_dir))
+  expect_true(length(dir(dst_dir)) > 0)
+})
+
+test_that("Dataset writing: partition on null", {
+  ds <- open_dataset(hive_dir)
+
+  dst_dir <- tempfile()
+  partitioning <- hive_partition(lgl = boolean())
+  write_dataset(ds, dst_dir, partitioning = partitioning)
+  expect_true(dir.exists(dst_dir))
+  expect_identical(dir(dst_dir), c("lgl=__HIVE_DEFAULT_PARTITION__", "lgl=false", "lgl=true"))
+
+  dst_dir <- tempfile()
+  partitioning <- hive_partition(lgl = boolean(), null_fallback = "xyz")
+  write_dataset(ds, dst_dir, partitioning = partitioning)
+  expect_true(dir.exists(dst_dir))
+  expect_identical(dir(dst_dir), c("lgl=false", "lgl=true", "lgl=xyz"))
+
+  ds_readback <- open_dataset(dst_dir, partitioning = hive_partition(lgl = boolean(), null_fallback = "xyz"))
+
+  expect_identical(
+    ds %>%
+      select(int, lgl) %>%
+      collect() %>%
+      arrange(lgl, int),
+    ds_readback %>%
+      select(int, lgl) %>%
+      collect() %>%
+      arrange(lgl, int)
+  )
+})
+
+test_that("Dataset writing: from data.frame", {
+  dst_dir <- tempfile()
+  stacked <- rbind(df1, df2)
+  stacked %>%
+    group_by(int) %>%
+    write_dataset(dst_dir, format = "feather")
+  expect_true(dir.exists(dst_dir))
+  expect_identical(dir(dst_dir), sort(paste("int", c(1:10, 101:110), sep = "=")))
+
+  new_ds <- open_dataset(dst_dir, format = "feather")
+
+  expect_equal(
+    new_ds %>%
+      select(string = chr, integer = int) %>%
+      filter(integer > 6 & integer < 11) %>%
+      collect() %>%
+      summarize(mean = mean(integer)),
+    df1 %>%
+      select(string = chr, integer = int) %>%
+      filter(integer > 6) %>%
+      summarize(mean = mean(integer))
+  )
+})
+
+test_that("Dataset writing: from RecordBatch", {
+  dst_dir <- tempfile()
+  stacked <- record_batch(rbind(df1, df2))
+  stacked %>%
+    group_by(int) %>%
+    write_dataset(dst_dir, format = "feather")
+  expect_true(dir.exists(dst_dir))
+  expect_identical(dir(dst_dir), sort(paste("int", c(1:10, 101:110), sep = "=")))
+
+  new_ds <- open_dataset(dst_dir, format = "feather")
+
+  expect_equal(
+    new_ds %>%
+      select(string = chr, integer = int) %>%
+      filter(integer > 6 & integer < 11) %>%
+      collect() %>%
+      summarize(mean = mean(integer)),
+    df1 %>%
+      select(string = chr, integer = int) %>%
+      filter(integer > 6) %>%
+      summarize(mean = mean(integer))
+  )
+})
+
+test_that("Writing a dataset: Ipc format options & compression", {
+  ds <- open_dataset(csv_dir, partitioning = "part", format = "csv")
+  dst_dir <- make_temp_dir()
+
+  codec <- NULL
+  if (codec_is_available("zstd")) {
+    codec <- Codec$create("zstd")
+  }
+
+  write_dataset(ds, dst_dir, format = "feather", codec = codec)
+  expect_true(dir.exists(dst_dir))
+
+  new_ds <- open_dataset(dst_dir, format = "feather")
+  expect_equal(
+    new_ds %>%
+      select(string = chr, integer = int) %>%
+      filter(integer > 6 & integer < 11) %>%
+      collect() %>%
+      summarize(mean = mean(integer)),
+    df1 %>%
+      select(string = chr, integer = int) %>%
+      filter(integer > 6) %>%
+      summarize(mean = mean(integer))
+  )
+})
+
+test_that("Writing a dataset: Parquet format options", {
+  skip_if_not_available("parquet")
+  ds <- open_dataset(csv_dir, partitioning = "part", format = "csv")
+  dst_dir <- make_temp_dir()
+  dst_dir_no_truncated_timestamps <- make_temp_dir()
+
+  # Use trace() to confirm that options are passed in
+  suppressMessages(trace(
+    "parquet___ArrowWriterProperties___create",
+    tracer = quote(warning("allow_truncated_timestamps == ", allow_truncated_timestamps)),
+    print = FALSE,
+    where = write_dataset
+  ))
+  expect_warning(
+    write_dataset(ds, dst_dir_no_truncated_timestamps, format = "parquet", partitioning = "int"),
+    "allow_truncated_timestamps == FALSE"
+  )
+  expect_warning(
+    write_dataset(ds, dst_dir, format = "parquet", partitioning = "int", allow_truncated_timestamps = TRUE),
+    "allow_truncated_timestamps == TRUE"
+  )
+  suppressMessages(untrace(
+    "parquet___ArrowWriterProperties___create",
+    where = write_dataset
+  ))
+
+  # Now confirm we can read back what we sent
+  expect_true(dir.exists(dst_dir))
+  expect_identical(dir(dst_dir), sort(paste("int", c(1:10, 101:110), sep = "=")))
+
+  new_ds <- open_dataset(dst_dir)
+
+  expect_equal(
+    new_ds %>%
+      select(string = chr, integer = int) %>%
+      filter(integer > 6 & integer < 11) %>%
+      collect() %>%
+      summarize(mean = mean(integer)),
+    df1 %>%
+      select(string = chr, integer = int) %>%
+      filter(integer > 6) %>%
+      summarize(mean = mean(integer))
+  )
+})
+
+test_that("Writing a dataset: CSV format options", {
+  df <- tibble(
+    int = 1:10,
+    dbl = as.numeric(1:10),
+    lgl = rep(c(TRUE, FALSE, NA, TRUE, FALSE), 2),
+    chr = letters[1:10],
+  )
+
+  dst_dir <- make_temp_dir()
+  write_dataset(df, dst_dir, format = "csv")
+  expect_true(dir.exists(dst_dir))
+  new_ds <- open_dataset(dst_dir, format = "csv")
+  expect_equal(new_ds %>% collect(), df)
+
+  dst_dir <- make_temp_dir()
+  write_dataset(df, dst_dir, format = "csv", include_header = FALSE)
+  expect_true(dir.exists(dst_dir))
+  new_ds <- open_dataset(dst_dir,
+    format = "csv",
+    column_names = c("int", "dbl", "lgl", "chr")
+  )
+  expect_equal(new_ds %>% collect(), df)
+})
+
+test_that("Dataset writing: unsupported features/input validation", {
+  skip_if_not_available("parquet")
+  expect_error(write_dataset(4), 'dataset must be a "Dataset"')
+
+  ds <- open_dataset(hive_dir)
+  expect_error(
+    write_dataset(ds, partitioning = c("int", "NOTACOLUMN"), format = "ipc"),
+    'Invalid field name: "NOTACOLUMN"'
+  )
+  expect_error(
+    write_dataset(ds, tempfile(), basename_template = "something_without_i")
+  )
+  expect_error(
+    write_dataset(ds, tempfile(), basename_template = NULL)
+  )
+})
diff --git a/src/arrow/r/tests/testthat/test-dataset.R b/src/arrow/r/tests/testthat/test-dataset.R
new file mode 100644
index 000000000..4403b479a
--- /dev/null
+++ b/src/arrow/r/tests/testthat/test-dataset.R
@@ -0,0 +1,696 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+skip_if_not_available("dataset")
+
+library(dplyr, warn.conflicts = FALSE)
+
+dataset_dir <- make_temp_dir()
+hive_dir <- make_temp_dir()
+ipc_dir <- make_temp_dir()
+
+test_that("Setup (putting data in the dir)", {
+  if (arrow_with_parquet()) {
+    dir.create(file.path(dataset_dir, 1))
+    dir.create(file.path(dataset_dir, 2))
+    write_parquet(df1, file.path(dataset_dir, 1, "file1.parquet"))
+    write_parquet(df2, file.path(dataset_dir, 2, "file2.parquet"))
+    expect_length(dir(dataset_dir, recursive = TRUE), 2)
+
+    dir.create(file.path(hive_dir, "subdir", "group=1", "other=xxx"), recursive = TRUE)
+    dir.create(file.path(hive_dir, "subdir", "group=2", "other=yyy"), recursive = TRUE)
+    write_parquet(df1, file.path(hive_dir, "subdir", "group=1", "other=xxx", "file1.parquet"))
+    write_parquet(df2, file.path(hive_dir, "subdir", "group=2", "other=yyy", "file2.parquet"))
+    expect_length(dir(hive_dir, recursive = TRUE), 2)
+  }
+
+  # Now, an IPC format dataset
+  dir.create(file.path(ipc_dir, 3))
+  dir.create(file.path(ipc_dir, 4))
+  write_feather(df1, file.path(ipc_dir, 3, "file1.arrow"))
+  write_feather(df2, file.path(ipc_dir, 4, "file2.arrow"))
+  expect_length(dir(ipc_dir, recursive = TRUE), 2)
+})
+
+test_that("IPC/Feather format data", {
+  ds <- open_dataset(ipc_dir, partitioning = "part", format = "feather")
+  expect_r6_class(ds$format, "IpcFileFormat")
+  expect_r6_class(ds$filesystem, "LocalFileSystem")
+  expect_identical(names(ds), c(names(df1), "part"))
+  expect_identical(dim(ds), c(20L, 7L))
+
+  expect_equal(
+    ds %>%
+      select(string = chr, integer = int, part) %>%
+      filter(integer > 6 & part == 3) %>%
+      collect() %>%
+      summarize(mean = mean(integer)),
+    df1 %>%
+      select(string = chr, integer = int) %>%
+      filter(integer > 6) %>%
+      summarize(mean = mean(integer))
+  )
+
+  # Collecting virtual partition column works
+  expect_equal(
+    ds %>% arrange(part) %>% pull(part),
+    c(rep(3, 10), rep(4, 10))
+  )
+})
+
+expect_scan_result <- function(ds, schm) {
+  sb <- ds$NewScan()
+  expect_r6_class(sb, "ScannerBuilder")
+  expect_equal(sb$schema, schm)
+
+  sb$Project(c("chr", "lgl"))
+  sb$Filter(Expression$field_ref("dbl") == 8)
+  scn <- sb$Finish()
+  expect_r6_class(scn, "Scanner")
+
+  tab <- scn$ToTable()
+  expect_r6_class(tab, "Table")
+
+  expect_equal(
+    as.data.frame(tab),
+    df1[8, c("chr", "lgl")]
+  )
+}
+
+test_that("URI-decoding with directory partitioning", {
+  root <- make_temp_dir()
+  fmt <- FileFormat$create("feather")
+  fs <- LocalFileSystem$create()
+  selector <- FileSelector$create(root, recursive = TRUE)
+  dir1 <- file.path(root, "2021-05-04 00%3A00%3A00", "%24")
+  dir.create(dir1, recursive = TRUE)
+  write_feather(df1, file.path(dir1, "data.feather"))
+
+  partitioning <- DirectoryPartitioning$create(
+    schema(date = timestamp(unit = "s"), string = utf8())
+  )
+  factory <- FileSystemDatasetFactory$create(
+    fs, selector, NULL, fmt,
+    partitioning = partitioning
+  )
+  schm <- factory$Inspect()
+  ds <- factory$Finish(schm)
+  expect_scan_result(ds, schm)
+
+  partitioning <- DirectoryPartitioning$create(
+    schema(date = timestamp(unit = "s"), string = utf8()),
+    segment_encoding = "none"
+  )
+  factory <- FileSystemDatasetFactory$create(
+    fs, selector, NULL, fmt,
+    partitioning = partitioning
+  )
+  schm <- factory$Inspect()
+  expect_error(factory$Finish(schm), "Invalid: error parsing")
+
+  partitioning_factory <- DirectoryPartitioningFactory$create(
+    c("date", "string")
+  )
+  factory <- FileSystemDatasetFactory$create(
+    fs, selector, NULL, fmt, partitioning_factory
+  )
+  schm <- factory$Inspect()
+  ds <- factory$Finish(schm)
+  # Can't directly inspect partition expressions, so do it implicitly via scan
+  expect_equal(
+    ds %>%
+      filter(date == "2021-05-04 00:00:00", string == "$") %>%
+      select(int) %>%
+      collect(),
+    df1 %>% select(int) %>% collect()
+  )
+
+  partitioning_factory <- DirectoryPartitioningFactory$create(
+    c("date", "string"),
+    segment_encoding = "none"
+  )
+  factory <- FileSystemDatasetFactory$create(
+    fs, selector, NULL, fmt, partitioning_factory
+  )
+  schm <- factory$Inspect()
+  ds <- factory$Finish(schm)
+  expect_equal(
+    ds %>%
+      filter(date == "2021-05-04 00%3A00%3A00", string == "%24") %>%
+      select(int) %>%
+      collect(),
+    df1 %>% select(int) %>% collect()
+  )
+})
+
+test_that("URI-decoding with hive partitioning", {
+  root <- make_temp_dir()
+  fmt <- FileFormat$create("feather")
+  fs <- LocalFileSystem$create()
+  selector <- FileSelector$create(root, recursive = TRUE)
+  dir1 <- file.path(root, "date=2021-05-04 00%3A00%3A00", "string=%24")
+  dir.create(dir1, recursive = TRUE)
+  write_feather(df1, file.path(dir1, "data.feather"))
+
+  partitioning <- hive_partition(
+    date = timestamp(unit = "s"), string = utf8()
+  )
+  factory <- FileSystemDatasetFactory$create(
+    fs, selector, NULL, fmt,
+    partitioning = partitioning
+  )
+  ds <- factory$Finish(schm)
+  expect_scan_result(ds, schm)
+
+  partitioning <- hive_partition(
+    date = timestamp(unit = "s"), string = utf8(), segment_encoding = "none"
+  )
+  factory <- FileSystemDatasetFactory$create(
+    fs, selector, NULL, fmt,
+    partitioning = partitioning
+  )
+  expect_error(factory$Finish(schm), "Invalid: error parsing")
+
+  partitioning_factory <- hive_partition()
+  factory <- FileSystemDatasetFactory$create(
+    fs, selector, NULL, fmt, partitioning_factory
+  )
+  schm <- factory$Inspect()
+  ds <- factory$Finish(schm)
+  # Can't directly inspect partition expressions, so do it implicitly via scan
+  expect_equal(
+    ds %>%
+      filter(date == "2021-05-04 00:00:00", string == "$") %>%
+      select(int) %>%
+      collect(),
+    df1 %>% select(int) %>% collect()
+  )
+
+  partitioning_factory <- hive_partition(segment_encoding = "none")
+  factory <- FileSystemDatasetFactory$create(
+    fs, selector, NULL, fmt, partitioning_factory
+  )
+  schm <- factory$Inspect()
+  ds <- factory$Finish(schm)
+  expect_equal(
+    ds %>%
+      filter(date == "2021-05-04 00%3A00%3A00", string == "%24") %>%
+      select(int) %>%
+      collect(),
+    df1 %>% select(int) %>% collect()
+  )
+})
+
+# Everything else below here is using parquet files
+skip_if_not_available("parquet")
+
+files <- c(
+  file.path(dataset_dir, 1, "file1.parquet", fsep = "/"),
+  file.path(dataset_dir, 2, "file2.parquet", fsep = "/")
+)
+
+test_that("Simple interface for datasets", {
+  ds <- open_dataset(dataset_dir, partitioning = schema(part = uint8()))
+  expect_r6_class(ds$format, "ParquetFileFormat")
+  expect_r6_class(ds$filesystem, "LocalFileSystem")
+  expect_r6_class(ds, "Dataset")
+  expect_equal(
+    ds %>%
+      select(chr, dbl) %>%
+      filter(dbl > 7 & dbl < 53L) %>% # Testing the auto-casting of scalars
+      collect() %>%
+      arrange(dbl),
+    rbind(
+      df1[8:10, c("chr", "dbl")],
+      df2[1:2, c("chr", "dbl")]
+    )
+  )
+
+  expect_equal(
+    ds %>%
+      select(string = chr, integer = int, part) %>%
+      filter(integer > 6 & part == 1) %>% # 6 not 6L to test autocasting
+      collect() %>%
+      summarize(mean = mean(integer)),
+    df1 %>%
+      select(string = chr, integer = int) %>%
+      filter(integer > 6) %>%
+      summarize(mean = mean(integer))
+  )
+
+  # Collecting virtual partition column works
+  expect_equal(
+    ds %>% arrange(part) %>% pull(part),
+    c(rep(1, 10), rep(2, 10))
+  )
+})
+
+test_that("dim method returns the correct number of rows and columns", {
+  ds <- open_dataset(dataset_dir, partitioning = schema(part = uint8()))
+  expect_identical(dim(ds), c(20L, 7L))
+})
+
+
+test_that("dim() correctly determine numbers of rows and columns on arrow_dplyr_query object", {
+  ds <- open_dataset(dataset_dir, partitioning = schema(part = uint8()))
+
+  expect_identical(
+    ds %>%
+      filter(chr == "a") %>%
+      dim(),
+    c(2L, 7L)
+  )
+  expect_equal(
+    ds %>%
+      select(chr, fct, int) %>%
+      dim(),
+    c(20L, 3L)
+  )
+  expect_identical(
+    ds %>%
+      select(chr, fct, int) %>%
+      filter(chr == "a") %>%
+      dim(),
+    c(2L, 3L)
+  )
+})
+
+test_that("Simple interface for datasets (custom ParquetFileFormat)", {
+  ds <- open_dataset(dataset_dir,
+    partitioning = schema(part = uint8()),
+    format = FileFormat$create("parquet", dict_columns = c("chr"))
+  )
+  expect_type_equal(ds$schema$GetFieldByName("chr")$type, dictionary())
+})
+
+test_that("Hive partitioning", {
+  ds <- open_dataset(hive_dir, partitioning = hive_partition(other = utf8(), group = uint8()))
+  expect_r6_class(ds, "Dataset")
+  expect_equal(
+    ds %>%
+      filter(group == 2) %>%
+      select(chr, dbl) %>%
+      filter(dbl > 7 & dbl < 53) %>%
+      collect() %>%
+      arrange(dbl),
+    df2[1:2, c("chr", "dbl")]
+  )
+})
+
+test_that("input validation", {
+  expect_error(
+    open_dataset(hive_dir, hive_partition(other = utf8(), group = uint8()))
+  )
+})
+
+test_that("Partitioning inference", {
+  # These are the same tests as above, just using the *PartitioningFactory
+  ds1 <- open_dataset(dataset_dir, partitioning = "part")
+  expect_identical(names(ds1), c(names(df1), "part"))
+  expect_equal(
+    ds1 %>%
+      select(string = chr, integer = int, part) %>%
+      filter(integer > 6 & part == 1) %>%
+      collect() %>%
+      summarize(mean = mean(integer)),
+    df1 %>%
+      select(string = chr, integer = int) %>%
+      filter(integer > 6) %>%
+      summarize(mean = mean(integer))
+  )
+
+  ds2 <- open_dataset(hive_dir)
+  expect_identical(names(ds2), c(names(df1), "group", "other"))
+  expect_equal(
+    ds2 %>%
+      filter(group == 2) %>%
+      select(chr, dbl) %>%
+      filter(dbl > 7 & dbl < 53) %>%
+      collect() %>%
+      arrange(dbl),
+    df2[1:2, c("chr", "dbl")]
+  )
+})
+
+test_that("Dataset with multiple file formats", {
+  skip("https://issues.apache.org/jira/browse/ARROW-7653")
+  ds <- open_dataset(list(
+    open_dataset(dataset_dir, format = "parquet", partitioning = "part"),
+    open_dataset(ipc_dir, format = "arrow", partitioning = "part")
+  ))
+  expect_identical(names(ds), c(names(df1), "part"))
+  expect_equal(
+    ds %>%
+      filter(int > 6 & part %in% c(1, 3)) %>%
+      select(string = chr, integer = int) %>%
+      collect(),
+    df1 %>%
+      select(string = chr, integer = int) %>%
+      filter(integer > 6) %>%
+      rbind(., .) # Stack it twice
+  )
+})
+
+test_that("Creating UnionDataset", {
+  ds1 <- open_dataset(file.path(dataset_dir, 1))
+  ds2 <- open_dataset(file.path(dataset_dir, 2))
+  union1 <- open_dataset(list(ds1, ds2))
+  expect_r6_class(union1, "UnionDataset")
+  expect_equal(
+    union1 %>%
+      select(chr, dbl) %>%
+      filter(dbl > 7 & dbl < 53L) %>% # Testing the auto-casting of scalars
+      collect() %>%
+      arrange(dbl),
+    rbind(
+      df1[8:10, c("chr", "dbl")],
+      df2[1:2, c("chr", "dbl")]
+    )
+  )
+
+  # Now with the c() method
+  union2 <- c(ds1, ds2)
+  expect_r6_class(union2, "UnionDataset")
+  expect_equal(
+    union2 %>%
+      select(chr, dbl) %>%
+      filter(dbl > 7 & dbl < 53L) %>% # Testing the auto-casting of scalars
+      collect() %>%
+      arrange(dbl),
+    rbind(
+      df1[8:10, c("chr", "dbl")],
+      df2[1:2, c("chr", "dbl")]
+    )
+  )
+
+  # Confirm c() method error handling
+  expect_error(c(ds1, 42), "character")
+})
+
+test_that("map_batches", {
+  skip("map_batches() is broken (ARROW-14029)")
+  ds <- open_dataset(dataset_dir, partitioning = "part")
+  expect_equal(
+    ds %>%
+      filter(int > 5) %>%
+      select(int, lgl) %>%
+      map_batches(~ summarize(., min_int = min(int))),
+    tibble(min_int = c(6L, 101L))
+  )
+})
+
+test_that("partitioning = NULL to ignore partition information (but why?)", {
+  ds <- open_dataset(hive_dir, partitioning = NULL)
+  expect_identical(names(ds), names(df1)) # i.e. not c(names(df1), "group", "other")
+})
+
+test_that("head/tail", {
+  # head/tail with no query are still deterministic order
+  ds <- open_dataset(dataset_dir)
+  expect_equal(as.data.frame(head(ds)), head(df1))
+  expect_equal(
+    as.data.frame(head(ds, 12)),
+    rbind(df1, df2[1:2, ])
+  )
+
+  expect_equal(as.data.frame(tail(ds)), tail(df2))
+  expect_equal(
+    as.data.frame(tail(ds, 12)),
+    rbind(df1[9:10, ], df2)
+  )
+})
+
+test_that("Dataset [ (take by index)", {
+  ds <- open_dataset(dataset_dir)
+  # Taking only from one file
+  expect_equal(
+    as.data.frame(ds[c(4, 5, 9), 3:4]),
+    df1[c(4, 5, 9), 3:4]
+  )
+  # Taking from more than one
+  expect_equal(
+    as.data.frame(ds[c(4, 5, 9, 12, 13), 3:4]),
+    rbind(df1[c(4, 5, 9), 3:4], df2[2:3, 3:4])
+  )
+  # Taking out of order
+  expect_equal(
+    as.data.frame(ds[c(4, 13, 9, 12, 5), ]),
+    rbind(
+      df1[4, ],
+      df2[3, ],
+      df1[9, ],
+      df2[2, ],
+      df1[5, ]
+    )
+  )
+
+  # Take from a query
+  ds2 <- ds %>%
+    filter(int > 6) %>%
+    select(int, lgl)
+  expect_equal(
+    as.data.frame(ds2[c(2, 5), ]),
+    rbind(
+      df1[8, c("int", "lgl")],
+      df2[1, c("int", "lgl")]
+    )
+  )
+})
+
+test_that("Dataset and query print methods", {
+  ds <- open_dataset(hive_dir)
+  expect_output(
+    print(ds),
+    paste(
+      "FileSystemDataset with 2 Parquet files",
+      "int: int32",
+      "dbl: double",
+      "lgl: bool",
+      "chr: string",
+      "fct: dictionary<values=string, indices=int32>",
+      "ts: timestamp[us, tz=UTC]",
+      "group: int32",
+      "other: string",
+      sep = "\n"
+    ),
+    fixed = TRUE
+  )
+  expect_type(ds$metadata, "list")
+  q <- select(ds, string = chr, lgl, integer = int)
+  expect_output(
+    print(q),
+    paste(
+      "Dataset (query)",
+      "string: string",
+      "lgl: bool",
+      "integer: int32",
+      "",
+      "See $.data for the source Arrow object",
+      sep = "\n"
+    ),
+    fixed = TRUE
+  )
+  expect_output(
+    print(q %>% filter(integer == 6) %>% group_by(lgl)),
+    paste(
+      "Dataset (query)",
+      "string: string",
+      "lgl: bool",
+      "integer: int32",
+      "",
+      "* Filter: (int == 6)",
+      "* Grouped by lgl",
+      "See $.data for the source Arrow object",
+      sep = "\n"
+    ),
+    fixed = TRUE
+  )
+})
+
+test_that("Scanner$ScanBatches", {
+  ds <- open_dataset(ipc_dir, format = "feather")
+  batches <- ds$NewScan()$Finish()$ScanBatches()
+  table <- Table$create(!!!batches)
+  expect_equal(as.data.frame(table), rbind(df1, df2))
+
+  batches <- ds$NewScan()$UseAsync(TRUE)$Finish()$ScanBatches()
+  table <- Table$create(!!!batches)
+  expect_equal(as.data.frame(table), rbind(df1, df2))
+})
+
+test_that("Scanner$ToRecordBatchReader()", {
+  ds <- open_dataset(dataset_dir, partitioning = "part")
+  scan <- ds %>%
+    filter(part == 1) %>%
+    select(int, lgl) %>%
+    filter(int > 6) %>%
+    Scanner$create()
+  reader <- scan$ToRecordBatchReader()
+  expect_r6_class(reader, "RecordBatchReader")
+  expect_identical(
+    as.data.frame(reader$read_table()),
+    df1[df1$int > 6, c("int", "lgl")]
+  )
+})
+
+test_that("Scanner$create() filter/projection pushdown", {
+  ds <- open_dataset(dataset_dir, partitioning = "part")
+
+  # the standard to compare all Scanner$create()s against
+  scan_one <- ds %>%
+    filter(int > 7 & dbl < 57) %>%
+    select(int, dbl, lgl) %>%
+    mutate(int_plus = int + 1, dbl_minus = dbl - 1) %>%
+    Scanner$create()
+
+  # select a column in projection
+  scan_two <- ds %>%
+    filter(int > 7 & dbl < 57) %>%
+    # select an extra column, since we are going to
+    select(int, dbl, lgl, chr) %>%
+    mutate(int_plus = int + 1, dbl_minus = dbl - 1) %>%
+    Scanner$create(projection = c("int", "dbl", "lgl", "int_plus", "dbl_minus"))
+  expect_identical(
+    as.data.frame(scan_one$ToRecordBatchReader()$read_table()),
+    as.data.frame(scan_two$ToRecordBatchReader()$read_table())
+  )
+
+  # adding filters to Scanner$create
+  scan_three <- ds %>%
+    filter(int > 7) %>%
+    select(int, dbl, lgl) %>%
+    mutate(int_plus = int + 1, dbl_minus = dbl - 1) %>%
+    Scanner$create(
+      filter = Expression$create("less", Expression$field_ref("dbl"), Expression$scalar(57))
+    )
+  expect_identical(
+    as.data.frame(scan_one$ToRecordBatchReader()$read_table()),
+    as.data.frame(scan_three$ToRecordBatchReader()$read_table())
+  )
+
+  expect_error(
+    ds %>%
+      select(int, dbl, lgl) %>%
+      Scanner$create(projection = "not_a_col"),
+    # Full message is "attempting to project with unknown columns" >= 4.0.0, but
+    # prior versions have a less nice "all(projection %in% names(proj)) is not TRUE"
+    "project"
+  )
+
+  expect_error(
+    ds %>%
+      select(int, dbl, lgl) %>%
+      Scanner$create(filter = list("foo", "bar")),
+    "filter expressions must be either an expression or a list of expressions"
+  )
+})
+
+test_that("Assembling a Dataset manually and getting a Table", {
+  fs <- LocalFileSystem$create()
+  selector <- FileSelector$create(dataset_dir, recursive = TRUE)
+  partitioning <- DirectoryPartitioning$create(schema(part = double()))
+
+  fmt <- FileFormat$create("parquet")
+  factory <- FileSystemDatasetFactory$create(fs, selector, NULL, fmt, partitioning = partitioning)
+  expect_r6_class(factory, "FileSystemDatasetFactory")
+
+  schm <- factory$Inspect()
+  expect_r6_class(schm, "Schema")
+
+  phys_schm <- ParquetFileReader$create(files[1])$GetSchema()
+  expect_equal(names(phys_schm), names(df1))
+  expect_equal(names(schm), c(names(phys_schm), "part"))
+
+  child <- factory$Finish(schm)
+  expect_r6_class(child, "FileSystemDataset")
+  expect_r6_class(child$schema, "Schema")
+  expect_r6_class(child$format, "ParquetFileFormat")
+  expect_equal(names(schm), names(child$schema))
+  expect_equal(child$files, files)
+
+  ds <- Dataset$create(list(child), schm)
+  expect_scan_result(ds, schm)
+})
+
+test_that("Assembling multiple DatasetFactories with DatasetFactory", {
+  factory1 <- dataset_factory(file.path(dataset_dir, 1), format = "parquet")
+  expect_r6_class(factory1, "FileSystemDatasetFactory")
+  factory2 <- dataset_factory(file.path(dataset_dir, 2), format = "parquet")
+  expect_r6_class(factory2, "FileSystemDatasetFactory")
+
+  factory <- DatasetFactory$create(list(factory1, factory2))
+  expect_r6_class(factory, "DatasetFactory")
+
+  schm <- factory$Inspect()
+  expect_r6_class(schm, "Schema")
+
+  phys_schm <- ParquetFileReader$create(files[1])$GetSchema()
+  expect_equal(names(phys_schm), names(df1))
+
+  ds <- factory$Finish(schm)
+  expect_r6_class(ds, "UnionDataset")
+  expect_r6_class(ds$schema, "Schema")
+  expect_equal(names(schm), names(ds$schema))
+  expect_equal(unlist(map(ds$children, ~ .$files)), files)
+
+  expect_scan_result(ds, schm)
+})
+
+# see https://issues.apache.org/jira/browse/ARROW-11328
+test_that("Collecting zero columns from a dataset doesn't return entire dataset", {
+  tmp <- tempfile()
+  write_dataset(mtcars, tmp, format = "parquet")
+  expect_equal(
+    open_dataset(tmp) %>% select() %>% collect() %>% dim(),
+    c(32, 0)
+  )
+})
+
+
+test_that("dataset RecordBatchReader to C-interface to arrow_dplyr_query", {
+  ds <- open_dataset(ipc_dir, partitioning = "part", format = "feather")
+
+  # export the RecordBatchReader via the C-interface
+  stream_ptr <- allocate_arrow_array_stream()
+  scan <- Scanner$create(ds)
+  reader <- scan$ToRecordBatchReader()
+  reader$export_to_c(stream_ptr)
+
+  # then import it and check that the roundtripped value is the same
+  circle <- RecordBatchStreamReader$import_from_c(stream_ptr)
+
+  # create an arrow_dplyr_query() from the recordbatch reader
+  reader_adq <- arrow_dplyr_query(circle)
+
+  # TODO: ARROW-14321 should be able to arrange then collect
+  tab_from_c_new <- reader_adq %>%
+    filter(int < 8, int > 55) %>%
+    mutate(part_plus = part + 6) %>%
+    collect()
+  expect_equal(
+    tab_from_c_new %>%
+      arrange(dbl),
+    ds %>%
+      filter(int < 8, int > 55) %>%
+      mutate(part_plus = part + 6) %>%
+      collect() %>%
+      arrange(dbl)
+  )
+
+  # must clean up the pointer or we leak
+  delete_arrow_array_stream(stream_ptr)
+})
diff --git a/src/arrow/r/tests/testthat/test-dplyr-arrange.R b/src/arrow/r/tests/testthat/test-dplyr-arrange.R
new file mode 100644
index 000000000..d22f64a7c
--- /dev/null
+++ b/src/arrow/r/tests/testthat/test-dplyr-arrange.R
@@ -0,0 +1,205 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+skip_if_not_available("dataset")
+
+library(dplyr, warn.conflicts = FALSE)
+
+# randomize order of rows in test data
+tbl <- slice_sample(example_data_for_sorting, prop = 1L)
+
+test_that("arrange() on integer, double, and character columns", {
+  compare_dplyr_binding(
+    .input %>%
+      arrange(int, chr) %>%
+      collect(),
+    tbl
+  )
+  compare_dplyr_binding(
+    .input %>%
+      arrange(int, desc(dbl)) %>%
+      collect(),
+    tbl
+  )
+  compare_dplyr_binding(
+    .input %>%
+      arrange(int, desc(desc(dbl))) %>%
+      collect(),
+    tbl
+  )
+  compare_dplyr_binding(
+    .input %>%
+      arrange(int) %>%
+      arrange(desc(dbl)) %>%
+      collect(),
+    tbl
+  )
+  compare_dplyr_binding(
+    .input %>%
+      arrange(int + dbl, chr) %>%
+      collect(),
+    tbl
+  )
+  compare_dplyr_binding(
+    .input %>%
+      mutate(zzz = int + dbl, ) %>%
+      arrange(zzz, chr) %>%
+      collect(),
+    tbl
+  )
+  compare_dplyr_binding(
+    .input %>%
+      mutate(zzz = int + dbl) %>%
+      arrange(int + dbl, chr) %>%
+      collect(),
+    tbl
+  )
+  compare_dplyr_binding(
+    .input %>%
+      mutate(int + dbl) %>%
+      arrange(int + dbl, chr) %>%
+      collect(),
+    tbl
+  )
+  compare_dplyr_binding(
+    .input %>%
+      group_by(grp) %>%
+      arrange(int, dbl) %>%
+      collect(),
+    tbl
+  )
+  compare_dplyr_binding(
+    .input %>%
+      group_by(grp) %>%
+      arrange(int, dbl, .by_group = TRUE) %>%
+      collect(),
+    tbl
+  )
+  compare_dplyr_binding(
+    .input %>%
+      group_by(grp, grp2) %>%
+      arrange(int, dbl, .by_group = TRUE) %>%
+      collect(),
+    tbl %>%
+      mutate(grp2 = ifelse(is.na(lgl), 1L, as.integer(lgl)))
+  )
+  compare_dplyr_binding(
+    .input %>%
+      group_by(grp) %>%
+      arrange(.by_group = TRUE) %>%
+      pull(grp),
+    tbl
+  )
+  compare_dplyr_binding(
+    .input %>%
+      arrange() %>%
+      collect(),
+    tbl %>%
+      group_by(grp)
+  )
+  compare_dplyr_binding(
+    .input %>%
+      group_by(grp) %>%
+      arrange() %>%
+      collect(),
+    tbl
+  )
+  compare_dplyr_binding(
+    .input %>%
+      arrange() %>%
+      collect(),
+    tbl
+  )
+  test_sort_col <- "chr"
+  compare_dplyr_binding(
+    .input %>%
+      arrange(!!sym(test_sort_col)) %>%
+      collect(),
+    tbl %>%
+      select(chr, lgl)
+  )
+  test_sort_cols <- c("int", "dbl")
+  compare_dplyr_binding(
+    .input %>%
+      arrange(!!!syms(test_sort_cols)) %>%
+      collect(),
+    tbl
+  )
+})
+
+test_that("arrange() on datetime columns", {
+  compare_dplyr_binding(
+    .input %>%
+      arrange(dttm, int) %>%
+      collect(),
+    tbl
+  )
+  compare_dplyr_binding(
+    .input %>%
+      arrange(dttm) %>%
+      collect(),
+    tbl %>%
+      select(dttm, grp)
+  )
+})
+
+test_that("arrange() on logical columns", {
+  compare_dplyr_binding(
+    .input %>%
+      arrange(lgl, int) %>%
+      collect(),
+    tbl
+  )
+})
+
+test_that("arrange() with bad inputs", {
+  expect_error(
+    tbl %>%
+      Table$create() %>%
+      arrange(1),
+    "does not contain any field names",
+    fixed = TRUE
+  )
+  expect_error(
+    tbl %>%
+      Table$create() %>%
+      arrange(2 + 2),
+    "does not contain any field names",
+    fixed = TRUE
+  )
+  expect_error(
+    tbl %>%
+      Table$create() %>%
+      arrange(aertidjfgjksertyj),
+    "not found",
+    fixed = TRUE
+  )
+  expect_error(
+    tbl %>%
+      Table$create() %>%
+      arrange(desc(aertidjfgjksertyj + iaermxiwerksxsdqq)),
+    "not found",
+    fixed = TRUE
+  )
+  expect_error(
+    tbl %>%
+      Table$create() %>%
+      arrange(desc(int, chr)),
+    "expects only one argument",
+    fixed = TRUE
+  )
+})
diff --git a/src/arrow/r/tests/testthat/test-dplyr-collapse.R b/src/arrow/r/tests/testthat/test-dplyr-collapse.R
new file mode 100644
index 000000000..c7281b62d
--- /dev/null
+++ b/src/arrow/r/tests/testthat/test-dplyr-collapse.R
@@ -0,0 +1,235 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+skip_if_not_available("dataset")
+
+withr::local_options(list(arrow.summarise.sort = TRUE))
+
+library(dplyr, warn.conflicts = FALSE)
+library(stringr)
+
+tbl <- example_data
+# Add some better string data
+tbl$verses <- verses[[1]]
+# c(" a ", "  b  ", "   c   ", ...) increasing padding
+# nchar =   3  5  7  9 11 13 15 17 19 21
+tbl$padded_strings <- stringr::str_pad(letters[1:10], width = 2 * (1:10) + 1, side = "both")
+tbl$some_grouping <- rep(c(1, 2), 5)
+
+tab <- Table$create(tbl)
+
+test_that("implicit_schema with select", {
+  expect_equal(
+    tab %>%
+      select(int, lgl) %>%
+      implicit_schema(),
+    schema(int = int32(), lgl = bool())
+  )
+})
+
+test_that("implicit_schema with rename", {
+  expect_equal(
+    tab %>%
+      select(numbers = int, lgl) %>%
+      implicit_schema(),
+    schema(numbers = int32(), lgl = bool())
+  )
+})
+
+test_that("implicit_schema with mutate", {
+  expect_equal(
+    tab %>%
+      transmute(
+        numbers = int * 4,
+        words = as.character(int)
+      ) %>%
+      implicit_schema(),
+    schema(numbers = float64(), words = utf8())
+  )
+})
+
+test_that("implicit_schema with summarize", {
+  expect_equal(
+    tab %>%
+      summarize(
+        avg = mean(int)
+      ) %>%
+      implicit_schema(),
+    schema(avg = float64())
+  )
+})
+
+test_that("implicit_schema with group_by summarize", {
+  expect_equal(
+    tab %>%
+      group_by(some_grouping) %>%
+      summarize(
+        avg = mean(int * 5L)
+      ) %>%
+      implicit_schema(),
+    schema(some_grouping = float64(), avg = float64())
+  )
+})
+
+test_that("collapse", {
+  q <- tab %>%
+    filter(dbl > 2, chr == "d" | chr == "f") %>%
+    select(chr, int, lgl) %>%
+    mutate(twice = int * 2L)
+  expect_false(is_collapsed(q))
+  expect_true(is_collapsed(collapse(q)))
+  expect_false(is_collapsed(collapse(q)$.data))
+
+  compare_dplyr_binding(
+    .input %>%
+      filter(dbl > 2, chr == "d" | chr == "f") %>%
+      select(chr, int, lgl) %>%
+      mutate(twice = int * 2L) %>%
+      collapse() %>%
+      filter(int < 5) %>%
+      select(int, twice) %>%
+      collect(),
+    tbl
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      filter(dbl > 2, chr == "d" | chr == "f") %>%
+      collapse() %>%
+      select(chr, int, lgl) %>%
+      collapse() %>%
+      filter(int < 5) %>%
+      select(int, chr) %>%
+      collect(),
+    tbl
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      filter(dbl > 2, chr == "d" | chr == "f") %>%
+      collapse() %>%
+      group_by(chr) %>%
+      select(chr, int, lgl) %>%
+      collapse() %>%
+      filter(int < 5) %>%
+      select(int, chr) %>%
+      collect(),
+    tbl
+  )
+})
+
+test_that("Properties of collapsed query", {
+  q <- tab %>%
+    filter(dbl > 2) %>%
+    select(chr, int, lgl) %>%
+    mutate(twice = int * 2L) %>%
+    group_by(lgl) %>%
+    summarize(total = sum(int, na.rm = TRUE)) %>%
+    mutate(extra = total * 5)
+
+  # print(tbl %>%
+  #   filter(dbl > 2) %>%
+  #   select(chr, int, lgl) %>%
+  #   mutate(twice = int * 2L) %>%
+  #   group_by(lgl) %>%
+  #   summarize(total = sum(int, na.rm = TRUE)) %>%
+  #   mutate(extra = total * 5))
+
+  #   # A tibble: 3 × 3
+  #   lgl   total extra
+  #   <lgl> <int> <dbl>
+  # 1 FALSE     8    40
+  # 2 TRUE      8    40
+  # 3 NA       25   125
+
+  # Avoid evaluating just for nrow
+  expect_identical(dim(q), c(NA_integer_, 3L))
+
+  expect_output(
+    print(q),
+    "InMemoryDataset (query)
+lgl: bool
+total: int32
+extra: double (multiply_checked(total, 5))
+
+See $.data for the source Arrow object",
+    fixed = TRUE
+  )
+  expect_output(
+    print(q$.data),
+    "InMemoryDataset (query)
+int: int32
+lgl: bool
+
+* Aggregations:
+total: sum(int)
+* Filter: (dbl > 2)
+* Grouped by lgl
+See $.data for the source Arrow object",
+    fixed = TRUE
+  )
+
+  skip_if(getRversion() < "3.6.0", "TODO investigate why these aren't equal")
+  # On older R versions:
+  #  ── Failure (test-dplyr-collapse.R:172:3): Properties of collapsed query ────────
+  # head(q, 1) %>% collect() not equal to tibble::tibble(lgl = FALSE, total = 8L, extra = 40).
+  # Component "total": Mean relative difference: 0.3846154
+  # Component "extra": Mean relative difference: 0.3846154
+  # ── Failure (test-dplyr-collapse.R:176:3): Properties of collapsed query ────────
+  # tail(q, 1) %>% collect() not equal to tibble::tibble(lgl = NA, total = 25L, extra = 125).
+  # Component "total": Mean relative difference: 0.9230769
+  # Component "extra": Mean relative difference: 0.9230769
+  expect_equal(
+    q %>% head(1) %>% collect(),
+    tibble::tibble(lgl = FALSE, total = 8L, extra = 40)
+  )
+  skip("TODO (ARROW-1XXXX): implement sorting option about where NAs go")
+  expect_equal(
+    q %>% tail(1) %>% collect(),
+    tibble::tibble(lgl = NA, total = 25L, extra = 125)
+  )
+})
+
+test_that("query_on_dataset handles collapse()", {
+  expect_false(query_on_dataset(
+    tab %>%
+      select(int, chr)
+  ))
+  expect_false(query_on_dataset(
+    tab %>%
+      select(int, chr) %>%
+      collapse() %>%
+      select(int)
+  ))
+
+  ds_dir <- tempfile()
+  dir.create(ds_dir)
+  on.exit(unlink(ds_dir))
+  write_parquet(tab, file.path(ds_dir, "file.parquet"))
+  ds <- open_dataset(ds_dir)
+
+  expect_true(query_on_dataset(
+    ds %>%
+      select(int, chr)
+  ))
+  expect_true(query_on_dataset(
+    ds %>%
+      select(int, chr) %>%
+      collapse() %>%
+      select(int)
+  ))
+})
diff --git a/src/arrow/r/tests/testthat/test-dplyr-count.R b/src/arrow/r/tests/testthat/test-dplyr-count.R
new file mode 100644
index 000000000..8af9b57aa
--- /dev/null
+++ b/src/arrow/r/tests/testthat/test-dplyr-count.R
@@ -0,0 +1,92 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+skip_if_not_available("dataset")
+
+library(dplyr, warn.conflicts = FALSE)
+
+tbl <- example_data
+tbl$some_grouping <- rep(c(1, 2), 5)
+
+test_that("count/tally", {
+  compare_dplyr_binding(
+    .input %>%
+      count() %>%
+      collect(),
+    tbl
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      tally() %>%
+      collect(),
+    tbl
+  )
+})
+
+test_that("count/tally with wt and grouped data", {
+  compare_dplyr_binding(
+    .input %>%
+      group_by(some_grouping) %>%
+      count(wt = int) %>%
+      collect(),
+    tbl
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      group_by(some_grouping) %>%
+      tally(wt = int) %>%
+      collect(),
+    tbl
+  )
+})
+
+test_that("count/tally with sort", {
+  compare_dplyr_binding(
+    .input %>%
+      group_by(some_grouping) %>%
+      count(wt = int, sort = TRUE) %>%
+      collect(),
+    tbl
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      group_by(some_grouping) %>%
+      tally(wt = int, sort = TRUE) %>%
+      collect(),
+    tbl
+  )
+})
+
+test_that("count/tally with name arg", {
+  compare_dplyr_binding(
+    .input %>%
+      count(name = "new_col") %>%
+      collect(),
+    tbl
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      tally(name = "new_col") %>%
+      collect(),
+    tbl
+  )
+})
diff --git a/src/arrow/r/tests/testthat/test-dplyr-distinct.R b/src/arrow/r/tests/testthat/test-dplyr-distinct.R
new file mode 100644
index 000000000..3a44c7372
--- /dev/null
+++ b/src/arrow/r/tests/testthat/test-dplyr-distinct.R
@@ -0,0 +1,104 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+skip_if_not_available("dataset")
+
+library(dplyr, warn.conflicts = FALSE)
+
+tbl <- example_data
+tbl$some_grouping <- rep(c(1, 2), 5)
+
+test_that("distinct()", {
+  compare_dplyr_binding(
+    .input %>%
+      distinct(some_grouping, lgl) %>%
+      collect() %>%
+      arrange(some_grouping, lgl),
+    tbl
+  )
+})
+
+test_that("distinct() works without any variables", {
+  compare_dplyr_binding(
+    .input %>%
+      distinct() %>%
+      arrange(int) %>%
+      collect(),
+    tbl
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      group_by(x = int + 1) %>%
+      distinct() %>%
+      # Even though we have group_by(x), all cols (including int) are kept
+      arrange(int) %>%
+      collect(),
+    tbl
+  )
+})
+
+test_that("distinct() can retain groups", {
+  compare_dplyr_binding(
+    .input %>%
+      group_by(some_grouping, int) %>%
+      distinct(lgl) %>%
+      collect() %>%
+      arrange(lgl, int),
+    tbl
+  )
+
+  # With expressions here
+  compare_dplyr_binding(
+    .input %>%
+      group_by(y = some_grouping, int) %>%
+      distinct(x = lgl) %>%
+      collect() %>%
+      arrange(int),
+    tbl
+  )
+})
+
+test_that("distinct() can contain expressions", {
+  compare_dplyr_binding(
+    .input %>%
+      distinct(lgl, x = some_grouping + 1) %>%
+      collect() %>%
+      arrange(lgl, x),
+    tbl
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      group_by(lgl, int) %>%
+      distinct(x = some_grouping + 1) %>%
+      collect() %>%
+      arrange(int),
+    tbl
+  )
+})
+
+test_that("distinct() can return all columns", {
+  skip("ARROW-13993 - need this to return correct rows from other cols")
+  compare_dplyr_binding(
+    .input %>%
+      distinct(lgl, .keep_all = TRUE) %>%
+      collect() %>%
+      arrange(int),
+    tbl
+  )
+})
diff --git a/src/arrow/r/tests/testthat/test-dplyr-filter.R b/src/arrow/r/tests/testthat/test-dplyr-filter.R
new file mode 100644
index 000000000..72a64229c
--- /dev/null
+++ b/src/arrow/r/tests/testthat/test-dplyr-filter.R
@@ -0,0 +1,412 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+skip_if_not_available("dataset")
+
+library(dplyr, warn.conflicts = FALSE)
+library(stringr)
+
+tbl <- example_data
+# Add some better string data
+tbl$verses <- verses[[1]]
+# c(" a ", "  b  ", "   c   ", ...) increasing padding
+# nchar =   3  5  7  9 11 13 15 17 19 21
+tbl$padded_strings <- stringr::str_pad(letters[1:10], width = 2 * (1:10) + 1, side = "both")
+tbl$some_negative <- tbl$int * (-1)^(1:nrow(tbl)) # nolint
+
+test_that("filter() on is.na()", {
+  compare_dplyr_binding(
+    .input %>%
+      filter(is.na(lgl)) %>%
+      select(chr, int, lgl) %>%
+      collect(),
+    tbl
+  )
+})
+
+test_that("filter() with NAs in selection", {
+  compare_dplyr_binding(
+    .input %>%
+      filter(lgl) %>%
+      select(chr, int, lgl) %>%
+      collect(),
+    tbl
+  )
+})
+
+test_that("Filter returning an empty Table should not segfault (ARROW-8354)", {
+  compare_dplyr_binding(
+    .input %>%
+      filter(false) %>%
+      select(chr, int, lgl) %>%
+      collect(),
+    tbl
+  )
+})
+
+test_that("filtering with expression", {
+  char_sym <- "b"
+  compare_dplyr_binding(
+    .input %>%
+      filter(chr == char_sym) %>%
+      select(string = chr, int) %>%
+      collect(),
+    tbl
+  )
+})
+
+test_that("filtering with arithmetic", {
+  compare_dplyr_binding(
+    .input %>%
+      filter(dbl + 1 > 3) %>%
+      select(string = chr, int, dbl) %>%
+      collect(),
+    tbl
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      filter(dbl / 2 > 3) %>%
+      select(string = chr, int, dbl) %>%
+      collect(),
+    tbl
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      filter(dbl / 2L > 3) %>%
+      select(string = chr, int, dbl) %>%
+      collect(),
+    tbl
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      filter(int / 2 > 3) %>%
+      select(string = chr, int, dbl) %>%
+      collect(),
+    tbl
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      filter(int / 2L > 3) %>%
+      select(string = chr, int, dbl) %>%
+      collect(),
+    tbl
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      filter(dbl %/% 2 > 3) %>%
+      select(string = chr, int, dbl) %>%
+      collect(),
+    tbl
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      filter(dbl^2 > 3) %>%
+      select(string = chr, int, dbl) %>%
+      collect(),
+    tbl
+  )
+})
+
+test_that("filtering with expression + autocasting", {
+  compare_dplyr_binding(
+    .input %>%
+      filter(dbl + 1 > 3L) %>% # test autocasting with comparison to 3L
+      select(string = chr, int, dbl) %>%
+      collect(),
+    tbl
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      filter(int + 1 > 3) %>%
+      select(string = chr, int, dbl) %>%
+      collect(),
+    tbl
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      filter(int^2 > 3) %>%
+      select(string = chr, int, dbl) %>%
+      collect(),
+    tbl
+  )
+})
+
+test_that("More complex select/filter", {
+  compare_dplyr_binding(
+    .input %>%
+      filter(dbl > 2, chr == "d" | chr == "f") %>%
+      select(chr, int, lgl) %>%
+      filter(int < 5) %>%
+      select(int, chr) %>%
+      collect(),
+    tbl
+  )
+})
+
+test_that("filter() with %in%", {
+  compare_dplyr_binding(
+    .input %>%
+      filter(dbl > 2, chr %in% c("d", "f")) %>%
+      collect(),
+    tbl
+  )
+})
+
+test_that("Negative scalar values", {
+  compare_dplyr_binding(
+    .input %>%
+      filter(some_negative > -2) %>%
+      collect(),
+    tbl
+  )
+  compare_dplyr_binding(
+    .input %>%
+      filter(some_negative %in% -1) %>%
+      collect(),
+    tbl
+  )
+  compare_dplyr_binding(
+    .input %>%
+      filter(int == -some_negative) %>%
+      collect(),
+    tbl
+  )
+})
+
+test_that("filter() with between()", {
+  compare_dplyr_binding(
+    .input %>%
+      filter(between(dbl, 1, 2)) %>%
+      collect(),
+    tbl
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      filter(between(dbl, 0.5, 2)) %>%
+      collect(),
+    tbl
+  )
+
+  expect_identical(
+    tbl %>%
+      record_batch() %>%
+      filter(between(dbl, int, dbl2)) %>%
+      collect(),
+    tbl %>%
+      filter(dbl >= int, dbl <= dbl2)
+  )
+
+  expect_error(
+    tbl %>%
+      record_batch() %>%
+      filter(between(dbl, 1, "2")) %>%
+      collect()
+  )
+
+  expect_error(
+    tbl %>%
+      record_batch() %>%
+      filter(between(dbl, 1, NA)) %>%
+      collect()
+  )
+
+  expect_error(
+    tbl %>%
+      record_batch() %>%
+      filter(between(chr, 1, 2)) %>%
+      collect()
+  )
+})
+
+test_that("filter() with string ops", {
+  skip_if_not_available("utf8proc")
+  compare_dplyr_binding(
+    .input %>%
+      filter(dbl > 2, str_length(verses) > 25) %>%
+      collect(),
+    tbl
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      filter(dbl > 2, str_length(str_trim(padded_strings, "left")) > 5) %>%
+      collect(),
+    tbl
+  )
+})
+
+test_that("filter environment scope", {
+  # "object 'b_var' not found"
+  compare_dplyr_error(.input %>% filter(chr == b_var), tbl)
+
+  b_var <- "b"
+  compare_dplyr_binding(
+    .input %>%
+      filter(chr == b_var) %>%
+      collect(),
+    tbl
+  )
+  # Also for functions
+  # 'could not find function "isEqualTo"' because we haven't defined it yet
+  compare_dplyr_error(.input %>% filter(isEqualTo(int, 4)), tbl)
+
+  # This works but only because there are S3 methods for those operations
+  isEqualTo <- function(x, y) x == y & !is.na(x)
+  compare_dplyr_binding(
+    .input %>%
+      select(-fct) %>% # factor levels aren't identical
+      filter(isEqualTo(int, 4)) %>%
+      collect(),
+    tbl
+  )
+  # Try something that needs to call another nse_func
+  compare_dplyr_binding(
+    .input %>%
+      select(-fct) %>%
+      filter(nchar(padded_strings) < 10) %>%
+      collect(),
+    tbl
+  )
+  isShortString <- function(x) nchar(x) < 10
+  skip("TODO: 14071")
+  compare_dplyr_binding(
+    .input %>%
+      select(-fct) %>%
+      filter(isShortString(padded_strings)) %>%
+      collect(),
+    tbl
+  )
+})
+
+test_that("Filtering on a column that doesn't exist errors correctly", {
+  with_language("fr", {
+    # expect_warning(., NA) because the usual behavior when it hits a filter
+    # that it can't evaluate is to raise a warning, collect() to R, and retry
+    # the filter. But we want this to error the first time because it's
+    # a user error, not solvable by retrying in R
+    expect_warning(
+      expect_error(
+        tbl %>% record_batch() %>% filter(not_a_col == 42) %>% collect(),
+        "objet 'not_a_col' introuvable"
+      ),
+      NA
+    )
+  })
+  with_language("en", {
+    expect_warning(
+      expect_error(
+        tbl %>% record_batch() %>% filter(not_a_col == 42) %>% collect(),
+        "object 'not_a_col' not found"
+      ),
+      NA
+    )
+  })
+})
+
+test_that("Filtering with unsupported functions", {
+  compare_dplyr_binding(
+    .input %>%
+      filter(int > 2, pnorm(dbl) > .99) %>%
+      collect(),
+    tbl,
+    warning = "Expression pnorm\\(dbl\\) > 0.99 not supported in Arrow; pulling data into R"
+  )
+  compare_dplyr_binding(
+    .input %>%
+      filter(
+        nchar(chr, type = "bytes", allowNA = TRUE) == 1, # bad, Arrow msg
+        int > 2, # good
+        pnorm(dbl) > .99 # bad, opaque
+      ) %>%
+      collect(),
+    tbl,
+    warning = '\\* In nchar\\(chr, type = "bytes", allowNA = TRUE\\) == 1, allowNA = TRUE not supported by Arrow
+\\* Expression pnorm\\(dbl\\) > 0.99 not supported in Arrow
+pulling data into R'
+  )
+})
+
+test_that("Calling Arrow compute functions 'directly'", {
+  expect_equal(
+    tbl %>%
+      record_batch() %>%
+      filter(arrow_add(dbl, 1) > 3L) %>%
+      select(string = chr, int, dbl) %>%
+      collect(),
+    tbl %>%
+      filter(dbl + 1 > 3L) %>%
+      select(string = chr, int, dbl)
+  )
+
+  compare_dplyr_binding(
+    tbl %>%
+      record_batch() %>%
+      filter(arrow_greater(arrow_add(dbl, 1), 3L)) %>%
+      select(string = chr, int, dbl) %>%
+      collect(),
+    tbl %>%
+      filter(dbl + 1 > 3L) %>%
+      select(string = chr, int, dbl)
+  )
+})
+
+test_that("filter() with .data pronoun", {
+  compare_dplyr_binding(
+    .input %>%
+      filter(.data$dbl > 4) %>%
+      select(.data$chr, .data$int, .data$lgl) %>%
+      collect(),
+    tbl
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      filter(is.na(.data$lgl)) %>%
+      select(.data$chr, .data$int, .data$lgl) %>%
+      collect(),
+    tbl
+  )
+
+  # and the .env pronoun too!
+  chr <- 4
+  compare_dplyr_binding(
+    .input %>%
+      filter(.data$dbl > .env$chr) %>%
+      select(.data$chr, .data$int, .data$lgl) %>%
+      collect(),
+    tbl
+  )
+
+  skip("test now faulty - code no longer gives error & outputs a empty tibble")
+  # but there is an error if we don't override the masking with `.env`
+  compare_dplyr_error(
+    .input %>%
+      filter(.data$dbl > chr) %>%
+      select(.data$chr, .data$int, .data$lgl) %>%
+      collect(),
+    tbl
+  )
+})
diff --git a/src/arrow/r/tests/testthat/test-dplyr-funcs-conditional.R b/src/arrow/r/tests/testthat/test-dplyr-funcs-conditional.R
new file mode 100644
index 000000000..4f2700795
--- /dev/null
+++ b/src/arrow/r/tests/testthat/test-dplyr-funcs-conditional.R
@@ -0,0 +1,409 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+skip_if_not_available("dataset")
+
+library(dplyr, warn.conflicts = FALSE)
+suppressPackageStartupMessages(library(bit64))
+
+
+tbl <- example_data
+tbl$verses <- verses[[1]]
+tbl$another_chr <- tail(letters, 10)
+
+test_that("if_else and ifelse", {
+  compare_dplyr_binding(
+    .input %>%
+      mutate(
+        y = if_else(int > 5, 1, 0)
+      ) %>%
+      collect(),
+    tbl
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(
+        y = if_else(int > 5, int, 0L)
+      ) %>%
+      collect(),
+    tbl
+  )
+
+  expect_error(
+    Table$create(tbl) %>%
+      mutate(
+        y = if_else(int > 5, 1, FALSE)
+      ) %>%
+      collect(),
+    "NotImplemented: Function if_else has no kernel matching input types"
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(
+        y = if_else(int > 5, 1, NA_real_)
+      ) %>%
+      collect(),
+    tbl
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(
+        y = ifelse(int > 5, 1, 0)
+      ) %>%
+      collect(),
+    tbl
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(
+        y = if_else(dbl > 5, TRUE, FALSE)
+      ) %>%
+      collect(),
+    tbl
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(
+        y = if_else(chr %in% letters[1:3], 1L, 3L)
+      ) %>%
+      collect(),
+    tbl
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(
+        y = if_else(int > 5, "one", "zero")
+      ) %>%
+      collect(),
+    tbl
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(
+        y = if_else(int > 5, chr, another_chr)
+      ) %>%
+      collect(),
+    tbl
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(
+        y = if_else(int > 5, "true", chr, missing = "MISSING")
+      ) %>%
+      collect(),
+    tbl
+  )
+
+  # TODO: remove the mutate + warning after ARROW-13358 is merged and Arrow
+  # supports factors in if(_)else
+  compare_dplyr_binding(
+    .input %>%
+      mutate(
+        y = if_else(int > 5, fct, factor("a"))
+      ) %>%
+      collect() %>%
+      # This is a no-op on the Arrow side, but necessary to make the results equal
+      mutate(y = as.character(y)),
+    tbl,
+    warning = "Dictionaries .* are currently converted to strings .* in if_else and ifelse"
+  )
+
+  # detecting NA and NaN works just fine
+  compare_dplyr_binding(
+    .input %>%
+      mutate(
+        y = if_else(is.na(dbl), chr, "false", missing = "MISSING")
+      ) %>%
+      collect(),
+    example_data_for_sorting
+  )
+
+  # However, currently comparisons with NaNs return false and not NaNs or NAs
+  skip("ARROW-13364")
+  compare_dplyr_binding(
+    .input %>%
+      mutate(
+        y = if_else(dbl > 5, chr, another_chr, missing = "MISSING")
+      ) %>%
+      collect(),
+    example_data_for_sorting
+  )
+
+  skip("TODO: could? should? we support the autocasting in ifelse")
+  compare_dplyr_binding(
+    .input %>%
+      mutate(y = ifelse(int > 5, 1, FALSE)) %>%
+      collect(),
+    tbl
+  )
+})
+
+test_that("case_when()", {
+  compare_dplyr_binding(
+    .input %>%
+      transmute(cw = case_when(lgl ~ dbl, !false ~ dbl + dbl2)) %>%
+      collect(),
+    tbl
+  )
+  compare_dplyr_binding(
+    .input %>%
+      mutate(cw = case_when(int > 5 ~ 1, TRUE ~ 0)) %>%
+      collect(),
+    tbl
+  )
+  compare_dplyr_binding(
+    .input %>%
+      transmute(cw = case_when(chr %in% letters[1:3] ~ 1L) + 41L) %>%
+      collect(),
+    tbl
+  )
+  compare_dplyr_binding(
+    .input %>%
+      filter(case_when(
+        dbl + int - 1.1 == dbl2 ~ TRUE,
+        NA ~ NA,
+        TRUE ~ FALSE
+      ) & !is.na(dbl2)) %>%
+      collect(),
+    tbl
+  )
+
+  # dplyr::case_when() errors if values on right side of formulas do not have
+  # exactly the same type, but the Arrow case_when kernel allows compatible types
+  expect_equal(
+    tbl %>%
+      mutate(i64 = as.integer64(1e10)) %>%
+      Table$create() %>%
+      transmute(cw = case_when(
+        is.na(fct) ~ int,
+        is.na(chr) ~ dbl,
+        TRUE ~ i64
+      )) %>%
+      collect(),
+    tbl %>%
+      transmute(
+        cw = ifelse(is.na(fct), int, ifelse(is.na(chr), dbl, 1e10))
+      )
+  )
+
+  # expected errors (which are caught by abandon_ship() and changed to warnings)
+  # TODO: Find a way to test these directly without abandon_ship() interfering
+  expect_error(
+    # no cases
+    expect_warning(
+      tbl %>%
+        Table$create() %>%
+        transmute(cw = case_when()),
+      "case_when"
+    )
+  )
+  expect_error(
+    # argument not a formula
+    expect_warning(
+      tbl %>%
+        Table$create() %>%
+        transmute(cw = case_when(TRUE ~ FALSE, TRUE)),
+      "case_when"
+    )
+  )
+  expect_error(
+    # non-logical R scalar on left side of formula
+    expect_warning(
+      tbl %>%
+        Table$create() %>%
+        transmute(cw = case_when(0L ~ FALSE, TRUE ~ FALSE)),
+      "case_when"
+    )
+  )
+  expect_error(
+    # non-logical Arrow column reference on left side of formula
+    expect_warning(
+      tbl %>%
+        Table$create() %>%
+        transmute(cw = case_when(int ~ FALSE)),
+      "case_when"
+    )
+  )
+  expect_error(
+    # non-logical Arrow expression on left side of formula
+    expect_warning(
+      tbl %>%
+        Table$create() %>%
+        transmute(cw = case_when(dbl + 3.14159 ~ TRUE)),
+      "case_when"
+    )
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      transmute(cw = case_when(lgl ~ "abc")) %>%
+      collect(),
+    tbl
+  )
+  compare_dplyr_binding(
+    .input %>%
+      transmute(cw = case_when(lgl ~ verses, !false ~ paste(chr, chr))) %>%
+      collect(),
+    tbl
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(
+        cw = case_when(!(!(!(lgl))) ~ factor(chr), TRUE ~ fct)
+      ) %>%
+      collect(),
+    tbl,
+    warning = TRUE
+  )
+})
+
+test_that("coalesce()", {
+  # character
+  df <- tibble(
+    w = c(NA_character_, NA_character_, NA_character_),
+    x = c(NA_character_, NA_character_, "c"),
+    y = c(NA_character_, "b", "c"),
+    z = c("a", "b", "c")
+  )
+  compare_dplyr_binding(
+    .input %>%
+      mutate(
+        cw = coalesce(w),
+        cz = coalesce(z),
+        cwx = coalesce(w, x),
+        cwxy = coalesce(w, x, y),
+        cwxyz = coalesce(w, x, y, z)
+      ) %>%
+      collect(),
+    df
+  )
+
+  # integer
+  df <- tibble(
+    w = c(NA_integer_, NA_integer_, NA_integer_),
+    x = c(NA_integer_, NA_integer_, 3L),
+    y = c(NA_integer_, 2L, 3L),
+    z = 1:3
+  )
+  compare_dplyr_binding(
+    .input %>%
+      mutate(
+        cw = coalesce(w),
+        cz = coalesce(z),
+        cwx = coalesce(w, x),
+        cwxy = coalesce(w, x, y),
+        cwxyz = coalesce(w, x, y, z)
+      ) %>%
+      collect(),
+    df
+  )
+
+  # double with NaNs
+  df <- tibble(
+    w = c(NA_real_, NaN, NA_real_),
+    x = c(NA_real_, NaN, 3.3),
+    y = c(NA_real_, 2.2, 3.3),
+    z = c(1.1, 2.2, 3.3)
+  )
+  compare_dplyr_binding(
+    .input %>%
+      mutate(
+        cw = coalesce(w),
+        cz = coalesce(z),
+        cwx = coalesce(w, x),
+        cwxy = coalesce(w, x, y),
+        cwxyz = coalesce(w, x, y, z)
+      ) %>%
+      collect(),
+    df
+  )
+  # NaNs stay NaN and are not converted to NA in the results
+  # (testing this requires expect_identical())
+  expect_identical(
+    df %>% Table$create() %>% mutate(cwx = coalesce(w, x)) %>% collect(),
+    df %>% mutate(cwx = coalesce(w, x))
+  )
+  expect_identical(
+    df %>% Table$create() %>% transmute(cw = coalesce(w)) %>% collect(),
+    df %>% transmute(cw = coalesce(w))
+  )
+  expect_identical(
+    df %>% Table$create() %>% transmute(cn = coalesce(NaN)) %>% collect(),
+    df %>% transmute(cn = coalesce(NaN))
+  )
+  # singles stay single
+  expect_equal(
+    (df %>%
+      Table$create(schema = schema(
+        w = float32(),
+        x = float32(),
+        y = float32(),
+        z = float32()
+      )) %>%
+      transmute(c = coalesce(w, x, y, z)) %>%
+      compute()
+    )$schema[[1]]$type,
+    float32()
+  )
+  # with R literal values
+  compare_dplyr_binding(
+    .input %>%
+      mutate(
+        c1 = coalesce(4.4),
+        c2 = coalesce(NA_real_),
+        c3 = coalesce(NaN),
+        c4 = coalesce(w, x, y, 5.5),
+        c5 = coalesce(w, x, y, NA_real_),
+        c6 = coalesce(w, x, y, NaN)
+      ) %>%
+      collect(),
+    df
+  )
+
+  # factors
+  # TODO: remove the mutate + warning after ARROW-14167 is merged and Arrow
+  # supports factors in coalesce
+  df <- tibble(
+    x = factor("a", levels = c("a", "z")),
+    y = factor("b", levels = c("a", "b", "c"))
+  )
+  compare_dplyr_binding(
+    .input %>%
+      mutate(c = coalesce(x, y)) %>%
+      collect() %>%
+      # This is a no-op on the Arrow side, but necessary to make the results equal
+      mutate(c = as.character(c)),
+    df,
+    warning = "Dictionaries .* are currently converted to strings .* in coalesce"
+  )
+
+  # no arguments
+  expect_error(
+    nse_funcs$coalesce(),
+    "At least one argument must be supplied to coalesce()",
+    fixed = TRUE
+  )
+})
diff --git a/src/arrow/r/tests/testthat/test-dplyr-funcs-datetime.R b/src/arrow/r/tests/testthat/test-dplyr-funcs-datetime.R
new file mode 100644
index 000000000..5cb515e69
--- /dev/null
+++ b/src/arrow/r/tests/testthat/test-dplyr-funcs-datetime.R
@@ -0,0 +1,304 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+skip_if_not_available("dataset")
+
+library(lubridate, warn.conflicts = FALSE)
+library(dplyr, warn.conflicts = FALSE)
+
+# base::strptime() defaults to local timezone
+# but arrow's strptime defaults to UTC.
+# So that tests are consistent, set the local timezone to UTC
+# TODO: consider reevaluating this workaround after ARROW-12980
+withr::local_timezone("UTC")
+
+# TODO: We should test on windows once ARROW-13168 is resolved.
+if (tolower(Sys.info()[["sysname"]]) == "windows") {
+  test_date <- as.POSIXct("2017-01-01 00:00:11.3456789", tz = "")
+} else {
+  test_date <- as.POSIXct("2017-01-01 00:00:11.3456789", tz = "Pacific/Marquesas")
+}
+
+
+test_df <- tibble::tibble(
+  # test_date + 1 turns the tzone = "" to NULL, which is functionally equivalent
+  # so we can run some tests on Windows, but this skirts around
+  # https://issues.apache.org/jira/browse/ARROW-13588
+  # That issue is tough because in C++, "" is the "no timezone" value
+  # due to static typing, so we can't distinguish a literal "" from NULL
+  datetime = c(test_date, NA) + 1,
+  date = c(as.Date("2021-09-09"), NA)
+)
+
+# These tests test component extraction from timestamp objects
+
+test_that("extract year from timestamp", {
+  compare_dplyr_binding(
+    .input %>%
+      mutate(x = year(datetime)) %>%
+      collect(),
+    test_df
+  )
+})
+
+test_that("extract isoyear from timestamp", {
+  compare_dplyr_binding(
+    .input %>%
+      mutate(x = isoyear(datetime)) %>%
+      collect(),
+    test_df
+  )
+})
+
+test_that("extract quarter from timestamp", {
+  compare_dplyr_binding(
+    .input %>%
+      mutate(x = quarter(datetime)) %>%
+      collect(),
+    test_df
+  )
+})
+
+test_that("extract month from timestamp", {
+  compare_dplyr_binding(
+    .input %>%
+      mutate(x = month(datetime)) %>%
+      collect(),
+    test_df
+  )
+})
+
+test_that("extract isoweek from timestamp", {
+  compare_dplyr_binding(
+    .input %>%
+      mutate(x = isoweek(datetime)) %>%
+      collect(),
+    test_df
+  )
+})
+
+test_that("extract epiweek from timestamp", {
+  compare_dplyr_binding(
+    .input %>%
+      mutate(x = epiweek(datetime)) %>%
+      collect(),
+    test_df
+  )
+})
+
+test_that("extract day from timestamp", {
+  compare_dplyr_binding(
+    .input %>%
+      mutate(x = day(datetime)) %>%
+      collect(),
+    test_df
+  )
+})
+
+test_that("extract wday from timestamp", {
+  compare_dplyr_binding(
+    .input %>%
+      mutate(x = wday(datetime)) %>%
+      collect(),
+    test_df
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(x = wday(date, week_start = 3)) %>%
+      collect(),
+    test_df
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(x = wday(date, week_start = 1)) %>%
+      collect(),
+    test_df
+  )
+
+  skip_on_os("windows") # https://issues.apache.org/jira/browse/ARROW-13168
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(x = wday(date, label = TRUE)) %>%
+      mutate(x = as.character(x)) %>%
+      collect(),
+    test_df
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(x = wday(datetime, label = TRUE, abbr = TRUE)) %>%
+      mutate(x = as.character(x)) %>%
+      collect(),
+    test_df
+  )
+})
+
+test_that("extract yday from timestamp", {
+  compare_dplyr_binding(
+    .input %>%
+      mutate(x = yday(datetime)) %>%
+      collect(),
+    test_df
+  )
+})
+
+test_that("extract hour from timestamp", {
+  compare_dplyr_binding(
+    .input %>%
+      mutate(x = hour(datetime)) %>%
+      collect(),
+    test_df
+  )
+})
+
+test_that("extract minute from timestamp", {
+  compare_dplyr_binding(
+    .input %>%
+      mutate(x = minute(datetime)) %>%
+      collect(),
+    test_df
+  )
+})
+
+test_that("extract second from timestamp", {
+  compare_dplyr_binding(
+    .input %>%
+      mutate(x = second(datetime)) %>%
+      collect(),
+    test_df,
+    # arrow supports nanosecond resolution but lubridate does not
+    tolerance = 1e-6
+  )
+})
+
+# These tests test extraction of components from date32 objects
+
+test_that("extract year from date", {
+  compare_dplyr_binding(
+    .input %>%
+      mutate(x = year(date)) %>%
+      collect(),
+    test_df
+  )
+})
+
+test_that("extract isoyear from date", {
+  compare_dplyr_binding(
+    .input %>%
+      mutate(x = isoyear(date)) %>%
+      collect(),
+    test_df
+  )
+})
+
+test_that("extract quarter from date", {
+  compare_dplyr_binding(
+    .input %>%
+      mutate(x = quarter(date)) %>%
+      collect(),
+    test_df
+  )
+})
+
+test_that("extract month from date", {
+  compare_dplyr_binding(
+    .input %>%
+      mutate(x = month(date)) %>%
+      collect(),
+    test_df
+  )
+})
+
+test_that("extract isoweek from date", {
+  compare_dplyr_binding(
+    .input %>%
+      mutate(x = isoweek(date)) %>%
+      collect(),
+    test_df
+  )
+})
+
+test_that("extract epiweek from date", {
+  compare_dplyr_binding(
+    .input %>%
+      mutate(x = epiweek(date)) %>%
+      collect(),
+    test_df
+  )
+})
+
+test_that("extract day from date", {
+  compare_dplyr_binding(
+    .input %>%
+      mutate(x = day(date)) %>%
+      collect(),
+    test_df
+  )
+})
+
+test_that("extract wday from date", {
+  compare_dplyr_binding(
+    .input %>%
+      mutate(x = wday(date)) %>%
+      collect(),
+    test_df
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(x = wday(date, week_start = 3)) %>%
+      collect(),
+    test_df
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(x = wday(date, week_start = 1)) %>%
+      collect(),
+    test_df
+  )
+
+  skip_on_os("windows") # https://issues.apache.org/jira/browse/ARROW-13168
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(x = wday(date, label = TRUE, abbr = TRUE)) %>%
+      mutate(x = as.character(x)) %>%
+      collect(),
+    test_df
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(x = wday(date, label = TRUE)) %>%
+      mutate(x = as.character(x)) %>%
+      collect(),
+    test_df
+  )
+})
+
+test_that("extract yday from date", {
+  compare_dplyr_binding(
+    .input %>%
+      mutate(x = yday(date)) %>%
+      collect(),
+    test_df
+  )
+})
diff --git a/src/arrow/r/tests/testthat/test-dplyr-funcs-math.R b/src/arrow/r/tests/testthat/test-dplyr-funcs-math.R
new file mode 100644
index 000000000..b66630675
--- /dev/null
+++ b/src/arrow/r/tests/testthat/test-dplyr-funcs-math.R
@@ -0,0 +1,309 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+skip_if_not_available("dataset")
+
+library(dplyr, warn.conflicts = FALSE)
+
+
+test_that("abs()", {
+  df <- tibble(x = c(-127, -10, -1, -0, 0, 1, 10, 127, NA))
+
+  compare_dplyr_binding(
+    .input %>%
+      transmute(abs = abs(x)) %>%
+      collect(),
+    df
+  )
+})
+
+test_that("sign()", {
+  df <- tibble(x = c(-127, -10, -1, -0, 0, 1, 10, 127, NA))
+
+  compare_dplyr_binding(
+    .input %>%
+      transmute(sign = sign(x)) %>%
+      collect(),
+    df
+  )
+})
+
+test_that("ceiling(), floor(), trunc(), round()", {
+  df <- tibble(x = c(-1, -0.55, -0.5, -0.1, 0, 0.1, 0.5, 0.55, 1, NA, NaN))
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(
+        c = ceiling(x),
+        f = floor(x),
+        t = trunc(x),
+        r = round(x)
+      ) %>%
+      collect(),
+    df
+  )
+
+  # with digits set to 1
+  compare_dplyr_binding(
+    .input %>%
+      filter(x %% 0.5 == 0) %>% # filter out indeterminate cases (see below)
+      mutate(r = round(x, 1)) %>%
+      collect(),
+    df
+  )
+
+  # with digits set to -1
+  compare_dplyr_binding(
+    .input %>%
+      mutate(
+        rd = round(floor(x * 111), -1), # double
+        y = ifelse(is.nan(x), NA_integer_, x),
+        ri = round(as.integer(y * 111), -1) # integer (with the NaN removed)
+      ) %>%
+      collect(),
+    df
+  )
+
+  # round(x, -2) is equivalent to round_to_multiple(x, 100)
+  expect_equal(
+    Table$create(x = 1111.1) %>%
+      mutate(r = round(x, -2)) %>%
+      collect(),
+    Table$create(x = 1111.1) %>%
+      mutate(r = arrow_round_to_multiple(x, options = list(multiple = 100))) %>%
+      collect()
+  )
+
+  # For consistency with base R, the binding for round() uses the Arrow
+  # library's HALF_TO_EVEN round mode, but the expectations *above* would pass
+  # even if another round mode were used. The expectations *below* should fail
+  # with other round modes. However, some decimal numbers cannot be represented
+  # exactly as floating point numbers, and for the ones that also end in 5 (such
+  # as 0.55), R's rounding behavior is indeterminate: it will vary depending on
+  # the OS. In practice, this seems to affect Windows, so we skip these tests
+  # on Windows and on CRAN.
+
+  skip_on_cran()
+  skip_on_os("windows")
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(r = round(x, 1)) %>%
+      collect(),
+    df
+  )
+
+  # Verify that round mode HALF_TO_EVEN, which is what the round() binding uses,
+  # yields results consistent with R...
+  expect_equal(
+    as.vector(
+      call_function(
+        "round",
+        Array$create(df$x),
+        options = list(ndigits = 1L, round_mode = RoundMode$HALF_TO_EVEN)
+      )
+    ),
+    round(df$x, 1)
+  )
+  # ...but that the round mode HALF_TOWARDS_ZERO does not. If the expectation
+  # below fails, it means that the expectation above is not effectively testing
+  # that Arrow is using the HALF_TO_EVEN mode.
+  expect_false(
+    isTRUE(all.equal(
+      as.vector(
+        call_function(
+          "round",
+          Array$create(df$x),
+          options = list(ndigits = 1L, round_mode = RoundMode$HALF_TOWARDS_ZERO)
+        )
+      ),
+      round(df$x, 1)
+    ))
+  )
+})
+
+test_that("log functions", {
+  df <- tibble(x = c(1:10, NA, NA))
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(y = log(x)) %>%
+      collect(),
+    df
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(y = log(x, base = exp(1))) %>%
+      collect(),
+    df
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(y = log(x, base = 2)) %>%
+      collect(),
+    df
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(y = log(x, base = 10)) %>%
+      collect(),
+    df
+  )
+
+  # test log(, base = (length == 1))
+  compare_dplyr_binding(
+    .input %>%
+      mutate(y = log(x, base = 5)) %>%
+      collect(),
+    df
+  )
+
+  # test log(, base = (length != 1))
+  expect_error(
+    nse_funcs$log(10, base = 5:6),
+    "base must be a column or a length-1 numeric; other values not supported by Arrow",
+    fixed = TRUE
+  )
+
+  # test log(x = (length != 1))
+  expect_error(
+    nse_funcs$log(10:11),
+    "x must be a column or a length-1 numeric; other values not supported by Arrow",
+    fixed = TRUE
+  )
+
+  # test log(, base = Expression)
+  compare_dplyr_binding(
+    .input %>%
+      # test cases where base = 1 below
+      filter(x != 1) %>%
+      mutate(
+        y = log(x, base = x),
+        z = log(2, base = x)
+      ) %>%
+      collect(),
+    df
+  )
+
+  # log(1, base = 1) is NaN in both R and Arrow
+  # suppress the R warning because R warns but Arrow does not
+  suppressWarnings(
+    compare_dplyr_binding(
+      .input %>%
+        mutate(y = log(x, base = y)) %>%
+        collect(),
+      tibble(x = 1, y = 1)
+    )
+  )
+
+  # log(n != 1, base = 1) is Inf in R and Arrow
+  compare_dplyr_binding(
+    .input %>%
+      mutate(y = log(x, base = y)) %>%
+      collect(),
+    tibble(x = 10, y = 1)
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(y = logb(x)) %>%
+      collect(),
+    df
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(y = log1p(x)) %>%
+      collect(),
+    df
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(y = log2(x)) %>%
+      collect(),
+    df
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(y = log10(x)) %>%
+      collect(),
+    df
+  )
+})
+
+test_that("trig functions", {
+  df <- tibble(x = c(seq(from = 0, to = 1, by = 0.1), NA))
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(y = sin(x)) %>%
+      collect(),
+    df
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(y = cos(x)) %>%
+      collect(),
+    df
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(y = tan(x)) %>%
+      collect(),
+    df
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(y = asin(x)) %>%
+      collect(),
+    df
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(y = acos(x)) %>%
+      collect(),
+    df
+  )
+})
+
+test_that("arith functions ", {
+  df <- tibble(x = c(1:5, NA))
+
+  compare_dplyr_binding(
+    .input %>%
+      transmute(
+        int_div = x %/% 2,
+        addition = x + 1,
+        multiplication = x * 3,
+        subtraction = x - 5,
+        division = x / 2,
+        power = x^3,
+        modulo = x %% 3
+      ) %>%
+      collect(),
+    df
+  )
+})
diff --git a/src/arrow/r/tests/testthat/test-dplyr-funcs-string.R b/src/arrow/r/tests/testthat/test-dplyr-funcs-string.R
new file mode 100644
index 000000000..5e092f4e3
--- /dev/null
+++ b/src/arrow/r/tests/testthat/test-dplyr-funcs-string.R
@@ -0,0 +1,1399 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+skip_if_not_available("dataset")
+skip_if_not_available("utf8proc")
+
+library(dplyr, warn.conflicts = FALSE)
+library(lubridate)
+library(stringr)
+library(stringi)
+
+test_that("paste, paste0, and str_c", {
+  df <- tibble(
+    v = c("A", "B", "C"),
+    w = c("a", "b", "c"),
+    x = c("d", NA_character_, "f"),
+    y = c(NA_character_, "h", "i"),
+    z = c(1.1, 2.2, NA)
+  )
+  x <- Expression$field_ref("x")
+  y <- Expression$field_ref("y")
+
+  # no NAs in data
+  compare_dplyr_binding(
+    .input %>%
+      transmute(paste(v, w)) %>%
+      collect(),
+    df
+  )
+  compare_dplyr_binding(
+    .input %>%
+      transmute(paste(v, w, sep = "-")) %>%
+      collect(),
+    df
+  )
+  compare_dplyr_binding(
+    .input %>%
+      transmute(paste0(v, w)) %>%
+      collect(),
+    df
+  )
+  compare_dplyr_binding(
+    .input %>%
+      transmute(str_c(v, w)) %>%
+      collect(),
+    df
+  )
+  compare_dplyr_binding(
+    .input %>%
+      transmute(str_c(v, w, sep = "+")) %>%
+      collect(),
+    df
+  )
+
+  # NAs in data
+  compare_dplyr_binding(
+    .input %>%
+      transmute(paste(x, y)) %>%
+      collect(),
+    df
+  )
+  compare_dplyr_binding(
+    .input %>%
+      transmute(paste(x, y, sep = "-")) %>%
+      collect(),
+    df
+  )
+  compare_dplyr_binding(
+    .input %>%
+      transmute(str_c(x, y)) %>%
+      collect(),
+    df
+  )
+
+  # non-character column in dots
+  compare_dplyr_binding(
+    .input %>%
+      transmute(paste0(x, y, z)) %>%
+      collect(),
+    df
+  )
+
+  # literal string in dots
+  compare_dplyr_binding(
+    .input %>%
+      transmute(paste(x, "foo", y)) %>%
+      collect(),
+    df
+  )
+
+  # literal NA in dots
+  compare_dplyr_binding(
+    .input %>%
+      transmute(paste(x, NA, y)) %>%
+      collect(),
+    df
+  )
+
+  # expressions in dots
+  compare_dplyr_binding(
+    .input %>%
+      transmute(paste0(x, toupper(y), as.character(z))) %>%
+      collect(),
+    df
+  )
+
+  # sep is literal NA
+  # errors in paste() (consistent with base::paste())
+  expect_error(
+    nse_funcs$paste(x, y, sep = NA_character_),
+    "Invalid separator"
+  )
+  # emits null in str_c() (consistent with stringr::str_c())
+  compare_dplyr_binding(
+    .input %>%
+      transmute(str_c(x, y, sep = NA_character_)) %>%
+      collect(),
+    df
+  )
+
+  # sep passed in dots to paste0 (which doesn't take a sep argument)
+  compare_dplyr_binding(
+    .input %>%
+      transmute(paste0(x, y, sep = "-")) %>%
+      collect(),
+    df
+  )
+
+  # known differences
+
+  # arrow allows the separator to be an array
+  expect_equal(
+    df %>%
+      Table$create() %>%
+      transmute(result = paste(x, y, sep = w)) %>%
+      collect(),
+    df %>%
+      transmute(result = paste(x, w, y, sep = ""))
+  )
+
+  # expected errors
+
+  # collapse argument not supported
+  expect_error(
+    nse_funcs$paste(x, y, collapse = ""),
+    "collapse"
+  )
+  expect_error(
+    nse_funcs$paste0(x, y, collapse = ""),
+    "collapse"
+  )
+  expect_error(
+    nse_funcs$str_c(x, y, collapse = ""),
+    "collapse"
+  )
+
+  # literal vectors of length != 1 not supported
+  expect_error(
+    nse_funcs$paste(x, character(0), y),
+    "Literal vectors of length != 1 not supported in string concatenation"
+  )
+  expect_error(
+    nse_funcs$paste(x, c(",", ";"), y),
+    "Literal vectors of length != 1 not supported in string concatenation"
+  )
+})
+
+test_that("grepl with ignore.case = FALSE and fixed = TRUE", {
+  df <- tibble(x = c("Foo", "bar"))
+  compare_dplyr_binding(
+    .input %>%
+      filter(grepl("o", x, fixed = TRUE)) %>%
+      collect(),
+    df
+  )
+})
+
+test_that("sub and gsub with ignore.case = FALSE and fixed = TRUE", {
+  df <- tibble(x = c("Foo", "bar"))
+  compare_dplyr_binding(
+    .input %>%
+      transmute(x = sub("Foo", "baz", x, fixed = TRUE)) %>%
+      collect(),
+    df
+  )
+  compare_dplyr_binding(
+    .input %>%
+      transmute(x = gsub("o", "u", x, fixed = TRUE)) %>%
+      collect(),
+    df
+  )
+})
+
+# many of the remainder of these tests require RE2
+skip_if_not_available("re2")
+
+test_that("grepl", {
+  df <- tibble(x = c("Foo", "bar"))
+
+  for (fixed in c(TRUE, FALSE)) {
+    compare_dplyr_binding(
+      .input %>%
+        filter(grepl("Foo", x, fixed = fixed)) %>%
+        collect(),
+      df
+    )
+    compare_dplyr_binding(
+      .input %>%
+        transmute(x = grepl("^B.+", x, ignore.case = FALSE, fixed = fixed)) %>%
+        collect(),
+      df
+    )
+    compare_dplyr_binding(
+      .input %>%
+        filter(grepl("Foo", x, ignore.case = FALSE, fixed = fixed)) %>%
+        collect(),
+      df
+    )
+  }
+})
+
+test_that("grepl with ignore.case = TRUE and fixed = TRUE", {
+  df <- tibble(x = c("Foo", "bar"))
+
+  # base::grepl() ignores ignore.case = TRUE with a warning when fixed = TRUE,
+  # so we can't use compare_dplyr_binding() for these tests
+  expect_equal(
+    df %>%
+      Table$create() %>%
+      filter(grepl("O", x, ignore.case = TRUE, fixed = TRUE)) %>%
+      collect(),
+    tibble(x = "Foo")
+  )
+  expect_equal(
+    df %>%
+      Table$create() %>%
+      filter(x = grepl("^B.+", x, ignore.case = TRUE, fixed = TRUE)) %>%
+      collect(),
+    tibble(x = character(0))
+  )
+})
+
+test_that("str_detect", {
+  df <- tibble(x = c("Foo", "bar"))
+
+  compare_dplyr_binding(
+    .input %>%
+      filter(str_detect(x, regex("^F"))) %>%
+      collect(),
+    df
+  )
+  compare_dplyr_binding(
+    .input %>%
+      transmute(x = str_detect(x, regex("^f[A-Z]{2}", ignore_case = TRUE))) %>%
+      collect(),
+    df
+  )
+  compare_dplyr_binding(
+    .input %>%
+      transmute(x = str_detect(x, regex("^f[A-Z]{2}", ignore_case = TRUE), negate = TRUE)) %>%
+      collect(),
+    df
+  )
+  compare_dplyr_binding(
+    .input %>%
+      filter(str_detect(x, fixed("o"))) %>%
+      collect(),
+    df
+  )
+  compare_dplyr_binding(
+    .input %>%
+      filter(str_detect(x, fixed("O"))) %>%
+      collect(),
+    df
+  )
+  compare_dplyr_binding(
+    .input %>%
+      filter(str_detect(x, fixed("O", ignore_case = TRUE))) %>%
+      collect(),
+    df
+  )
+  compare_dplyr_binding(
+    .input %>%
+      filter(str_detect(x, fixed("O", ignore_case = TRUE), negate = TRUE)) %>%
+      collect(),
+    df
+  )
+})
+
+test_that("sub and gsub", {
+  df <- tibble(x = c("Foo", "bar"))
+
+  for (fixed in c(TRUE, FALSE)) {
+    compare_dplyr_binding(
+      .input %>%
+        transmute(x = sub("Foo", "baz", x, fixed = fixed)) %>%
+        collect(),
+      df
+    )
+    compare_dplyr_binding(
+      .input %>%
+        transmute(x = sub("^B.+", "baz", x, ignore.case = FALSE, fixed = fixed)) %>%
+        collect(),
+      df
+    )
+    compare_dplyr_binding(
+      .input %>%
+        transmute(x = sub("Foo", "baz", x, ignore.case = FALSE, fixed = fixed)) %>%
+        collect(),
+      df
+    )
+  }
+})
+
+test_that("sub and gsub with ignore.case = TRUE and fixed = TRUE", {
+  df <- tibble(x = c("Foo", "bar"))
+
+  # base::sub() and base::gsub() ignore ignore.case = TRUE with a warning when
+  # fixed = TRUE, so we can't use compare_dplyr_binding() for these tests
+  expect_equal(
+    df %>%
+      Table$create() %>%
+      transmute(x = sub("O", "u", x, ignore.case = TRUE, fixed = TRUE)) %>%
+      collect(),
+    tibble(x = c("Fuo", "bar"))
+  )
+  expect_equal(
+    df %>%
+      Table$create() %>%
+      transmute(x = gsub("o", "u", x, ignore.case = TRUE, fixed = TRUE)) %>%
+      collect(),
+    tibble(x = c("Fuu", "bar"))
+  )
+  expect_equal(
+    df %>%
+      Table$create() %>%
+      transmute(x = sub("^B.+", "baz", x, ignore.case = TRUE, fixed = TRUE)) %>%
+      collect(),
+    df # unchanged
+  )
+})
+
+test_that("str_replace and str_replace_all", {
+  df <- tibble(x = c("Foo", "bar"))
+
+  compare_dplyr_binding(
+    .input %>%
+      transmute(x = str_replace_all(x, "^F", "baz")) %>%
+      collect(),
+    df
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      transmute(x = str_replace_all(x, regex("^F"), "baz")) %>%
+      collect(),
+    df
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(x = str_replace(x, "^F[a-z]{2}", "baz")) %>%
+      collect(),
+    df
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      transmute(x = str_replace(x, regex("^f[A-Z]{2}", ignore_case = TRUE), "baz")) %>%
+      collect(),
+    df
+  )
+  compare_dplyr_binding(
+    .input %>%
+      transmute(x = str_replace_all(x, fixed("o"), "u")) %>%
+      collect(),
+    df
+  )
+  compare_dplyr_binding(
+    .input %>%
+      transmute(x = str_replace(x, fixed("O"), "u")) %>%
+      collect(),
+    df
+  )
+  compare_dplyr_binding(
+    .input %>%
+      transmute(x = str_replace(x, fixed("O", ignore_case = TRUE), "u")) %>%
+      collect(),
+    df
+  )
+})
+
+test_that("strsplit and str_split", {
+  df <- tibble(x = c("Foo and bar", "baz and qux and quux"))
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(x = strsplit(x, "and")) %>%
+      collect(),
+    df,
+    # `ignore_attr = TRUE` because the vctr coming back from arrow (ListArray)
+    # has type information in it, but it's just a bare list from R/dplyr.
+    ignore_attr = TRUE
+  )
+  compare_dplyr_binding(
+    .input %>%
+      mutate(x = strsplit(x, "and.*", fixed = TRUE)) %>%
+      collect(),
+    df,
+    ignore_attr = TRUE
+  )
+  compare_dplyr_binding(
+    .input %>%
+      mutate(x = strsplit(x, " +and +")) %>%
+      collect(),
+    df,
+    ignore_attr = TRUE
+  )
+  compare_dplyr_binding(
+    .input %>%
+      mutate(x = str_split(x, "and")) %>%
+      collect(),
+    df,
+    ignore_attr = TRUE
+  )
+  compare_dplyr_binding(
+    .input %>%
+      mutate(x = str_split(x, "and", n = 2)) %>%
+      collect(),
+    df,
+    ignore_attr = TRUE
+  )
+  compare_dplyr_binding(
+    .input %>%
+      mutate(x = str_split(x, fixed("and"), n = 2)) %>%
+      collect(),
+    df,
+    ignore_attr = TRUE
+  )
+  compare_dplyr_binding(
+    .input %>%
+      mutate(x = str_split(x, regex("and"), n = 2)) %>%
+      collect(),
+    df,
+    ignore_attr = TRUE
+  )
+  compare_dplyr_binding(
+    .input %>%
+      mutate(x = str_split(x, "Foo|bar", n = 2)) %>%
+      collect(),
+    df,
+    ignore_attr = TRUE
+  )
+})
+
+test_that("str_to_lower, str_to_upper, and str_to_title", {
+  df <- tibble(x = c("foo1", " \tB a R\n", "!apACHe aRroW!"))
+  compare_dplyr_binding(
+    .input %>%
+      transmute(
+        x_lower = str_to_lower(x),
+        x_upper = str_to_upper(x),
+        x_title = str_to_title(x)
+      ) %>%
+      collect(),
+    df
+  )
+
+  # Error checking a single function because they all use the same code path.
+  expect_error(
+    nse_funcs$str_to_lower("Apache Arrow", locale = "sp"),
+    "Providing a value for 'locale' other than the default ('en') is not supported by Arrow",
+    fixed = TRUE
+  )
+})
+
+test_that("arrow_*_split_whitespace functions", {
+  # use only ASCII whitespace characters
+  df_ascii <- tibble(x = c("Foo\nand bar", "baz\tand qux and quux"))
+
+  # use only non-ASCII whitespace characters
+  df_utf8 <- tibble(x = c("Foo\u00A0and\u2000bar", "baz\u2006and\u1680qux\u3000and\u2008quux"))
+
+  df_split <- tibble(x = list(c("Foo", "and", "bar"), c("baz", "and", "qux", "and", "quux")))
+
+  # use default option values
+  expect_equal(
+    df_ascii %>%
+      Table$create() %>%
+      mutate(x = arrow_ascii_split_whitespace(x)) %>%
+      collect(),
+    df_split,
+    ignore_attr = TRUE
+  )
+  expect_equal(
+    df_utf8 %>%
+      Table$create() %>%
+      mutate(x = arrow_utf8_split_whitespace(x)) %>%
+      collect(),
+    df_split,
+    ignore_attr = TRUE
+  )
+
+  # specify non-default option values
+  expect_equal(
+    df_ascii %>%
+      Table$create() %>%
+      mutate(
+        x = arrow_ascii_split_whitespace(x, options = list(max_splits = 1, reverse = TRUE))
+      ) %>%
+      collect(),
+    tibble(x = list(c("Foo\nand", "bar"), c("baz\tand qux and", "quux"))),
+    ignore_attr = TRUE
+  )
+  expect_equal(
+    df_utf8 %>%
+      Table$create() %>%
+      mutate(
+        x = arrow_utf8_split_whitespace(x, options = list(max_splits = 1, reverse = TRUE))
+      ) %>%
+      collect(),
+    tibble(x = list(c("Foo\u00A0and", "bar"), c("baz\u2006and\u1680qux\u3000and", "quux"))),
+    ignore_attr = TRUE
+  )
+})
+
+test_that("errors and warnings in string splitting", {
+  # These conditions generate an error, but abandon_ship() catches the error,
+  # issues a warning, and pulls the data into R (if computing on InMemoryDataset)
+  # Elsewhere we test that abandon_ship() works,
+  # so here we can just call the functions directly
+
+  x <- Expression$field_ref("x")
+  expect_error(
+    nse_funcs$str_split(x, fixed("and", ignore_case = TRUE)),
+    "Case-insensitive string splitting not supported by Arrow"
+  )
+  expect_error(
+    nse_funcs$str_split(x, coll("and.?")),
+    "Pattern modifier `coll()` not supported by Arrow",
+    fixed = TRUE
+  )
+  expect_error(
+    nse_funcs$str_split(x, boundary(type = "word")),
+    "Pattern modifier `boundary()` not supported by Arrow",
+    fixed = TRUE
+  )
+  expect_error(
+    nse_funcs$str_split(x, "and", n = 0),
+    "Splitting strings into zero parts not supported by Arrow"
+  )
+
+  # This condition generates a warning
+  expect_warning(
+    nse_funcs$str_split(x, fixed("and"), simplify = TRUE),
+    "Argument 'simplify = TRUE' will be ignored"
+  )
+})
+
+test_that("errors and warnings in string detection and replacement", {
+  x <- Expression$field_ref("x")
+
+  expect_error(
+    nse_funcs$str_detect(x, boundary(type = "character")),
+    "Pattern modifier `boundary()` not supported by Arrow",
+    fixed = TRUE
+  )
+  expect_error(
+    nse_funcs$str_replace_all(x, coll("o", locale = "en"), "ó"),
+    "Pattern modifier `coll()` not supported by Arrow",
+    fixed = TRUE
+  )
+
+  # This condition generates a warning
+  expect_warning(
+    nse_funcs$str_replace_all(x, regex("o", multiline = TRUE), "u"),
+    "Ignoring pattern modifier argument not supported in Arrow: \"multiline\""
+  )
+})
+
+test_that("backreferences in pattern in string detection", {
+  skip("RE2 does not support backreferences in pattern (https://github.com/google/re2/issues/101)")
+  df <- tibble(x = c("Foo", "bar"))
+
+  compare_dplyr_binding(
+    .input %>%
+      filter(str_detect(x, regex("F([aeiou])\\1"))) %>%
+      collect(),
+    df
+  )
+})
+
+test_that("backreferences (substitutions) in string replacement", {
+  df <- tibble(x = c("Foo", "bar"))
+
+  compare_dplyr_binding(
+    .input %>%
+      transmute(desc = sub(
+        "(?:https?|ftp)://([^/\r\n]+)(/[^\r\n]*)?",
+        "path `\\2` on server `\\1`",
+        url
+      )) %>%
+      collect(),
+    tibble(url = "https://arrow.apache.org/docs/r/")
+  )
+  compare_dplyr_binding(
+    .input %>%
+      transmute(x = str_replace(x, "^(\\w)o(.*)", "\\1\\2p")) %>%
+      collect(),
+    df
+  )
+  compare_dplyr_binding(
+    .input %>%
+      transmute(x = str_replace(x, regex("^(\\w)o(.*)", ignore_case = TRUE), "\\1\\2p")) %>%
+      collect(),
+    df
+  )
+  compare_dplyr_binding(
+    .input %>%
+      transmute(x = str_replace(x, regex("^(\\w)o(.*)", ignore_case = TRUE), "\\1\\2p")) %>%
+      collect(),
+    df
+  )
+})
+
+test_that("edge cases in string detection and replacement", {
+  # in case-insensitive fixed match/replace, test that "\\E" in the search
+  # string and backslashes in the replacement string are interpreted literally.
+  # this test does not use compare_dplyr_binding() because base::sub() and
+  # base::grepl() do not support ignore.case = TRUE when fixed = TRUE.
+  expect_equal(
+    tibble(x = c("\\Q\\e\\D")) %>%
+      Table$create() %>%
+      filter(grepl("\\E", x, ignore.case = TRUE, fixed = TRUE)) %>%
+      collect(),
+    tibble(x = c("\\Q\\e\\D"))
+  )
+  expect_equal(
+    tibble(x = c("\\Q\\e\\D")) %>%
+      Table$create() %>%
+      transmute(x = sub("\\E", "\\L", x, ignore.case = TRUE, fixed = TRUE)) %>%
+      collect(),
+    tibble(x = c("\\Q\\L\\D"))
+  )
+
+  # test that a user's "(?i)" prefix does not break the "(?i)" prefix that's
+  # added in case-insensitive regex match/replace
+  compare_dplyr_binding(
+    .input %>%
+      filter(grepl("(?i)^[abc]{3}$", x, ignore.case = TRUE, fixed = FALSE)) %>%
+      collect(),
+    tibble(x = c("ABC"))
+  )
+  compare_dplyr_binding(
+    .input %>%
+      transmute(x = sub("(?i)^[abc]{3}$", "123", x, ignore.case = TRUE, fixed = FALSE)) %>%
+      collect(),
+    tibble(x = c("ABC"))
+  )
+})
+
+test_that("strptime", {
+  # base::strptime() defaults to local timezone
+  # but arrow's strptime defaults to UTC.
+  # So that tests are consistent, set the local timezone to UTC
+  # TODO: consider reevaluating this workaround after ARROW-12980
+  withr::local_timezone("UTC")
+
+  t_string <- tibble(x = c("2018-10-07 19:04:05", NA))
+  t_stamp <- tibble(x = c(lubridate::ymd_hms("2018-10-07 19:04:05"), NA))
+
+  expect_equal(
+    t_string %>%
+      Table$create() %>%
+      mutate(
+        x = strptime(x)
+      ) %>%
+      collect(),
+    t_stamp,
+    ignore_attr = "tzone"
+  )
+
+  expect_equal(
+    t_string %>%
+      Table$create() %>%
+      mutate(
+        x = strptime(x, format = "%Y-%m-%d %H:%M:%S")
+      ) %>%
+      collect(),
+    t_stamp,
+    ignore_attr = "tzone"
+  )
+
+  expect_equal(
+    t_string %>%
+      Table$create() %>%
+      mutate(
+        x = strptime(x, format = "%Y-%m-%d %H:%M:%S", unit = "ns")
+      ) %>%
+      collect(),
+    t_stamp,
+    ignore_attr = "tzone"
+  )
+
+  expect_equal(
+    t_string %>%
+      Table$create() %>%
+      mutate(
+        x = strptime(x, format = "%Y-%m-%d %H:%M:%S", unit = "s")
+      ) %>%
+      collect(),
+    t_stamp,
+    ignore_attr = "tzone"
+  )
+
+  tstring <- tibble(x = c("08-05-2008", NA))
+  tstamp <- strptime(c("08-05-2008", NA), format = "%m-%d-%Y")
+
+  expect_equal(
+    tstring %>%
+      Table$create() %>%
+      mutate(
+        x = strptime(x, format = "%m-%d-%Y")
+      ) %>%
+      pull(),
+    # R's strptime returns POSIXlt (list type)
+    as.POSIXct(tstamp),
+    ignore_attr = "tzone"
+  )
+})
+
+test_that("errors in strptime", {
+  # Error when tz is passed
+  x <- Expression$field_ref("x")
+  expect_error(
+    nse_funcs$strptime(x, tz = "PDT"),
+    "Time zone argument not supported by Arrow"
+  )
+})
+
+test_that("strftime", {
+  skip_on_os("windows") # https://issues.apache.org/jira/browse/ARROW-13168
+
+  times <- tibble(
+    datetime = c(lubridate::ymd_hms("2018-10-07 19:04:05", tz = "Etc/GMT+6"), NA),
+    date = c(as.Date("2021-01-01"), NA)
+  )
+  formats <- "%a %A %w %d %b %B %m %y %Y %H %I %p %M %z %Z %j %U %W %x %X %% %G %V %u"
+  formats_date <- "%a %A %w %d %b %B %m %y %Y %H %I %p %M %j %U %W %x %X %% %G %V %u"
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(x = strftime(datetime, format = formats)) %>%
+      collect(),
+    times
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(x = strftime(date, format = formats_date)) %>%
+      collect(),
+    times
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(x = strftime(datetime, format = formats, tz = "Pacific/Marquesas")) %>%
+      collect(),
+    times
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(x = strftime(datetime, format = formats, tz = "EST", usetz = TRUE)) %>%
+      collect(),
+    times
+  )
+
+  withr::with_timezone(
+    "Pacific/Marquesas",
+    {
+      compare_dplyr_binding(
+        .input %>%
+          mutate(
+            x = strftime(datetime, format = formats, tz = "EST"),
+            x_date = strftime(date, format = formats_date, tz = "EST")
+          ) %>%
+          collect(),
+        times
+      )
+
+      compare_dplyr_binding(
+        .input %>%
+          mutate(
+            x = strftime(datetime, format = formats),
+            x_date = strftime(date, format = formats_date)
+          ) %>%
+          collect(),
+        times
+      )
+    }
+  )
+
+  # This check is due to differences in the way %c currently works in Arrow and R's strftime.
+  # We can revisit after https://github.com/HowardHinnant/date/issues/704 is resolved.
+  expect_error(
+    times %>%
+      Table$create() %>%
+      mutate(x = strftime(datetime, format = "%c")) %>%
+      collect(),
+    "%c flag is not supported in non-C locales."
+  )
+
+  # Output precision of %S depends on the input timestamp precision.
+  # Timestamps with second precision are represented as integers while
+  # milliseconds, microsecond and nanoseconds are represented as fixed floating
+  # point numbers with 3, 6 and 9 decimal places respectively.
+  compare_dplyr_binding(
+    .input %>%
+      mutate(x = strftime(datetime, format = "%S")) %>%
+      transmute(as.double(substr(x, 1, 2))) %>%
+      collect(),
+    times,
+    tolerance = 1e-6
+  )
+})
+
+test_that("format_ISO8601", {
+  skip_on_os("windows") # https://issues.apache.org/jira/browse/ARROW-13168
+  times <- tibble(x = c(lubridate::ymd_hms("2018-10-07 19:04:05", tz = "Etc/GMT+6"), NA))
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(x = format_ISO8601(x, precision = "ymd", usetz = FALSE)) %>%
+      collect(),
+    times
+  )
+
+  if (getRversion() < "3.5") {
+    # before 3.5, times$x will have no timezone attribute, so Arrow faithfully
+    # errors that there is no timezone to format:
+    expect_error(
+      times %>%
+        Table$create() %>%
+        mutate(x = format_ISO8601(x, precision = "ymd", usetz = TRUE)) %>%
+        collect(),
+      "Timezone not present, cannot convert to string with timezone: %Y-%m-%d%z"
+    )
+
+    # See comment regarding %S flag in strftime tests
+    expect_error(
+      times %>%
+        Table$create() %>%
+        mutate(x = format_ISO8601(x, precision = "ymdhms", usetz = TRUE)) %>%
+        mutate(x = gsub("\\.0*", "", x)) %>%
+        collect(),
+      "Timezone not present, cannot convert to string with timezone: %Y-%m-%dT%H:%M:%S%z"
+    )
+  } else {
+    compare_dplyr_binding(
+      .input %>%
+        mutate(x = format_ISO8601(x, precision = "ymd", usetz = TRUE)) %>%
+        collect(),
+      times
+    )
+
+    # See comment regarding %S flag in strftime tests
+    compare_dplyr_binding(
+      .input %>%
+        mutate(x = format_ISO8601(x, precision = "ymdhms", usetz = TRUE)) %>%
+        mutate(x = gsub("\\.0*", "", x)) %>%
+        collect(),
+      times
+    )
+  }
+
+
+  # See comment regarding %S flag in strftime tests
+  compare_dplyr_binding(
+    .input %>%
+      mutate(x = format_ISO8601(x, precision = "ymdhms", usetz = FALSE)) %>%
+      mutate(x = gsub("\\.0*", "", x)) %>%
+      collect(),
+    times
+  )
+})
+
+test_that("arrow_find_substring and arrow_find_substring_regex", {
+  df <- tibble(x = c("Foo and Bar", "baz and qux and quux"))
+
+  expect_equal(
+    df %>%
+      Table$create() %>%
+      mutate(x = arrow_find_substring(x, options = list(pattern = "b"))) %>%
+      collect(),
+    tibble(x = c(-1, 0))
+  )
+  expect_equal(
+    df %>%
+      Table$create() %>%
+      mutate(x = arrow_find_substring(
+        x,
+        options = list(pattern = "b", ignore_case = TRUE)
+      )) %>%
+      collect(),
+    tibble(x = c(8, 0))
+  )
+  expect_equal(
+    df %>%
+      Table$create() %>%
+      mutate(x = arrow_find_substring_regex(
+        x,
+        options = list(pattern = "^[fb]")
+      )) %>%
+      collect(),
+    tibble(x = c(-1, 0))
+  )
+  expect_equal(
+    df %>%
+      Table$create() %>%
+      mutate(x = arrow_find_substring_regex(
+        x,
+        options = list(pattern = "[AEIOU]", ignore_case = TRUE)
+      )) %>%
+      collect(),
+    tibble(x = c(1, 1))
+  )
+})
+
+test_that("stri_reverse and arrow_ascii_reverse functions", {
+  df_ascii <- tibble(x = c("Foo\nand bar", "baz\tand qux and quux"))
+
+  df_utf8 <- tibble(x = c("Foo\u00A0\u0061nd\u00A0bar", "\u0062az\u00A0and\u00A0qux\u3000and\u00A0quux"))
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(x = stri_reverse(x)) %>%
+      collect(),
+    df_utf8
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(x = stri_reverse(x)) %>%
+      collect(),
+    df_ascii
+  )
+
+  expect_equal(
+    df_ascii %>%
+      Table$create() %>%
+      mutate(x = arrow_ascii_reverse(x)) %>%
+      collect(),
+    tibble(x = c("rab dna\nooF", "xuuq dna xuq dna\tzab"))
+  )
+
+  expect_error(
+    df_utf8 %>%
+      Table$create() %>%
+      mutate(x = arrow_ascii_reverse(x)) %>%
+      collect(),
+    "Invalid: Non-ASCII sequence in input"
+  )
+})
+
+test_that("str_like", {
+  df <- tibble(x = c("Foo and bar", "baz and qux and quux"))
+
+  # TODO: After new version of stringr with str_like has been released, update all
+  # these tests to use compare_dplyr_binding
+
+  # No match - entire string
+  expect_equal(
+    df %>%
+      Table$create() %>%
+      mutate(x = str_like(x, "baz")) %>%
+      collect(),
+    tibble(x = c(FALSE, FALSE))
+  )
+
+  # Match - entire string
+  expect_equal(
+    df %>%
+      Table$create() %>%
+      mutate(x = str_like(x, "Foo and bar")) %>%
+      collect(),
+    tibble(x = c(TRUE, FALSE))
+  )
+
+  # Wildcard
+  expect_equal(
+    df %>%
+      Table$create() %>%
+      mutate(x = str_like(x, "f%", ignore_case = TRUE)) %>%
+      collect(),
+    tibble(x = c(TRUE, FALSE))
+  )
+
+  # Ignore case
+  expect_equal(
+    df %>%
+      Table$create() %>%
+      mutate(x = str_like(x, "f%", ignore_case = FALSE)) %>%
+      collect(),
+    tibble(x = c(FALSE, FALSE))
+  )
+
+  # Single character
+  expect_equal(
+    df %>%
+      Table$create() %>%
+      mutate(x = str_like(x, "_a%")) %>%
+      collect(),
+    tibble(x = c(FALSE, TRUE))
+  )
+
+  # This will give an error until a new version of stringr with str_like has been released
+  skip_if_not(packageVersion("stringr") > "1.4.0")
+  compare_dplyr_binding(
+    .input %>%
+      mutate(x = str_like(x, "%baz%")) %>%
+      collect(),
+    df
+  )
+})
+
+test_that("str_pad", {
+  df <- tibble(x = c("Foo and bar", "baz and qux and quux"))
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(x = str_pad(x, width = 31)) %>%
+      collect(),
+    df
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(x = str_pad(x, width = 30, side = "right")) %>%
+      collect(),
+    df
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(x = str_pad(x, width = 31, side = "left", pad = "+")) %>%
+      collect(),
+    df
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(x = str_pad(x, width = 10, side = "left", pad = "+")) %>%
+      collect(),
+    df
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(x = str_pad(x, width = 31, side = "both")) %>%
+      collect(),
+    df
+  )
+})
+
+test_that("substr", {
+  df <- tibble(x = "Apache Arrow")
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(y = substr(x, 1, 6)) %>%
+      collect(),
+    df
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(y = substr(x, 0, 6)) %>%
+      collect(),
+    df
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(y = substr(x, -1, 6)) %>%
+      collect(),
+    df
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(y = substr(x, 6, 1)) %>%
+      collect(),
+    df
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(y = substr(x, -1, -2)) %>%
+      collect(),
+    df
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(y = substr(x, 9, 6)) %>%
+      collect(),
+    df
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(y = substr(x, 1, 6)) %>%
+      collect(),
+    df
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(y = substr(x, 8, 12)) %>%
+      collect(),
+    df
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(y = substr(x, -5, -1)) %>%
+      collect(),
+    df
+  )
+
+  expect_error(
+    nse_funcs$substr("Apache Arrow", c(1, 2), 3),
+    "`start` must be length 1 - other lengths are not supported in Arrow"
+  )
+
+  expect_error(
+    nse_funcs$substr("Apache Arrow", 1, c(2, 3)),
+    "`stop` must be length 1 - other lengths are not supported in Arrow"
+  )
+})
+
+test_that("substring", {
+  # nse_funcs$substring just calls nse_funcs$substr, tested extensively above
+  df <- tibble(x = "Apache Arrow")
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(y = substring(x, 1, 6)) %>%
+      collect(),
+    df
+  )
+})
+
+test_that("str_sub", {
+  df <- tibble(x = "Apache Arrow")
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(y = str_sub(x, 1, 6)) %>%
+      collect(),
+    df
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(y = str_sub(x, 0, 6)) %>%
+      collect(),
+    df
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(y = str_sub(x, -1, 6)) %>%
+      collect(),
+    df
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(y = str_sub(x, 6, 1)) %>%
+      collect(),
+    df
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(y = str_sub(x, -1, -2)) %>%
+      collect(),
+    df
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(y = str_sub(x, -1, 3)) %>%
+      collect(),
+    df
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(y = str_sub(x, 9, 6)) %>%
+      collect(),
+    df
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(y = str_sub(x, 1, 6)) %>%
+      collect(),
+    df
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(y = str_sub(x, 8, 12)) %>%
+      collect(),
+    df
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(y = str_sub(x, -5, -1)) %>%
+      collect(),
+    df
+  )
+
+  expect_error(
+    nse_funcs$str_sub("Apache Arrow", c(1, 2), 3),
+    "`start` must be length 1 - other lengths are not supported in Arrow"
+  )
+
+  expect_error(
+    nse_funcs$str_sub("Apache Arrow", 1, c(2, 3)),
+    "`end` must be length 1 - other lengths are not supported in Arrow"
+  )
+})
+
+test_that("str_starts, str_ends, startsWith, endsWith", {
+  df <- tibble(x = c("Foo", "bar", "baz", "qux"))
+
+  compare_dplyr_binding(
+    .input %>%
+      filter(str_starts(x, "b.*")) %>%
+      collect(),
+    df
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      filter(str_starts(x, "b.*", negate = TRUE)) %>%
+      collect(),
+    df
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      filter(str_starts(x, fixed("b.*"))) %>%
+      collect(),
+    df
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      filter(str_starts(x, fixed("b"))) %>%
+      collect(),
+    df
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      filter(str_ends(x, "r")) %>%
+      collect(),
+    df
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      filter(str_ends(x, "r", negate = TRUE)) %>%
+      collect(),
+    df
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      filter(str_ends(x, fixed("r$"))) %>%
+      collect(),
+    df
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      filter(str_ends(x, fixed("r"))) %>%
+      collect(),
+    df
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      filter(startsWith(x, "b")) %>%
+      collect(),
+    df
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      filter(endsWith(x, "r")) %>%
+      collect(),
+    df
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      filter(startsWith(x, "b.*")) %>%
+      collect(),
+    df
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      filter(endsWith(x, "r$")) %>%
+      collect(),
+    df
+  )
+})
+
+test_that("str_count", {
+  df <- tibble(
+    cities = c("Kolkata", "Dar es Salaam", "Tel Aviv", "San Antonio", "Cluj Napoca", "Bern", "Bogota"),
+    dots = c("a.", "...", ".a.a", "a..a.", "ab...", "dse....", ".f..d..")
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(a_count = str_count(cities, pattern = "a")) %>%
+      collect(),
+    df
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(p_count = str_count(cities, pattern = "d")) %>%
+      collect(),
+    df
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(p_count = str_count(cities,
+        pattern = regex("d", ignore_case = TRUE)
+      )) %>%
+      collect(),
+    df
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(e_count = str_count(cities, pattern = "u")) %>%
+      collect(),
+    df
+  )
+
+  # nse_funcs$str_count() is not vectorised over pattern
+  compare_dplyr_binding(
+    .input %>%
+      mutate(let_count = str_count(cities, pattern = c("a", "b", "e", "g", "p", "n", "s"))) %>%
+      collect(),
+    df,
+    warning = TRUE
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(dots_count = str_count(dots, ".")) %>%
+      collect(),
+    df
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(dots_count = str_count(dots, fixed("."))) %>%
+      collect(),
+    df
+  )
+})
diff --git a/src/arrow/r/tests/testthat/test-dplyr-funcs-type.R b/src/arrow/r/tests/testthat/test-dplyr-funcs-type.R
new file mode 100644
index 000000000..859dc14b9
--- /dev/null
+++ b/src/arrow/r/tests/testthat/test-dplyr-funcs-type.R
@@ -0,0 +1,627 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+skip_if_not_available("dataset")
+
+library(dplyr, warn.conflicts = FALSE)
+suppressPackageStartupMessages(library(bit64))
+
+
+tbl <- example_data
+
+test_that("explicit type conversions with cast()", {
+  num_int32 <- 12L
+  num_int64 <- bit64::as.integer64(10)
+
+  int_types <- c(int8(), int16(), int32(), int64())
+  uint_types <- c(uint8(), uint16(), uint32(), uint64())
+  float_types <- c(float32(), float64())
+
+  types <- c(
+    int_types,
+    uint_types,
+    float_types,
+    double(), # not actually a type, a base R function but should be alias for float64
+    string()
+  )
+
+  for (type in types) {
+    expect_type_equal(
+      object = {
+        t1 <- Table$create(x = num_int32) %>%
+          transmute(x = cast(x, type)) %>%
+          compute()
+        t1$schema[[1]]$type
+      },
+      as_type(type)
+    )
+    expect_type_equal(
+      object = {
+        t1 <- Table$create(x = num_int64) %>%
+          transmute(x = cast(x, type)) %>%
+          compute()
+        t1$schema[[1]]$type
+      },
+      as_type(type)
+    )
+  }
+
+  # Arrow errors when truncating floats...
+  expect_error(
+    expect_type_equal(
+      object = {
+        t1 <- Table$create(pi = pi) %>%
+          transmute(three = cast(pi, int32())) %>%
+          compute()
+        t1$schema[[1]]$type
+      },
+      int32()
+    ),
+    "truncated"
+  )
+
+  # ... unless safe = FALSE (or allow_float_truncate = TRUE)
+  expect_type_equal(
+    object = {
+      t1 <- Table$create(pi = pi) %>%
+        transmute(three = cast(pi, int32(), safe = FALSE)) %>%
+        compute()
+      t1$schema[[1]]$type
+    },
+    int32()
+  )
+})
+
+test_that("explicit type conversions with as.*()", {
+  library(bit64)
+  compare_dplyr_binding(
+    .input %>%
+      transmute(
+        int2chr = as.character(int),
+        int2dbl = as.double(int),
+        int2int = as.integer(int),
+        int2num = as.numeric(int),
+        dbl2chr = as.character(dbl),
+        dbl2dbl = as.double(dbl),
+        dbl2int = as.integer(dbl),
+        dbl2num = as.numeric(dbl),
+      ) %>%
+      collect(),
+    tbl
+  )
+  compare_dplyr_binding(
+    .input %>%
+      transmute(
+        chr2chr = as.character(chr),
+        chr2dbl = as.double(chr),
+        chr2int = as.integer(chr),
+        chr2num = as.numeric(chr)
+      ) %>%
+      collect(),
+    tibble(chr = c("1", "2", "3"))
+  )
+  compare_dplyr_binding(
+    .input %>%
+      transmute(
+        chr2i64 = as.integer64(chr),
+        dbl2i64 = as.integer64(dbl),
+        i642i64 = as.integer64(i64),
+      ) %>%
+      collect(),
+    tibble(chr = "10000000000", dbl = 10000000000, i64 = as.integer64(1e10))
+  )
+  compare_dplyr_binding(
+    .input %>%
+      transmute(
+        chr2lgl = as.logical(chr),
+        dbl2lgl = as.logical(dbl),
+        int2lgl = as.logical(int)
+      ) %>%
+      collect(),
+    tibble(
+      chr = c("TRUE", "FALSE", "true", "false"),
+      dbl = c(1, 0, -99, 0),
+      int = c(1L, 0L, -99L, 0L)
+    )
+  )
+  compare_dplyr_binding(
+    .input %>%
+      transmute(
+        dbl2chr = as.character(dbl),
+        dbl2dbl = as.double(dbl),
+        dbl2int = as.integer(dbl),
+        dbl2lgl = as.logical(dbl),
+        int2chr = as.character(int),
+        int2dbl = as.double(int),
+        int2int = as.integer(int),
+        int2lgl = as.logical(int),
+        lgl2chr = as.character(lgl), # Arrow returns "true", "false" here ...
+        lgl2dbl = as.double(lgl),
+        lgl2int = as.integer(lgl),
+        lgl2lgl = as.logical(lgl)
+      ) %>%
+      collect() %>%
+      # need to use toupper() *after* collect() or else skip if utf8proc not available
+      mutate(lgl2chr = toupper(lgl2chr)), # ... but we need "TRUE", "FALSE"
+    tibble(
+      dbl = c(1, 0, NA_real_),
+      int = c(1L, 0L, NA_integer_),
+      lgl = c(TRUE, FALSE, NA)
+    )
+  )
+})
+
+test_that("is.finite(), is.infinite(), is.nan()", {
+  df <- tibble(x = c(
+    -4.94065645841246544e-324, 1.79769313486231570e+308, 0,
+    NA_real_, NaN, Inf, -Inf
+  ))
+  compare_dplyr_binding(
+    .input %>%
+      transmute(
+        is_fin = is.finite(x),
+        is_inf = is.infinite(x)
+      ) %>%
+      collect(),
+    df
+  )
+  # is.nan() evaluates to FALSE on NA_real_ (ARROW-12850)
+  compare_dplyr_binding(
+    .input %>%
+      transmute(
+        is_nan = is.nan(x)
+      ) %>%
+      collect(),
+    df
+  )
+})
+
+test_that("is.na() evaluates to TRUE on NaN (ARROW-12055)", {
+  df <- tibble(x = c(1.1, 2.2, NA_real_, 4.4, NaN, 6.6, 7.7))
+  compare_dplyr_binding(
+    .input %>%
+      transmute(
+        is_na = is.na(x)
+      ) %>%
+      collect(),
+    df
+  )
+})
+
+test_that("type checks with is() giving Arrow types", {
+  # with class2=DataType
+  expect_equal(
+    Table$create(
+      i32 = Array$create(1, int32()),
+      dec = Array$create(pi)$cast(decimal(3, 2)),
+      f64 = Array$create(1.1, float64()),
+      str = Array$create("a", arrow::string())
+    ) %>% transmute(
+      i32_is_i32 = is(i32, int32()),
+      i32_is_dec = is(i32, decimal(3, 2)),
+      i32_is_i64 = is(i32, float64()),
+      i32_is_str = is(i32, arrow::string()),
+      dec_is_i32 = is(dec, int32()),
+      dec_is_dec = is(dec, decimal(3, 2)),
+      dec_is_i64 = is(dec, float64()),
+      dec_is_str = is(dec, arrow::string()),
+      f64_is_i32 = is(f64, int32()),
+      f64_is_dec = is(f64, decimal(3, 2)),
+      f64_is_i64 = is(f64, float64()),
+      f64_is_str = is(f64, arrow::string()),
+      str_is_i32 = is(str, int32()),
+      str_is_dec = is(str, decimal(3, 2)),
+      str_is_i64 = is(str, float64()),
+      str_is_str = is(str, arrow::string())
+    ) %>%
+      collect() %>%
+      t() %>%
+      as.vector(),
+    c(
+      TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, TRUE,
+      FALSE, FALSE, FALSE, FALSE, TRUE
+    )
+  )
+  # with class2=string
+  expect_equal(
+    Table$create(
+      i32 = Array$create(1, int32()),
+      f64 = Array$create(1.1, float64()),
+      str = Array$create("a", arrow::string())
+    ) %>% transmute(
+      i32_is_i32 = is(i32, "int32"),
+      i32_is_i64 = is(i32, "double"),
+      i32_is_str = is(i32, "string"),
+      f64_is_i32 = is(f64, "int32"),
+      f64_is_i64 = is(f64, "double"),
+      f64_is_str = is(f64, "string"),
+      str_is_i32 = is(str, "int32"),
+      str_is_i64 = is(str, "double"),
+      str_is_str = is(str, "string")
+    ) %>%
+      collect() %>%
+      t() %>%
+      as.vector(),
+    c(TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, TRUE)
+  )
+  # with class2=string alias
+  expect_equal(
+    Table$create(
+      f16 = Array$create(NA_real_, halffloat()),
+      f32 = Array$create(1.1, float()),
+      f64 = Array$create(2.2, float64()),
+      lgl = Array$create(TRUE, bool()),
+      str = Array$create("a", arrow::string())
+    ) %>% transmute(
+      f16_is_f16 = is(f16, "float16"),
+      f16_is_f32 = is(f16, "float32"),
+      f16_is_f64 = is(f16, "float64"),
+      f16_is_lgl = is(f16, "boolean"),
+      f16_is_str = is(f16, "utf8"),
+      f32_is_f16 = is(f32, "float16"),
+      f32_is_f32 = is(f32, "float32"),
+      f32_is_f64 = is(f32, "float64"),
+      f32_is_lgl = is(f32, "boolean"),
+      f32_is_str = is(f32, "utf8"),
+      f64_is_f16 = is(f64, "float16"),
+      f64_is_f32 = is(f64, "float32"),
+      f64_is_f64 = is(f64, "float64"),
+      f64_is_lgl = is(f64, "boolean"),
+      f64_is_str = is(f64, "utf8"),
+      lgl_is_f16 = is(lgl, "float16"),
+      lgl_is_f32 = is(lgl, "float32"),
+      lgl_is_f64 = is(lgl, "float64"),
+      lgl_is_lgl = is(lgl, "boolean"),
+      lgl_is_str = is(lgl, "utf8"),
+      str_is_f16 = is(str, "float16"),
+      str_is_f32 = is(str, "float32"),
+      str_is_f64 = is(str, "float64"),
+      str_is_lgl = is(str, "boolean"),
+      str_is_str = is(str, "utf8")
+    ) %>%
+      collect() %>%
+      t() %>%
+      as.vector(),
+    c(
+      TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE,
+      FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE,
+      FALSE, FALSE, TRUE
+    )
+  )
+})
+
+test_that("type checks with is() giving R types", {
+  library(bit64)
+  compare_dplyr_binding(
+    .input %>%
+      transmute(
+        chr_is_chr = is(chr, "character"),
+        chr_is_fct = is(chr, "factor"),
+        chr_is_int = is(chr, "integer"),
+        chr_is_i64 = is(chr, "integer64"),
+        chr_is_lst = is(chr, "list"),
+        chr_is_lgl = is(chr, "logical"),
+        chr_is_num = is(chr, "numeric"),
+        dbl_is_chr = is(dbl, "character"),
+        dbl_is_fct = is(dbl, "factor"),
+        dbl_is_int = is(dbl, "integer"),
+        dbl_is_i64 = is(dbl, "integer64"),
+        dbl_is_lst = is(dbl, "list"),
+        dbl_is_lgl = is(dbl, "logical"),
+        dbl_is_num = is(dbl, "numeric"),
+        fct_is_chr = is(fct, "character"),
+        fct_is_fct = is(fct, "factor"),
+        fct_is_int = is(fct, "integer"),
+        fct_is_i64 = is(fct, "integer64"),
+        fct_is_lst = is(fct, "list"),
+        fct_is_lgl = is(fct, "logical"),
+        fct_is_num = is(fct, "numeric"),
+        int_is_chr = is(int, "character"),
+        int_is_fct = is(int, "factor"),
+        int_is_int = is(int, "integer"),
+        int_is_i64 = is(int, "integer64"),
+        int_is_lst = is(int, "list"),
+        int_is_lgl = is(int, "logical"),
+        int_is_num = is(int, "numeric"),
+        lgl_is_chr = is(lgl, "character"),
+        lgl_is_fct = is(lgl, "factor"),
+        lgl_is_int = is(lgl, "integer"),
+        lgl_is_i64 = is(lgl, "integer64"),
+        lgl_is_lst = is(lgl, "list"),
+        lgl_is_lgl = is(lgl, "logical"),
+        lgl_is_num = is(lgl, "numeric")
+      ) %>%
+      collect(),
+    tbl
+  )
+  compare_dplyr_binding(
+    .input %>%
+      transmute(
+        i64_is_chr = is(i64, "character"),
+        i64_is_fct = is(i64, "factor"),
+        # we want Arrow to return TRUE, but bit64 returns FALSE
+        # i64_is_int = is(i64, "integer"),
+        i64_is_i64 = is(i64, "integer64"),
+        i64_is_lst = is(i64, "list"),
+        i64_is_lgl = is(i64, "logical"),
+        # we want Arrow to return TRUE, but bit64 returns FALSE
+        # i64_is_num = is(i64, "numeric"),
+        lst_is_chr = is(lst, "character"),
+        lst_is_fct = is(lst, "factor"),
+        lst_is_int = is(lst, "integer"),
+        lst_is_i64 = is(lst, "integer64"),
+        lst_is_lst = is(lst, "list"),
+        lst_is_lgl = is(lst, "logical"),
+        lst_is_num = is(lst, "numeric")
+      ) %>%
+      collect(),
+    tibble(
+      i64 = as.integer64(1:3),
+      lst = list(c("a", "b"), c("d", "e"), c("f", "g"))
+    )
+  )
+})
+
+test_that("type checks with is.*()", {
+  library(bit64)
+  compare_dplyr_binding(
+    .input %>%
+      transmute(
+        chr_is_chr = is.character(chr),
+        chr_is_dbl = is.double(chr),
+        chr_is_fct = is.factor(chr),
+        chr_is_int = is.integer(chr),
+        chr_is_i64 = is.integer64(chr),
+        chr_is_lst = is.list(chr),
+        chr_is_lgl = is.logical(chr),
+        chr_is_num = is.numeric(chr),
+        dbl_is_chr = is.character(dbl),
+        dbl_is_dbl = is.double(dbl),
+        dbl_is_fct = is.factor(dbl),
+        dbl_is_int = is.integer(dbl),
+        dbl_is_i64 = is.integer64(dbl),
+        dbl_is_lst = is.list(dbl),
+        dbl_is_lgl = is.logical(dbl),
+        dbl_is_num = is.numeric(dbl),
+        fct_is_chr = is.character(fct),
+        fct_is_dbl = is.double(fct),
+        fct_is_fct = is.factor(fct),
+        fct_is_int = is.integer(fct),
+        fct_is_i64 = is.integer64(fct),
+        fct_is_lst = is.list(fct),
+        fct_is_lgl = is.logical(fct),
+        fct_is_num = is.numeric(fct),
+        int_is_chr = is.character(int),
+        int_is_dbl = is.double(int),
+        int_is_fct = is.factor(int),
+        int_is_int = is.integer(int),
+        int_is_i64 = is.integer64(int),
+        int_is_lst = is.list(int),
+        int_is_lgl = is.logical(int),
+        int_is_num = is.numeric(int),
+        lgl_is_chr = is.character(lgl),
+        lgl_is_dbl = is.double(lgl),
+        lgl_is_fct = is.factor(lgl),
+        lgl_is_int = is.integer(lgl),
+        lgl_is_i64 = is.integer64(lgl),
+        lgl_is_lst = is.list(lgl),
+        lgl_is_lgl = is.logical(lgl),
+        lgl_is_num = is.numeric(lgl)
+      ) %>%
+      collect(),
+    tbl
+  )
+  compare_dplyr_binding(
+    .input %>%
+      transmute(
+        i64_is_chr = is.character(i64),
+        # TODO: investigate why this is not matching when testthat runs it
+        # i64_is_dbl = is.double(i64),
+        i64_is_fct = is.factor(i64),
+        # we want Arrow to return TRUE, but bit64 returns FALSE
+        # i64_is_int = is.integer(i64),
+        i64_is_i64 = is.integer64(i64),
+        i64_is_lst = is.list(i64),
+        i64_is_lgl = is.logical(i64),
+        i64_is_num = is.numeric(i64),
+        lst_is_chr = is.character(lst),
+        lst_is_dbl = is.double(lst),
+        lst_is_fct = is.factor(lst),
+        lst_is_int = is.integer(lst),
+        lst_is_i64 = is.integer64(lst),
+        lst_is_lst = is.list(lst),
+        lst_is_lgl = is.logical(lst),
+        lst_is_num = is.numeric(lst)
+      ) %>%
+      collect(),
+    tibble(
+      i64 = as.integer64(1:3),
+      lst = list(c("a", "b"), c("d", "e"), c("f", "g"))
+    )
+  )
+})
+
+test_that("type checks with is_*()", {
+  library(rlang, warn.conflicts = FALSE)
+  compare_dplyr_binding(
+    .input %>%
+      transmute(
+        chr_is_chr = is_character(chr),
+        chr_is_dbl = is_double(chr),
+        chr_is_int = is_integer(chr),
+        chr_is_lst = is_list(chr),
+        chr_is_lgl = is_logical(chr),
+        dbl_is_chr = is_character(dbl),
+        dbl_is_dbl = is_double(dbl),
+        dbl_is_int = is_integer(dbl),
+        dbl_is_lst = is_list(dbl),
+        dbl_is_lgl = is_logical(dbl),
+        int_is_chr = is_character(int),
+        int_is_dbl = is_double(int),
+        int_is_int = is_integer(int),
+        int_is_lst = is_list(int),
+        int_is_lgl = is_logical(int),
+        lgl_is_chr = is_character(lgl),
+        lgl_is_dbl = is_double(lgl),
+        lgl_is_int = is_integer(lgl),
+        lgl_is_lst = is_list(lgl),
+        lgl_is_lgl = is_logical(lgl)
+      ) %>%
+      collect(),
+    tbl
+  )
+})
+
+test_that("type checks on expressions", {
+  compare_dplyr_binding(
+    .input %>%
+      transmute(
+        a = is.character(as.character(int)),
+        b = is.integer(as.character(int)),
+        c = is.integer(int + int),
+        d = is.double(int + dbl),
+        e = is.logical(dbl > pi)
+      ) %>%
+      collect(),
+    tbl
+  )
+
+  # the code in the expectation below depends on RE2
+  skip_if_not_available("re2")
+
+  compare_dplyr_binding(
+    .input %>%
+      transmute(
+        a = is.logical(grepl("[def]", chr))
+      ) %>%
+      collect(),
+    tbl
+  )
+})
+
+test_that("type checks on R scalar literals", {
+  compare_dplyr_binding(
+    .input %>%
+      transmute(
+        chr_is_chr = is.character("foo"),
+        int_is_chr = is.character(42L),
+        int_is_int = is.integer(42L),
+        chr_is_int = is.integer("foo"),
+        dbl_is_num = is.numeric(3.14159),
+        int_is_num = is.numeric(42L),
+        chr_is_num = is.numeric("foo"),
+        dbl_is_dbl = is.double(3.14159),
+        chr_is_dbl = is.double("foo"),
+        lgl_is_lgl = is.logical(TRUE),
+        chr_is_lgl = is.logical("foo"),
+        fct_is_fct = is.factor(factor("foo", levels = c("foo", "bar", "baz"))),
+        chr_is_fct = is.factor("foo"),
+        lst_is_lst = is.list(list(c(a = "foo", b = "bar"))),
+        chr_is_lst = is.list("foo")
+      ) %>%
+      collect(),
+    tbl
+  )
+})
+
+test_that("as.factor()/dictionary_encode()", {
+  skip("ARROW-12632: ExecuteScalarExpression cannot Execute non-scalar expression")
+  df1 <- tibble(x = c("C", "D", "B", NA, "D", "B", "S", "A", "B", "Z", "B"))
+  df2 <- tibble(x = c(5, 5, 5, NA, 2, 3, 6, 8))
+
+  compare_dplyr_binding(
+    .input %>%
+      transmute(x = as.factor(x)) %>%
+      collect(),
+    df1
+  )
+
+  expect_warning(
+    compare_dplyr_binding(
+      .input %>%
+        transmute(x = as.factor(x)) %>%
+        collect(),
+      df2
+    ),
+    "Coercing dictionary values to R character factor levels"
+  )
+
+  # dictionary values with default null encoding behavior ("mask") omits
+  # nulls from the dictionary values
+  expect_equal(
+    object = {
+      rb1 <- df1 %>%
+        record_batch() %>%
+        transmute(x = dictionary_encode(x)) %>%
+        compute()
+      dict <- rb1$x$dictionary()
+      as.vector(dict$Take(dict$SortIndices()))
+    },
+    sort(unique(df1$x), na.last = NA)
+  )
+
+  # dictionary values with "encode" null encoding behavior includes nulls in
+  # the dictionary values
+  expect_equal(
+    object = {
+      rb1 <- df1 %>%
+        record_batch() %>%
+        transmute(x = dictionary_encode(x, null_encoding_behavior = "encode")) %>%
+        compute()
+      dict <- rb1$x$dictionary()
+      as.vector(dict$Take(dict$SortIndices()))
+    },
+    sort(unique(df1$x), na.last = TRUE)
+  )
+})
+
+test_that("bad explicit type conversions with as.*()", {
+
+  # Arrow returns lowercase "true", "false" (instead of "TRUE", "FALSE" like R)
+  expect_error(
+    compare_dplyr_binding(
+      .input %>%
+        transmute(lgl2chr = as.character(lgl)) %>%
+        collect(),
+      tibble(lgl = c(TRUE, FALSE, NA))
+    )
+  )
+
+  # Arrow fails to parse these strings as numbers (instead of returning NAs with
+  # a warning like R does)
+  expect_error(
+    expect_warning(
+      compare_dplyr_binding(
+        .input %>%
+          transmute(chr2num = as.numeric(chr)) %>%
+          collect(),
+        tibble(chr = c("l.O", "S.S", ""))
+      )
+    )
+  )
+
+  # Arrow fails to parse these strings as Booleans (instead of returning NAs
+  # like R does)
+  expect_error(
+    compare_dplyr_binding(
+      .input %>%
+        transmute(chr2lgl = as.logical(chr)) %>%
+        collect(),
+      tibble(chr = c("TRU", "FAX", ""))
+    )
+  )
+})
diff --git a/src/arrow/r/tests/testthat/test-dplyr-group-by.R b/src/arrow/r/tests/testthat/test-dplyr-group-by.R
new file mode 100644
index 000000000..7cfcfb5c9
--- /dev/null
+++ b/src/arrow/r/tests/testthat/test-dplyr-group-by.R
@@ -0,0 +1,158 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+skip_if_not_available("dataset")
+
+library(dplyr, warn.conflicts = FALSE)
+library(stringr)
+
+tbl <- example_data
+
+test_that("group_by groupings are recorded", {
+  compare_dplyr_binding(
+    .input %>%
+      group_by(chr) %>%
+      select(int, chr) %>%
+      filter(int > 5) %>%
+      collect(),
+    tbl
+  )
+})
+
+test_that("group_by supports creating/renaming", {
+  compare_dplyr_binding(
+    .input %>%
+      group_by(chr, numbers = int) %>%
+      collect(),
+    tbl
+  )
+  compare_dplyr_binding(
+    .input %>%
+      group_by(chr, numbers = int * 4) %>%
+      collect(),
+    tbl
+  )
+  compare_dplyr_binding(
+    .input %>%
+      group_by(int > 4, lgl, foo = int > 5) %>%
+      collect(),
+    tbl
+  )
+})
+
+test_that("ungroup", {
+  compare_dplyr_binding(
+    .input %>%
+      group_by(chr) %>%
+      select(int, chr) %>%
+      ungroup() %>%
+      filter(int > 5) %>%
+      collect(),
+    tbl
+  )
+
+  # to confirm that the above expectation is actually testing what we think it's
+  # testing, verify that compare_dplyr_binding() distinguishes between grouped and
+  # ungrouped tibbles
+  expect_error(
+    compare_dplyr_binding(
+      .input %>%
+        group_by(chr) %>%
+        select(int, chr) %>%
+        (function(x) if (inherits(x, "tbl_df")) ungroup(x) else x) %>%
+        filter(int > 5) %>%
+        collect(),
+      tbl
+    )
+  )
+})
+
+test_that("group_by then rename", {
+  compare_dplyr_binding(
+    .input %>%
+      group_by(chr) %>%
+      select(string = chr, int) %>%
+      collect(),
+    tbl
+  )
+})
+
+test_that("group_by with .drop", {
+  test_groups <- c("starting_a_fight", "consoling_a_child", "petting_a_dog")
+  compare_dplyr_binding(
+    .input %>%
+      group_by(!!!syms(test_groups), .drop = TRUE) %>%
+      collect(),
+    example_with_logical_factors
+  )
+  compare_dplyr_binding(
+    .input %>%
+      group_by(!!!syms(test_groups), .drop = FALSE) %>%
+      collect(),
+    example_with_logical_factors
+  )
+  expect_equal(
+    example_with_logical_factors %>%
+      group_by(!!!syms(test_groups), .drop = TRUE) %>%
+      collect() %>%
+      n_groups(),
+    4L
+  )
+  expect_equal(
+    example_with_logical_factors %>%
+      group_by(!!!syms(test_groups), .drop = FALSE) %>%
+      collect() %>%
+      n_groups(),
+    8L
+  )
+  expect_equal(
+    example_with_logical_factors %>%
+      group_by(!!!syms(test_groups), .drop = FALSE) %>%
+      group_by_drop_default(),
+    FALSE
+  )
+  expect_equal(
+    example_with_logical_factors %>%
+      group_by(!!!syms(test_groups), .drop = TRUE) %>%
+      group_by_drop_default(),
+    TRUE
+  )
+  compare_dplyr_binding(
+    .input %>%
+      group_by(.drop = FALSE) %>% # no group by vars
+      group_by_drop_default(),
+    example_with_logical_factors
+  )
+  compare_dplyr_binding(
+    .input %>%
+      group_by_drop_default(),
+    example_with_logical_factors
+  )
+  compare_dplyr_binding(
+    .input %>%
+      group_by(!!!syms(test_groups)) %>%
+      group_by_drop_default(),
+    example_with_logical_factors
+  )
+  compare_dplyr_binding(
+    .input %>%
+      group_by(!!!syms(test_groups), .drop = FALSE) %>%
+      ungroup() %>%
+      group_by_drop_default(),
+    example_with_logical_factors
+  )
+})
diff --git a/src/arrow/r/tests/testthat/test-dplyr-join.R b/src/arrow/r/tests/testthat/test-dplyr-join.R
new file mode 100644
index 000000000..d8239f810
--- /dev/null
+++ b/src/arrow/r/tests/testthat/test-dplyr-join.R
@@ -0,0 +1,175 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+skip_if_not_available("dataset")
+
+library(dplyr, warn.conflicts = FALSE)
+
+left <- example_data
+left$some_grouping <- rep(c(1, 2), 5)
+
+left_tab <- Table$create(left)
+
+to_join <- tibble::tibble(
+  some_grouping = c(1, 2),
+  capital_letters = c("A", "B"),
+  another_column = TRUE
+)
+to_join_tab <- Table$create(to_join)
+
+
+test_that("left_join", {
+  expect_message(
+    compare_dplyr_binding(
+      .input %>%
+        left_join(to_join) %>%
+        collect(),
+      left
+    ),
+    'Joining, by = "some_grouping"'
+  )
+})
+
+test_that("left_join `by` args", {
+  compare_dplyr_binding(
+    .input %>%
+      left_join(to_join, by = "some_grouping") %>%
+      collect(),
+    left
+  )
+  compare_dplyr_binding(
+    .input %>%
+      left_join(
+        to_join %>%
+          rename(the_grouping = some_grouping),
+        by = c(some_grouping = "the_grouping")
+      ) %>%
+      collect(),
+    left
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      rename(the_grouping = some_grouping) %>%
+      left_join(
+        to_join,
+        by = c(the_grouping = "some_grouping")
+      ) %>%
+      collect(),
+    left
+  )
+})
+
+test_that("join two tables", {
+  expect_identical(
+    left_tab %>%
+      left_join(to_join_tab, by = "some_grouping") %>%
+      collect(),
+    left %>%
+      left_join(to_join, by = "some_grouping") %>%
+      collect()
+  )
+})
+
+test_that("Error handling", {
+  expect_error(
+    left_tab %>%
+      left_join(to_join, by = "not_a_col") %>%
+      collect(),
+    "all(names(by) %in% names(x)) is not TRUE",
+    fixed = TRUE
+  )
+})
+
+# TODO: test duplicate col names
+# TODO: casting: int and float columns?
+
+test_that("right_join", {
+  compare_dplyr_binding(
+    .input %>%
+      right_join(to_join, by = "some_grouping") %>%
+      collect(),
+    left
+  )
+})
+
+test_that("inner_join", {
+  compare_dplyr_binding(
+    .input %>%
+      inner_join(to_join, by = "some_grouping") %>%
+      collect(),
+    left
+  )
+})
+
+test_that("full_join", {
+  compare_dplyr_binding(
+    .input %>%
+      full_join(to_join, by = "some_grouping") %>%
+      collect(),
+    left
+  )
+})
+
+test_that("semi_join", {
+  compare_dplyr_binding(
+    .input %>%
+      semi_join(to_join, by = "some_grouping") %>%
+      collect(),
+    left
+  )
+})
+
+test_that("anti_join", {
+  compare_dplyr_binding(
+    .input %>%
+      # Factor levels when there are no rows in the data don't match
+      # TODO: use better anti_join test data
+      select(-fct) %>%
+      anti_join(to_join, by = "some_grouping") %>%
+      collect(),
+    left
+  )
+})
+
+test_that("mutate then join", {
+  left <- Table$create(
+    one = c("a", "b"),
+    two = 1:2
+  )
+  right <- Table$create(
+    three = TRUE,
+    dos = 2L
+  )
+
+  expect_equal(
+    left %>%
+      rename(dos = two) %>%
+      mutate(one = toupper(one)) %>%
+      left_join(
+        right %>%
+          mutate(three = !three)
+      ) %>%
+      arrange(dos) %>%
+      collect(),
+    tibble(
+      one = c("A", "B"),
+      dos = 1:2,
+      three = c(NA, FALSE)
+    )
+  )
+})
diff --git a/src/arrow/r/tests/testthat/test-dplyr-mutate.R b/src/arrow/r/tests/testthat/test-dplyr-mutate.R
new file mode 100644
index 000000000..886ec9e42
--- /dev/null
+++ b/src/arrow/r/tests/testthat/test-dplyr-mutate.R
@@ -0,0 +1,522 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+skip_if_not_available("dataset")
+
+library(dplyr, warn.conflicts = FALSE)
+library(stringr)
+
+tbl <- example_data
+# Add some better string data
+tbl$verses <- verses[[1]]
+# c(" a ", "  b  ", "   c   ", ...) increasing padding
+# nchar =   3  5  7  9 11 13 15 17 19 21
+tbl$padded_strings <- stringr::str_pad(letters[1:10], width = 2 * (1:10) + 1, side = "both")
+
+test_that("mutate() is lazy", {
+  expect_s3_class(
+    tbl %>% record_batch() %>% mutate(int = int + 6L),
+    "arrow_dplyr_query"
+  )
+})
+
+test_that("basic mutate", {
+  compare_dplyr_binding(
+    .input %>%
+      select(int, chr) %>%
+      filter(int > 5) %>%
+      mutate(int = int + 6L) %>%
+      collect(),
+    tbl
+  )
+})
+
+test_that("mutate() with NULL inputs", {
+  compare_dplyr_binding(
+    .input %>%
+      mutate(int = NULL) %>%
+      collect(),
+    tbl
+  )
+})
+
+test_that("empty mutate()", {
+  compare_dplyr_binding(
+    .input %>%
+      mutate() %>%
+      collect(),
+    tbl
+  )
+})
+
+test_that("transmute", {
+  compare_dplyr_binding(
+    .input %>%
+      select(int, chr) %>%
+      filter(int > 5) %>%
+      transmute(int = int + 6L) %>%
+      collect(),
+    tbl
+  )
+})
+
+test_that("transmute() with NULL inputs", {
+  compare_dplyr_binding(
+    .input %>%
+      transmute(int = NULL) %>%
+      collect(),
+    tbl
+  )
+})
+
+test_that("empty transmute()", {
+  compare_dplyr_binding(
+    .input %>%
+      transmute() %>%
+      collect(),
+    tbl
+  )
+})
+
+test_that("transmute() with unsupported arguments", {
+  expect_error(
+    tbl %>%
+      Table$create() %>%
+      transmute(int = int + 42L, .keep = "all"),
+    "`transmute()` does not support the `.keep` argument",
+    fixed = TRUE
+  )
+  expect_error(
+    tbl %>%
+      Table$create() %>%
+      transmute(int = int + 42L, .before = lgl),
+    "`transmute()` does not support the `.before` argument",
+    fixed = TRUE
+  )
+  expect_error(
+    tbl %>%
+      Table$create() %>%
+      transmute(int = int + 42L, .after = chr),
+    "`transmute()` does not support the `.after` argument",
+    fixed = TRUE
+  )
+})
+
+test_that("transmute() defuses dots arguments (ARROW-13262)", {
+  expect_warning(
+    tbl %>%
+      Table$create() %>%
+      transmute(stringr::str_c(chr, chr)) %>%
+      collect(),
+    "Expression stringr::str_c(chr, chr) not supported in Arrow; pulling data into R",
+    fixed = TRUE
+  )
+})
+
+test_that("mutate and refer to previous mutants", {
+  compare_dplyr_binding(
+    .input %>%
+      select(int, verses) %>%
+      mutate(
+        line_lengths = nchar(verses),
+        longer = line_lengths * 10
+      ) %>%
+      filter(line_lengths > 15) %>%
+      collect(),
+    tbl
+  )
+})
+
+test_that("nchar() arguments", {
+  compare_dplyr_binding(
+    .input %>%
+      select(int, verses) %>%
+      mutate(
+        line_lengths = nchar(verses, type = "bytes"),
+        longer = line_lengths * 10
+      ) %>%
+      filter(line_lengths > 15) %>%
+      collect(),
+    tbl
+  )
+  # This tests the whole abandon_ship() machinery
+  compare_dplyr_binding(
+    .input %>%
+      select(int, verses) %>%
+      mutate(
+        line_lengths = nchar(verses, type = "bytes", allowNA = TRUE),
+        longer = line_lengths * 10
+      ) %>%
+      filter(line_lengths > 15) %>%
+      collect(),
+    tbl,
+    warning = paste0(
+      "In nchar\\(verses, type = \"bytes\", allowNA = TRUE\\), ",
+      "allowNA = TRUE not supported by Arrow; pulling data into R"
+    )
+  )
+})
+
+test_that("mutate with .data pronoun", {
+  compare_dplyr_binding(
+    .input %>%
+      select(int, verses) %>%
+      mutate(
+        line_lengths = str_length(verses),
+        longer = .data$line_lengths * 10
+      ) %>%
+      filter(line_lengths > 15) %>%
+      collect(),
+    tbl
+  )
+})
+
+test_that("mutate with unnamed expressions", {
+  compare_dplyr_binding(
+    .input %>%
+      select(int, padded_strings) %>%
+      mutate(
+        int, # bare column name
+        nchar(padded_strings) # expression
+      ) %>%
+      filter(int > 5) %>%
+      collect(),
+    tbl
+  )
+})
+
+test_that("mutate with reassigning same name", {
+  compare_dplyr_binding(
+    .input %>%
+      transmute(
+        new = lgl,
+        new = chr
+      ) %>%
+      collect(),
+    tbl
+  )
+})
+
+test_that("mutate with single value for recycling", {
+  compare_dplyr_binding(
+    .input %>%
+      select(int, padded_strings) %>%
+      mutate(
+        dr_bronner = 1 # ALL ONE!
+      ) %>%
+      collect(),
+    tbl
+  )
+})
+
+test_that("dplyr::mutate's examples", {
+  # Newly created variables are available immediately
+  compare_dplyr_binding(
+    .input %>%
+      select(name, mass) %>%
+      mutate(
+        mass2 = mass * 2,
+        mass2_squared = mass2 * mass2
+      ) %>%
+      collect(),
+    starwars # this is a test tibble that ships with dplyr
+  )
+
+  # As well as adding new variables, you can use mutate() to
+  # remove variables and modify existing variables.
+  compare_dplyr_binding(
+    .input %>%
+      select(name, height, mass, homeworld) %>%
+      mutate(
+        mass = NULL,
+        height = height * 0.0328084 # convert to feet
+      ) %>%
+      collect(),
+    starwars
+  )
+
+  # Examples we don't support should succeed
+  # but warn that they're pulling data into R to do so
+
+  # across and autosplicing: ARROW-11699
+  compare_dplyr_binding(
+    .input %>%
+      select(name, homeworld, species) %>%
+      mutate(across(!name, as.factor)) %>%
+      collect(),
+    starwars,
+    warning = "Expression across.*not supported in Arrow"
+  )
+
+  # group_by then mutate
+  compare_dplyr_binding(
+    .input %>%
+      select(name, mass, homeworld) %>%
+      group_by(homeworld) %>%
+      mutate(rank = min_rank(desc(mass))) %>%
+      collect(),
+    starwars,
+    warning = TRUE
+  )
+
+  # `.before` and `.after` experimental args: ARROW-11701
+  df <- tibble(x = 1, y = 2)
+  compare_dplyr_binding(
+    .input %>% mutate(z = x + y) %>% collect(),
+    df
+  )
+  #> # A tibble: 1 x 3
+  #>       x     y     z
+  #>   <dbl> <dbl> <dbl>
+  #> 1     1     2     3
+
+  compare_dplyr_binding(
+    .input %>% mutate(z = x + y, .before = 1) %>% collect(),
+    df
+  )
+  #> # A tibble: 1 x 3
+  #>       z     x     y
+  #>   <dbl> <dbl> <dbl>
+  #> 1     3     1     2
+  compare_dplyr_binding(
+    .input %>% mutate(z = x + y, .after = x) %>% collect(),
+    df
+  )
+  #> # A tibble: 1 x 3
+  #>       x     z     y
+  #>   <dbl> <dbl> <dbl>
+  #> 1     1     3     2
+
+  # By default, mutate() keeps all columns from the input data.
+  # Experimental: You can override with `.keep`
+  df <- tibble(x = 1, y = 2, a = "a", b = "b")
+  compare_dplyr_binding(
+    .input %>% mutate(z = x + y, .keep = "all") %>% collect(), # the default
+    df
+  )
+  #> # A tibble: 1 x 5
+  #>       x     y a     b         z
+  #>   <dbl> <dbl> <chr> <chr> <dbl>
+  #> 1     1     2 a     b         3
+  compare_dplyr_binding(
+    .input %>% mutate(z = x + y, .keep = "used") %>% collect(),
+    df
+  )
+  #> # A tibble: 1 x 3
+  #>       x     y     z
+  #>   <dbl> <dbl> <dbl>
+  #> 1     1     2     3
+  compare_dplyr_binding(
+    .input %>% mutate(z = x + y, .keep = "unused") %>% collect(),
+    df
+  )
+  #> # A tibble: 1 x 3
+  #>   a     b         z
+  #>   <chr> <chr> <dbl>
+  #> 1 a     b         3
+  compare_dplyr_binding(
+    .input %>% mutate(z = x + y, .keep = "none") %>% collect(), # same as transmute()
+    df
+  )
+  #> # A tibble: 1 x 1
+  #>       z
+  #>   <dbl>
+  #> 1     3
+
+  # Grouping ----------------------------------------
+  # The mutate operation may yield different results on grouped
+  # tibbles because the expressions are computed within groups.
+  # The following normalises `mass` by the global average:
+  # TODO: ARROW-13926
+  compare_dplyr_binding(
+    .input %>%
+      select(name, mass, species) %>%
+      mutate(mass_norm = mass / mean(mass, na.rm = TRUE)) %>%
+      collect(),
+    starwars,
+    warning = "window function"
+  )
+})
+
+test_that("Can mutate after group_by as long as there are no aggregations", {
+  compare_dplyr_binding(
+    .input %>%
+      select(int, chr) %>%
+      group_by(chr) %>%
+      mutate(int = int + 6L) %>%
+      collect(),
+    tbl
+  )
+  compare_dplyr_binding(
+    .input %>%
+      select(mean = int, chr) %>%
+      # rename `int` to `mean` and use `mean` in `mutate()` to test that
+      # `all_funs()` does not incorrectly identify it as an aggregate function
+      group_by(chr) %>%
+      mutate(mean = mean + 6L) %>%
+      collect(),
+    tbl
+  )
+  expect_warning(
+    tbl %>%
+      Table$create() %>%
+      select(int, chr) %>%
+      group_by(chr) %>%
+      mutate(avg_int = mean(int)) %>%
+      collect(),
+    "window functions not currently supported in Arrow; pulling data into R",
+    fixed = TRUE
+  )
+  expect_warning(
+    tbl %>%
+      Table$create() %>%
+      select(mean = int, chr) %>%
+      # rename `int` to `mean` and use `mean(mean)` in `mutate()` to test that
+      # `all_funs()` detects `mean()` despite the collision with a column name
+      group_by(chr) %>%
+      mutate(avg_int = mean(mean)) %>%
+      collect(),
+    "window functions not currently supported in Arrow; pulling data into R",
+    fixed = TRUE
+  )
+})
+
+test_that("handle bad expressions", {
+  # TODO: search for functions other than mean() (see above test)
+  # that need to be forced to fail because they error ambiguously
+
+  with_language("fr", {
+    # expect_warning(., NA) because the usual behavior when it hits a filter
+    # that it can't evaluate is to raise a warning, collect() to R, and retry
+    # the filter. But we want this to error the first time because it's
+    # a user error, not solvable by retrying in R
+    expect_warning(
+      expect_error(
+        Table$create(tbl) %>% mutate(newvar = NOTAVAR + 2),
+        "objet 'NOTAVAR' introuvable"
+      ),
+      NA
+    )
+  })
+})
+
+test_that("Can't just add a vector column with mutate()", {
+  expect_warning(
+    expect_equal(
+      Table$create(tbl) %>%
+        select(int) %>%
+        mutate(again = 1:10),
+      tibble::tibble(int = tbl$int, again = 1:10)
+    ),
+    "In again = 1:10, only values of size one are recycled; pulling data into R"
+  )
+})
+
+test_that("print a mutated table", {
+  expect_output(
+    Table$create(tbl) %>%
+      select(int) %>%
+      mutate(twice = int * 2) %>%
+      print(),
+    "InMemoryDataset (query)
+int: int32
+twice: double (multiply_checked(int, 2))
+
+See $.data for the source Arrow object",
+    fixed = TRUE
+  )
+})
+
+test_that("mutate and write_dataset", {
+  skip_if_not_available("dataset")
+  # See related test in test-dataset.R
+
+  first_date <- lubridate::ymd_hms("2015-04-29 03:12:39")
+  df1 <- tibble(
+    int = 1:10,
+    dbl = as.numeric(1:10),
+    lgl = rep(c(TRUE, FALSE, NA, TRUE, FALSE), 2),
+    chr = letters[1:10],
+    fct = factor(LETTERS[1:10]),
+    ts = first_date + lubridate::days(1:10)
+  )
+
+  second_date <- lubridate::ymd_hms("2017-03-09 07:01:02")
+  df2 <- tibble(
+    int = 101:110,
+    dbl = c(as.numeric(51:59), NaN),
+    lgl = rep(c(TRUE, FALSE, NA, TRUE, FALSE), 2),
+    chr = letters[10:1],
+    fct = factor(LETTERS[10:1]),
+    ts = second_date + lubridate::days(10:1)
+  )
+
+  dst_dir <- tempfile()
+  stacked <- record_batch(rbind(df1, df2))
+  stacked %>%
+    mutate(twice = int * 2) %>%
+    group_by(int) %>%
+    write_dataset(dst_dir, format = "feather")
+  expect_true(dir.exists(dst_dir))
+  expect_identical(dir(dst_dir), sort(paste("int", c(1:10, 101:110), sep = "=")))
+
+  new_ds <- open_dataset(dst_dir, format = "feather")
+
+  expect_equal(
+    new_ds %>%
+      select(string = chr, integer = int, twice) %>%
+      filter(integer > 6 & integer < 11) %>%
+      collect() %>%
+      summarize(mean = mean(integer)),
+    df1 %>%
+      select(string = chr, integer = int) %>%
+      mutate(twice = integer * 2) %>%
+      filter(integer > 6) %>%
+      summarize(mean = mean(integer))
+  )
+})
+
+test_that("mutate and pmin/pmax", {
+  df <- tibble(
+    city = c("Chillan", "Valdivia", "Osorno"),
+    val1 = c(200, 300, NA),
+    val2 = c(100, NA, NA),
+    val3 = c(0, NA, NA)
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(
+        max_val_1 = pmax(val1, val2, val3),
+        max_val_2 = pmax(val1, val2, val3, na.rm = TRUE),
+        min_val_1 = pmin(val1, val2, val3),
+        min_val_2 = pmin(val1, val2, val3, na.rm = TRUE)
+      ) %>%
+      collect(),
+    df
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(
+        max_val_1 = pmax(val1 - 100, 200, val1 * 100, na.rm = TRUE),
+        min_val_1 = pmin(val1 - 100, 100, val1 * 100, na.rm = TRUE),
+      ) %>%
+      collect(),
+    df
+  )
+})
diff --git a/src/arrow/r/tests/testthat/test-dplyr-query.R b/src/arrow/r/tests/testthat/test-dplyr-query.R
new file mode 100644
index 000000000..21a55f4b4
--- /dev/null
+++ b/src/arrow/r/tests/testthat/test-dplyr-query.R
@@ -0,0 +1,296 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+skip_if_not_available("dataset")
+
+library(dplyr, warn.conflicts = FALSE)
+library(stringr)
+
+tbl <- example_data
+# Add some better string data
+tbl$verses <- verses[[1]]
+# c(" a ", "  b  ", "   c   ", ...) increasing padding
+# nchar =   3  5  7  9 11 13 15 17 19 21
+tbl$padded_strings <- stringr::str_pad(letters[1:10], width = 2 * (1:10) + 1, side = "both")
+tbl$another_chr <- tail(letters, 10)
+
+test_that("basic select/filter/collect", {
+  batch <- record_batch(tbl)
+
+  b2 <- batch %>%
+    select(int, chr) %>%
+    filter(int > 5)
+
+  expect_s3_class(b2, "arrow_dplyr_query")
+  t2 <- collect(b2)
+  expect_equal(t2, tbl[tbl$int > 5 & !is.na(tbl$int), c("int", "chr")])
+  # Test that the original object is not affected
+  expect_identical(collect(batch), tbl)
+})
+
+test_that("dim() on query", {
+  compare_dplyr_binding(
+    .input %>%
+      filter(int > 5) %>%
+      select(int, chr) %>%
+      dim(),
+    tbl
+  )
+})
+
+test_that("Print method", {
+  expect_output(
+    record_batch(tbl) %>%
+      filter(dbl > 2, chr == "d" | chr == "f") %>%
+      select(chr, int, lgl) %>%
+      filter(int < 5) %>%
+      select(int, chr) %>%
+      print(),
+    'InMemoryDataset (query)
+int: int32
+chr: string
+
+* Filter: (((dbl > 2) and ((chr == "d") or (chr == "f"))) and (int < 5))
+See $.data for the source Arrow object',
+    fixed = TRUE
+  )
+})
+
+test_that("pull", {
+  compare_dplyr_binding(
+    .input %>% pull(),
+    tbl
+  )
+  compare_dplyr_binding(
+    .input %>% pull(1),
+    tbl
+  )
+  compare_dplyr_binding(
+    .input %>% pull(chr),
+    tbl
+  )
+  compare_dplyr_binding(
+    .input %>%
+      filter(int > 4) %>%
+      rename(strng = chr) %>%
+      pull(strng),
+    tbl
+  )
+})
+
+test_that("collect(as_data_frame=FALSE)", {
+  batch <- record_batch(tbl)
+
+  b1 <- batch %>% collect(as_data_frame = FALSE)
+
+  expect_r6_class(b1, "RecordBatch")
+
+  b2 <- batch %>%
+    select(int, chr) %>%
+    filter(int > 5) %>%
+    collect(as_data_frame = FALSE)
+
+  # collect(as_data_frame = FALSE) always returns Table now
+  expect_r6_class(b2, "Table")
+  expected <- tbl[tbl$int > 5 & !is.na(tbl$int), c("int", "chr")]
+  expect_equal(as.data.frame(b2), expected)
+
+  b3 <- batch %>%
+    select(int, strng = chr) %>%
+    filter(int > 5) %>%
+    collect(as_data_frame = FALSE)
+  expect_r6_class(b3, "Table")
+  expect_equal(as.data.frame(b3), set_names(expected, c("int", "strng")))
+
+  b4 <- batch %>%
+    select(int, strng = chr) %>%
+    filter(int > 5) %>%
+    group_by(int) %>%
+    collect(as_data_frame = FALSE)
+  expect_s3_class(b4, "arrow_dplyr_query")
+  expect_equal(
+    as.data.frame(b4),
+    expected %>%
+      rename(strng = chr) %>%
+      group_by(int)
+  )
+})
+
+test_that("compute()", {
+  batch <- record_batch(tbl)
+
+  b1 <- batch %>% compute()
+
+  expect_r6_class(b1, "RecordBatch")
+
+  b2 <- batch %>%
+    select(int, chr) %>%
+    filter(int > 5) %>%
+    compute()
+
+  expect_r6_class(b2, "Table")
+  expected <- tbl[tbl$int > 5 & !is.na(tbl$int), c("int", "chr")]
+  expect_equal(as.data.frame(b2), expected)
+
+  b3 <- batch %>%
+    select(int, strng = chr) %>%
+    filter(int > 5) %>%
+    compute()
+  expect_r6_class(b3, "Table")
+  expect_equal(as.data.frame(b3), set_names(expected, c("int", "strng")))
+
+  b4 <- batch %>%
+    select(int, strng = chr) %>%
+    filter(int > 5) %>%
+    group_by(int) %>%
+    compute()
+  expect_s3_class(b4, "arrow_dplyr_query")
+  expect_equal(
+    as.data.frame(b4),
+    expected %>%
+      rename(strng = chr) %>%
+      group_by(int)
+  )
+})
+
+test_that("head", {
+  batch <- record_batch(tbl)
+
+  b2 <- batch %>%
+    select(int, chr) %>%
+    filter(int > 5) %>%
+    head(2)
+  expect_s3_class(b2, "arrow_dplyr_query")
+  expected <- tbl[tbl$int > 5 & !is.na(tbl$int), c("int", "chr")][1:2, ]
+  expect_equal(collect(b2), expected)
+
+  b3 <- batch %>%
+    select(int, strng = chr) %>%
+    filter(int > 5) %>%
+    head(2)
+  expect_s3_class(b3, "arrow_dplyr_query")
+  expect_equal(as.data.frame(b3), set_names(expected, c("int", "strng")))
+
+  b4 <- batch %>%
+    select(int, strng = chr) %>%
+    filter(int > 5) %>%
+    group_by(int) %>%
+    head(2)
+  expect_s3_class(b4, "arrow_dplyr_query")
+  expect_equal(
+    as.data.frame(b4),
+    expected %>%
+      rename(strng = chr) %>%
+      group_by(int)
+  )
+
+  expect_equal(
+    batch %>%
+      select(int, strng = chr) %>%
+      filter(int > 5) %>%
+      head(2) %>%
+      mutate(twice = int * 2) %>%
+      collect(),
+    expected %>%
+      rename(strng = chr) %>%
+      mutate(twice = int * 2)
+  )
+
+  # This would fail if we evaluated head() after filter()
+  expect_equal(
+    batch %>%
+      select(int, strng = chr) %>%
+      head(2) %>%
+      filter(int > 5) %>%
+      collect(),
+    expected %>%
+      rename(strng = chr) %>%
+      filter(FALSE)
+  )
+})
+
+test_that("arrange then head returns the right data (ARROW-14162)", {
+
+  compare_dplyr_binding(
+    .input %>%
+      # mpg has ties so we need to sort by two things to get deterministic order
+      arrange(mpg, disp) %>%
+      head(4) %>%
+      collect(),
+    mtcars,
+    ignore_attr = "row.names"
+  )
+})
+
+test_that("arrange then tail returns the right data", {
+  compare_dplyr_binding(
+    .input %>%
+      # mpg has ties so we need to sort by two things to get deterministic order
+      arrange(mpg, disp) %>%
+      tail(4) %>%
+      collect(),
+    mtcars,
+    ignore_attr = "row.names"
+  )
+})
+
+test_that("tail", {
+  batch <- record_batch(tbl)
+
+  b2 <- batch %>%
+    select(int, chr) %>%
+    filter(int > 5) %>%
+    arrange(int) %>%
+    tail(2)
+
+  expect_s3_class(b2, "arrow_dplyr_query")
+  expected <- tail(tbl[tbl$int > 5 & !is.na(tbl$int), c("int", "chr")], 2)
+  expect_equal(as.data.frame(b2), expected)
+
+  b3 <- batch %>%
+    select(int, strng = chr) %>%
+    filter(int > 5) %>%
+    arrange(int) %>%
+    tail(2)
+  expect_s3_class(b3, "arrow_dplyr_query")
+  expect_equal(as.data.frame(b3), set_names(expected, c("int", "strng")))
+
+  b4 <- batch %>%
+    select(int, strng = chr) %>%
+    filter(int > 5) %>%
+    group_by(int) %>%
+    arrange(int) %>%
+    tail(2)
+  expect_s3_class(b4, "arrow_dplyr_query")
+  expect_equal(
+    as.data.frame(b4),
+    expected %>%
+      rename(strng = chr) %>%
+      group_by(int)
+  )
+})
+
+test_that("No duplicate field names are allowed in an arrow_dplyr_query", {
+  expect_error(
+    Table$create(tbl, tbl) %>%
+      filter(int > 0),
+    regexp = paste0(
+      'The following field names were found more than once in the data: "int", "dbl", ',
+      '"dbl2", "lgl", "false", "chr", "fct", "verses", "padded_strings"'
+    )
+  )
+})
diff --git a/src/arrow/r/tests/testthat/test-dplyr-select.R b/src/arrow/r/tests/testthat/test-dplyr-select.R
new file mode 100644
index 000000000..2ca2b100e
--- /dev/null
+++ b/src/arrow/r/tests/testthat/test-dplyr-select.R
@@ -0,0 +1,146 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+skip_if_not_available("dataset")
+
+library(dplyr, warn.conflicts = FALSE)
+library(stringr)
+
+tbl <- example_data
+
+test_that("Empty select returns no columns", {
+  compare_dplyr_binding(
+    .input %>% select() %>% collect(),
+    tbl,
+    skip_table = "Table with 0 cols doesn't know how many rows it should have"
+  )
+})
+test_that("Empty select still includes the group_by columns", {
+  expect_message(
+    compare_dplyr_binding(
+      .input %>% group_by(chr) %>% select() %>% collect(),
+      tbl
+    ),
+    "Adding missing grouping variables"
+  )
+})
+
+test_that("select/rename", {
+  compare_dplyr_binding(
+    .input %>%
+      select(string = chr, int) %>%
+      collect(),
+    tbl
+  )
+  compare_dplyr_binding(
+    .input %>%
+      rename(string = chr) %>%
+      collect(),
+    tbl
+  )
+  compare_dplyr_binding(
+    .input %>%
+      rename(strng = chr) %>%
+      rename(other = strng) %>%
+      collect(),
+    tbl
+  )
+})
+
+test_that("select/rename with selection helpers", {
+
+  # TODO: add some passing tests here
+
+  expect_error(
+    compare_dplyr_binding(
+      .input %>%
+        select(where(is.numeric)) %>%
+        collect(),
+      tbl
+    ),
+    "Unsupported selection helper"
+  )
+})
+
+test_that("filtering with rename", {
+  compare_dplyr_binding(
+    .input %>%
+      filter(chr == "b") %>%
+      select(string = chr, int) %>%
+      collect(),
+    tbl
+  )
+  compare_dplyr_binding(
+    .input %>%
+      select(string = chr, int) %>%
+      filter(string == "b") %>%
+      collect(),
+    tbl
+  )
+})
+
+test_that("relocate", {
+  df <- tibble(a = 1, b = 1, c = 1, d = "a", e = "a", f = "a")
+  compare_dplyr_binding(
+    .input %>% relocate(f) %>% collect(),
+    df,
+  )
+  compare_dplyr_binding(
+    .input %>% relocate(a, .after = c) %>% collect(),
+    df,
+  )
+  compare_dplyr_binding(
+    .input %>% relocate(f, .before = b) %>% collect(),
+    df,
+  )
+  compare_dplyr_binding(
+    .input %>% relocate(a, .after = last_col()) %>% collect(),
+    df,
+  )
+  compare_dplyr_binding(
+    .input %>% relocate(ff = f) %>% collect(),
+    df,
+  )
+})
+
+test_that("relocate with selection helpers", {
+  df <- tibble(a = 1, b = 1, c = 1, d = "a", e = "a", f = "a")
+  compare_dplyr_binding(
+    .input %>% relocate(any_of(c("a", "e", "i", "o", "u"))) %>% collect(),
+    df
+  )
+  compare_dplyr_binding(
+    .input %>% relocate(where(is.character)) %>% collect(),
+    df
+  )
+  compare_dplyr_binding(
+    .input %>% relocate(a, b, c, .after = where(is.character)) %>% collect(),
+    df
+  )
+  compare_dplyr_binding(
+    .input %>% relocate(d, e, f, .before = where(is.numeric)) %>% collect(),
+    df
+  )
+  # works after other dplyr verbs
+  compare_dplyr_binding(
+    .input %>%
+      mutate(c = as.character(c)) %>%
+      relocate(d, e, f, .after = where(is.numeric)) %>%
+      collect(),
+    df
+  )
+})
diff --git a/src/arrow/r/tests/testthat/test-dplyr-summarize.R b/src/arrow/r/tests/testthat/test-dplyr-summarize.R
new file mode 100644
index 000000000..3988412b8
--- /dev/null
+++ b/src/arrow/r/tests/testthat/test-dplyr-summarize.R
@@ -0,0 +1,881 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+skip_if_not_available("dataset")
+
+withr::local_options(list(arrow.summarise.sort = TRUE))
+
+library(dplyr, warn.conflicts = FALSE)
+library(stringr)
+
+tbl <- example_data
+# Add some better string data
+tbl$verses <- verses[[1]]
+# c(" a ", "  b  ", "   c   ", ...) increasing padding
+# nchar =   3  5  7  9 11 13 15 17 19 21
+tbl$padded_strings <- stringr::str_pad(letters[1:10], width = 2 * (1:10) + 1, side = "both")
+tbl$some_grouping <- rep(c(1, 2), 5)
+
+test_that("summarize() doesn't evaluate eagerly", {
+  expect_s3_class(
+    Table$create(tbl) %>%
+      summarize(total = sum(int)),
+    "arrow_dplyr_query"
+  )
+  expect_r6_class(
+    Table$create(tbl) %>%
+      summarize(total = sum(int)) %>%
+      compute(),
+    "ArrowTabular"
+  )
+})
+
+test_that("Can aggregate in Arrow", {
+  compare_dplyr_binding(
+    .input %>%
+      summarize(total = sum(int, na.rm = TRUE)) %>%
+      collect(),
+    tbl
+  )
+  compare_dplyr_binding(
+    .input %>%
+      summarize(total = sum(int)) %>%
+      collect(),
+    tbl
+  )
+})
+
+test_that("Group by sum on dataset", {
+  compare_dplyr_binding(
+    .input %>%
+      group_by(some_grouping) %>%
+      summarize(total = sum(int, na.rm = TRUE)) %>%
+      collect(),
+    tbl
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      group_by(some_grouping) %>%
+      summarize(total = sum(int * 4, na.rm = TRUE)) %>%
+      collect(),
+    tbl
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      group_by(some_grouping) %>%
+      summarize(total = sum(int)) %>%
+      collect(),
+    tbl,
+  )
+})
+
+test_that("Group by mean on dataset", {
+  compare_dplyr_binding(
+    .input %>%
+      group_by(some_grouping) %>%
+      summarize(mean = mean(int, na.rm = TRUE)) %>%
+      collect(),
+    tbl
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      group_by(some_grouping) %>%
+      summarize(mean = mean(int, na.rm = FALSE)) %>%
+      collect(),
+    tbl
+  )
+})
+
+test_that("Group by sd on dataset", {
+  compare_dplyr_binding(
+    .input %>%
+      group_by(some_grouping) %>%
+      summarize(sd = sd(int, na.rm = TRUE)) %>%
+      collect(),
+    tbl
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      group_by(some_grouping) %>%
+      summarize(sd = sd(int, na.rm = FALSE)) %>%
+      collect(),
+    tbl
+  )
+})
+
+test_that("Group by var on dataset", {
+  compare_dplyr_binding(
+    .input %>%
+      group_by(some_grouping) %>%
+      summarize(var = var(int, na.rm = TRUE)) %>%
+      collect(),
+    tbl
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      group_by(some_grouping) %>%
+      summarize(var = var(int, na.rm = FALSE)) %>%
+      collect(),
+    tbl
+  )
+})
+
+test_that("n()", {
+  compare_dplyr_binding(
+    .input %>%
+      summarize(counts = n()) %>%
+      collect(),
+    tbl
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      group_by(some_grouping) %>%
+      summarize(counts = n()) %>%
+      arrange(some_grouping) %>%
+      collect(),
+    tbl
+  )
+})
+
+test_that("Group by any/all", {
+  compare_dplyr_binding(
+    .input %>%
+      group_by(some_grouping) %>%
+      summarize(any(lgl, na.rm = TRUE)) %>%
+      collect(),
+    tbl
+  )
+  compare_dplyr_binding(
+    .input %>%
+      group_by(some_grouping) %>%
+      summarize(all(lgl, na.rm = TRUE)) %>%
+      collect(),
+    tbl
+  )
+  compare_dplyr_binding(
+    .input %>%
+      group_by(some_grouping) %>%
+      summarize(any(lgl, na.rm = FALSE)) %>%
+      collect(),
+    tbl
+  )
+  compare_dplyr_binding(
+    .input %>%
+      group_by(some_grouping) %>%
+      summarize(all(lgl, na.rm = FALSE)) %>%
+      collect(),
+    tbl
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(has_words = nchar(verses) < 0) %>%
+      group_by(some_grouping) %>%
+      summarize(any(has_words, na.rm = TRUE)) %>%
+      collect(),
+    tbl
+  )
+  compare_dplyr_binding(
+    .input %>%
+      mutate(has_words = nchar(verses) < 0) %>%
+      group_by(some_grouping) %>%
+      summarize(all(has_words, na.rm = TRUE)) %>%
+      collect(),
+    tbl
+  )
+  compare_dplyr_binding(
+    .input %>%
+      group_by(some_grouping) %>%
+      summarize(has_words = all(nchar(verses) < 0, na.rm = TRUE)) %>%
+      collect(),
+    tbl
+  )
+})
+
+test_that("n_distinct() on dataset", {
+  # With groupby
+  compare_dplyr_binding(
+    .input %>%
+      group_by(some_grouping) %>%
+      summarize(distinct = n_distinct(lgl, na.rm = FALSE)) %>%
+      collect(),
+    tbl
+  )
+  compare_dplyr_binding(
+    .input %>%
+      group_by(some_grouping) %>%
+      summarize(distinct = n_distinct(lgl, na.rm = TRUE)) %>%
+      collect(),
+    tbl
+  )
+  # Without groupby
+  compare_dplyr_binding(
+    .input %>%
+      summarize(distinct = n_distinct(lgl, na.rm = FALSE)) %>%
+      collect(),
+    tbl
+  )
+  compare_dplyr_binding(
+    .input %>%
+      summarize(distinct = n_distinct(lgl, na.rm = TRUE)) %>%
+      collect(),
+    tbl
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      summarize(distinct = n_distinct(int, lgl)) %>%
+      collect(),
+    tbl,
+    warning = "Multiple arguments"
+  )
+  compare_dplyr_binding(
+    .input %>%
+      group_by(some_grouping) %>%
+      summarize(distinct = n_distinct(int, lgl)) %>%
+      collect(),
+    tbl,
+    warning = "Multiple arguments"
+  )
+})
+
+test_that("Functions that take ... but we only accept a single arg", {
+  compare_dplyr_binding(
+    .input %>%
+      summarize(distinct = n_distinct()) %>%
+      collect(),
+    tbl,
+    warning = "0 arguments"
+  )
+  compare_dplyr_binding(
+    .input %>%
+      summarize(distinct = n_distinct(int, lgl)) %>%
+      collect(),
+    tbl,
+    warning = "Multiple arguments"
+  )
+  # Now that we've demonstrated that the whole machinery works, let's test
+  # the agg_funcs directly
+  expect_error(agg_funcs$n_distinct(), "n_distinct() with 0 arguments", fixed = TRUE)
+  expect_error(agg_funcs$sum(), "sum() with 0 arguments", fixed = TRUE)
+  expect_error(agg_funcs$any(), "any() with 0 arguments", fixed = TRUE)
+  expect_error(agg_funcs$all(), "all() with 0 arguments", fixed = TRUE)
+  expect_error(agg_funcs$min(), "min() with 0 arguments", fixed = TRUE)
+  expect_error(agg_funcs$max(), "max() with 0 arguments", fixed = TRUE)
+  expect_error(agg_funcs$n_distinct(1, 2), "Multiple arguments to n_distinct()")
+  expect_error(agg_funcs$sum(1, 2), "Multiple arguments to sum")
+  expect_error(agg_funcs$any(1, 2), "Multiple arguments to any()")
+  expect_error(agg_funcs$all(1, 2), "Multiple arguments to all()")
+  expect_error(agg_funcs$min(1, 2), "Multiple arguments to min()")
+  expect_error(agg_funcs$max(1, 2), "Multiple arguments to max()")
+})
+
+test_that("median()", {
+  # When medians are integer-valued, stats::median() sometimes returns output of
+  # type integer, whereas whereas the Arrow approx_median kernels always return
+  # output of type float64. The calls to median(int, ...) in the tests below
+  # are enclosed in as.double() to work around this known difference.
+
+  # Use old testthat behavior here so we don't have to assert the same warning
+  # over and over
+  local_edition(2)
+
+  # with groups
+  compare_dplyr_binding(
+    .input %>%
+      group_by(some_grouping) %>%
+      summarize(
+        med_dbl = median(dbl),
+        med_int = as.double(median(int)),
+        med_dbl_narmf = median(dbl, FALSE),
+        med_int_narmf = as.double(median(int, na.rm = FALSE)),
+        med_dbl_narmt = median(dbl, na.rm = TRUE),
+        med_int_narmt = as.double(median(int, TRUE))
+      ) %>%
+      arrange(some_grouping) %>%
+      collect(),
+    tbl,
+    warning = "median\\(\\) currently returns an approximate median in Arrow"
+  )
+  # without groups, with na.rm = TRUE
+  compare_dplyr_binding(
+    .input %>%
+      summarize(
+        med_dbl_narmt = median(dbl, na.rm = TRUE),
+        med_int_narmt = as.double(median(int, TRUE))
+      ) %>%
+      collect(),
+    tbl,
+    warning = "median\\(\\) currently returns an approximate median in Arrow"
+  )
+  # without groups, with na.rm = FALSE (the default)
+  compare_dplyr_binding(
+    .input %>%
+      summarize(
+        med_dbl = median(dbl),
+        med_int = as.double(median(int)),
+        med_dbl_narmf = median(dbl, FALSE),
+        med_int_narmf = as.double(median(int, na.rm = FALSE))
+      ) %>%
+      collect(),
+    tbl,
+    warning = "median\\(\\) currently returns an approximate median in Arrow"
+  )
+  local_edition(3)
+})
+
+test_that("quantile()", {
+  # The default method for stats::quantile() throws an error when na.rm = FALSE
+  # and the input contains NA or NaN, whereas the Arrow tdigest kernels return
+  # null in this situation. To work around this known difference, the tests
+  # below always use na.rm = TRUE when the data contains NA or NaN.
+
+  # The default method for stats::quantile() has an argument `names` that
+  # controls whether the result has a names attribute. It defaults to
+  # names = TRUE. With Arrow, it is not possible to give the result a names
+  # attribute, so the quantile() binding in Arrow does not accept a `names`
+  # argument. Differences in this names attribute cause compare_dplyr_binding() to
+  # report that the objects are not equal, so we do not use compare_dplyr_binding()
+  # in the tests below.
+
+  # The tests below all use probs = 0.5 because other values cause differences
+  # between the exact quantiles returned by R and the approximate quantiles
+  # returned by Arrow.
+
+  # When quantiles are integer-valued, stats::quantile() sometimes returns
+  # output of type integer, whereas whereas the Arrow tdigest kernels always
+  # return output of type float64. The calls to quantile(int, ...) in the tests
+  # below are enclosed in as.double() to work around this known difference.
+
+  local_edition(2)
+  # with groups
+  expect_warning(
+    expect_equal(
+      tbl %>%
+        group_by(some_grouping) %>%
+        summarize(
+          q_dbl = quantile(dbl, probs = 0.5, na.rm = TRUE, names = FALSE),
+          q_int = as.double(
+            quantile(int, probs = 0.5, na.rm = TRUE, names = FALSE)
+          )
+        ) %>%
+        arrange(some_grouping),
+      Table$create(tbl) %>%
+        group_by(some_grouping) %>%
+        summarize(
+          q_dbl = quantile(dbl, probs = 0.5, na.rm = TRUE),
+          q_int = as.double(quantile(int, probs = 0.5, na.rm = TRUE))
+        ) %>%
+        arrange(some_grouping) %>%
+        collect()
+    ),
+    "quantile() currently returns an approximate quantile in Arrow",
+    fixed = TRUE
+  )
+
+  # without groups
+  expect_warning(
+    expect_equal(
+      tbl %>%
+        summarize(
+          q_dbl = quantile(dbl, probs = 0.5, na.rm = TRUE, names = FALSE),
+          q_int = as.double(
+            quantile(int, probs = 0.5, na.rm = TRUE, names = FALSE)
+          )
+        ),
+      Table$create(tbl) %>%
+        summarize(
+          q_dbl = quantile(dbl, probs = 0.5, na.rm = TRUE),
+          q_int = as.double(quantile(int, probs = 0.5, na.rm = TRUE))
+        ) %>%
+        collect()
+    ),
+    "quantile() currently returns an approximate quantile in Arrow",
+    fixed = TRUE
+  )
+
+  # with missing values and na.rm = FALSE
+  expect_warning(
+    expect_equal(
+      tibble(
+        q_dbl = NA_real_,
+        q_int = NA_real_
+      ),
+      Table$create(tbl) %>%
+        summarize(
+          q_dbl = quantile(dbl, probs = 0.5, na.rm = FALSE),
+          q_int = as.double(quantile(int, probs = 0.5, na.rm = FALSE))
+        ) %>%
+        collect()
+    ),
+    "quantile() currently returns an approximate quantile in Arrow",
+    fixed = TRUE
+  )
+  local_edition(3)
+
+  # with a vector of 2+ probs
+  expect_warning(
+    Table$create(tbl) %>%
+      summarize(q = quantile(dbl, probs = c(0.2, 0.8), na.rm = TRUE)),
+    "quantile() with length(probs) != 1 not supported by Arrow",
+    fixed = TRUE
+  )
+})
+
+test_that("summarize() with min() and max()", {
+  compare_dplyr_binding(
+    .input %>%
+      select(int, chr) %>%
+      filter(int > 5) %>% # this filters out the NAs in `int`
+      summarize(min_int = min(int), max_int = max(int)) %>%
+      collect(),
+    tbl,
+  )
+  compare_dplyr_binding(
+    .input %>%
+      select(int, chr) %>%
+      filter(int > 5) %>% # this filters out the NAs in `int`
+      summarize(
+        min_int = min(int + 4) / 2,
+        max_int = 3 / max(42 - int)
+      ) %>%
+      collect(),
+    tbl,
+  )
+  compare_dplyr_binding(
+    .input %>%
+      select(int, chr) %>%
+      summarize(min_int = min(int), max_int = max(int)) %>%
+      collect(),
+    tbl,
+  )
+  compare_dplyr_binding(
+    .input %>%
+      select(int) %>%
+      summarize(
+        min_int = min(int, na.rm = TRUE),
+        max_int = max(int, na.rm = TRUE)
+      ) %>%
+      collect(),
+    tbl,
+  )
+  compare_dplyr_binding(
+    .input %>%
+      select(dbl, int) %>%
+      summarize(
+        min_int = -min(log(ceiling(dbl)), na.rm = TRUE),
+        max_int = log(max(as.double(int), na.rm = TRUE))
+      ) %>%
+      collect(),
+    tbl,
+  )
+
+  # multiple dots arguments to min(), max() not supported
+  compare_dplyr_binding(
+    .input %>%
+      summarize(min_mult = min(dbl, int)) %>%
+      collect(),
+    tbl,
+    warning = "Multiple arguments to min\\(\\) not supported by Arrow"
+  )
+  compare_dplyr_binding(
+    .input %>%
+      select(int, dbl, dbl2) %>%
+      summarize(max_mult = max(int, dbl, dbl2)) %>%
+      collect(),
+    tbl,
+    warning = "Multiple arguments to max\\(\\) not supported by Arrow"
+  )
+
+  # min(logical) or max(logical) yields integer in R
+  # min(Boolean) or max(Boolean) yields Boolean in Arrow
+  compare_dplyr_binding(
+    .input %>%
+      select(lgl) %>%
+      summarize(
+        max_lgl = as.logical(max(lgl, na.rm = TRUE)),
+        min_lgl = as.logical(min(lgl, na.rm = TRUE))
+      ) %>%
+      collect(),
+    tbl,
+  )
+})
+
+test_that("min() and max() on character strings", {
+  compare_dplyr_binding(
+    .input %>%
+      summarize(
+        min_chr = min(chr, na.rm = TRUE),
+        max_chr = max(chr, na.rm = TRUE)
+      ) %>%
+      collect(),
+    tbl,
+  )
+  skip("Strings not supported by hash_min_max (ARROW-13988)")
+  compare_dplyr_binding(
+    .input %>%
+      group_by(fct) %>%
+      summarize(
+        min_chr = min(chr, na.rm = TRUE),
+        max_chr = max(chr, na.rm = TRUE)
+      ) %>%
+      collect(),
+    tbl,
+  )
+})
+
+test_that("summarise() with !!sym()", {
+  test_chr_col <- "int"
+  test_dbl_col <- "dbl"
+  test_lgl_col <- "lgl"
+  compare_dplyr_binding(
+    .input %>%
+      group_by(false) %>%
+      summarise(
+        sum = sum(!!sym(test_dbl_col)),
+        any = any(!!sym(test_lgl_col)),
+        all = all(!!sym(test_lgl_col)),
+        mean = mean(!!sym(test_dbl_col)),
+        sd = sd(!!sym(test_dbl_col)),
+        var = var(!!sym(test_dbl_col)),
+        n_distinct = n_distinct(!!sym(test_chr_col)),
+        min = min(!!sym(test_dbl_col)),
+        max = max(!!sym(test_dbl_col))
+      ) %>%
+      collect(),
+    tbl
+  )
+})
+
+test_that("Filter and aggregate", {
+  compare_dplyr_binding(
+    .input %>%
+      filter(some_grouping == 2) %>%
+      summarize(total = sum(int, na.rm = TRUE)) %>%
+      collect(),
+    tbl
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      filter(int > 5) %>%
+      summarize(total = sum(int, na.rm = TRUE)) %>%
+      collect(),
+    tbl
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      filter(some_grouping == 2) %>%
+      group_by(some_grouping) %>%
+      summarize(total = sum(int, na.rm = TRUE)) %>%
+      collect(),
+    tbl
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      filter(int > 5) %>%
+      group_by(some_grouping) %>%
+      summarize(total = sum(int, na.rm = TRUE)) %>%
+      collect(),
+    tbl
+  )
+})
+
+test_that("Group by edge cases", {
+  compare_dplyr_binding(
+    .input %>%
+      group_by(some_grouping * 2) %>%
+      summarize(total = sum(int, na.rm = TRUE)) %>%
+      collect(),
+    tbl
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      group_by(alt = some_grouping * 2) %>%
+      summarize(total = sum(int, na.rm = TRUE)) %>%
+      collect(),
+    tbl
+  )
+})
+
+test_that("Do things after summarize", {
+  group2_sum <- tbl %>%
+    group_by(some_grouping) %>%
+    filter(int > 5) %>%
+    summarize(total = sum(int, na.rm = TRUE)) %>%
+    pull() %>%
+    tail(1)
+
+  compare_dplyr_binding(
+    .input %>%
+      group_by(some_grouping) %>%
+      filter(int > 5) %>%
+      summarize(total = sum(int, na.rm = TRUE)) %>%
+      filter(total == group2_sum) %>%
+      mutate(extra = total * 5) %>%
+      collect(),
+    tbl
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      filter(dbl > 2) %>%
+      select(chr, int, lgl) %>%
+      mutate(twice = int * 2L) %>%
+      group_by(lgl) %>%
+      summarize(
+        count = n(),
+        total = sum(twice, na.rm = TRUE)
+      ) %>%
+      mutate(mean = total / count) %>%
+      collect(),
+    tbl
+  )
+})
+
+test_that("Expressions on aggregations", {
+  # This is what it effectively is
+  compare_dplyr_binding(
+    .input %>%
+      group_by(some_grouping) %>%
+      summarize(
+        any = any(lgl),
+        all = all(lgl)
+      ) %>%
+      ungroup() %>% # TODO: loosen the restriction on mutate after group_by
+      mutate(some = any & !all) %>%
+      select(some_grouping, some) %>%
+      collect(),
+    tbl
+  )
+  # More concisely:
+  compare_dplyr_binding(
+    .input %>%
+      group_by(some_grouping) %>%
+      summarize(any(lgl) & !all(lgl)) %>%
+      collect(),
+    tbl
+  )
+
+  # Save one of the aggregates first
+  compare_dplyr_binding(
+    .input %>%
+      group_by(some_grouping) %>%
+      summarize(
+        any_lgl = any(lgl),
+        some = any_lgl & !all(lgl)
+      ) %>%
+      collect(),
+    tbl
+  )
+
+  # Make sure order of columns in result is correct
+  compare_dplyr_binding(
+    .input %>%
+      group_by(some_grouping) %>%
+      summarize(
+        any_lgl = any(lgl),
+        some = any_lgl & !all(lgl),
+        n()
+      ) %>%
+      collect(),
+    tbl
+  )
+
+  # Aggregate on an aggregate (trivial but dplyr allows)
+  skip("Aggregate on an aggregate not supported")
+  compare_dplyr_binding(
+    .input %>%
+      group_by(some_grouping) %>%
+      summarize(
+        any_lgl = any(any(lgl))
+      ) %>%
+      collect(),
+    tbl
+  )
+})
+
+test_that("Summarize with 0 arguments", {
+  compare_dplyr_binding(
+    .input %>%
+      group_by(some_grouping) %>%
+      summarize() %>%
+      collect(),
+    tbl
+  )
+})
+
+test_that("Not (yet) supported: implicit join", {
+  withr::local_options(list(arrow.debug = TRUE))
+  compare_dplyr_binding(
+    .input %>%
+      group_by(some_grouping) %>%
+      summarize(
+        sum((dbl - mean(dbl))^2)
+      ) %>%
+      collect(),
+    tbl,
+    warning = "Expression sum\\(\\(dbl - mean\\(dbl\\)\\)\\^2\\) not supported in Arrow; pulling data into R"
+  )
+  compare_dplyr_binding(
+    .input %>%
+      group_by(some_grouping) %>%
+      summarize(
+        sum(dbl - mean(dbl))
+      ) %>%
+      collect(),
+    tbl,
+    warning = "Expression sum\\(dbl - mean\\(dbl\\)\\) not supported in Arrow; pulling data into R"
+  )
+  compare_dplyr_binding(
+    .input %>%
+      group_by(some_grouping) %>%
+      summarize(
+        sqrt(sum((dbl - mean(dbl))^2) / (n() - 1L))
+      ) %>%
+      collect(),
+    tbl,
+    warning = "Expression sum\\(\\(dbl - mean\\(dbl\\)\\)\\^2\\) not supported in Arrow; pulling data into R"
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      group_by(some_grouping) %>%
+      summarize(
+        dbl - mean(dbl)
+      ) %>%
+      collect(),
+    tbl,
+    warning = "Expression dbl - mean\\(dbl\\) not supported in Arrow; pulling data into R"
+  )
+
+  # This one could possibly be supported--in mutate()
+  compare_dplyr_binding(
+    .input %>%
+      group_by(some_grouping) %>%
+      summarize(
+        dbl - int
+      ) %>%
+      collect(),
+    tbl,
+    warning = "Expression dbl - int not supported in Arrow; pulling data into R"
+  )
+})
+
+test_that(".groups argument", {
+  compare_dplyr_binding(
+    .input %>%
+      group_by(some_grouping, int < 6) %>%
+      summarize(count = n()) %>%
+      collect(),
+    tbl
+  )
+  compare_dplyr_binding(
+    .input %>%
+      group_by(some_grouping, int < 6) %>%
+      summarize(count = n(), .groups = "drop_last") %>%
+      collect(),
+    tbl
+  )
+  compare_dplyr_binding(
+    .input %>%
+      group_by(some_grouping, int < 6) %>%
+      summarize(count = n(), .groups = "keep") %>%
+      collect(),
+    tbl
+  )
+  compare_dplyr_binding(
+    .input %>%
+      group_by(some_grouping, int < 6) %>%
+      summarize(count = n(), .groups = "drop") %>%
+      collect(),
+    tbl
+  )
+  compare_dplyr_binding(
+    .input %>%
+      group_by(some_grouping, int < 6) %>%
+      summarize(count = n(), .groups = "rowwise") %>%
+      collect(),
+    tbl,
+    warning = TRUE
+  )
+
+  # abandon_ship() raises the warning, then dplyr itself errors
+  # This isn't ideal but it's fine and won't be an issue on Datasets
+  expect_error(
+    expect_warning(
+      Table$create(tbl) %>%
+        group_by(some_grouping, int < 6) %>%
+        summarize(count = n(), .groups = "NOTVALID"),
+      "Invalid .groups argument"
+    ),
+    "NOTVALID"
+  )
+})
+
+test_that("summarize() handles group_by .drop", {
+  # Error: Type error: Sorting not supported for type dictionary<values=string, indices=int8, ordered=0>
+  withr::local_options(list(arrow.summarise.sort = FALSE))
+
+  tbl <- tibble(
+    x = 1:10,
+    y = factor(rep(c("a", "c"), each = 5), levels = c("a", "b", "c"))
+  )
+  compare_dplyr_binding(
+    .input %>%
+      group_by(y) %>%
+      count() %>%
+      collect() %>%
+      arrange(y),
+    tbl
+  )
+  # Not supported: check message
+  compare_dplyr_binding(
+    .input %>%
+      group_by(y, .drop = FALSE) %>%
+      count() %>%
+      collect() %>%
+      # Because it's not supported, we have to filter out the (empty) row
+      # that dplyr keeps, just so we test equal (otherwise)
+      filter(y != "b") %>%
+      arrange(y),
+    tbl,
+    warning = ".drop = FALSE currently not supported in Arrow aggregation"
+  )
+
+  # But this is ok because there is no factor group
+  compare_dplyr_binding(
+    .input %>%
+      group_by(y, .drop = FALSE) %>%
+      count() %>%
+      collect() %>%
+      arrange(y),
+    tibble(
+      x = 1:10,
+      y = rep(c("a", "c"), each = 5)
+    )
+  )
+})
diff --git a/src/arrow/r/tests/testthat/test-duckdb.R b/src/arrow/r/tests/testthat/test-duckdb.R
new file mode 100644
index 000000000..decd6e80e
--- /dev/null
+++ b/src/arrow/r/tests/testthat/test-duckdb.R
@@ -0,0 +1,217 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+skip_if_not_installed("duckdb", minimum_version = "0.2.8")
+skip_if_not_installed("dbplyr")
+skip_if_not_available("dataset")
+skip_on_cran()
+
+library(duckdb, quietly = TRUE)
+library(dplyr, warn.conflicts = FALSE)
+
+test_that("to_duckdb", {
+  ds <- InMemoryDataset$create(example_data)
+
+  expect_identical(
+    ds %>%
+      to_duckdb() %>%
+      collect() %>%
+      # factors don't roundtrip https://github.com/duckdb/duckdb/issues/1879
+      select(!fct),
+    select(example_data, !fct)
+  )
+
+  expect_identical(
+    ds %>%
+      select(int, lgl, dbl) %>%
+      to_duckdb() %>%
+      group_by(lgl) %>%
+      summarise(mean_int = mean(int, na.rm = TRUE), mean_dbl = mean(dbl, na.rm = TRUE)) %>%
+      collect(),
+    tibble::tibble(
+      lgl = c(TRUE, NA, FALSE),
+      mean_int = c(3, 6.25, 8.5),
+      mean_dbl = c(3.1, 6.35, 6.1)
+    )
+  )
+
+  # can group_by before the to_duckdb
+  expect_identical(
+    ds %>%
+      select(int, lgl, dbl) %>%
+      group_by(lgl) %>%
+      to_duckdb() %>%
+      summarise(mean_int = mean(int, na.rm = TRUE), mean_dbl = mean(dbl, na.rm = TRUE)) %>%
+      collect(),
+    tibble::tibble(
+      lgl = c(TRUE, NA, FALSE),
+      mean_int = c(3, 6.25, 8.5),
+      mean_dbl = c(3.1, 6.35, 6.1)
+    )
+  )
+})
+
+test_that("to_duckdb then to_arrow", {
+  ds <- InMemoryDataset$create(example_data)
+
+  ds_rt <- ds %>%
+    to_duckdb() %>%
+    # factors don't roundtrip https://github.com/duckdb/duckdb/issues/1879
+    select(-fct) %>%
+    to_arrow()
+
+  expect_identical(
+    collect(ds_rt),
+    ds %>%
+      select(-fct) %>%
+      collect()
+  )
+
+  # And we can continue the pipeline
+  ds_rt <- ds %>%
+    to_duckdb() %>%
+    # factors don't roundtrip https://github.com/duckdb/duckdb/issues/1879
+    select(-fct) %>%
+    to_arrow() %>%
+    filter(int > 5)
+
+  expect_identical(
+    collect(ds_rt),
+    ds %>%
+      select(-fct) %>%
+      filter(int > 5) %>%
+      collect()
+  )
+
+  # Now check errors
+  ds_rt <- ds %>%
+    to_duckdb() %>%
+    # factors don't roundtrip https://github.com/duckdb/duckdb/issues/1879
+    select(-fct)
+
+  # alter the class of ds_rt's connection to simulate some other database
+  class(ds_rt$src$con) <- "some_other_connection"
+
+  expect_error(
+    to_arrow(ds_rt),
+    "to_arrow\\(\\) currently only supports Arrow tables, Arrow datasets,"
+  )
+})
+
+# The next set of tests use an already-extant connection to test features of
+# persistence and querying against the table without using the `tbl` itself, so
+# we need to create a connection separate from the ephemeral one that is made
+# with arrow_duck_connection()
+con <- dbConnect(duckdb::duckdb())
+dbExecute(con, "PRAGMA threads=2")
+on.exit(dbDisconnect(con, shutdown = TRUE), add = TRUE)
+
+# write one table to the connection so it is kept open
+DBI::dbWriteTable(con, "mtcars", mtcars)
+
+test_that("Joining, auto-cleanup enabled", {
+  ds <- InMemoryDataset$create(example_data)
+
+  table_one_name <- "my_arrow_table_1"
+  table_one <- to_duckdb(ds, con = con, table_name = table_one_name, auto_disconnect = TRUE)
+  table_two_name <- "my_arrow_table_2"
+  table_two <- to_duckdb(ds, con = con, table_name = table_two_name, auto_disconnect = TRUE)
+
+  res <- dbGetQuery(
+    con,
+    paste0(
+      "SELECT * FROM ", table_one_name,
+      " INNER JOIN ", table_two_name,
+      " ON ", table_one_name, ".int = ", table_two_name, ".int"
+    )
+  )
+  expect_identical(dim(res), c(9L, 14L))
+
+  # clean up cleans up the tables
+  expect_true(all(c(table_one_name, table_two_name) %in% DBI::dbListTables(con)))
+  rm(table_one, table_two)
+  gc()
+  expect_false(any(c(table_one_name, table_two_name) %in% DBI::dbListTables(con)))
+})
+
+test_that("Joining, auto-cleanup disabled", {
+  ds <- InMemoryDataset$create(example_data)
+
+  table_three_name <- "my_arrow_table_3"
+  table_three <- to_duckdb(ds, con = con, table_name = table_three_name)
+
+  # clean up does *not* clean these tables
+  expect_true(table_three_name %in% DBI::dbListTables(con))
+  rm(table_three)
+  gc()
+  # but because we aren't auto_disconnecting then we still have this table.
+  expect_true(table_three_name %in% DBI::dbListTables(con))
+})
+
+test_that("to_duckdb with a table", {
+  tab <- Table$create(example_data)
+
+  expect_identical(
+    tab %>%
+      to_duckdb() %>%
+      group_by(int > 4) %>%
+      summarise(
+        int_mean = mean(int, na.rm = TRUE),
+        dbl_mean = mean(dbl, na.rm = TRUE)
+      ) %>%
+      collect(),
+    tibble::tibble(
+      "int > 4" = c(FALSE, NA, TRUE),
+      int_mean = c(2, NA, 7.5),
+      dbl_mean = c(2.1, 4.1, 7.3)
+    )
+  )
+})
+
+test_that("to_duckdb passing a connection", {
+  ds <- InMemoryDataset$create(example_data)
+
+  con_separate <- dbConnect(duckdb::duckdb())
+  # we always want to test in parallel
+  dbExecute(con_separate, "PRAGMA threads=2")
+  on.exit(dbDisconnect(con_separate, shutdown = TRUE), add = TRUE)
+
+  # create a table to join to that we know is in our con_separate
+  new_df <- data.frame(
+    int = 1:10,
+    char = letters[26:17],
+    stringsAsFactors = FALSE
+  )
+  DBI::dbWriteTable(con_separate, "separate_join_table", new_df)
+
+  table_four <- ds %>%
+    select(int, lgl, dbl) %>%
+    to_duckdb(con = con_separate, auto_disconnect = FALSE)
+  table_four_name <- table_four$ops$x
+
+  result <- DBI::dbGetQuery(
+    con_separate,
+    paste0(
+      "SELECT * FROM ", table_four_name,
+      " INNER JOIN separate_join_table ",
+      "ON separate_join_table.int = ", table_four_name, ".int"
+    )
+  )
+
+  expect_identical(dim(result), c(9L, 5L))
+  expect_identical(result$char, new_df[new_df$int != 4, ]$char)
+})
diff --git a/src/arrow/r/tests/testthat/test-expression.R b/src/arrow/r/tests/testthat/test-expression.R
new file mode 100644
index 000000000..c4aab718d
--- /dev/null
+++ b/src/arrow/r/tests/testthat/test-expression.R
@@ -0,0 +1,128 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+test_that("C++ expressions", {
+  skip_if_not_available("dataset")
+  f <- Expression$field_ref("f")
+  expect_identical(f$field_name, "f")
+  g <- Expression$field_ref("g")
+  date <- Expression$scalar(as.Date("2020-01-15"))
+  ts <- Expression$scalar(as.POSIXct("2020-01-17 11:11:11"))
+  i64 <- Expression$scalar(bit64::as.integer64(42))
+  time <- Expression$scalar(hms::hms(56, 34, 12))
+
+  expect_r6_class(f == g, "Expression")
+  expect_r6_class(f == 4, "Expression")
+  expect_r6_class(f == "", "Expression")
+  expect_r6_class(f == NULL, "Expression")
+  expect_r6_class(f == date, "Expression")
+  expect_r6_class(f == i64, "Expression")
+  expect_r6_class(f == time, "Expression")
+  # can't seem to make this work right now because of R Ops.method dispatch
+  # expect_r6_class(f == as.Date("2020-01-15"), "Expression") # nolint
+  expect_r6_class(f == ts, "Expression")
+  expect_r6_class(f <= 2L, "Expression")
+  expect_r6_class(f != FALSE, "Expression")
+  expect_r6_class(f > 4, "Expression")
+  expect_r6_class(f < 4 & f > 2, "Expression")
+  expect_r6_class(f < 4 | f > 2, "Expression")
+  expect_r6_class(!(f < 4), "Expression")
+  expect_output(
+    print(f > 4),
+    "Expression\n(f > 4)",
+    fixed = TRUE
+  )
+  expect_equal(
+    f$type(schema(f = float64())),
+    float64()
+  )
+  expect_equal(
+    (f > 4)$type(schema(f = float64())),
+    bool()
+  )
+  # Interprets that as a list type
+  expect_r6_class(f == c(1L, 2L), "Expression")
+
+  expect_error(
+    Expression$create("add", 1, 2),
+    "Expression arguments must be Expression objects"
+  )
+})
+
+test_that("Field reference expression schemas and types", {
+  x <- Expression$field_ref("x")
+
+  # type() throws error when schema is NULL
+  expect_error(x$type(), "schema")
+
+  # type() returns type when schema is set
+  x$schema <- Schema$create(x = int32())
+  expect_equal(x$type(), int32())
+})
+
+test_that("Scalar expression schemas and types", {
+  # type() works on scalars without setting the schema
+  expect_equal(
+    Expression$scalar("foo")$type(),
+    arrow::string()
+  )
+  expect_equal(
+    Expression$scalar(42L)$type(),
+    int32()
+  )
+})
+
+test_that("Expression schemas and types", {
+  x <- Expression$field_ref("x")
+  y <- Expression$field_ref("y")
+  z <- Expression$scalar(42L)
+
+  # type() throws error when both schemas are unset
+  expect_error(
+    Expression$create("add_checked", x, y)$type(),
+    "schema"
+  )
+
+  # type() throws error when left schema is unset
+  y$schema <- Schema$create(y = float64())
+  expect_error(
+    Expression$create("add_checked", x, y)$type(),
+    "schema"
+  )
+
+  # type() throws error when right schema is unset
+  x$schema <- Schema$create(x = int32())
+  y$schema <- NULL
+  expect_error(
+    Expression$create("add_checked", x, y)$type(),
+    "schema"
+  )
+
+  # type() returns type when both schemas are set
+  y$schema <- Schema$create(y = float64())
+  expect_equal(
+    Expression$create("add_checked", x, y)$type(),
+    float64()
+  )
+
+  # type() returns type when one arg has schema set and one is scalar
+  expect_equal(
+    Expression$create("add_checked", x, z)$type(),
+    int32()
+  )
+})
diff --git a/src/arrow/r/tests/testthat/test-feather.R b/src/arrow/r/tests/testthat/test-feather.R
new file mode 100644
index 000000000..136474dea
--- /dev/null
+++ b/src/arrow/r/tests/testthat/test-feather.R
@@ -0,0 +1,256 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+feather_file <- tempfile()
+tib <- tibble::tibble(x = 1:10, y = rnorm(10), z = letters[1:10])
+
+test_that("Write a feather file", {
+  tib_out <- write_feather(tib, feather_file)
+  expect_true(file.exists(feather_file))
+  # Input is returned unmodified
+  expect_identical(tib_out, tib)
+})
+
+expect_feather_roundtrip <- function(write_fun) {
+  tf2 <- normalizePath(tempfile(), mustWork = FALSE)
+  tf3 <- tempfile()
+  on.exit({
+    unlink(tf2)
+    unlink(tf3)
+  })
+
+  # Write two ways. These are what varies with each run
+  write_fun(tib, tf2)
+  expect_true(file.exists(tf2))
+
+  stream <- FileOutputStream$create(tf3)
+  write_fun(tib, stream)
+  stream$close()
+  expect_true(file.exists(tf3))
+
+  # Read both back
+  tab2 <- read_feather(tf2)
+  expect_s3_class(tab2, "data.frame")
+
+  tab3 <- read_feather(tf3)
+  expect_s3_class(tab3, "data.frame")
+
+  # reading directly from arrow::io::MemoryMappedFile
+  tab4 <- read_feather(mmap_open(tf3))
+  expect_s3_class(tab4, "data.frame")
+
+  # reading directly from arrow::io::ReadableFile
+  tab5 <- read_feather(ReadableFile$create(tf3))
+  expect_s3_class(tab5, "data.frame")
+
+  expect_equal(tib, tab2)
+  expect_equal(tib, tab3)
+  expect_equal(tib, tab4)
+  expect_equal(tib, tab5)
+}
+
+test_that("feather read/write round trip", {
+  expect_feather_roundtrip(function(x, f) write_feather(x, f, version = 1))
+  expect_feather_roundtrip(function(x, f) write_feather(x, f, version = 2))
+  expect_feather_roundtrip(function(x, f) write_feather(x, f, chunk_size = 32))
+  if (codec_is_available("lz4")) {
+    expect_feather_roundtrip(function(x, f) write_feather(x, f, compression = "lz4"))
+  }
+  if (codec_is_available("zstd")) {
+    expect_feather_roundtrip(function(x, f) write_feather(x, f, compression = "zstd"))
+    expect_feather_roundtrip(function(x, f) write_feather(x, f, compression = "zstd", compression_level = 3))
+  }
+
+  # Write from Arrow data structures
+  expect_feather_roundtrip(function(x, f) write_feather(RecordBatch$create(x), f))
+  expect_feather_roundtrip(function(x, f) write_feather(Table$create(x), f))
+})
+
+test_that("write_feather option error handling", {
+  tf <- tempfile()
+  expect_false(file.exists(tf))
+  expect_error(
+    write_feather(tib, tf, version = 1, chunk_size = 1024),
+    "Feather version 1 does not support the 'chunk_size' option"
+  )
+  expect_error(
+    write_feather(tib, tf, version = 1, compression = "lz4"),
+    "Feather version 1 does not support the 'compression' option"
+  )
+  expect_error(
+    write_feather(tib, tf, version = 1, compression_level = 1024),
+    "Feather version 1 does not support the 'compression_level' option"
+  )
+  expect_error(
+    write_feather(tib, tf, compression_level = 1024),
+    "Can only specify a 'compression_level' when 'compression' is 'zstd'"
+  )
+  expect_match_arg_error(write_feather(tib, tf, compression = "bz2"))
+  expect_false(file.exists(tf))
+})
+
+test_that("write_feather with invalid input type", {
+  bad_input <- Array$create(1:5)
+  expect_error(
+    write_feather(bad_input, feather_file),
+    regexp = "x must be an object of class 'data.frame', 'RecordBatch', or 'Table', not 'Array'."
+  )
+})
+
+test_that("read_feather supports col_select = <names>", {
+  tab1 <- read_feather(feather_file, col_select = c("x", "y"))
+  expect_s3_class(tab1, "data.frame")
+
+  expect_equal(tib$x, tab1$x)
+  expect_equal(tib$y, tab1$y)
+})
+
+test_that("feather handles col_select = <integer>", {
+  tab1 <- read_feather(feather_file, col_select = 1:2)
+  expect_s3_class(tab1, "data.frame")
+
+  expect_equal(tib$x, tab1$x)
+  expect_equal(tib$y, tab1$y)
+})
+
+test_that("feather handles col_select = <tidyselect helper>", {
+  tab1 <- read_feather(feather_file, col_select = everything())
+  expect_identical(tib, tab1)
+
+  tab2 <- read_feather(feather_file, col_select = starts_with("x"))
+  expect_identical(tab2, tib[, "x", drop = FALSE])
+
+  tab3 <- read_feather(feather_file, col_select = c(starts_with("x"), contains("y")))
+  expect_identical(tab3, tib[, c("x", "y"), drop = FALSE])
+
+  tab4 <- read_feather(feather_file, col_select = -z)
+  expect_identical(tab4, tib[, c("x", "y"), drop = FALSE])
+})
+
+test_that("feather read/write round trip", {
+  tab1 <- read_feather(feather_file, as_data_frame = FALSE)
+  expect_r6_class(tab1, "Table")
+
+  expect_equal(tib, as.data.frame(tab1))
+})
+
+test_that("Read feather from raw vector", {
+  test_raw <- readBin(feather_file, what = "raw", n = 5000)
+  df <- read_feather(test_raw)
+  expect_s3_class(df, "data.frame")
+})
+
+test_that("FeatherReader", {
+  v1 <- tempfile()
+  v2 <- tempfile()
+  on.exit({
+    unlink(v1)
+    unlink(v2)
+  })
+  write_feather(tib, v1, version = 1)
+  write_feather(tib, v2)
+  f1 <- make_readable_file(v1)
+  reader1 <- FeatherReader$create(f1)
+  f1$close()
+  expect_identical(reader1$version, 1L)
+  f2 <- make_readable_file(v2)
+  reader2 <- FeatherReader$create(f2)
+  expect_identical(reader2$version, 2L)
+  f2$close()
+})
+
+test_that("read_feather requires RandomAccessFile and errors nicely otherwise (ARROW-8615)", {
+  skip_if_not_available("gzip")
+  expect_error(
+    read_feather(CompressedInputStream$create(feather_file)),
+    'file must be a "RandomAccessFile"'
+  )
+})
+
+test_that("read_feather closes connection to file", {
+  tf <- tempfile()
+  on.exit(unlink(tf))
+  write_feather(tib, sink = tf)
+  expect_true(file.exists(tf))
+  read_feather(tf)
+  expect_error(file.remove(tf), NA)
+  expect_false(file.exists(tf))
+})
+
+test_that("Character vectors > 2GB can write to feather", {
+  skip_on_cran()
+  skip_if_not_running_large_memory_tests()
+  df <- tibble::tibble(big = make_big_string())
+  tf <- tempfile()
+  on.exit(unlink(tf))
+  write_feather(df, tf)
+  expect_identical(read_feather(tf), df)
+})
+
+test_that("FeatherReader methods", {
+  # Setup a feather file to use in the test
+  feather_temp <- tempfile()
+  on.exit({
+    unlink(feather_temp)
+  })
+  write_feather(tib, feather_temp)
+  feather_temp_RA <- make_readable_file(feather_temp)
+
+  reader <- FeatherReader$create(feather_temp_RA)
+  feather_temp_RA$close()
+
+  # column_names
+  expect_identical(
+    reader$column_names,
+    c("x", "y", "z")
+  )
+
+  # print method
+  expect_identical(
+    capture.output(print(reader)),
+    # TODO: can we get rows/columns?
+    c("FeatherReader:", "Schema", "x: int32", "y: double", "z: string")
+  )
+})
+
+unlink(feather_file)
+
+ft_file <- test_path("golden-files/data-arrow_2.0.0_lz4.feather")
+
+test_that("Error messages are shown when the compression algorithm lz4 is not found", {
+  msg <- paste0(
+    "NotImplemented: Support for codec 'lz4' not built\nIn order to read this file, ",
+    "you will need to reinstall arrow with additional features enabled.\nSet one of ",
+    "these environment variables before installing:\n\n * LIBARROW_MINIMAL=false ",
+    "(for all optional features, including 'lz4')\n * ARROW_WITH_LZ4=ON (for just 'lz4')",
+    "\n\nSee https://arrow.apache.org/docs/r/articles/install.html for details"
+  )
+
+  if (codec_is_available("lz4")) {
+    d <- read_feather(ft_file)
+    expect_s3_class(d, "data.frame")
+  } else {
+    expect_error(read_feather(ft_file), msg, fixed = TRUE)
+  }
+})
+
+test_that("Error is created when feather reads a parquet file", {
+  expect_error(
+    read_feather(system.file("v0.7.1.parquet", package = "arrow")),
+    "Not a Feather V1 or Arrow IPC file"
+  )
+})
diff --git a/src/arrow/r/tests/testthat/test-field.R b/src/arrow/r/tests/testthat/test-field.R
new file mode 100644
index 000000000..1be36c064
--- /dev/null
+++ b/src/arrow/r/tests/testthat/test-field.R
@@ -0,0 +1,67 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+test_that("field() factory", {
+  x <- field("x", int32())
+  expect_equal(x$type, int32())
+  expect_equal(x$name, "x")
+  expect_true(x$nullable)
+  expect_true(x == x)
+  expect_false(x == field("x", int64()))
+})
+
+test_that("Field with nullable values", {
+  x <- field("x", int32(), nullable = FALSE)
+  expect_equal(x$type, int32())
+  expect_false(x$nullable)
+  expect_true(x == x)
+  expect_false(x == field("x", int32()))
+})
+
+test_that("Field validation", {
+  expect_error(schema(b = 32), "b must be a DataType, not numeric")
+})
+
+test_that("Print method for field", {
+  expect_output(print(field("x", int32())), "Field\nx: int32")
+  expect_output(
+    print(field("zz", dictionary())),
+    "Field\nzz: dictionary<values=string, indices=int32>"
+  )
+
+  expect_output(
+    print(field("x", int32(), nullable = FALSE)),
+    "Field\nx: int32 not null"
+  )
+
+})
+
+test_that("Field to C-interface", {
+  field <- field("x", time32("s"))
+
+  # export the field via the C-interface
+  ptr <- allocate_arrow_schema()
+  field$export_to_c(ptr)
+
+  # then import it and check that the roundtripped value is the same
+  circle <- Field$import_from_c(ptr)
+  expect_equal(circle, field)
+
+  # must clean up the pointer or we leak
+  delete_arrow_schema(ptr)
+})
diff --git a/src/arrow/r/tests/testthat/test-filesystem.R b/src/arrow/r/tests/testthat/test-filesystem.R
new file mode 100644
index 000000000..5ee096f13
--- /dev/null
+++ b/src/arrow/r/tests/testthat/test-filesystem.R
@@ -0,0 +1,178 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+test_that("LocalFilesystem", {
+  fs <- LocalFileSystem$create()
+  expect_identical(fs$type_name, "local")
+  DESCRIPTION <- system.file("DESCRIPTION", package = "arrow")
+  info <- fs$GetFileInfo(DESCRIPTION)[[1]]
+  expect_equal(info$base_name(), "DESCRIPTION")
+  expect_equal(info$extension(), "")
+  expect_equal(info$type, FileType$File)
+  expect_equal(info$path, DESCRIPTION)
+  info <- file.info(DESCRIPTION)
+
+  expect_equal(info$size, info$size)
+  # This fails due to a subsecond difference on Appveyor on Windows with R 3.3 only
+  # So add a greater tolerance to allow for that
+  expect_equal(info$mtime, info$mtime, tolerance = 1)
+
+  tf <- tempfile(fileext = ".txt")
+  fs$CopyFile(DESCRIPTION, tf)
+  info <- fs$GetFileInfo(tf)[[1]]
+  expect_equal(info$extension(), "txt")
+  expect_equal(info$size, info$size)
+  expect_equal(readLines(DESCRIPTION), readLines(tf))
+
+  tf2 <- tempfile(fileext = ".txt")
+  fs$Move(tf, tf2)
+  infos <- fs$GetFileInfo(c(tf, tf2, dirname(tf)))
+  expect_equal(infos[[1]]$type, FileType$NotFound)
+  expect_equal(infos[[2]]$type, FileType$File)
+  expect_equal(infos[[3]]$type, FileType$Directory)
+
+  fs$DeleteFile(tf2)
+  expect_equal(fs$GetFileInfo(tf2)[[1L]]$type, FileType$NotFound)
+  expect_true(!file.exists(tf2))
+
+  expect_equal(fs$GetFileInfo(tf)[[1L]]$type, FileType$NotFound)
+  expect_true(!file.exists(tf))
+
+  td <- tempfile()
+  fs$CreateDir(td)
+  expect_equal(fs$GetFileInfo(td)[[1L]]$type, FileType$Directory)
+  fs$CopyFile(DESCRIPTION, file.path(td, "DESCRIPTION"))
+  fs$DeleteDirContents(td)
+  expect_equal(length(dir(td)), 0L)
+  fs$DeleteDir(td)
+  expect_equal(fs$GetFileInfo(td)[[1L]]$type, FileType$NotFound)
+
+  tf3 <- tempfile()
+  os <- fs$OpenOutputStream(path = tf3)
+  bytes <- as.raw(1:40)
+  os$write(bytes)
+  os$close()
+
+  is <- fs$OpenInputStream(tf3)
+  buf <- is$Read(40)
+  expect_equal(buf$data(), bytes)
+  is$close()
+})
+
+test_that("SubTreeFilesystem", {
+  dir.create(td <- tempfile())
+  DESCRIPTION <- system.file("DESCRIPTION", package = "arrow")
+  file.copy(DESCRIPTION, file.path(td, "DESCRIPTION"))
+
+  st_fs <- SubTreeFileSystem$create(td)
+  expect_r6_class(st_fs, "SubTreeFileSystem")
+  expect_r6_class(st_fs, "FileSystem")
+  expect_r6_class(st_fs$base_fs, "LocalFileSystem")
+  expect_identical(
+    capture.output(print(st_fs)),
+    paste0("SubTreeFileSystem: ", "file://", st_fs$base_path)
+  )
+
+  # FIXME windows has a trailing slash for one but not the other
+  # expect_identical(normalizePath(st_fs$base_path), normalizePath(td)) # nolint
+
+  st_fs$CreateDir("test")
+  st_fs$CopyFile("DESCRIPTION", "DESC.txt")
+  infos <- st_fs$GetFileInfo(c("DESCRIPTION", "test", "nope", "DESC.txt"))
+  expect_equal(infos[[1L]]$type, FileType$File)
+  expect_equal(infos[[2L]]$type, FileType$Directory)
+  expect_equal(infos[[3L]]$type, FileType$NotFound)
+  expect_equal(infos[[4L]]$type, FileType$File)
+  expect_equal(infos[[4L]]$extension(), "txt")
+
+  local_fs <- LocalFileSystem$create()
+  local_fs$DeleteDirContents(td)
+  infos <- st_fs$GetFileInfo(c("DESCRIPTION", "test", "nope", "DESC.txt"))
+  expect_equal(infos[[1L]]$type, FileType$NotFound)
+  expect_equal(infos[[2L]]$type, FileType$NotFound)
+  expect_equal(infos[[3L]]$type, FileType$NotFound)
+  expect_equal(infos[[4L]]$type, FileType$NotFound)
+})
+
+test_that("LocalFileSystem + Selector", {
+  fs <- LocalFileSystem$create()
+  dir.create(td <- tempfile())
+  writeLines("blah blah", file.path(td, "one.txt"))
+  writeLines("yada yada", file.path(td, "two.txt"))
+  dir.create(file.path(td, "dir"))
+  writeLines("...", file.path(td, "dir", "three.txt"))
+
+  selector <- FileSelector$create(td, recursive = TRUE)
+  infos <- fs$GetFileInfo(selector)
+  expect_equal(length(infos), 4L)
+  types <- sapply(infos, function(.x) .x$type)
+  expect_equal(sum(types == FileType$File), 3L)
+  expect_equal(sum(types == FileType$Directory), 1L)
+
+  selector <- FileSelector$create(td, recursive = FALSE)
+  infos <- fs$GetFileInfo(selector)
+  expect_equal(length(infos), 3L)
+  types <- sapply(infos, function(.x) .x$type)
+  expect_equal(sum(types == FileType$File), 2L)
+  expect_equal(sum(types == FileType$Directory), 1L)
+})
+
+test_that("FileSystem$from_uri", {
+  skip_on_cran()
+  skip_if_not_available("s3")
+  skip_if_offline()
+  fs_and_path <- FileSystem$from_uri("s3://ursa-labs-taxi-data")
+  expect_r6_class(fs_and_path$fs, "S3FileSystem")
+  expect_identical(fs_and_path$fs$region, "us-east-2")
+})
+
+test_that("SubTreeFileSystem$create() with URI", {
+  skip_on_cran()
+  skip_if_not_available("s3")
+  skip_if_offline()
+  fs <- SubTreeFileSystem$create("s3://ursa-labs-taxi-data")
+  expect_r6_class(fs, "SubTreeFileSystem")
+  expect_identical(
+    capture.output(print(fs)),
+    "SubTreeFileSystem: s3://ursa-labs-taxi-data/"
+  )
+})
+
+test_that("S3FileSystem", {
+  skip_on_cran()
+  skip_if_not_available("s3")
+  skip_if_offline()
+  s3fs <- S3FileSystem$create()
+  expect_r6_class(s3fs, "S3FileSystem")
+})
+
+test_that("s3_bucket", {
+  skip_on_cran()
+  skip_if_not_available("s3")
+  skip_if_offline()
+  bucket <- s3_bucket("ursa-labs-r-test")
+  expect_r6_class(bucket, "SubTreeFileSystem")
+  expect_r6_class(bucket$base_fs, "S3FileSystem")
+  expect_identical(bucket$region, "us-west-2")
+  expect_identical(
+    capture.output(print(bucket)),
+    "SubTreeFileSystem: s3://ursa-labs-r-test/"
+  )
+  skip_on_os("windows") # FIXME
+  expect_identical(bucket$base_path, "ursa-labs-r-test/")
+})
diff --git a/src/arrow/r/tests/testthat/test-install-arrow.R b/src/arrow/r/tests/testthat/test-install-arrow.R
new file mode 100644
index 000000000..977f9d77d
--- /dev/null
+++ b/src/arrow/r/tests/testthat/test-install-arrow.R
@@ -0,0 +1,37 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+r_only({
+  test_that("arrow_repos", {
+    cran <- "https://cloud.r-project.org/"
+    ours <- "https://dl.example.com/ursalabs/fake_repo"
+    other <- "https://cran.fiocruz.br/"
+
+    opts <- list(
+      repos = c(CRAN = "@CRAN@"), # Restore defaul
+      arrow.dev_repo = ours
+    )
+    withr::with_options(opts, {
+      expect_identical(arrow_repos(), cran)
+      expect_identical(arrow_repos(c(cran, ours)), cran)
+      expect_identical(arrow_repos(c(ours, other)), other)
+      expect_identical(arrow_repos(nightly = TRUE), c(ours, cran))
+      expect_identical(arrow_repos(c(cran, ours), nightly = TRUE), c(ours, cran))
+      expect_identical(arrow_repos(c(ours, other), nightly = TRUE), c(ours, other))
+    })
+  })
+})
diff --git a/src/arrow/r/tests/testthat/test-json.R b/src/arrow/r/tests/testthat/test-json.R
new file mode 100644
index 000000000..825511b97
--- /dev/null
+++ b/src/arrow/r/tests/testthat/test-json.R
@@ -0,0 +1,255 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+skip_if_not_available("json")
+
+test_that("Can read json file with scalars columns (ARROW-5503)", {
+  tf <- tempfile()
+  on.exit(unlink(tf))
+  writeLines('
+    { "hello": 3.5, "world": false, "yo": "thing" }
+    { "hello": 3.25, "world": null }
+    { "hello": 3.125, "world": null, "yo": "\u5fcd" }
+    { "hello": 0.0, "world": true, "yo": null }
+  ', tf, useBytes = TRUE)
+
+  tab1 <- read_json_arrow(tf, as_data_frame = FALSE)
+  tab2 <- read_json_arrow(mmap_open(tf), as_data_frame = FALSE)
+  tab3 <- read_json_arrow(ReadableFile$create(tf), as_data_frame = FALSE)
+
+  expect_equal(tab1, tab2)
+  expect_equal(tab1, tab3)
+
+  expect_equal(
+    tab1$schema,
+    schema(hello = float64(), world = boolean(), yo = utf8())
+  )
+  tib <- as.data.frame(tab1)
+  expect_equal(tib$hello, c(3.5, 3.25, 3.125, 0))
+  expect_equal(tib$world, c(FALSE, NA, NA, TRUE))
+  expect_equal(tib$yo, c("thing", NA, "\u5fcd", NA))
+})
+
+test_that("read_json_arrow() converts to tibble", {
+  tf <- tempfile()
+  on.exit(unlink(tf))
+  writeLines('
+    { "hello": 3.5, "world": false, "yo": "thing" }
+    { "hello": 3.25, "world": null }
+    { "hello": 3.125, "world": null, "yo": "\u5fcd" }
+    { "hello": 0.0, "world": true, "yo": null }
+  ', tf, useBytes = TRUE)
+
+  tab1 <- read_json_arrow(tf)
+  tab2 <- read_json_arrow(mmap_open(tf))
+  tab3 <- read_json_arrow(ReadableFile$create(tf))
+
+  expect_s3_class(tab1, "tbl_df")
+  expect_s3_class(tab2, "tbl_df")
+  expect_s3_class(tab3, "tbl_df")
+
+  expect_equal(tab1, tab2)
+  expect_equal(tab1, tab3)
+
+  expect_equal(tab1$hello, c(3.5, 3.25, 3.125, 0))
+  expect_equal(tab1$world, c(FALSE, NA, NA, TRUE))
+  expect_equal(tab1$yo, c("thing", NA, "\u5fcd", NA))
+})
+
+test_that("read_json_arrow() supports col_select=", {
+  tf <- tempfile()
+  writeLines('
+    { "hello": 3.5, "world": false, "yo": "thing" }
+    { "hello": 3.25, "world": null }
+    { "hello": 3.125, "world": null, "yo": "\u5fcd" }
+    { "hello": 0.0, "world": true, "yo": null }
+  ', tf)
+
+  tab1 <- read_json_arrow(tf, col_select = c(hello, world))
+  expect_equal(names(tab1), c("hello", "world"))
+
+  tab2 <- read_json_arrow(tf, col_select = 1:2)
+  expect_equal(names(tab2), c("hello", "world"))
+})
+
+test_that("read_json_arrow(schema=) with empty schema", {
+  tf <- tempfile()
+  writeLines('
+    { "hello": 3.5, "world": 2, "third_col": 99}
+    { "hello": 3.25, "world": 5, "third_col": 98}
+    { "hello": 3.125, "world": 8, "third_col": 97 }
+    { "hello": 0.0, "world": 10, "third_col": 96}
+  ', tf)
+
+  tab1 <- read_json_arrow(tf, schema = schema())
+
+  expect_identical(
+    tab1,
+    tibble::tibble(
+      hello = c(3.5, 3.25, 3.125, 0),
+      world = c(2L, 5L, 8L, 10L),
+      third_col = c(99L, 98L, 97L, 96L)
+    )
+  )
+})
+
+test_that("read_json_arrow(schema=) with partial schema", {
+  tf <- tempfile()
+  writeLines('
+    { "hello": 3.5, "world": 2, "third_col": 99}
+    { "hello": 3.25, "world": 5, "third_col": 98}
+    { "hello": 3.125, "world": 8, "third_col": 97 }
+    { "hello": 0.0, "world": 10, "third_col": 96}
+  ', tf)
+
+  tab1 <- read_json_arrow(tf, schema = schema(third_col = float64(), world = float64()))
+
+  expect_identical(
+    tab1,
+    tibble::tibble(
+      third_col = c(99, 98, 97, 96),
+      world = c(2, 5, 8, 10),
+      hello = c(3.5, 3.25, 3.125, 0)
+    )
+  )
+
+  tf2 <- tempfile()
+  writeLines('
+    { "hello": 3.5, "world": 2, "third_col": "99"}
+    { "hello": 3.25, "world": 5, "third_col": "98"}
+    { "hello": 3.125, "world": 8, "third_col": "97"}
+  ', tf2)
+
+  tab2 <- read_json_arrow(tf2, schema = schema(third_col = string(), world = float64()))
+
+  expect_identical(
+    tab2,
+    tibble::tibble(
+      third_col = c("99", "98", "97"),
+      world = c(2, 5, 8),
+      hello = c(3.5, 3.25, 3.125)
+    )
+  )
+})
+
+test_that("read_json_arrow(schema=) with full schema", {
+  tf <- tempfile()
+  writeLines('
+    { "hello": 3.5, "world": 2, "third_col": 99}
+    { "hello": 3.25, "world": 5, "third_col": 98}
+    { "hello": 3.125, "world": 8, "third_col": 97}
+    { "hello": 0.0, "world": 10, "third_col": 96}
+  ', tf)
+
+  tab1 <- read_json_arrow(
+    tf,
+    schema = schema(
+      hello = float64(),
+      third_col = float64(),
+      world = float64()
+    )
+  )
+
+  expect_identical(
+    tab1,
+    tibble::tibble(
+      hello = c(3.5, 3.25, 3.125, 0),
+      third_col = c(99, 98, 97, 96),
+      world = c(2, 5, 8, 10)
+    )
+  )
+})
+
+test_that("Can read json file with nested columns (ARROW-5503)", {
+  tf <- tempfile()
+  on.exit(unlink(tf))
+  writeLines('
+    { "arr": [1.0, 2.0, 3.0], "nuf": {} }
+    { "arr": [2.0], "nuf": null }
+    { "arr": [], "nuf": { "ps": 78.0, "hello": "hi" } }
+    { "arr": null, "nuf": { "ps": 90.0, "hello": "bonjour" } }
+    { "arr": [5.0], "nuf": { "hello": "ciao" } }
+    { "arr": [5.0, 6.0], "nuf": { "ps": 19 } }
+  ', tf)
+
+  tab1 <- read_json_arrow(tf, as_data_frame = FALSE)
+  tab2 <- read_json_arrow(mmap_open(tf), as_data_frame = FALSE)
+  tab3 <- read_json_arrow(ReadableFile$create(tf), as_data_frame = FALSE)
+
+  expect_equal(tab1, tab2)
+  expect_equal(tab1, tab3)
+
+  expect_equal(
+    tab1$schema,
+    schema(
+      arr = list_of(float64()),
+      nuf = struct(ps = float64(), hello = utf8())
+    )
+  )
+
+  struct_array <- tab1$column(1)$chunk(0)
+  ps <- Array$create(c(NA, NA, 78, 90, NA, 19))
+  hello <- Array$create(c(NA, NA, "hi", "bonjour", "ciao", NA))
+  expect_equal(struct_array$field(0L), ps)
+  expect_equal(struct_array$GetFieldByName("ps"), ps)
+  struct_cols <- struct_array$Flatten()
+  expect_identical(length(struct_cols), 2L)
+  expect_equal(struct_cols[[1]], ps)
+  expect_equal(struct_cols[[2]], hello)
+  expect_equal(
+    as.vector(struct_array),
+    tibble::tibble(ps = ps$as_vector(), hello = hello$as_vector())
+  )
+
+  list_array_r <- list(
+    c(1, 2, 3),
+    c(2),
+    numeric(),
+    NULL,
+    5,
+    c(5, 6)
+  )
+  list_array <- tab1$column(0)
+  expect_equal(
+    list_array$as_vector(),
+    list_array_r,
+    ignore_attr = TRUE
+  )
+
+  tib <- as.data.frame(tab1)
+  expect_equal(
+    tib,
+    tibble::tibble(
+      arr = list_array_r,
+      nuf = tibble::tibble(ps = ps$as_vector(), hello = hello$as_vector())
+    ),
+    ignore_attr = TRUE
+  )
+})
+
+test_that("Can read json file with list<struct<T...>> nested columns (ARROW-7740)", {
+  tf <- tempfile()
+  on.exit(unlink(tf))
+  writeLines('
+    {"a":[{"b":1.0},{"b":2.0}]}
+    {"a":[{"b":1.0},{"b":2.0}]}
+  ', tf)
+
+  one <- tibble::tibble(b = c(1, 2))
+  expected <- tibble::tibble(a = c(list(one), list(one)))
+  expect_equal(read_json_arrow(tf), expected, ignore_attr = TRUE)
+})
diff --git a/src/arrow/r/tests/testthat/test-memory-pool.R b/src/arrow/r/tests/testthat/test-memory-pool.R
new file mode 100644
index 000000000..0aa18aadc
--- /dev/null
+++ b/src/arrow/r/tests/testthat/test-memory-pool.R
@@ -0,0 +1,26 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+test_that("default_memory_pool and its attributes", {
+  pool <- default_memory_pool()
+  # Not integer bc can be >2gb, so we cast to double
+  expect_type(pool$bytes_allocated, "double")
+  expect_type(pool$max_memory, "double")
+  expect_true(pool$backend_name %in% c("system", "jemalloc", "mimalloc"))
+
+  expect_true(all(supported_memory_backends() %in% c("system", "jemalloc", "mimalloc")))
+})
diff --git a/src/arrow/r/tests/testthat/test-message-reader.R b/src/arrow/r/tests/testthat/test-message-reader.R
new file mode 100644
index 000000000..44f3fe4f7
--- /dev/null
+++ b/src/arrow/r/tests/testthat/test-message-reader.R
@@ -0,0 +1,85 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+test_that("MessageReader can be created from raw vectors", {
+  batch <- record_batch(x = 1:10)
+  bytes <- batch$serialize()
+
+  reader <- MessageReader$create(bytes)
+
+  message <- reader$ReadNextMessage()
+  expect_r6_class(message, "Message")
+  expect_equal(message$type, MessageType$RECORD_BATCH)
+  expect_r6_class(message$body, "Buffer")
+  expect_r6_class(message$metadata, "Buffer")
+
+  message <- reader$ReadNextMessage()
+  expect_null(message)
+
+  schema <- schema(x = int32())
+  bytes <- schema$serialize()
+
+  reader <- MessageReader$create(bytes)
+
+  message <- reader$ReadNextMessage()
+  expect_r6_class(message, "Message")
+  expect_equal(message$type, MessageType$SCHEMA)
+  expect_r6_class(message$body, "Buffer")
+  expect_r6_class(message$metadata, "Buffer")
+
+  message <- reader$ReadNextMessage()
+  expect_null(message)
+})
+
+test_that("MessageReader can be created from input stream", {
+  batch <- record_batch(x = 1:10)
+  bytes <- batch$serialize()
+
+  stream <- BufferReader$create(bytes)
+  expect_r6_class(stream, "BufferReader")
+
+  reader <- MessageReader$create(stream)
+  expect_r6_class(reader, "MessageReader")
+
+  message <- reader$ReadNextMessage()
+  expect_r6_class(message, "Message")
+  expect_equal(message$type, MessageType$RECORD_BATCH)
+  expect_r6_class(message$body, "Buffer")
+  expect_r6_class(message$metadata, "Buffer")
+
+  message <- reader$ReadNextMessage()
+  expect_null(message)
+
+  schema <- schema(x = int32())
+  bytes <- schema$serialize()
+
+  stream <- BufferReader$create(bytes)
+  expect_r6_class(stream, "BufferReader")
+
+  reader <- MessageReader$create(stream)
+  expect_r6_class(reader, "MessageReader")
+
+  message <- reader$ReadNextMessage()
+  expect_r6_class(message, "Message")
+  expect_equal(message$type, MessageType$SCHEMA)
+  expect_r6_class(message$body, "Buffer")
+  expect_r6_class(message$metadata, "Buffer")
+
+  message <- reader$ReadNextMessage()
+  expect_null(message)
+})
diff --git a/src/arrow/r/tests/testthat/test-message.R b/src/arrow/r/tests/testthat/test-message.R
new file mode 100644
index 000000000..c9ee4cb72
--- /dev/null
+++ b/src/arrow/r/tests/testthat/test-message.R
@@ -0,0 +1,63 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+test_that("read_message can read from input stream", {
+  batch <- record_batch(x = 1:10)
+  bytes <- batch$serialize()
+  stream <- BufferReader$create(bytes)
+
+  message <- read_message(stream)
+  expect_r6_class(message, "Message")
+  expect_equal(message$type, MessageType$RECORD_BATCH)
+  expect_r6_class(message$body, "Buffer")
+  expect_r6_class(message$metadata, "Buffer")
+
+  message <- read_message(stream)
+  expect_null(read_message(stream))
+})
+
+test_that("read_message() can read Schema messages", {
+  bytes <- schema(x = int32())$serialize()
+  stream <- BufferReader$create(bytes)
+  message <- read_message(stream)
+
+  expect_r6_class(message, "Message")
+  expect_equal(message$type, MessageType$SCHEMA)
+  expect_r6_class(message$body, "Buffer")
+  expect_r6_class(message$metadata, "Buffer")
+
+  message <- read_message(stream)
+  expect_null(read_message(stream))
+})
+
+test_that("read_message() can handle raw vectors", {
+  batch <- record_batch(x = 1:10)
+  bytes <- batch$serialize()
+  stream <- BufferReader$create(bytes)
+
+  message_stream <- read_message(stream)
+  message_raw <- read_message(bytes)
+  expect_equal(message_stream, message_raw)
+
+  bytes <- schema(x = int32())$serialize()
+  stream <- BufferReader$create(bytes)
+  message_stream <- read_message(stream)
+  message_raw <- read_message(bytes)
+
+  expect_equal(message_stream, message_raw)
+})
diff --git a/src/arrow/r/tests/testthat/test-metadata.R b/src/arrow/r/tests/testthat/test-metadata.R
new file mode 100644
index 000000000..4c4d8a767
--- /dev/null
+++ b/src/arrow/r/tests/testthat/test-metadata.R
@@ -0,0 +1,369 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+test_that("Schema metadata", {
+  s <- schema(b = double())
+  expect_equal(s$metadata, empty_named_list())
+  expect_false(s$HasMetadata)
+  s$metadata <- list(test = TRUE)
+  expect_identical(s$metadata, list(test = "TRUE"))
+  expect_true(s$HasMetadata)
+  s$metadata$foo <- 42
+  expect_identical(s$metadata, list(test = "TRUE", foo = "42"))
+  expect_true(s$HasMetadata)
+  s$metadata$foo <- NULL
+  expect_identical(s$metadata, list(test = "TRUE"))
+  expect_true(s$HasMetadata)
+  s$metadata <- NULL
+  expect_equal(s$metadata, empty_named_list())
+  expect_false(s$HasMetadata)
+  expect_error(
+    s$metadata <- 4,
+    "Key-value metadata must be a named list or character vector"
+  )
+})
+
+test_that("Table metadata", {
+  tab <- Table$create(x = 1:2, y = c("a", "b"))
+  expect_equal(tab$metadata, empty_named_list())
+  tab$metadata <- list(test = TRUE)
+  expect_identical(tab$metadata, list(test = "TRUE"))
+  tab$metadata$foo <- 42
+  expect_identical(tab$metadata, list(test = "TRUE", foo = "42"))
+  tab$metadata$foo <- NULL
+  expect_identical(tab$metadata, list(test = "TRUE"))
+  tab$metadata <- NULL
+  expect_equal(tab$metadata, empty_named_list())
+})
+
+test_that("Table R metadata", {
+  tab <- Table$create(example_with_metadata)
+  expect_output(print(tab$metadata), "arrow_r_metadata")
+  expect_identical(as.data.frame(tab), example_with_metadata)
+})
+
+test_that("R metadata is not stored for types that map to Arrow types (factor, Date, etc.)", {
+  tab <- Table$create(example_data[1:6])
+  expect_null(tab$metadata$r)
+
+  expect_null(Table$create(example_with_times[1:3])$metadata$r)
+})
+
+test_that("classes are not stored for arrow_binary/arrow_large_binary/arrow_fixed_size_binary (ARROW-14140)", {
+  raws <- charToRaw("bonjour")
+
+  binary <- Array$create(list(raws), binary())
+  large_binary <- Array$create(list(raws), large_binary())
+  fixed_size_binary <- Array$create(list(raws), fixed_size_binary(7L))
+
+  expect_null(RecordBatch$create(b = binary)$metadata$r)
+  expect_null(RecordBatch$create(b = large_binary)$metadata$r)
+  expect_null(RecordBatch$create(b = fixed_size_binary)$metadata$r)
+
+  expect_null(Table$create(b = binary)$metadata$r)
+  expect_null(Table$create(b = large_binary)$metadata$r)
+  expect_null(Table$create(b = fixed_size_binary)$metadata$r)
+})
+
+test_that("Garbage R metadata doesn't break things", {
+  tab <- Table$create(example_data[1:6])
+  tab$metadata$r <- "garbage"
+  expect_warning(
+    expect_identical(as.data.frame(tab), example_data[1:6]),
+    "Invalid metadata$r",
+    fixed = TRUE
+  )
+  # serialize data like .serialize_arrow_r_metadata does, but don't call that
+  # directly since it checks to ensure that the data is a list
+  tab$metadata$r <- rawToChar(serialize("garbage", NULL, ascii = TRUE))
+  expect_warning(
+    expect_identical(as.data.frame(tab), example_data[1:6]),
+    "Invalid metadata$r",
+    fixed = TRUE
+  )
+})
+
+test_that("Metadata serialization compression", {
+  # attributes that (when serialized) are just under 100kb are not compressed,
+  # and simply serialized
+  strings <- as.list(rep(make_string_of_size(1), 98))
+  small <- .serialize_arrow_r_metadata(strings)
+  expect_equal(
+    object.size(small),
+    object.size(rawToChar(serialize(strings, NULL, ascii = TRUE)))
+  )
+
+  # Large strings will be compressed
+  large_strings <- as.list(rep(make_string_of_size(1), 100))
+  large <- .serialize_arrow_r_metadata(large_strings)
+  expect_lt(
+    object.size(large),
+    object.size(rawToChar(serialize(large_strings, NULL, ascii = TRUE)))
+  )
+  # and this compression ends up being smaller than even the "small" strings
+  expect_lt(object.size(large), object.size(small))
+
+  # However strings where compression + serialization is not effective are no
+  # worse than only serialization alone
+  large_few_strings <- as.list(rep(make_random_string_of_size(50), 2))
+  large_few <- .serialize_arrow_r_metadata(large_few_strings)
+  expect_equal(
+    object.size(large_few),
+    object.size(rawToChar(serialize(large_few_strings, NULL, ascii = TRUE)))
+  )
+
+  # But we can disable compression
+  op <- options(arrow.compress_metadata = FALSE)
+  on.exit(options(op))
+
+  large_strings <- as.list(rep(make_string_of_size(1), 100))
+  large <- .serialize_arrow_r_metadata(large_strings)
+  expect_equal(
+    object.size(large),
+    object.size(rawToChar(serialize(large_strings, NULL, ascii = TRUE)))
+  )
+})
+
+test_that("RecordBatch metadata", {
+  rb <- RecordBatch$create(x = 1:2, y = c("a", "b"))
+  expect_equal(rb$metadata, empty_named_list())
+  rb$metadata <- list(test = TRUE)
+  expect_identical(rb$metadata, list(test = "TRUE"))
+  rb$metadata$foo <- 42
+  expect_identical(rb$metadata, list(test = "TRUE", foo = "42"))
+  rb$metadata$foo <- NULL
+  expect_identical(rb$metadata, list(test = "TRUE"))
+  rb$metadata <- NULL
+  expect_equal(rb$metadata, empty_named_list())
+})
+
+test_that("RecordBatch R metadata", {
+  expect_identical(as.data.frame(record_batch(example_with_metadata)), example_with_metadata)
+})
+
+test_that("R metadata roundtrip via parquet", {
+  skip_if_not_available("parquet")
+  tf <- tempfile()
+  on.exit(unlink(tf))
+
+  write_parquet(example_with_metadata, tf)
+  expect_identical(read_parquet(tf), example_with_metadata)
+})
+
+test_that("R metadata roundtrip via feather", {
+  tf <- tempfile()
+  on.exit(unlink(tf))
+
+  write_feather(example_with_metadata, tf)
+  expect_identical(read_feather(tf), example_with_metadata)
+})
+
+test_that("haven types roundtrip via feather", {
+  tf <- tempfile()
+  on.exit(unlink(tf))
+
+  write_feather(haven_data, tf)
+  expect_identical(read_feather(tf), haven_data)
+})
+
+test_that("Date/time type roundtrip", {
+  rb <- record_batch(example_with_times)
+  expect_r6_class(rb$schema$posixlt$type, "StructType")
+  expect_identical(as.data.frame(rb), example_with_times)
+})
+
+test_that("metadata keeps attribute of top level data frame", {
+  df <- structure(data.frame(x = 1, y = 2), foo = "bar")
+  tab <- Table$create(df)
+  expect_identical(attr(as.data.frame(tab), "foo"), "bar")
+  expect_identical(as.data.frame(tab), df)
+})
+
+
+test_that("metadata drops readr's problems attribute", {
+  readr_like <- tibble::tibble(
+    dbl = 1.1,
+    not_here = NA_character_
+  )
+  attributes(readr_like) <- append(
+    attributes(readr_like),
+    list(problems = tibble::tibble(
+      row = 1L,
+      col = NA_character_,
+      expected = "2 columns",
+      actual = "1 columns",
+      file = "'test'"
+    ))
+  )
+
+  tab <- Table$create(readr_like)
+  expect_null(attr(as.data.frame(tab), "problems"))
+})
+
+test_that("Row-level metadata (does not by default) roundtrip", {
+  # First tracked at ARROW-10386, though it was later determined that row-level
+  # metadata should be handled separately ARROW-14020, ARROW-12542
+  df <- data.frame(x = I(list(structure(1, foo = "bar"), structure(2, baz = "qux"))))
+  tab <- Table$create(df)
+  r_metadata <- tab$r_metadata
+  expect_type(r_metadata, "list")
+  expect_null(r_metadata$columns$x$columns)
+
+  # But we can re-enable this / read data that has already been written with
+  # row-level metadata
+  withr::with_options(
+    list("arrow.preserve_row_level_metadata" = TRUE), {
+      tab <- Table$create(df)
+      expect_identical(attr(as.data.frame(tab)$x[[1]], "foo"), "bar")
+      expect_identical(attr(as.data.frame(tab)$x[[2]], "baz"), "qux")
+    })
+})
+
+
+test_that("Row-level metadata (does not) roundtrip in datasets", {
+  # First tracked at ARROW-10386, though it was later determined that row-level
+  # metadata should be handled separately ARROW-14020, ARROW-12542
+  skip_if_not_available("dataset")
+  skip_if_not_available("parquet")
+
+  library(dplyr, warn.conflicts = FALSE)
+
+  df <- tibble::tibble(
+    metadata = list(
+      structure(1, my_value_as_attr = 1),
+      structure(2, my_value_as_attr = 2),
+      structure(3, my_value_as_attr = 3),
+      structure(4, my_value_as_attr = 3)
+    ),
+    int = 1L:4L,
+    part = c(1, 3, 2, 1)
+  )
+
+  dst_dir <- make_temp_dir()
+
+  withr::with_options(
+    list("arrow.preserve_row_level_metadata" = TRUE), {
+      expect_warning(
+        write_dataset(df, dst_dir, partitioning = "part"),
+        "Row-level metadata is not compatible with datasets and will be discarded"
+      )
+
+      # Reset directory as previous write will have created some files and the default
+      # behavior is to error on existing
+      dst_dir <- make_temp_dir()
+      # but we need to write a dataset with row-level metadata to make sure when
+      # reading ones that have been written with them we warn appropriately
+      fake_func_name <- write_dataset
+      fake_func_name(df, dst_dir, partitioning = "part")
+
+      ds <- open_dataset(dst_dir)
+      expect_warning(
+        df_from_ds <- collect(ds),
+        "Row-level metadata is not compatible with this operation and has been ignored"
+      )
+      expect_equal(
+        arrange(df_from_ds, int),
+        arrange(df, int),
+        ignore_attr = TRUE
+      )
+
+      # however there is *no* warning if we don't select the metadata column
+      expect_warning(
+        df_from_ds <- ds %>% select(int) %>% collect(),
+        NA
+      )
+    })
+})
+
+test_that("When we encounter SF cols, we warn", {
+  df <- data.frame(x = I(list(structure(1, foo = "bar"), structure(2, baz = "qux"))))
+  class(df$x) <- c("sfc_MULTIPOLYGON", "sfc", "list")
+
+  expect_warning(
+    tab <- Table$create(df),
+    "One of the columns given appears to be an"
+  )
+
+  # but the table was read fine, just sans (row-level) metadata
+  r_metadata <- .unserialize_arrow_r_metadata(tab$metadata$r)
+  expect_null(r_metadata$columns$x$columns)
+
+  # But we can re-enable this / read data that has already been written with
+  # row-level metadata without a warning
+  withr::with_options(
+    list("arrow.preserve_row_level_metadata" = TRUE), {
+      expect_warning(tab <- Table$create(df), NA)
+      expect_identical(attr(as.data.frame(tab)$x[[1]], "foo"), "bar")
+      expect_identical(attr(as.data.frame(tab)$x[[2]], "baz"), "qux")
+    })
+})
+
+test_that("dplyr with metadata", {
+  skip_if_not_available("dataset")
+
+  compare_dplyr_binding(
+    .input %>%
+      collect(),
+    example_with_metadata
+  )
+  compare_dplyr_binding(
+    .input %>%
+      select(a) %>%
+      collect(),
+    example_with_metadata
+  )
+  compare_dplyr_binding(
+    .input %>%
+      mutate(z = b * 4) %>%
+      select(z, a) %>%
+      collect(),
+    example_with_metadata
+  )
+  compare_dplyr_binding(
+    .input %>%
+      mutate(z = nchar(a)) %>%
+      select(z, a) %>%
+      collect(),
+    example_with_metadata
+  )
+  # dplyr drops top-level attributes if you do summarize, though attributes
+  # of grouping columns appear to come through
+  compare_dplyr_binding(
+    .input %>%
+      group_by(a) %>%
+      summarize(n()) %>%
+      collect(),
+    example_with_metadata
+  )
+  # Same name in output but different data, so the column metadata shouldn't
+  # carry through
+  compare_dplyr_binding(
+    .input %>%
+      mutate(a = nchar(a)) %>%
+      select(a) %>%
+      collect(),
+    example_with_metadata
+  )
+})
+
+test_that("grouped_df metadata is recorded (efficiently)", {
+  grouped <- group_by(tibble(a = 1:2, b = 3:4), a)
+  expect_s3_class(grouped, "grouped_df")
+  grouped_tab <- Table$create(grouped)
+  expect_r6_class(grouped_tab, "Table")
+  expect_equal(grouped_tab$r_metadata$attributes$.group_vars, "a")
+})
diff --git a/src/arrow/r/tests/testthat/test-na-omit.R b/src/arrow/r/tests/testthat/test-na-omit.R
new file mode 100644
index 000000000..fafebb4ff
--- /dev/null
+++ b/src/arrow/r/tests/testthat/test-na-omit.R
@@ -0,0 +1,94 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+data_no_na <- c(2:10)
+data_na <- c(data_no_na, NA_real_)
+
+test_that("na.fail on Scalar", {
+  scalar_na <- Scalar$create(NA)
+  scalar_one <- Scalar$create(1)
+  expect_as_vector(na.fail(scalar_one), 1)
+  expect_error(na.fail(scalar_na), "missing values in object")
+})
+
+test_that("na.omit on Array and ChunkedArray", {
+  compare_expression(na.omit(.input), data_no_na)
+  compare_expression(na.omit(.input), data_na, ignore_attr = TRUE)
+})
+
+test_that("na.exclude on Array and ChunkedArray", {
+  compare_expression(na.exclude(.input), data_no_na)
+  compare_expression(na.exclude(.input), data_na, ignore_attr = TRUE)
+})
+
+test_that("na.fail on Array and ChunkedArray", {
+  compare_expression(na.fail(.input), data_no_na, ignore_attr = TRUE)
+  compare_expression_error(na.fail(.input), data_na)
+})
+
+test_that("na.fail on Scalar", {
+  scalar_one <- Scalar$create(1)
+  expect_error(na.fail(scalar_na), regexp = "missing values in object")
+  expect_as_vector(na.fail(scalar_one), na.fail(1))
+})
+
+test_that("na.omit on Table", {
+  tbl <- Table$create(example_data)
+  expect_equal(
+    as.data.frame(na.omit(tbl)),
+    na.omit(example_data),
+    # We don't include an attribute with the rows omitted
+    ignore_attr = "na.action"
+  )
+})
+
+test_that("na.exclude on Table", {
+  tbl <- Table$create(example_data)
+  expect_equal(
+    as.data.frame(na.exclude(tbl)),
+    na.exclude(example_data),
+    ignore_attr = "na.action"
+  )
+})
+
+test_that("na.fail on Table", {
+  tbl <- Table$create(example_data)
+  expect_error(na.fail(tbl), "missing values in object")
+})
+
+test_that("na.omit on RecordBatch", {
+  batch <- record_batch(example_data)
+  expect_equal(
+    as.data.frame(na.omit(batch)),
+    na.omit(example_data),
+    ignore_attr = "na.action"
+  )
+})
+
+test_that("na.exclude on RecordBatch", {
+  batch <- record_batch(example_data)
+  expect_equal(
+    as.data.frame(na.exclude(batch)),
+    na.omit(example_data),
+    ignore_attr = "na.action"
+  )
+})
+
+test_that("na.fail on RecordBatch", {
+  batch <- record_batch(example_data)
+  expect_error(na.fail(batch), "missing values in object")
+})
diff --git a/src/arrow/r/tests/testthat/test-parquet.R b/src/arrow/r/tests/testthat/test-parquet.R
new file mode 100644
index 000000000..55d86b532
--- /dev/null
+++ b/src/arrow/r/tests/testthat/test-parquet.R
@@ -0,0 +1,274 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+skip_if_not_available("parquet")
+
+pq_file <- system.file("v0.7.1.parquet", package = "arrow")
+
+test_that("reading a known Parquet file to tibble", {
+  skip_if_not_available("snappy")
+  df <- read_parquet(pq_file)
+  expect_true(tibble::is_tibble(df))
+  expect_identical(dim(df), c(10L, 11L))
+  # TODO: assert more about the contents
+})
+
+test_that("simple int column roundtrip", {
+  df <- tibble::tibble(x = 1:5)
+  pq_tmp_file <- tempfile() # You can specify the .parquet here but that's probably not necessary
+
+  write_parquet(df, pq_tmp_file)
+  df_read <- read_parquet(pq_tmp_file)
+  expect_equal(df, df_read)
+  # Make sure file connection is cleaned up
+  expect_error(file.remove(pq_tmp_file), NA)
+  expect_false(file.exists(pq_tmp_file))
+})
+
+test_that("read_parquet() supports col_select", {
+  skip_if_not_available("snappy")
+  df <- read_parquet(pq_file, col_select = c(x, y, z))
+  expect_equal(names(df), c("x", "y", "z"))
+
+  df <- read_parquet(pq_file, col_select = starts_with("c"))
+  expect_equal(names(df), c("carat", "cut", "color", "clarity"))
+})
+
+test_that("read_parquet() with raw data", {
+  skip_if_not_available("snappy")
+  test_raw <- readBin(pq_file, what = "raw", n = 5000)
+  df <- read_parquet(test_raw)
+  expect_identical(dim(df), c(10L, 11L))
+})
+
+test_that("write_parquet() handles various compression= specs", {
+  skip_if_not_available("snappy")
+  tab <- Table$create(x1 = 1:5, x2 = 1:5, y = 1:5)
+
+  expect_parquet_roundtrip(tab, compression = "snappy")
+  expect_parquet_roundtrip(tab, compression = rep("snappy", 3L))
+  expect_parquet_roundtrip(tab, compression = c(x1 = "snappy", x2 = "snappy"))
+})
+
+test_that("write_parquet() handles various compression_level= specs", {
+  skip_if_not_available("gzip")
+  tab <- Table$create(x1 = 1:5, x2 = 1:5, y = 1:5)
+
+  expect_parquet_roundtrip(tab, compression = "gzip", compression_level = 4)
+  expect_parquet_roundtrip(tab, compression = "gzip", compression_level = rep(4L, 3L))
+  expect_parquet_roundtrip(tab, compression = "gzip", compression_level = c(x1 = 5L, x2 = 3L))
+})
+
+test_that("write_parquet() handles various use_dictionary= specs", {
+  tab <- Table$create(x1 = 1:5, x2 = 1:5, y = 1:5)
+
+  expect_parquet_roundtrip(tab, use_dictionary = TRUE)
+  expect_parquet_roundtrip(tab, use_dictionary = c(TRUE, FALSE, TRUE))
+  expect_parquet_roundtrip(tab, use_dictionary = c(x1 = TRUE, x2 = TRUE))
+  expect_error(
+    write_parquet(tab, tempfile(), use_dictionary = c(TRUE, FALSE)),
+    "unsupported use_dictionary= specification"
+  )
+  expect_error(
+    write_parquet(tab, tempfile(), use_dictionary = 12),
+    "is.logical(use_dictionary) is not TRUE",
+    fixed = TRUE
+  )
+})
+
+test_that("write_parquet() handles various write_statistics= specs", {
+  tab <- Table$create(x1 = 1:5, x2 = 1:5, y = 1:5)
+
+  expect_parquet_roundtrip(tab, write_statistics = TRUE)
+  expect_parquet_roundtrip(tab, write_statistics = c(TRUE, FALSE, TRUE))
+  expect_parquet_roundtrip(tab, write_statistics = c(x1 = TRUE, x2 = TRUE))
+})
+
+test_that("write_parquet() accepts RecordBatch too", {
+  batch <- RecordBatch$create(x1 = 1:5, x2 = 1:5, y = 1:5)
+  tab <- parquet_roundtrip(batch)
+  expect_equal(tab, Table$create(batch))
+})
+
+test_that("write_parquet() handles grouped_df", {
+  library(dplyr, warn.conflicts = FALSE)
+  df <- tibble::tibble(a = 1:4, b = 5) %>% group_by(b)
+  # Since `df` is a "grouped_df", this test asserts that we get a grouped_df back
+  expect_parquet_roundtrip(df, as_data_frame = TRUE)
+})
+
+test_that("write_parquet() with invalid input type", {
+  bad_input <- Array$create(1:5)
+  expect_error(
+    write_parquet(bad_input, tempfile()),
+    regexp = "x must be an object of class 'data.frame', 'RecordBatch', or 'Table', not 'Array'."
+  )
+})
+
+test_that("write_parquet() can truncate timestamps", {
+  tab <- Table$create(x1 = as.POSIXct("2020/06/03 18:00:00", tz = "UTC"))
+  expect_type_equal(tab$x1, timestamp("us", "UTC"))
+
+  tf <- tempfile()
+  on.exit(unlink(tf))
+
+  write_parquet(tab, tf, coerce_timestamps = "ms", allow_truncated_timestamps = TRUE)
+  new <- read_parquet(tf, as_data_frame = FALSE)
+  expect_type_equal(new$x1, timestamp("ms", "UTC"))
+  expect_equal(as.data.frame(tab), as.data.frame(new))
+})
+
+test_that("make_valid_version()", {
+  expect_equal(make_valid_version("1.0"), ParquetVersionType$PARQUET_1_0)
+  expect_equal(make_valid_version("2.0"), ParquetVersionType$PARQUET_2_0)
+
+  expect_equal(make_valid_version(1), ParquetVersionType$PARQUET_1_0)
+  expect_equal(make_valid_version(2), ParquetVersionType$PARQUET_2_0)
+
+  expect_equal(make_valid_version(1.0), ParquetVersionType$PARQUET_1_0)
+  expect_equal(make_valid_version(2.0), ParquetVersionType$PARQUET_2_0)
+})
+
+test_that("write_parquet() defaults to snappy compression", {
+  skip_if_not_available("snappy")
+  tmp1 <- tempfile()
+  tmp2 <- tempfile()
+  write_parquet(mtcars, tmp1)
+  write_parquet(mtcars, tmp2, compression = "snappy")
+  expect_equal(file.size(tmp1), file.size(tmp2))
+})
+
+test_that("Factors are preserved when writing/reading from Parquet", {
+  fct <- factor(c("a", "b"), levels = c("c", "a", "b"))
+  ord <- factor(c("a", "b"), levels = c("c", "a", "b"), ordered = TRUE)
+  chr <- c("a", "b")
+  df <- tibble::tibble(fct = fct, ord = ord, chr = chr)
+
+  pq_tmp_file <- tempfile()
+  on.exit(unlink(pq_tmp_file))
+
+  write_parquet(df, pq_tmp_file)
+  df_read <- read_parquet(pq_tmp_file)
+  expect_equal(df, df_read)
+})
+
+test_that("Lists are preserved when writing/reading from Parquet", {
+  bool <- list(logical(0), NA, c(TRUE, FALSE))
+  int <- list(integer(0), NA_integer_, 1:4)
+  num <- list(numeric(0), NA_real_, c(1, 2))
+  char <- list(character(0), NA_character_, c("itsy", "bitsy"))
+  df <- tibble::tibble(bool = bool, int = int, num = num, char = char)
+
+  pq_tmp_file <- tempfile()
+  on.exit(unlink(pq_tmp_file))
+
+  write_parquet(df, pq_tmp_file)
+  df_read <- read_parquet(pq_tmp_file)
+  expect_equal(df, df_read, ignore_attr = TRUE)
+})
+
+test_that("write_parquet() to stream", {
+  df <- tibble::tibble(x = 1:5)
+  tf <- tempfile()
+  con <- FileOutputStream$create(tf)
+  on.exit(unlink(tf))
+  write_parquet(df, con)
+  con$close()
+  expect_equal(read_parquet(tf), df)
+})
+
+test_that("write_parquet() returns its input", {
+  df <- tibble::tibble(x = 1:5)
+  tf <- tempfile()
+  on.exit(unlink(tf))
+  df_out <- write_parquet(df, tf)
+  expect_equal(df, df_out)
+})
+
+test_that("write_parquet() handles version argument", {
+  df <- tibble::tibble(x = 1:5)
+  tf <- tempfile()
+  on.exit(unlink(tf))
+
+  purrr::walk(list("1.0", "2.0", 1.0, 2.0, 1L, 2L), ~ {
+    write_parquet(df, tf, version = .x)
+    expect_identical(read_parquet(tf), df)
+  })
+  purrr::walk(list("3.0", 3.0, 3L, "A"), ~ {
+    expect_error(write_parquet(df, tf, version = .x))
+  })
+})
+
+test_that("ParquetFileWriter raises an error for non-OutputStream sink", {
+  sch <- schema(a = float32())
+  # ARROW-9946
+  expect_error(
+    ParquetFileWriter$create(schema = sch, sink = tempfile()),
+    regex = "OutputStream"
+  )
+})
+
+test_that("ParquetFileReader $ReadRowGroup(s) methods", {
+  tab <- Table$create(x = 1:100)
+  tf <- tempfile()
+  on.exit(unlink(tf))
+  write_parquet(tab, tf, chunk_size = 10)
+
+  reader <- ParquetFileReader$create(tf)
+  expect_true(reader$ReadRowGroup(0) == Table$create(x = 1:10))
+  expect_true(reader$ReadRowGroup(9) == Table$create(x = 91:100))
+  expect_error(reader$ReadRowGroup(-1), "Some index in row_group_indices")
+  expect_error(reader$ReadRowGroup(111), "Some index in row_group_indices")
+  expect_error(reader$ReadRowGroup(c(1, 2)))
+  expect_error(reader$ReadRowGroup("a"))
+
+  expect_true(reader$ReadRowGroups(c(0, 1)) == Table$create(x = 1:20))
+  expect_error(reader$ReadRowGroups(c(0, 1, -2))) # although it gives a weird error
+  expect_error(reader$ReadRowGroups(c(0, 1, 31))) # ^^
+  expect_error(reader$ReadRowGroups(c("a", "b")))
+
+  ## -- with column_indices
+  expect_true(reader$ReadRowGroup(0, 0) == Table$create(x = 1:10))
+  expect_error(reader$ReadRowGroup(0, 1))
+
+  expect_true(reader$ReadRowGroups(c(0, 1), 0) == Table$create(x = 1:20))
+  expect_error(reader$ReadRowGroups(c(0, 1), 1))
+})
+
+test_that("Error messages are shown when the compression algorithm snappy is not found", {
+  msg <- paste0(
+    "NotImplemented: Support for codec 'snappy' not built\nIn order to read this file, ",
+    "you will need to reinstall arrow with additional features enabled.\nSet one of these ",
+    "environment variables before installing:\n\n * LIBARROW_MINIMAL=false (for all optional ",
+    "features, including 'snappy')\n * ARROW_WITH_SNAPPY=ON (for just 'snappy')\n\n",
+    "See https://arrow.apache.org/docs/r/articles/install.html for details"
+  )
+
+  if (codec_is_available("snappy")) {
+    d <- read_parquet(pq_file)
+    expect_s3_class(d, "data.frame")
+  } else {
+    expect_error(read_parquet(pq_file), msg, fixed = TRUE)
+  }
+})
+
+test_that("Error is created when parquet reads a feather file", {
+  expect_error(
+    read_parquet(test_path("golden-files/data-arrow_2.0.0_lz4.feather")),
+    "Parquet magic bytes not found in footer"
+  )
+})
diff --git a/src/arrow/r/tests/testthat/test-python-flight.R b/src/arrow/r/tests/testthat/test-python-flight.R
new file mode 100644
index 000000000..c87f3a562
--- /dev/null
+++ b/src/arrow/r/tests/testthat/test-python-flight.R
@@ -0,0 +1,62 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Assumes:
+# * We've already done arrow::install_pyarrow()
+# * R -e 'arrow::load_flight_server("demo_flight_server")$DemoFlightServer(port = 8089)$serve()'
+# TODO: set up CI job to test this, or some way of running a background process
+if (process_is_running("demo_flight_server")) {
+  client <- flight_connect(port = 8089)
+  flight_obj <- tempfile()
+
+  test_that("flight_path_exists", {
+    expect_false(flight_path_exists(client, flight_obj))
+    expect_false(flight_obj %in% list_flights(client))
+  })
+
+  test_that("flight_put", {
+    flight_put(client, example_data, path = flight_obj)
+    expect_true(flight_path_exists(client, flight_obj))
+    expect_true(flight_obj %in% list_flights(client))
+  })
+
+  test_that("flight_get", {
+    expect_identical(as.data.frame(flight_get(client, flight_obj)), example_data)
+  })
+
+  test_that("flight_put with RecordBatch", {
+    flight_obj2 <- tempfile()
+    flight_put(client, RecordBatch$create(example_data), path = flight_obj2)
+    expect_identical(as.data.frame(flight_get(client, flight_obj2)), example_data)
+  })
+
+  test_that("flight_put with overwrite = FALSE", {
+    expect_error(
+      flight_put(client, example_with_times, path = flight_obj, overwrite = FALSE),
+      "exists"
+    )
+    # Default is TRUE so this will overwrite
+    flight_put(client, example_with_times, path = flight_obj)
+    expect_identical(as.data.frame(flight_get(client, flight_obj)), example_with_times)
+  })
+} else {
+  # Kinda hacky, let's put a skipped test here, just so we note that the tests
+  # didn't run
+  test_that("Flight tests", {
+    skip("Flight server is not running")
+  })
+}
diff --git a/src/arrow/r/tests/testthat/test-python.R b/src/arrow/r/tests/testthat/test-python.R
new file mode 100644
index 000000000..5ad7513fb
--- /dev/null
+++ b/src/arrow/r/tests/testthat/test-python.R
@@ -0,0 +1,145 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+test_that("install_pyarrow", {
+  skip_on_cran()
+  skip_if_offline()
+  skip_if_not_dev_mode()
+  # Windows CI machine doesn't pick up the right python or something
+  skip_on_os("windows")
+  skip_if_not_installed("reticulate")
+
+  venv <- try(reticulate::virtualenv_create("arrow-test"))
+  # Bail out if virtualenv isn't available
+  skip_if(inherits(venv, "try-error"))
+  expect_error(install_pyarrow("arrow-test", nightly = TRUE), NA)
+  # Set this up for the following tests
+  reticulate::use_virtualenv("arrow-test")
+})
+
+skip_if_no_pyarrow()
+
+test_that("Array from Python", {
+  pa <- reticulate::import("pyarrow")
+  py <- pa$array(c(1, 2, 3))
+  expect_equal(py, Array$create(c(1, 2, 3)))
+})
+
+test_that("Array to Python", {
+  pa <- reticulate::import("pyarrow", convert = FALSE)
+  r <- Array$create(c(1, 2, 3))
+  py <- pa$concat_arrays(list(r))
+  expect_s3_class(py, "pyarrow.lib.Array")
+  expect_equal(reticulate::py_to_r(py), r)
+})
+
+test_that("RecordBatch to/from Python", {
+  pa <- reticulate::import("pyarrow", convert = FALSE)
+  batch <- record_batch(col1 = c(1, 2, 3), col2 = letters[1:3])
+  py <- reticulate::r_to_py(batch)
+  expect_s3_class(py, "pyarrow.lib.RecordBatch")
+  expect_equal(reticulate::py_to_r(py), batch)
+})
+
+test_that("Table and ChunkedArray from Python", {
+  pa <- reticulate::import("pyarrow", convert = FALSE)
+  batch <- record_batch(col1 = c(1, 2, 3), col2 = letters[1:3])
+  tab <- Table$create(batch, batch)
+  pybatch <- reticulate::r_to_py(batch)
+  pytab <- pa$Table$from_batches(list(pybatch, pybatch))
+  expect_s3_class(pytab, "pyarrow.lib.Table")
+  expect_s3_class(pytab[0], "pyarrow.lib.ChunkedArray")
+  expect_equal(reticulate::py_to_r(pytab[0]), tab$col1)
+  expect_equal(reticulate::py_to_r(pytab), tab)
+})
+
+test_that("Table and ChunkedArray to Python", {
+  batch <- record_batch(col1 = c(1, 2, 3), col2 = letters[1:3])
+  tab <- Table$create(batch, batch)
+
+  pychunked <- reticulate::r_to_py(tab$col1)
+  expect_s3_class(pychunked, "pyarrow.lib.ChunkedArray")
+  expect_equal(reticulate::py_to_r(pychunked), tab$col1)
+
+  pytab <- reticulate::r_to_py(tab)
+  expect_s3_class(pytab, "pyarrow.lib.Table")
+  expect_equal(reticulate::py_to_r(pytab), tab)
+})
+
+test_that("RecordBatch with metadata roundtrip", {
+  batch <- RecordBatch$create(example_with_times)
+  pybatch <- reticulate::r_to_py(batch)
+  expect_s3_class(pybatch, "pyarrow.lib.RecordBatch")
+  expect_equal(reticulate::py_to_r(pybatch), batch)
+  expect_identical(as.data.frame(reticulate::py_to_r(pybatch)), example_with_times)
+})
+
+test_that("Table with metadata roundtrip", {
+  tab <- Table$create(example_with_times)
+  pytab <- reticulate::r_to_py(tab)
+  expect_s3_class(pytab, "pyarrow.lib.Table")
+  expect_equal(reticulate::py_to_r(pytab), tab)
+  expect_identical(as.data.frame(reticulate::py_to_r(pytab)), example_with_times)
+})
+
+test_that("DataType roundtrip", {
+  r <- timestamp("ms", timezone = "Pacific/Marquesas")
+  py <- reticulate::r_to_py(r)
+  expect_s3_class(py, "pyarrow.lib.DataType")
+  expect_equal(reticulate::py_to_r(py), r)
+})
+
+test_that("Field roundtrip", {
+  r <- field("x", time32("s"))
+  py <- reticulate::r_to_py(r)
+  expect_s3_class(py, "pyarrow.lib.Field")
+  expect_equal(reticulate::py_to_r(py), r)
+})
+
+test_that("RecordBatchReader to python", {
+  library(dplyr)
+
+  tab <- Table$create(example_data)
+  scan <- tab %>%
+    select(int, lgl) %>%
+    filter(int > 6) %>%
+    Scanner$create()
+  reader <- scan$ToRecordBatchReader()
+  pyreader <- reticulate::r_to_py(reader)
+  expect_s3_class(pyreader, "pyarrow.lib.RecordBatchReader")
+  pytab <- pyreader$read_all()
+  expect_s3_class(pytab, "pyarrow.lib.Table")
+  back_to_r <- reticulate::py_to_r(pytab)
+  expect_r6_class(back_to_r, "Table")
+  expect_identical(
+    as.data.frame(back_to_r),
+    example_data %>%
+      select(int, lgl) %>%
+      filter(int > 6)
+  )
+})
+
+test_that("RecordBatchReader from python", {
+  tab <- Table$create(example_data)
+  scan <- Scanner$create(tab)
+  reader <- scan$ToRecordBatchReader()
+  pyreader <- reticulate::r_to_py(reader)
+  back_to_r <- reticulate::py_to_r(pyreader)
+  rt_table <- back_to_r$read_table()
+  expect_r6_class(rt_table, "Table")
+  expect_identical(as.data.frame(rt_table), example_data)
+})
diff --git a/src/arrow/r/tests/testthat/test-read-record-batch.R b/src/arrow/r/tests/testthat/test-read-record-batch.R
new file mode 100644
index 000000000..ba109da6c
--- /dev/null
+++ b/src/arrow/r/tests/testthat/test-read-record-batch.R
@@ -0,0 +1,78 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+test_that("RecordBatchFileWriter / RecordBatchFileReader roundtrips", {
+  tab <- Table$create(
+    int = 1:10,
+    dbl = as.numeric(1:10),
+    lgl = sample(c(TRUE, FALSE, NA), 10, replace = TRUE),
+    chr = letters[1:10]
+  )
+
+  tf <- tempfile()
+  expect_error(
+    RecordBatchFileWriter$create(tf, tab$schema),
+    "RecordBatchFileWriter$create() requires an Arrow InputStream. Try providing FileOutputStream$create(tf)",
+    fixed = TRUE
+  )
+
+  stream <- FileOutputStream$create(tf)
+  writer <- RecordBatchFileWriter$create(stream, tab$schema)
+  expect_r6_class(writer, "RecordBatchWriter")
+  writer$write_table(tab)
+  writer$close()
+  stream$close()
+
+  expect_equal(read_feather(tf, as_data_frame = FALSE), tab)
+  # Make sure connections are closed
+  expect_error(file.remove(tf), NA)
+  skip_on_os("windows") # This should pass, we've closed the stream
+  expect_false(file.exists(tf))
+})
+
+test_that("record_batch() handles (raw|Buffer|InputStream, Schema) (ARROW-3450, ARROW-3505)", {
+  tbl <- tibble::tibble(
+    int = 1:10, dbl = as.numeric(1:10),
+    lgl = sample(c(TRUE, FALSE, NA), 10, replace = TRUE),
+    chr = letters[1:10]
+  )
+  batch <- record_batch(!!!tbl)
+  schema <- batch$schema
+
+  raw <- batch$serialize()
+  batch2 <- record_batch(raw, schema = schema)
+  batch3 <- record_batch(buffer(raw), schema = schema)
+  stream <- BufferReader$create(raw)
+  stream$close()
+
+  expect_equal(batch, batch2)
+  expect_equal(batch, batch3)
+})
+
+test_that("record_batch() can handle (Message, Schema) parameters (ARROW-3499)", {
+  batch <- record_batch(x = 1:10)
+  schema <- batch$schema
+
+  raw <- batch$serialize()
+  stream <- BufferReader$create(raw)
+
+  message <- read_message(stream)
+  batch2 <- record_batch(message, schema = schema)
+  expect_equal(batch, batch2)
+  stream$close()
+})
diff --git a/src/arrow/r/tests/testthat/test-read-write.R b/src/arrow/r/tests/testthat/test-read-write.R
new file mode 100644
index 000000000..66f6db56d
--- /dev/null
+++ b/src/arrow/r/tests/testthat/test-read-write.R
@@ -0,0 +1,125 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+test_that("table round trip", {
+  tbl <- tibble::tibble(
+    int = 1:10,
+    dbl = as.numeric(1:10),
+    raw = as.raw(1:10)
+  )
+
+  tab <- Table$create(!!!tbl)
+  expect_equal(tab$num_columns, 3L)
+  expect_equal(tab$num_rows, 10L)
+
+  # ChunkedArray
+  chunked_array_int <- tab$column(0)
+  expect_equal(chunked_array_int$length(), 10L)
+  expect_equal(chunked_array_int$null_count, 0L)
+  expect_equal(chunked_array_int$as_vector(), tbl$int)
+
+  # Array
+  chunks_int <- chunked_array_int$chunks
+  expect_equal(length(chunks_int), chunked_array_int$num_chunks)
+  for (i in seq_along(chunks_int)) {
+    expect_equal(chunked_array_int$chunk(i - 1L), chunks_int[[i]])
+  }
+
+  # ChunkedArray
+  chunked_array_dbl <- tab$column(1)
+  expect_equal(chunked_array_dbl$length(), 10L)
+  expect_equal(chunked_array_dbl$null_count, 0L)
+  expect_equal(chunked_array_dbl$as_vector(), tbl$dbl)
+
+  # Array
+  chunks_dbl <- chunked_array_dbl$chunks
+  expect_equal(length(chunks_dbl), chunked_array_dbl$num_chunks)
+  for (i in seq_along(chunks_dbl)) {
+    expect_equal(chunked_array_dbl$chunk(i - 1L), chunks_dbl[[i]])
+  }
+
+  # ChunkedArray
+  chunked_array_raw <- tab$column(2)
+  expect_equal(chunked_array_raw$length(), 10L)
+  expect_equal(chunked_array_raw$null_count, 0L)
+  expect_equal(chunked_array_raw$as_vector(), as.integer(tbl$raw))
+
+  # Array
+  chunks_raw <- chunked_array_raw$chunks
+  expect_equal(length(chunks_raw), chunked_array_raw$num_chunks)
+  for (i in seq_along(chunks_raw)) {
+    expect_equal(chunked_array_raw$chunk(i - 1L), chunks_raw[[i]])
+  }
+  tf <- tempfile()
+  write_feather(tbl, tf)
+
+  res <- read_feather(tf)
+  expect_identical(tbl$int, res$int)
+  expect_identical(tbl$dbl, res$dbl)
+  expect_identical(as.integer(tbl$raw), res$raw)
+  unlink(tf)
+})
+
+test_that("table round trip handles NA in integer and numeric", {
+  tbl <- tibble::tibble(
+    int = c(NA, 2:10),
+    dbl = as.numeric(c(1:5, NA, 7:9, NA)),
+    raw = as.raw(1:10)
+  )
+
+  tab <- Table$create(!!!tbl)
+  expect_equal(tab$num_columns, 3L)
+  expect_equal(tab$num_rows, 10L)
+
+  expect_equal(tab$column(0)$length(), 10L)
+  expect_equal(tab$column(1)$length(), 10L)
+  expect_equal(tab$column(2)$length(), 10L)
+
+  expect_equal(tab$column(0)$null_count, 1L)
+  expect_equal(tab$column(1)$null_count, 2L)
+  expect_equal(tab$column(2)$null_count, 0L)
+
+  expect_equal(tab$column(0)$type, int32())
+  expect_equal(tab$column(1)$type, float64())
+  expect_equal(tab$column(2)$type, uint8())
+
+  tf <- tempfile()
+  write_feather(tbl, tf)
+
+  res <- read_feather(tf)
+  expect_identical(tbl$int, res$int)
+  expect_identical(tbl$dbl, res$dbl)
+  expect_identical(as.integer(tbl$raw), res$raw)
+
+  expect_true(is.na(res$int[1]))
+  expect_true(is.na(res$dbl[6]))
+  expect_true(is.na(res$dbl[10]))
+  unlink(tf)
+})
+
+test_that("reading/writing a raw vector (sparklyr integration)", {
+  # These are effectively what sparklyr calls to get data to/from Spark
+  read_from_raw_test <- function(x) {
+    as.data.frame(RecordBatchStreamReader$create(x)$read_next_batch())
+  }
+  bytes <- write_to_raw(example_data)
+  expect_type(bytes, "raw")
+  expect_identical(read_from_raw_test(bytes), example_data)
+  # this could just be `read_ipc_stream(x)`; propose that
+  expect_identical(read_ipc_stream(bytes), example_data)
+})
diff --git a/src/arrow/r/tests/testthat/test-record-batch-reader.R b/src/arrow/r/tests/testthat/test-record-batch-reader.R
new file mode 100644
index 000000000..3992670dc
--- /dev/null
+++ b/src/arrow/r/tests/testthat/test-record-batch-reader.R
@@ -0,0 +1,141 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+test_that("RecordBatchStreamReader / Writer", {
+  tbl <- tibble::tibble(
+    x = 1:10,
+    y = letters[1:10]
+  )
+  batch <- record_batch(tbl)
+  tab <- Table$create(tbl)
+
+  sink <- BufferOutputStream$create()
+  expect_equal(sink$tell(), 0)
+  writer <- RecordBatchStreamWriter$create(sink, batch$schema)
+  expect_r6_class(writer, "RecordBatchWriter")
+  writer$write(batch)
+  writer$write(tab)
+  writer$write(tbl)
+  expect_true(sink$tell() > 0)
+  writer$close()
+
+  buf <- sink$finish()
+  expect_r6_class(buf, "Buffer")
+
+  reader <- RecordBatchStreamReader$create(buf)
+  expect_r6_class(reader, "RecordBatchStreamReader")
+
+  batch1 <- reader$read_next_batch()
+  expect_r6_class(batch1, "RecordBatch")
+  expect_equal(batch, batch1)
+  batch2 <- reader$read_next_batch()
+  expect_r6_class(batch2, "RecordBatch")
+  expect_equal(batch, batch2)
+  batch3 <- reader$read_next_batch()
+  expect_r6_class(batch3, "RecordBatch")
+  expect_equal(batch, batch3)
+  expect_null(reader$read_next_batch())
+})
+
+test_that("RecordBatchFileReader / Writer", {
+  sink <- BufferOutputStream$create()
+  writer <- RecordBatchFileWriter$create(sink, batch$schema)
+  expect_r6_class(writer, "RecordBatchWriter")
+  writer$write(batch)
+  writer$write(tab)
+  writer$write(tbl)
+  writer$close()
+
+  buf <- sink$finish()
+  expect_r6_class(buf, "Buffer")
+
+  reader <- RecordBatchFileReader$create(buf)
+  expect_r6_class(reader, "RecordBatchFileReader")
+
+  batch1 <- reader$get_batch(0)
+  expect_r6_class(batch1, "RecordBatch")
+  expect_equal(batch, batch1)
+
+  expect_equal(reader$num_record_batches, 3)
+})
+
+test_that("StreamReader read_table", {
+  sink <- BufferOutputStream$create()
+  writer <- RecordBatchStreamWriter$create(sink, batch$schema)
+  expect_r6_class(writer, "RecordBatchWriter")
+  writer$write(batch)
+  writer$write(tab)
+  writer$write(tbl)
+  writer$close()
+  buf <- sink$finish()
+
+  reader <- RecordBatchStreamReader$create(buf)
+  out <- reader$read_table()
+  expect_identical(dim(out), c(30L, 2L))
+})
+
+test_that("FileReader read_table", {
+  sink <- BufferOutputStream$create()
+  writer <- RecordBatchFileWriter$create(sink, batch$schema)
+  expect_r6_class(writer, "RecordBatchWriter")
+  writer$write(batch)
+  writer$write(tab)
+  writer$write(tbl)
+  writer$close()
+  buf <- sink$finish()
+
+  reader <- RecordBatchFileReader$create(buf)
+  out <- reader$read_table()
+  expect_identical(dim(out), c(30L, 2L))
+})
+
+test_that("MetadataFormat", {
+  expect_identical(get_ipc_metadata_version(5), 4L)
+  expect_identical(get_ipc_metadata_version("V4"), 3L)
+  expect_identical(get_ipc_metadata_version(NULL), 4L)
+  Sys.setenv(ARROW_PRE_0_15_IPC_FORMAT = 1)
+  expect_identical(get_ipc_metadata_version(NULL), 3L)
+  Sys.setenv(ARROW_PRE_0_15_IPC_FORMAT = "")
+
+  expect_identical(get_ipc_metadata_version(NULL), 4L)
+  Sys.setenv(ARROW_PRE_1_0_METADATA_VERSION = 1)
+  expect_identical(get_ipc_metadata_version(NULL), 3L)
+  Sys.setenv(ARROW_PRE_1_0_METADATA_VERSION = "")
+
+  expect_error(
+    get_ipc_metadata_version(99),
+    "99 is not a valid IPC MetadataVersion"
+  )
+  expect_error(
+    get_ipc_metadata_version("45"),
+    '"45" is not a valid IPC MetadataVersion'
+  )
+})
+
+test_that("reader with 0 batches", {
+  # IPC stream containing only a schema (ARROW-10642)
+  sink <- BufferOutputStream$create()
+  writer <- RecordBatchStreamWriter$create(sink, schema(a = int32()))
+  writer$close()
+  buf <- sink$finish()
+
+  reader <- RecordBatchStreamReader$create(buf)
+  tab <- reader$read_table()
+  expect_r6_class(tab, "Table")
+  expect_identical(dim(tab), c(0L, 1L))
+})
diff --git a/src/arrow/r/tests/testthat/test-s3-minio.R b/src/arrow/r/tests/testthat/test-s3-minio.R
new file mode 100644
index 000000000..e2c1dc2e7
--- /dev/null
+++ b/src/arrow/r/tests/testthat/test-s3-minio.R
@@ -0,0 +1,228 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+if (arrow_with_s3() && process_is_running("minio server")) {
+  # Get minio config, with expected defaults
+  minio_key <- Sys.getenv("MINIO_ACCESS_KEY", "minioadmin")
+  minio_secret <- Sys.getenv("MINIO_SECRET_KEY", "minioadmin")
+  minio_port <- Sys.getenv("MINIO_PORT", "9000")
+
+  # Helper function for minio URIs
+  minio_uri <- function(...) {
+    template <- "s3://%s:%s@%s?scheme=http&endpoint_override=localhost%s%s"
+    sprintf(template, minio_key, minio_secret, minio_path(...), "%3A", minio_port)
+  }
+  minio_path <- function(...) paste(now, ..., sep = "/")
+
+  test_that("minio setup", {
+    # Create a "bucket" on minio for this test run, which we'll delete when done.
+    fs <- S3FileSystem$create(
+      access_key = minio_key,
+      secret_key = minio_secret,
+      scheme = "http",
+      endpoint_override = paste0("localhost:", minio_port)
+    )
+    expect_r6_class(fs, "S3FileSystem")
+    now <- as.character(as.numeric(Sys.time()))
+    # If minio isn't running, this will hang for a few seconds and fail with a
+    # curl timeout, causing `run_these` to be set to FALSE and skipping the tests
+    fs$CreateDir(now)
+  })
+  # Clean up when we're all done
+  on.exit(fs$DeleteDir(now))
+
+  test_that("read/write Feather on minio", {
+    write_feather(example_data, minio_uri("test.feather"))
+    expect_identical(read_feather(minio_uri("test.feather")), example_data)
+  })
+
+  test_that("read/write Feather by filesystem, not URI", {
+    write_feather(example_data, fs$path(minio_path("test2.feather")))
+    expect_identical(
+      read_feather(fs$path(minio_path("test2.feather"))),
+      example_data
+    )
+  })
+
+  test_that("read/write stream", {
+    write_ipc_stream(example_data, fs$path(minio_path("test3.ipc")))
+    expect_identical(
+      read_ipc_stream(fs$path(minio_path("test3.ipc"))),
+      example_data
+    )
+  })
+
+  test_that("read/write Parquet on minio", {
+    skip_if_not_available("parquet")
+    write_parquet(example_data, fs$path(minio_uri("test.parquet")))
+    expect_identical(read_parquet(minio_uri("test.parquet")), example_data)
+  })
+
+  if (arrow_with_dataset()) {
+    library(dplyr)
+
+    make_temp_dir <- function() {
+      path <- tempfile()
+      dir.create(path)
+      normalizePath(path, winslash = "/")
+    }
+
+    test_that("open_dataset with an S3 file (not directory) URI", {
+      skip_if_not_available("parquet")
+      expect_identical(
+        open_dataset(minio_uri("test.parquet")) %>% collect() %>% arrange(int),
+        example_data %>% arrange(int)
+      )
+    })
+
+    test_that("open_dataset with vector of S3 file URIs", {
+      expect_identical(
+        open_dataset(
+          c(minio_uri("test.feather"), minio_uri("test2.feather")),
+          format = "feather"
+        ) %>%
+          arrange(int) %>%
+          collect(),
+        rbind(example_data, example_data) %>% arrange(int)
+      )
+    })
+
+    test_that("open_dataset errors on URIs for different file systems", {
+      td <- make_temp_dir()
+      expect_error(
+        open_dataset(
+          c(
+            minio_uri("test.feather"),
+            paste0("file://", file.path(td, "fake.feather"))
+          ),
+          format = "feather"
+        ),
+        "Vectors of URIs for different file systems are not supported"
+      )
+    })
+
+    # Dataset test setup, cf. test-dataset.R
+    first_date <- lubridate::ymd_hms("2015-04-29 03:12:39")
+    df1 <- tibble(
+      int = 1:10,
+      dbl = as.numeric(1:10),
+      lgl = rep(c(TRUE, FALSE, NA, TRUE, FALSE), 2),
+      chr = letters[1:10],
+      fct = factor(LETTERS[1:10]),
+      ts = first_date + lubridate::days(1:10)
+    )
+
+    second_date <- lubridate::ymd_hms("2017-03-09 07:01:02")
+    df2 <- tibble(
+      int = 101:110,
+      dbl = as.numeric(51:60),
+      lgl = rep(c(TRUE, FALSE, NA, TRUE, FALSE), 2),
+      chr = letters[10:1],
+      fct = factor(LETTERS[10:1]),
+      ts = second_date + lubridate::days(10:1)
+    )
+
+    # This is also to set up the dataset tests
+    test_that("write_parquet with filesystem arg", {
+      skip_if_not_available("parquet")
+      fs$CreateDir(minio_path("hive_dir", "group=1", "other=xxx"))
+      fs$CreateDir(minio_path("hive_dir", "group=2", "other=yyy"))
+      expect_length(fs$ls(minio_path("hive_dir")), 2)
+      write_parquet(df1, fs$path(minio_path("hive_dir", "group=1", "other=xxx", "file1.parquet")))
+      write_parquet(df2, fs$path(minio_path("hive_dir", "group=2", "other=yyy", "file2.parquet")))
+      expect_identical(
+        read_parquet(fs$path(minio_path("hive_dir", "group=1", "other=xxx", "file1.parquet"))),
+        df1
+      )
+    })
+
+    test_that("open_dataset with fs", {
+      ds <- open_dataset(fs$path(minio_path("hive_dir")))
+      expect_identical(
+        ds %>% select(int, dbl, lgl) %>% collect() %>% arrange(int),
+        rbind(df1[, c("int", "dbl", "lgl")], df2[, c("int", "dbl", "lgl")]) %>% arrange(int)
+      )
+    })
+
+    test_that("write_dataset with fs", {
+      ds <- open_dataset(fs$path(minio_path("hive_dir")))
+      write_dataset(ds, fs$path(minio_path("new_dataset_dir")))
+      expect_length(fs$ls(minio_path("new_dataset_dir")), 1)
+    })
+
+    test_that("Let's test copy_files too", {
+      td <- make_temp_dir()
+      copy_files(minio_uri("hive_dir"), td)
+      expect_length(dir(td), 2)
+      ds <- open_dataset(td)
+      expect_identical(
+        ds %>% select(int, dbl, lgl) %>% collect() %>% arrange(int),
+        rbind(df1[, c("int", "dbl", "lgl")], df2[, c("int", "dbl", "lgl")]) %>% arrange(int)
+      )
+
+      # Let's copy the other way and use a SubTreeFileSystem rather than URI
+      copy_files(td, fs$path(minio_path("hive_dir2")))
+      ds2 <- open_dataset(fs$path(minio_path("hive_dir2")))
+      expect_identical(
+        ds2 %>% select(int, dbl, lgl) %>% collect() %>% arrange(int),
+        rbind(df1[, c("int", "dbl", "lgl")], df2[, c("int", "dbl", "lgl")]) %>% arrange(int)
+      )
+    })
+  }
+
+  test_that("S3FileSystem input validation", {
+    expect_error(
+      S3FileSystem$create(access_key = "foo"),
+      "Key authentication requires both access_key and secret_key"
+    )
+    expect_error(
+      S3FileSystem$create(secret_key = "foo"),
+      "Key authentication requires both access_key and secret_key"
+    )
+    expect_error(
+      S3FileSystem$create(session_token = "foo"),
+      paste0(
+        "In order to initialize a session with temporary credentials, ",
+        "both secret_key and access_key must be provided ",
+        "in addition to session_token."
+      )
+    )
+    expect_error(
+      S3FileSystem$create(access_key = "foo", secret_key = "asdf", anonymous = TRUE),
+      'Cannot specify "access_key" and "secret_key" when anonymous = TRUE'
+    )
+    expect_error(
+      S3FileSystem$create(access_key = "foo", secret_key = "asdf", role_arn = "qwer"),
+      "Cannot provide both key authentication and role_arn"
+    )
+    expect_error(
+      S3FileSystem$create(access_key = "foo", secret_key = "asdf", external_id = "qwer"),
+      'Cannot specify "external_id" without providing a role_arn string'
+    )
+    expect_error(
+      S3FileSystem$create(external_id = "foo"),
+      'Cannot specify "external_id" without providing a role_arn string'
+    )
+  })
+} else {
+  # Kinda hacky, let's put a skipped test here, just so we note that the tests
+  # didn't run
+  test_that("S3FileSystem tests with Minio", {
+    skip("Minio is not running")
+  })
+}
diff --git a/src/arrow/r/tests/testthat/test-s3.R b/src/arrow/r/tests/testthat/test-s3.R
new file mode 100644
index 000000000..298b15bb8
--- /dev/null
+++ b/src/arrow/r/tests/testthat/test-s3.R
@@ -0,0 +1,55 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+run_these <- tryCatch(
+  expr = {
+    if (arrow_with_s3() &&
+      identical(tolower(Sys.getenv("ARROW_R_DEV")), "true") &&
+      !identical(Sys.getenv("AWS_ACCESS_KEY_ID"), "") &&
+      !identical(Sys.getenv("AWS_SECRET_ACCESS_KEY"), "")) {
+      # See if we have access to the test bucket
+      bucket <- s3_bucket("ursa-labs-r-test")
+      bucket$GetFileInfo("")
+      TRUE
+    } else {
+      FALSE
+    }
+  },
+  error = function(e) FALSE
+)
+
+bucket_uri <- function(..., bucket = "s3://ursa-labs-r-test/%s?region=us-west-2") {
+  segments <- paste(..., sep = "/")
+  sprintf(bucket, segments)
+}
+
+if (run_these) {
+  now <- as.numeric(Sys.time())
+  on.exit(bucket$DeleteDir(now))
+
+  test_that("read/write Feather on S3", {
+    write_feather(example_data, bucket_uri(now, "test.feather"))
+    expect_identical(read_feather(bucket_uri(now, "test.feather")), example_data)
+  })
+
+  test_that("read/write Parquet on S3", {
+    skip_if_not_available("parquet")
+    write_parquet(example_data, bucket_uri(now, "test.parquet"))
+    expect_identical(read_parquet(bucket_uri(now, "test.parquet")), example_data)
+  })
+}
diff --git a/src/arrow/r/tests/testthat/test-scalar.R b/src/arrow/r/tests/testthat/test-scalar.R
new file mode 100644
index 000000000..3afccf743
--- /dev/null
+++ b/src/arrow/r/tests/testthat/test-scalar.R
@@ -0,0 +1,112 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+expect_scalar_roundtrip <- function(x, type) {
+  s <- Scalar$create(x)
+  expect_r6_class(s, "Scalar")
+  expect_equal(s$type, type)
+  expect_identical(length(s), 1L)
+  if (inherits(type, "NestedType")) {
+    # Should this be? Missing if all elements are missing?
+    # expect_identical(is.na(s), all(is.na(x))) # nolint
+  } else {
+    expect_identical(as.vector(is.na(s)), is.na(x))
+    # MakeArrayFromScalar not implemented for list types
+    expect_as_vector(s, x)
+  }
+}
+
+test_that("Scalar object roundtrip", {
+  expect_scalar_roundtrip(2, float64())
+  expect_scalar_roundtrip(2L, int32())
+  expect_scalar_roundtrip(c(2, 4), list_of(float64()))
+  expect_scalar_roundtrip(c(NA, NA), list_of(bool()))
+  expect_scalar_roundtrip(data.frame(a = 2, b = 4L), struct(a = double(), b = int32()))
+})
+
+test_that("Scalar print", {
+  expect_output(print(Scalar$create(4)), "Scalar\n4")
+})
+
+test_that("Creating Scalars of a different type and casting them", {
+  expect_equal(Scalar$create(4L, int8())$type, int8())
+  expect_equal(Scalar$create(4L)$cast(float32())$type, float32())
+})
+
+test_that("Scalar to Array", {
+  a <- Scalar$create(42)
+  expect_equal(a$as_array(), Array$create(42))
+  expect_equal(Array$create(a), Array$create(42))
+})
+
+test_that("Scalar$Equals", {
+  a <- Scalar$create(42)
+  aa <- Array$create(42)
+  b <- Scalar$create(42)
+  d <- Scalar$create(43)
+  expect_equal(a, b)
+  expect_true(a$Equals(b))
+  expect_false(a$Equals(d))
+  expect_false(a$Equals(aa))
+})
+
+test_that("Scalar$ApproxEquals", {
+  a <- Scalar$create(1.0000000000001)
+  aa <- Array$create(1.0000000000001)
+  b <- Scalar$create(1.0)
+  d <- 2.400000000000001
+  expect_false(a$Equals(b))
+  expect_true(a$ApproxEquals(b))
+  expect_false(a$ApproxEquals(d))
+  expect_false(a$ApproxEquals(aa))
+})
+
+test_that("Handling string data with embedded nuls", {
+  raws <- as.raw(c(0x6d, 0x61, 0x00, 0x6e))
+  expect_error(
+    rawToChar(raws),
+    "embedded nul in string: 'ma\\0n'", # See?
+    fixed = TRUE
+  )
+  scalar_with_nul <- Scalar$create(raws, binary())$cast(utf8())
+
+  # The behavior of the warnings/errors is slightly different with and without
+  # altrep. Without it (i.e. 3.5.0 and below, the error would trigger immediately
+  # on `as.vector()` where as with it, the error only happens on materialization)
+  skip_if_r_version("3.5.0")
+  v <- expect_error(as.vector(scalar_with_nul), NA)
+  expect_error(
+    v[1],
+    paste0(
+      "embedded nul in string: 'ma\\0n'; to strip nuls when converting from Arrow to R, ",
+      "set options(arrow.skip_nul = TRUE)"
+    ),
+    fixed = TRUE
+  )
+
+  withr::with_options(list(arrow.skip_nul = TRUE), {
+    expect_warning(
+      expect_identical(
+        as.vector(scalar_with_nul)[],
+        "man"
+      ),
+      "Stripping '\\0' (nul) from character vector",
+      fixed = TRUE
+    )
+  })
+})
diff --git a/src/arrow/r/tests/testthat/test-schema.R b/src/arrow/r/tests/testthat/test-schema.R
new file mode 100644
index 000000000..8473550df
--- /dev/null
+++ b/src/arrow/r/tests/testthat/test-schema.R
@@ -0,0 +1,220 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+test_that("Alternate type names are supported", {
+  expect_equal(
+    schema(b = double(), c = bool(), d = string(), e = float(), f = halffloat()),
+    schema(b = float64(), c = boolean(), d = utf8(), e = float32(), f = float16())
+  )
+  expect_equal(names(schema(b = double(), c = bool(), d = string())), c("b", "c", "d"))
+})
+
+test_that("Schema print method", {
+  expect_output(
+    print(schema(b = double(), c = bool(), d = string())),
+    paste(
+      "Schema",
+      "b: double",
+      "c: bool",
+      "d: string",
+      sep = "\n"
+    ),
+    fixed = TRUE
+  )
+})
+
+test_that("Schema with non-nullable fields", {
+  expect_output(
+    print(schema(field("b", double()),
+                 field("c", bool(), nullable = FALSE),
+                 field("d", string()))),
+    paste(
+      "Schema",
+      "b: double",
+      "c: bool not null",
+      "d: string",
+      sep = "\n"
+    ),
+    fixed = TRUE
+  )
+})
+
+test_that("Schema $GetFieldByName", {
+  schm <- schema(b = double(), c = string())
+  expect_equal(schm$GetFieldByName("b"), field("b", double()))
+  expect_null(schm$GetFieldByName("f"))
+  # TODO: schema(b = double(), b = string())$GetFieldByName("b") # nolint
+  # also returns NULL and probably should error bc duplicated names
+})
+
+test_that("Schema extract (returns Field)", {
+  # TODO: should this return a Field or the Type?
+  # I think of Schema like list(name = type, name = type, ...)
+  # but in practice it is more like list(list(name, type), list(name, type), ...)
+  # -> Field names in a Schema may be duplicated
+  # -> Fields may have metadata (though we don't really handle that in R)
+  schm <- schema(b = double(), c = string())
+  expect_equal(schm$b, field("b", double()))
+  expect_equal(schm[["b"]], field("b", double()))
+  expect_equal(schm[[1]], field("b", double()))
+
+  expect_null(schm[["ZZZ"]])
+  expect_error(schm[[42]]) # Should have better error message
+})
+
+test_that("Schema slicing", {
+  schm <- schema(b = double(), c = string(), d = int8())
+  expect_equal(schm[2:3], schema(c = string(), d = int8()))
+  expect_equal(schm[-1], schema(c = string(), d = int8()))
+  expect_equal(schm[c("d", "c")], schema(d = int8(), c = string()))
+  expect_equal(schm[c(FALSE, TRUE, TRUE)], schema(c = string(), d = int8()))
+  expect_error(schm[c("c", "ZZZ")], 'Invalid field name: "ZZZ"')
+  expect_error(schm[c("XXX", "c", "ZZZ")], 'Invalid field names: "XXX" and "ZZZ"')
+})
+
+test_that("Schema modification", {
+  schm <- schema(b = double(), c = string(), d = int8())
+  schm$c <- boolean()
+  expect_equal(schm, schema(b = double(), c = boolean(), d = int8()))
+  schm[["d"]] <- int16()
+  expect_equal(schm, schema(b = double(), c = boolean(), d = int16()))
+  schm$b <- NULL
+  expect_equal(schm, schema(c = boolean(), d = int16()))
+  # NULL assigning something that doesn't exist doesn't modify
+  schm$zzzz <- NULL
+  expect_equal(schm, schema(c = boolean(), d = int16()))
+  # Adding a field
+  schm$fff <- int32()
+  expect_equal(schm, schema(c = boolean(), d = int16(), fff = int32()))
+
+  # By index
+  schm <- schema(b = double(), c = string(), d = int8())
+  schm[[2]] <- int32()
+  expect_equal(schm, schema(b = double(), c = int32(), d = int8()))
+
+  # Adding actual Fields
+  # If assigning by name, note that this can modify the resulting name
+  schm <- schema(b = double(), c = string(), d = int8())
+  schm$c <- field("x", int32())
+  expect_equal(schm, schema(b = double(), x = int32(), d = int8()))
+  schm[[2]] <- field("y", int64())
+  expect_equal(schm, schema(b = double(), y = int64(), d = int8()))
+
+  # Error handling
+  expect_error(schm$c <- 4, "value must be a DataType")
+  expect_error(schm[[-3]] <- int32(), "i not greater than 0")
+  expect_error(schm[[0]] <- int32(), "i not greater than 0")
+  expect_error(schm[[NA_integer_]] <- int32(), "!is.na(i) is not TRUE", fixed = TRUE)
+  expect_error(schm[[TRUE]] <- int32(), "i is not a numeric or integer vector")
+  expect_error(schm[[c(2, 4)]] <- int32(), "length(i) not equal to 1", fixed = TRUE)
+})
+
+test_that("Metadata is preserved when modifying Schema", {
+  schm <- schema(b = double(), c = string(), d = int8())
+  schm$metadata$foo <- "bar"
+  expect_identical(schm$metadata, list(foo = "bar"))
+  schm$c <- field("x", int32())
+  expect_identical(schm$metadata, list(foo = "bar"))
+})
+
+test_that("reading schema from Buffer", {
+  # TODO: this uses the streaming format, i.e. from RecordBatchStreamWriter
+  #       maybe there is an easier way to serialize a schema
+  batch <- record_batch(x = 1:10)
+  expect_r6_class(batch, "RecordBatch")
+
+  stream <- BufferOutputStream$create()
+  writer <- RecordBatchStreamWriter$create(stream, batch$schema)
+  expect_r6_class(writer, "RecordBatchWriter")
+  writer$close()
+
+  buffer <- stream$finish()
+  expect_r6_class(buffer, "Buffer")
+
+  reader <- MessageReader$create(buffer)
+  expect_r6_class(reader, "MessageReader")
+
+  message <- reader$ReadNextMessage()
+  expect_r6_class(message, "Message")
+  expect_equal(message$type, MessageType$SCHEMA)
+
+  stream <- BufferReader$create(buffer)
+  expect_r6_class(stream, "BufferReader")
+  message <- read_message(stream)
+  expect_r6_class(message, "Message")
+  expect_equal(message$type, MessageType$SCHEMA)
+})
+
+test_that("Input validation when creating a table with a schema", {
+  expect_error(
+    Table$create(b = 1, schema = c(b = float64())), # list not Schema
+    "`schema` must be an arrow::Schema or NULL"
+  )
+})
+
+test_that("Schema$Equals", {
+  a <- schema(b = double(), c = bool())
+  b <- a$WithMetadata(list(some = "metadata"))
+
+  # different metadata
+  expect_failure(expect_equal(a, b))
+  expect_false(a$Equals(b, check_metadata = TRUE))
+
+  # Metadata not checked
+  expect_equal(a, b, ignore_attr = TRUE)
+
+  # Non-schema object
+  expect_false(a$Equals(42))
+})
+
+test_that("unify_schemas", {
+  a <- schema(b = double(), c = bool())
+  z <- schema(b = double(), k = utf8())
+  expect_equal(
+    unify_schemas(a, z),
+    schema(b = double(), c = bool(), k = utf8())
+  )
+  # returns NULL when any arg is NULL
+  expect_null(
+    unify_schemas(a, NULL, z)
+  )
+  # returns NULL when all args are NULL
+  expect_null(
+    unify_schemas(NULL, NULL)
+  )
+  # errors when no args
+  expect_error(
+    unify_schemas(),
+    "Must provide at least one schema to unify"
+  )
+})
+
+test_that("Schema to C-interface", {
+  schema <- schema(b = double(), c = bool())
+
+  # export the schema via the C-interface
+  ptr <- allocate_arrow_schema()
+  schema$export_to_c(ptr)
+
+  # then import it and check that the roundtripped value is the same
+  circle <- Schema$import_from_c(ptr)
+  expect_equal(circle, schema)
+
+  # must clean up the pointer or we leak
+  delete_arrow_schema(ptr)
+})
diff --git a/src/arrow/r/tests/testthat/test-thread-pool.R b/src/arrow/r/tests/testthat/test-thread-pool.R
new file mode 100644
index 000000000..baf410368
--- /dev/null
+++ b/src/arrow/r/tests/testthat/test-thread-pool.R
@@ -0,0 +1,33 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+test_that("can set/get cpu thread pool capacity", {
+  old <- cpu_count()
+  set_cpu_count(19)
+  expect_equal(cpu_count(), 19L)
+  set_cpu_count(old)
+  expect_equal(cpu_count(), old)
+})
+
+test_that("can set/get I/O thread pool capacity", {
+  old <- io_thread_count()
+  set_io_thread_count(19)
+  expect_equal(io_thread_count(), 19L)
+  set_io_thread_count(old)
+  expect_equal(io_thread_count(), old)
+})
diff --git a/src/arrow/r/tests/testthat/test-type.R b/src/arrow/r/tests/testthat/test-type.R
new file mode 100644
index 000000000..3821fb450
--- /dev/null
+++ b/src/arrow/r/tests/testthat/test-type.R
@@ -0,0 +1,211 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+test_that("type() gets the right type for arrow::Array", {
+  a <- Array$create(1:10)
+  expect_equal(type(a), a$type)
+})
+
+test_that("type() gets the right type for ChunkedArray", {
+  a <- chunked_array(1:10, 1:10)
+  expect_equal(type(a), a$type)
+})
+
+test_that("type() infers from R type", {
+  expect_equal(type(1:10), int32())
+  expect_equal(type(1), float64())
+  expect_equal(type(TRUE), boolean())
+  expect_equal(type(raw()), uint8())
+  expect_equal(type(""), utf8())
+  expect_equal(
+    type(example_data$fct),
+    dictionary(int8(), utf8(), FALSE)
+  )
+  expect_equal(
+    type(lubridate::ymd_hms("2019-02-14 13:55:05")),
+    timestamp(TimeUnit$MICRO, "UTC")
+  )
+  expect_equal(
+    type(hms::hms(56, 34, 12)),
+    time32(unit = TimeUnit$SECOND)
+  )
+  expect_equal(
+    type(bit64::integer64()),
+    int64()
+  )
+})
+
+test_that("type() can infer struct types from data frames", {
+  df <- tibble::tibble(x = 1:10, y = rnorm(10), z = letters[1:10])
+  expect_equal(type(df), struct(x = int32(), y = float64(), z = utf8()))
+})
+
+test_that("DataType$Equals", {
+  a <- int32()
+  b <- int32()
+  z <- float64()
+  expect_true(a == b)
+  expect_true(a$Equals(b))
+  expect_false(a == z)
+  expect_equal(a, b)
+  expect_failure(expect_equal(a, z))
+  expect_failure(expect_equal(a, z))
+  expect_false(a$Equals(32L))
+})
+
+test_that("Masked data type functions still work", {
+  skip("Work around masking of data type functions (ARROW-12322)")
+
+  # Works when type function is masked
+  string <- rlang::string
+  expect_equal(
+    Array$create("abc", type = string()),
+    arrow::string()
+  )
+  rm(string)
+
+  # Works when with non-Arrow function that returns an Arrow type
+  # when the non-Arrow function has the same name as a base R function...
+  str <- arrow::string
+  expect_equal(
+    Array$create("abc", type = str()),
+    arrow::string()
+  )
+  rm(str)
+
+  # ... and when it has the same name as an Arrow function
+  type <- arrow::string
+  expect_equal(
+    Array$create("abc", type = type()),
+    arrow::string()
+  )
+  rm(type)
+
+  # Works with local variable whose value is an Arrow type
+  type <- arrow::string()
+  expect_equal(
+    Array$create("abc", type = type),
+    arrow::string()
+  )
+  rm(type)
+})
+
+test_that("Type strings are correctly canonicalized", {
+  # data types without arguments
+  expect_equal(canonical_type_str("int8"), int8()$ToString())
+  expect_equal(canonical_type_str("int16"), int16()$ToString())
+  expect_equal(canonical_type_str("int32"), int32()$ToString())
+  expect_equal(canonical_type_str("int64"), int64()$ToString())
+  expect_equal(canonical_type_str("uint8"), uint8()$ToString())
+  expect_equal(canonical_type_str("uint16"), uint16()$ToString())
+  expect_equal(canonical_type_str("uint32"), uint32()$ToString())
+  expect_equal(canonical_type_str("uint64"), uint64()$ToString())
+  expect_equal(canonical_type_str("float16"), float16()$ToString())
+  expect_equal(canonical_type_str("halffloat"), halffloat()$ToString())
+  expect_equal(canonical_type_str("float32"), float32()$ToString())
+  expect_equal(canonical_type_str("float"), float()$ToString())
+  expect_equal(canonical_type_str("float64"), float64()$ToString())
+  expect_equal(canonical_type_str("double"), float64()$ToString())
+  expect_equal(canonical_type_str("boolean"), boolean()$ToString())
+  expect_equal(canonical_type_str("bool"), bool()$ToString())
+  expect_equal(canonical_type_str("utf8"), utf8()$ToString())
+  expect_equal(canonical_type_str("large_utf8"), large_utf8()$ToString())
+  expect_equal(canonical_type_str("large_string"), large_utf8()$ToString())
+  expect_equal(canonical_type_str("binary"), binary()$ToString())
+  expect_equal(canonical_type_str("large_binary"), large_binary()$ToString())
+  expect_equal(canonical_type_str("string"), arrow::string()$ToString())
+  expect_equal(canonical_type_str("null"), null()$ToString())
+
+  # data types with arguments
+  expect_equal(
+    canonical_type_str("fixed_size_binary"),
+    sub("^([^([<]+).*$", "\\1", fixed_size_binary(42)$ToString())
+  )
+  expect_equal(
+    canonical_type_str("date32"),
+    sub("^([^([<]+).*$", "\\1", date32()$ToString())
+  )
+  expect_equal(
+    canonical_type_str("date64"),
+    sub("^([^([<]+).*$", "\\1", date64()$ToString())
+  )
+  expect_equal(
+    canonical_type_str("time32"),
+    sub("^([^([<]+).*$", "\\1", time32()$ToString())
+  )
+  expect_equal(
+    canonical_type_str("time64"),
+    sub("^([^([<]+).*$", "\\1", time64()$ToString())
+  )
+  expect_equal(
+    canonical_type_str("timestamp"),
+    sub("^([^([<]+).*$", "\\1", timestamp()$ToString())
+  )
+  expect_equal(
+    canonical_type_str("decimal"),
+    sub("^([^([<]+).*$", "\\1", decimal(3, 2)$ToString())
+  )
+  expect_equal(
+    canonical_type_str("struct"),
+    sub("^([^([<]+).*$", "\\1", struct(foo = int32())$ToString())
+  )
+  expect_equal(
+    canonical_type_str("list_of"),
+    sub("^([^([<]+).*$", "\\1", list_of(int32())$ToString())
+  )
+  expect_equal(
+    canonical_type_str("list"),
+    sub("^([^([<]+).*$", "\\1", list_of(int32())$ToString())
+  )
+  expect_equal(
+    canonical_type_str("large_list_of"),
+    sub("^([^([<]+).*$", "\\1", large_list_of(int32())$ToString())
+  )
+  expect_equal(
+    canonical_type_str("large_list"),
+    sub("^([^([<]+).*$", "\\1", large_list_of(int32())$ToString())
+  )
+  expect_equal(
+    canonical_type_str("fixed_size_list_of"),
+    sub("^([^([<]+).*$", "\\1", fixed_size_list_of(int32(), 42)$ToString())
+  )
+  expect_equal(
+    canonical_type_str("fixed_size_list"),
+    sub("^([^([<]+).*$", "\\1", fixed_size_list_of(int32(), 42)$ToString())
+  )
+
+  # unsupported data types
+  expect_error(
+    canonical_type_str("decimal128(3, 2)"),
+    "parameters"
+  )
+  expect_error(
+    canonical_type_str("list<item: int32>"),
+    "parameters"
+  )
+  expect_error(
+    canonical_type_str("time32[s]"),
+    "parameters"
+  )
+
+  # unrecognized data types
+  expect_error(
+    canonical_type_str("foo"),
+    "Unrecognized"
+  )
+})
diff --git a/src/arrow/r/tests/testthat/test-utf.R b/src/arrow/r/tests/testthat/test-utf.R
new file mode 100644
index 000000000..69d196274
--- /dev/null
+++ b/src/arrow/r/tests/testthat/test-utf.R
@@ -0,0 +1,24 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+test_that("We handle non-UTF strings", {
+  # Move the code with non-UTF strings to a separate file so that we don't
+  # get a parse error on *cough* certain platforms
+  skip_on_cran()
+  source("latin1.R", encoding = "latin1")
+})