60 files changed, 14318 insertions, 0 deletions
diff --git a/src/arrow/r/R/array-data.R b/src/arrow/r/R/array-data.R
new file mode 100644
index 000000000..99c24fdcf
--- /dev/null
+++ b/src/arrow/r/R/array-data.R
@@ -0,0 +1,53 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#' @title ArrayData class
+#' @usage NULL
+#' @format NULL
+#' @docType class
+#' @description The `ArrayData` class allows you to get and inspect the data
+#' inside an `arrow::Array`.
+#'
+#' @section Usage:
+#'
+#' ```
+#' data <- Array$create(x)$data()
+#'
+#' data$type
+#' data$length
+#' data$null_count
+#' data$offset
+#' data$buffers
+#' ```
+#'
+#' @section Methods:
+#'
+#' ...
+#'
+#' @rdname ArrayData
+#' @name ArrayData
+#' @include type.R
+ArrayData <- R6Class("ArrayData",
+  inherit = ArrowObject,
+  active = list(
+    type = function() ArrayData__get_type(self),
+    length = function() ArrayData__get_length(self),
+    null_count = function() ArrayData__get_null_count(self),
+    offset = function() ArrayData__get_offset(self),
+    buffers = function() ArrayData__buffers(self)
+  )
+)
diff --git a/src/arrow/r/R/array.R b/src/arrow/r/R/array.R
new file mode 100644
index 000000000..46acc14ff
--- /dev/null
+++ b/src/arrow/r/R/array.R
@@ -0,0 +1,329 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#' @include arrow-datum.R
+
+#' @title Arrow Arrays
+#' @description An `Array` is an immutable data array with some logical type
+#' and some length. Most logical types are contained in the base
+#' `Array` class; there are also subclasses for `DictionaryArray`, `ListArray`,
+#' and `StructArray`.
+#' @usage NULL
+#' @format NULL
+#' @docType class
+#'
+#' @section Factory:
+#' The `Array$create()` factory method instantiates an `Array` and
+#' takes the following arguments:
+#' * `x`: an R vector, list, or `data.frame`
+#' * `type`: an optional [data type][data-type] for `x`. If omitted, the type
+#'    will be inferred from the data.
+#'
+#' `Array$create()` will return the appropriate subclass of `Array`, such as
+#' `DictionaryArray` when given an R factor.
+#'
+#' To compose a `DictionaryArray` directly, call `DictionaryArray$create()`,
+#' which takes two arguments:
+#' * `x`: an R vector or `Array` of integers for the dictionary indices
+#' * `dict`: an R vector or `Array` of dictionary values (like R factor levels
+#'   but not limited to strings only)
+#' @section Usage:
+#'
+#' ```
+#' a <- Array$create(x)
+#' length(a)
+#'
+#' print(a)
+#' a == a
+#' ```
+#'
+#' @section Methods:
+#'
+#' - `$IsNull(i)`: Return true if value at index is null. Does not boundscheck
+#' - `$IsValid(i)`: Return true if value at index is valid. Does not boundscheck
+#' - `$length()`: Size in the number of elements this array contains
+#' - `$offset`: A relative position into another array's data, to enable zero-copy slicing
+#' - `$null_count`: The number of null entries in the array
+#' - `$type`: logical type of data
+#' - `$type_id()`: type id
+#' - `$Equals(other)` : is this array equal to `other`
+#' - `$ApproxEquals(other)` :
+#' - `$Diff(other)` : return a string expressing the difference between two arrays
+#' - `$data()`: return the underlying [ArrayData][ArrayData]
+#' - `$as_vector()`: convert to an R vector
+#' - `$ToString()`: string representation of the array
+#' - `$Slice(offset, length = NULL)`: Construct a zero-copy slice of the array
+#'    with the indicated offset and length. If length is `NULL`, the slice goes
+#'    until the end of the array.
+#' - `$Take(i)`: return an `Array` with values at positions given by integers
+#'    (R vector or Array Array) `i`.
+#' - `$Filter(i, keep_na = TRUE)`: return an `Array` with values at positions where logical
+#'    vector (or Arrow boolean Array) `i` is `TRUE`.
+#' - `$SortIndices(descending = FALSE)`: return an `Array` of integer positions that can be
+#'    used to rearrange the `Array` in ascending or descending order
+#' - `$RangeEquals(other, start_idx, end_idx, other_start_idx)` :
+#' - `$cast(target_type, safe = TRUE, options = cast_options(safe))`: Alter the
+#'    data in the array to change its type.
+#' - `$View(type)`: Construct a zero-copy view of this array with the given type.
+#' - `$Validate()` : Perform any validation checks to determine obvious inconsistencies
+#'    within the array's internal data. This can be an expensive check, potentially `O(length)`
+#'
+#' @rdname array
+#' @name array
+#' @examplesIf arrow_available()
+#' my_array <- Array$create(1:10)
+#' my_array$type
+#' my_array$cast(int8())
+#'
+#' # Check if value is null; zero-indexed
+#' na_array <- Array$create(c(1:5, NA))
+#' na_array$IsNull(0)
+#' na_array$IsNull(5)
+#' na_array$IsValid(5)
+#' na_array$null_count
+#'
+#' # zero-copy slicing; the offset of the new Array will be the same as the index passed to $Slice
+#' new_array <- na_array$Slice(5)
+#' new_array$offset
+#'
+#' # Compare 2 arrays
+#' na_array2 <- na_array
+#' na_array2 == na_array # element-wise comparison
+#' na_array2$Equals(na_array) # overall comparison
+#' @export
+Array <- R6Class("Array",
+  inherit = ArrowDatum,
+  public = list(
+    IsNull = function(i) Array__IsNull(self, i),
+    IsValid = function(i) Array__IsValid(self, i),
+    length = function() Array__length(self),
+    type_id = function() Array__type_id(self),
+    Equals = function(other, ...) {
+      inherits(other, "Array") && Array__Equals(self, other)
+    },
+    ApproxEquals = function(other) {
+      inherits(other, "Array") && Array__ApproxEquals(self, other)
+    },
+    Diff = function(other) {
+      if (!inherits(other, "Array")) {
+        other <- Array$create(other)
+      }
+      Array__Diff(self, other)
+    },
+    data = function() Array__data(self),
+    as_vector = function() Array__as_vector(self),
+    ToString = function() {
+      typ <- paste0("<", self$type$ToString(), ">")
+      paste(typ, Array__ToString(self), sep = "\n")
+    },
+    Slice = function(offset, length = NULL) {
+      if (is.null(length)) {
+        Array__Slice1(self, offset)
+      } else {
+        Array__Slice2(self, offset, length)
+      }
+    },
+    Take = function(i) {
+      if (is.numeric(i)) {
+        i <- as.integer(i)
+      }
+      if (is.integer(i)) {
+        i <- Array$create(i)
+      }
+      call_function("take", self, i)
+    },
+    Filter = function(i, keep_na = TRUE) {
+      if (is.logical(i)) {
+        i <- Array$create(i)
+      }
+      assert_is(i, "Array")
+      call_function("filter", self, i, options = list(keep_na = keep_na))
+    },
+    SortIndices = function(descending = FALSE) {
+      assert_that(is.logical(descending))
+      assert_that(length(descending) == 1L)
+      assert_that(!is.na(descending))
+      call_function("array_sort_indices", self, options = list(order = descending))
+    },
+    RangeEquals = function(other, start_idx, end_idx, other_start_idx = 0L) {
+      assert_is(other, "Array")
+      Array__RangeEquals(self, other, start_idx, end_idx, other_start_idx)
+    },
+    View = function(type) {
+      Array$create(Array__View(self, as_type(type)))
+    },
+    Validate = function() Array__Validate(self),
+    export_to_c = function(array_ptr, schema_ptr) ExportArray(self, array_ptr, schema_ptr)
+  ),
+  active = list(
+    null_count = function() Array__null_count(self),
+    offset = function() Array__offset(self),
+    type = function() Array__type(self)
+  )
+)
+Array$create <- function(x, type = NULL) {
+  if (!is.null(type)) {
+    type <- as_type(type)
+  }
+  if (inherits(x, "Scalar")) {
+    out <- x$as_array()
+    if (!is.null(type)) {
+      out <- out$cast(type)
+    }
+    return(out)
+  }
+  vec_to_arrow(x, type)
+}
+#' @include arrowExports.R
+Array$import_from_c <- ImportArray
+
+#' @rdname array
+#' @usage NULL
+#' @format NULL
+#' @export
+DictionaryArray <- R6Class("DictionaryArray",
+  inherit = Array,
+  public = list(
+    indices = function() DictionaryArray__indices(self),
+    dictionary = function() DictionaryArray__dictionary(self)
+  ),
+  active = list(
+    ordered = function() self$type$ordered
+  )
+)
+DictionaryArray$create <- function(x, dict = NULL) {
+  if (is.factor(x)) {
+    # The simple case: converting a factor.
+    # Ignoring `dict`; should probably error if dict is not NULL
+    return(Array$create(x))
+  }
+
+  assert_that(!is.null(dict))
+  if (!is.Array(x)) {
+    x <- Array$create(x)
+  }
+  if (!is.Array(dict)) {
+    dict <- Array$create(dict)
+  }
+  type <- DictionaryType$create(x$type, dict$type)
+  DictionaryArray__FromArrays(type, x, dict)
+}
+
+#' @rdname array
+#' @usage NULL
+#' @format NULL
+#' @export
+StructArray <- R6Class("StructArray",
+  inherit = Array,
+  public = list(
+    field = function(i) StructArray__field(self, i),
+    GetFieldByName = function(name) StructArray__GetFieldByName(self, name),
+    Flatten = function() StructArray__Flatten(self)
+  )
+)
+
+
+#' @export
+`[[.StructArray` <- function(x, i, ...) {
+  if (is.character(i)) {
+    x$GetFieldByName(i)
+  } else if (is.numeric(i)) {
+    x$field(i - 1)
+  } else {
+    stop("'i' must be character or numeric, not ", class(i), call. = FALSE)
+  }
+}
+
+#' @export
+`$.StructArray` <- function(x, name, ...) {
+  assert_that(is.string(name))
+  if (name %in% ls(x)) {
+    get(name, x)
+  } else {
+    x$GetFieldByName(name)
+  }
+}
+
+#' @export
+names.StructArray <- function(x, ...) StructType__field_names(x$type)
+
+#' @export
+dim.StructArray <- function(x, ...) c(length(x), x$type$num_fields)
+
+#' @export
+as.data.frame.StructArray <- function(x, row.names = NULL, optional = FALSE, ...) {
+  as.vector(x)
+}
+
+#' @rdname array
+#' @usage NULL
+#' @format NULL
+#' @export
+ListArray <- R6Class("ListArray",
+  inherit = Array,
+  public = list(
+    values = function() ListArray__values(self),
+    value_length = function(i) ListArray__value_length(self, i),
+    value_offset = function(i) ListArray__value_offset(self, i),
+    raw_value_offsets = function() ListArray__raw_value_offsets(self)
+  ),
+  active = list(
+    value_type = function() ListArray__value_type(self)
+  )
+)
+
+#' @rdname array
+#' @usage NULL
+#' @format NULL
+#' @export
+LargeListArray <- R6Class("LargeListArray",
+  inherit = Array,
+  public = list(
+    values = function() LargeListArray__values(self),
+    value_length = function(i) LargeListArray__value_length(self, i),
+    value_offset = function(i) LargeListArray__value_offset(self, i),
+    raw_value_offsets = function() LargeListArray__raw_value_offsets(self)
+  ),
+  active = list(
+    value_type = function() LargeListArray__value_type(self)
+  )
+)
+
+#' @rdname array
+#' @usage NULL
+#' @format NULL
+#' @export
+FixedSizeListArray <- R6Class("FixedSizeListArray",
+  inherit = Array,
+  public = list(
+    values = function() FixedSizeListArray__values(self),
+    value_length = function(i) FixedSizeListArray__value_length(self, i),
+    value_offset = function(i) FixedSizeListArray__value_offset(self, i)
+  ),
+  active = list(
+    value_type = function() FixedSizeListArray__value_type(self),
+    list_size = function() self$type$list_size
+  )
+)
+
+is.Array <- function(x, type = NULL) { # nolint
+  is_it <- inherits(x, c("Array", "ChunkedArray"))
+  if (is_it && !is.null(type)) {
+    is_it <- x$type$ToString() %in% type
+  }
+  is_it
+}
diff --git a/src/arrow/r/R/arrow-datum.R b/src/arrow/r/R/arrow-datum.R
new file mode 100644
index 000000000..557321f68
--- /dev/null
+++ b/src/arrow/r/R/arrow-datum.R
@@ -0,0 +1,266 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#' @include arrow-package.R
+
+# Base class for Array, ChunkedArray, and Scalar, for S3 method dispatch only.
+# Does not exist in C++ class hierarchy
+ArrowDatum <- R6Class("ArrowDatum",
+  inherit = ArrowObject,
+  public = list(
+    cast = function(target_type, safe = TRUE, ...) {
+      opts <- cast_options(safe, ...)
+      opts$to_type <- as_type(target_type)
+      call_function("cast", self, options = opts)
+    }
+  )
+)
+
+#' @export
+length.ArrowDatum <- function(x) x$length()
+
+#' @export
+is.finite.ArrowDatum <- function(x) {
+  is_fin <- call_function("is_finite", x)
+  # for compatibility with base::is.finite(), return FALSE for NA_real_
+  is_fin & !is.na(is_fin)
+}
+
+#' @export
+is.infinite.ArrowDatum <- function(x) {
+  is_inf <- call_function("is_inf", x)
+  # for compatibility with base::is.infinite(), return FALSE for NA_real_
+  is_inf & !is.na(is_inf)
+}
+
+#' @export
+is.na.ArrowDatum <- function(x) {
+  call_function("is_null", x, options = list(nan_is_null = TRUE))
+}
+
+#' @export
+is.nan.ArrowDatum <- function(x) {
+  if (x$type_id() %in% TYPES_WITH_NAN) {
+    # TODO: if an option is added to the is_nan kernel to treat NA as NaN,
+    # use that to simplify the code here (ARROW-13366)
+    call_function("is_nan", x) & call_function("is_valid", x)
+  } else {
+    Scalar$create(FALSE)$as_array(length(x))
+  }
+}
+
+#' @export
+as.vector.ArrowDatum <- function(x, mode) {
+  x$as_vector()
+}
+
+#' @export
+Ops.ArrowDatum <- function(e1, e2) {
+  if (.Generic == "!") {
+    eval_array_expression(.Generic, e1)
+  } else if (.Generic %in% names(.array_function_map)) {
+    eval_array_expression(.Generic, e1, e2)
+  } else {
+    stop(paste0("Unsupported operation on `", class(e1)[1L], "` : "), .Generic, call. = FALSE)
+  }
+}
+
+# Wrapper around call_function that:
+# (1) maps R function names to Arrow C++ compute ("/" --> "divide_checked")
+# (2) wraps R input args as Array or Scalar
+eval_array_expression <- function(FUN,
+                                  ...,
+                                  args = list(...),
+                                  options = empty_named_list()) {
+  if (FUN == "-" && length(args) == 1L) {
+    if (inherits(args[[1]], "ArrowObject")) {
+      return(eval_array_expression("negate_checked", args[[1]]))
+    } else {
+      return(-args[[1]])
+    }
+  }
+  args <- lapply(args, .wrap_arrow, FUN)
+
+  # In Arrow, "divide" is one function, which does integer division on
+  # integer inputs and floating-point division on floats
+  if (FUN == "/") {
+    # TODO: omg so many ways it's wrong to assume these types
+    args <- map(args, ~ .$cast(float64()))
+  } else if (FUN == "%/%") {
+    # In R, integer division works like floor(float division)
+    out <- eval_array_expression("/", args = args, options = options)
+    return(out$cast(int32(), allow_float_truncate = TRUE))
+  } else if (FUN == "%%") {
+    # We can't simply do {e1 - e2 * ( e1 %/% e2 )} since Ops.Array evaluates
+    # eagerly, but we can build that up
+    quotient <- eval_array_expression("%/%", args = args)
+    base <- eval_array_expression("*", quotient, args[[2]])
+    # this cast is to ensure that the result of this and e1 are the same
+    # (autocasting only applies to scalars)
+    base <- base$cast(args[[1]]$type)
+    return(eval_array_expression("-", args[[1]], base))
+  }
+
+  call_function(
+    .array_function_map[[FUN]] %||% FUN,
+    args = args,
+    options = options
+  )
+}
+
+.wrap_arrow <- function(arg, fun) {
+  if (!inherits(arg, "ArrowObject")) {
+    # TODO: Array$create if lengths are equal?
+    if (fun == "%in%") {
+      arg <- Array$create(arg)
+    } else {
+      arg <- Scalar$create(arg)
+    }
+  }
+  arg
+}
+
+#' @export
+na.omit.ArrowDatum <- function(object, ...) {
+  object$Filter(!is.na(object))
+}
+
+#' @export
+na.exclude.ArrowDatum <- na.omit.ArrowDatum
+
+#' @export
+na.fail.ArrowDatum <- function(object, ...) {
+  if (object$null_count > 0) {
+    stop("missing values in object", call. = FALSE)
+  }
+  object
+}
+
+filter_rows <- function(x, i, keep_na = TRUE, ...) {
+  # General purpose function for [ row subsetting with R semantics
+  # Based on the input for `i`, calls x$Filter, x$Slice, or x$Take
+  nrows <- x$num_rows %||% x$length() # Depends on whether Array or Table-like
+  if (is.logical(i)) {
+    if (isTRUE(i)) {
+      # Shortcut without doing any work
+      x
+    } else {
+      i <- rep_len(i, nrows) # For R recycling behavior; consider vctrs::vec_recycle()
+      x$Filter(i, keep_na)
+    }
+  } else if (is.numeric(i)) {
+    if (all(i < 0)) {
+      # in R, negative i means "everything but i"
+      i <- setdiff(seq_len(nrows), -1 * i)
+    }
+    if (is.sliceable(i)) {
+      x$Slice(i[1] - 1, length(i))
+    } else if (all(i > 0)) {
+      x$Take(i - 1)
+    } else {
+      stop("Cannot mix positive and negative indices", call. = FALSE)
+    }
+  } else if (is.Array(i, INTEGER_TYPES)) {
+    # NOTE: this doesn't do the - 1 offset
+    x$Take(i)
+  } else if (is.Array(i, "bool")) {
+    x$Filter(i, keep_na)
+  } else {
+    # Unsupported cases
+    if (is.Array(i)) {
+      stop("Cannot extract rows with an Array of type ", i$type$ToString(), call. = FALSE)
+    }
+    stop("Cannot extract rows with an object of class ", class(i), call. = FALSE)
+  }
+}
+
+#' @export
+`[.ArrowDatum` <- filter_rows
+
+#' @importFrom utils head
+#' @export
+head.ArrowDatum <- function(x, n = 6L, ...) {
+  assert_is(n, c("numeric", "integer"))
+  assert_that(length(n) == 1)
+  len <- NROW(x)
+  if (n < 0) {
+    # head(x, negative) means all but the last n rows
+    n <- max(len + n, 0)
+  } else {
+    n <- min(len, n)
+  }
+  if (n == len) {
+    return(x)
+  }
+  x$Slice(0, n)
+}
+
+#' @importFrom utils tail
+#' @export
+tail.ArrowDatum <- function(x, n = 6L, ...) {
+  assert_is(n, c("numeric", "integer"))
+  assert_that(length(n) == 1)
+  len <- NROW(x)
+  if (n < 0) {
+    # tail(x, negative) means all but the first n rows
+    n <- min(-n, len)
+  } else {
+    n <- max(len - n, 0)
+  }
+  if (n == 0) {
+    return(x)
+  }
+  x$Slice(n)
+}
+
+is.sliceable <- function(i) {
+  # Determine whether `i` can be expressed as a $Slice() command
+  is.numeric(i) &&
+    length(i) > 0 &&
+    all(i > 0) &&
+    i[1] <= i[length(i)] &&
+    identical(as.integer(i), i[1]:i[length(i)])
+}
+
+#' @export
+as.double.ArrowDatum <- function(x, ...) as.double(as.vector(x), ...)
+
+#' @export
+as.integer.ArrowDatum <- function(x, ...) as.integer(as.vector(x), ...)
+
+#' @export
+as.character.ArrowDatum <- function(x, ...) as.character(as.vector(x), ...)
+
+#' @export
+sort.ArrowDatum <- function(x, decreasing = FALSE, na.last = NA, ...) {
+  # Arrow always sorts nulls at the end of the array. This corresponds to
+  # sort(na.last = TRUE). For the other two cases (na.last = NA and
+  # na.last = FALSE) we need to use workarounds.
+  # TODO: Implement this more cleanly after ARROW-12063
+  if (is.na(na.last)) {
+    # Filter out NAs before sorting
+    x <- x$Filter(!is.na(x))
+    x$Take(x$SortIndices(descending = decreasing))
+  } else if (na.last) {
+    x$Take(x$SortIndices(descending = decreasing))
+  } else {
+    # Create a new array that encodes missing values as 1 and non-missing values
+    # as 0. Sort descending by that array first to get the NAs at the beginning
+    tbl <- Table$create(x = x, `is_na` = as.integer(is.na(x)))
+    tbl$x$Take(tbl$SortIndices(names = c("is_na", "x"), descending = c(TRUE, decreasing)))
+  }
+}
diff --git a/src/arrow/r/R/arrow-package.R b/src/arrow/r/R/arrow-package.R
new file mode 100644
index 000000000..edc2652b6
--- /dev/null
+++ b/src/arrow/r/R/arrow-package.R
@@ -0,0 +1,351 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#' @importFrom stats quantile median na.omit na.exclude na.pass na.fail
+#' @importFrom R6 R6Class
+#' @importFrom purrr as_mapper map map2 map_chr map2_chr map_dfr map_int map_lgl keep imap imap_chr
+#' @importFrom assertthat assert_that is.string
+#' @importFrom rlang list2 %||% is_false abort dots_n warn enquo quo_is_null enquos is_integerish quos
+#' @importFrom rlang eval_tidy new_data_mask syms env new_environment env_bind set_names exec
+#' @importFrom rlang is_bare_character quo_get_expr quo_get_env quo_set_expr .data seq2 is_interactive
+#' @importFrom rlang expr caller_env is_character quo_name is_quosure enexpr enexprs as_quosure
+#' @importFrom tidyselect vars_pull vars_rename vars_select eval_select
+#' @useDynLib arrow, .registration = TRUE
+#' @keywords internal
+"_PACKAGE"
+
+#' @importFrom vctrs s3_register vec_size vec_cast vec_unique
+.onLoad <- function(...) {
+  dplyr_methods <- paste0(
+    "dplyr::",
+    c(
+      "select", "filter", "collect", "summarise", "group_by", "groups",
+      "group_vars", "group_by_drop_default", "ungroup", "mutate", "transmute",
+      "arrange", "rename", "pull", "relocate", "compute", "collapse",
+      "distinct", "left_join", "right_join", "inner_join", "full_join",
+      "semi_join", "anti_join", "count", "tally"
+    )
+  )
+  for (cl in c("Dataset", "ArrowTabular", "arrow_dplyr_query")) {
+    for (m in dplyr_methods) {
+      s3_register(m, cl)
+    }
+  }
+  s3_register("dplyr::tbl_vars", "arrow_dplyr_query")
+
+  for (cl in c(
+    "Array", "RecordBatch", "ChunkedArray", "Table", "Schema",
+    "Field", "DataType", "RecordBatchReader"
+  )) {
+    s3_register("reticulate::py_to_r", paste0("pyarrow.lib.", cl))
+    s3_register("reticulate::r_to_py", cl)
+  }
+
+  # Create these once, at package build time
+  if (arrow_available()) {
+    # Also include all available Arrow Compute functions,
+    # namespaced as arrow_fun.
+    # We can't do this at install time because list_compute_functions() may error
+    all_arrow_funs <- list_compute_functions()
+    arrow_funcs <- set_names(
+      lapply(all_arrow_funs, function(fun) {
+        force(fun)
+        function(...) build_expr(fun, ...)
+      }),
+      paste0("arrow_", all_arrow_funs)
+    )
+    .cache$functions <- c(nse_funcs, arrow_funcs)
+  }
+
+  if (tolower(Sys.info()[["sysname"]]) == "windows") {
+    # Disable multithreading on Windows
+    # See https://issues.apache.org/jira/browse/ARROW-8379
+    options(arrow.use_threads = FALSE)
+  }
+
+  invisible()
+}
+
+.onAttach <- function(libname, pkgname) {
+  if (!arrow_available()) {
+    msg <- paste(
+      "The Arrow C++ library is not available. To retry installation with debug output, run:",
+      "    install_arrow(verbose = TRUE)",
+      "See https://arrow.apache.org/docs/r/articles/install.html for more guidance and troubleshooting.",
+      sep = "\n"
+    )
+    packageStartupMessage(msg)
+  } else {
+    # Just to be extra safe, let's wrap this in a try();
+    # we don't a failed startup message to prevent the package from loading
+    try({
+      features <- arrow_info()$capabilities
+      # That has all of the #ifdef features, plus the compression libs and the
+      # string libraries (but not the memory allocators, they're added elsewhere)
+      #
+      # Let's print a message if some are off
+      if (some_features_are_off(features)) {
+        packageStartupMessage("See arrow_info() for available features")
+      }
+    })
+  }
+}
+
+#' Is the C++ Arrow library available?
+#'
+#' You won't generally need to call these function, but they're made available
+#' for diagnostic purposes.
+#' @return `TRUE` or `FALSE` depending on whether the package was installed
+#' with:
+#' * The Arrow C++ library (check with `arrow_available()`)
+#' * Arrow Dataset support enabled (check with `arrow_with_dataset()`)
+#' * Parquet support enabled (check with `arrow_with_parquet()`)
+#' * JSON support enabled (check with `arrow_with_json()`)
+#' * Amazon S3 support enabled (check with `arrow_with_s3()`)
+#' @export
+#' @examples
+#' arrow_available()
+#' arrow_with_dataset()
+#' arrow_with_parquet()
+#' arrow_with_json()
+#' arrow_with_s3()
+#' @seealso If any of these are `FALSE`, see
+#' `vignette("install", package = "arrow")` for guidance on reinstalling the
+#' package.
+arrow_available <- function() {
+  tryCatch(.Call(`_arrow_available`), error = function(e) {
+    return(FALSE)
+  })
+}
+
+#' @rdname arrow_available
+#' @export
+arrow_with_dataset <- function() {
+  is_32bit <- .Machine$sizeof.pointer < 8
+  is_old_r <- getRversion() < "4.0.0"
+  is_windows <- tolower(Sys.info()[["sysname"]]) == "windows"
+  if (is_32bit && is_old_r && is_windows) {
+    # 32-bit rtools 3.5 does not properly implement the std::thread expectations
+    # but we can't just disable ARROW_DATASET in that build,
+    # so report it as "off" here.
+    return(FALSE)
+  }
+  tryCatch(.Call(`_dataset_available`), error = function(e) {
+    return(FALSE)
+  })
+}
+
+#' @rdname arrow_available
+#' @export
+arrow_with_parquet <- function() {
+  tryCatch(.Call(`_parquet_available`), error = function(e) {
+    return(FALSE)
+  })
+}
+
+#' @rdname arrow_available
+#' @export
+arrow_with_s3 <- function() {
+  tryCatch(.Call(`_s3_available`), error = function(e) {
+    return(FALSE)
+  })
+}
+
+#' @rdname arrow_available
+#' @export
+arrow_with_json <- function() {
+  tryCatch(.Call(`_json_available`), error = function(e) {
+    return(FALSE)
+  })
+}
+
+option_use_threads <- function() {
+  !is_false(getOption("arrow.use_threads"))
+}
+
+#' Report information on the package's capabilities
+#'
+#' This function summarizes a number of build-time configurations and run-time
+#' settings for the Arrow package. It may be useful for diagnostics.
+#' @return A list including version information, boolean "capabilities", and
+#' statistics from Arrow's memory allocator, and also Arrow's run-time
+#' information.
+#' @export
+#' @importFrom utils packageVersion
+arrow_info <- function() {
+  opts <- options()
+  out <- list(
+    version = packageVersion("arrow"),
+    libarrow = arrow_available(),
+    options = opts[grep("^arrow\\.", names(opts))]
+  )
+  if (out$libarrow) {
+    pool <- default_memory_pool()
+    runtimeinfo <- runtime_info()
+    buildinfo <- build_info()
+    compute_funcs <- list_compute_functions()
+    out <- c(out, list(
+      capabilities = c(
+        dataset = arrow_with_dataset(),
+        parquet = arrow_with_parquet(),
+        json = arrow_with_json(),
+        s3 = arrow_with_s3(),
+        utf8proc = "utf8_upper" %in% compute_funcs,
+        re2 = "replace_substring_regex" %in% compute_funcs,
+        vapply(tolower(names(CompressionType)[-1]), codec_is_available, logical(1))
+      ),
+      memory_pool = list(
+        backend_name = pool$backend_name,
+        bytes_allocated = pool$bytes_allocated,
+        max_memory = pool$max_memory,
+        available_backends = supported_memory_backends()
+      ),
+      runtime_info = list(
+        simd_level = runtimeinfo[1],
+        detected_simd_level = runtimeinfo[2]
+      ),
+      build_info = list(
+        cpp_version = buildinfo[1],
+        cpp_compiler = buildinfo[2],
+        cpp_compiler_version = buildinfo[3],
+        cpp_compiler_flags = buildinfo[4],
+        # git_id is "" if not built from a git checkout
+        # convert that to NULL
+        git_id = if (nzchar(buildinfo[5])) buildinfo[5]
+      )
+    ))
+  }
+  structure(out, class = "arrow_info")
+}
+
+some_features_are_off <- function(features) {
+  # `features` is a named logical vector (as in arrow_info()$capabilities)
+  # Let's exclude some less relevant ones
+  blocklist <- c("lzo", "bz2", "brotli")
+  # Return TRUE if any of the other features are FALSE
+  !all(features[setdiff(names(features), blocklist)])
+}
+
+#' @export
+print.arrow_info <- function(x, ...) {
+  print_key_values <- function(title, vals, ...) {
+    # Make a key-value table for printing, no column names
+    df <- data.frame(vals, stringsAsFactors = FALSE, ...)
+    names(df) <- ""
+
+    cat(title, ":\n", sep = "")
+    print(df)
+    cat("\n")
+  }
+  cat("Arrow package version: ", format(x$version), "\n\n", sep = "")
+  if (x$libarrow) {
+    print_key_values("Capabilities", c(
+      x$capabilities,
+      jemalloc = "jemalloc" %in% x$memory_pool$available_backends,
+      mimalloc = "mimalloc" %in% x$memory_pool$available_backends
+    ))
+    if (some_features_are_off(x$capabilities) && identical(tolower(Sys.info()[["sysname"]]), "linux")) {
+      # Only on linux because (e.g.) we disable certain features on purpose on rtools35 and solaris
+      cat(
+        "To reinstall with more optional capabilities enabled, see\n",
+        "  https://arrow.apache.org/docs/r/articles/install.html\n\n"
+      )
+    }
+
+    if (length(x$options)) {
+      print_key_values("Arrow options()", map_chr(x$options, format))
+    }
+
+    format_bytes <- function(b, units = "auto", digits = 2L, ...) {
+      format(structure(b, class = "object_size"), units = units, digits = digits, ...)
+    }
+    print_key_values("Memory", c(
+      Allocator = x$memory_pool$backend_name,
+      # utils:::format.object_size is not properly vectorized
+      Current = format_bytes(x$memory_pool$bytes_allocated, ...),
+      Max = format_bytes(x$memory_pool$max_memory, ...)
+    ))
+    print_key_values("Runtime", c(
+      `SIMD Level` = x$runtime_info$simd_level,
+      `Detected SIMD Level` = x$runtime_info$detected_simd_level
+    ))
+    print_key_values("Build", c(
+      `C++ Library Version` = x$build_info$cpp_version,
+      `C++ Compiler` = x$build_info$cpp_compiler,
+      `C++ Compiler Version` = x$build_info$cpp_compiler_version,
+      `Git ID` = x$build_info$git_id
+    ))
+  } else {
+    cat(
+      "Arrow C++ library not available. See https://arrow.apache.org/docs/r/articles/install.html ",
+      "for troubleshooting.\n"
+    )
+  }
+  invisible(x)
+}
+
+option_compress_metadata <- function() {
+  !is_false(getOption("arrow.compress_metadata"))
+}
+
+#' @include enums.R
+ArrowObject <- R6Class("ArrowObject",
+  public = list(
+    initialize = function(xp) self$set_pointer(xp),
+    pointer = function() get(".:xp:.", envir = self),
+    `.:xp:.` = NULL,
+    set_pointer = function(xp) {
+      if (!inherits(xp, "externalptr")) {
+        stop(
+          class(self)[1], "$new() requires a pointer as input: ",
+          "did you mean $create() instead?",
+          call. = FALSE
+        )
+      }
+      assign(".:xp:.", xp, envir = self)
+    },
+    print = function(...) {
+      if (!is.null(self$.class_title)) {
+        # Allow subclasses to override just printing the class name first
+        class_title <- self$.class_title()
+      } else {
+        class_title <- class(self)[[1]]
+      }
+      cat(class_title, "\n", sep = "")
+      if (!is.null(self$ToString)) {
+        cat(self$ToString(), "\n", sep = "")
+      }
+      invisible(self)
+    },
+    invalidate = function() {
+      assign(".:xp:.", NULL, envir = self)
+    }
+  )
+)
+
+#' @export
+`!=.ArrowObject` <- function(lhs, rhs) !(lhs == rhs) # nolint
+
+#' @export
+`==.ArrowObject` <- function(x, y) { # nolint
+  x$Equals(y)
+}
+
+#' @export
+all.equal.ArrowObject <- function(target, current, ..., check.attributes = TRUE) {
+  target$Equals(current, check_metadata = check.attributes)
+}
diff --git a/src/arrow/r/R/arrow-tabular.R b/src/arrow/r/R/arrow-tabular.R
new file mode 100644
index 000000000..43110ccf2
--- /dev/null
+++ b/src/arrow/r/R/arrow-tabular.R
@@ -0,0 +1,272 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#' @include arrow-datum.R
+
+# Base class for RecordBatch and Table for S3 method dispatch only.
+# Does not exist in C++ class hierarchy
+ArrowTabular <- R6Class("ArrowTabular",
+  inherit = ArrowObject,
+  public = list(
+    ToString = function() {
+      sch <- unlist(strsplit(self$schema$ToString(), "\n"))
+      sch <- sub("(.*): (.*)", "$\\1 <\\2>", sch)
+      dims <- sprintf("%s rows x %s columns", self$num_rows, self$num_columns)
+      paste(c(dims, sch), collapse = "\n")
+    },
+    Take = function(i) {
+      if (is.numeric(i)) {
+        i <- as.integer(i)
+      }
+      if (is.integer(i)) {
+        i <- Array$create(i)
+      }
+      assert_that(is.Array(i))
+      call_function("take", self, i)
+    },
+    Filter = function(i, keep_na = TRUE) {
+      if (is.logical(i)) {
+        i <- Array$create(i)
+      }
+      assert_that(is.Array(i, "bool"))
+      call_function("filter", self, i, options = list(keep_na = keep_na))
+    },
+    SortIndices = function(names, descending = FALSE) {
+      assert_that(is.character(names))
+      assert_that(length(names) > 0)
+      assert_that(!any(is.na(names)))
+      if (length(descending) == 1L) {
+        descending <- rep_len(descending, length(names))
+      }
+      assert_that(is.logical(descending))
+      assert_that(identical(length(names), length(descending)))
+      assert_that(!any(is.na(descending)))
+      call_function(
+        "sort_indices",
+        self,
+        # cpp11 does not support logical vectors so convert to integer
+        options = list(names = names, orders = as.integer(descending))
+      )
+    }
+  ),
+  active = list(
+    metadata = function(new) {
+      if (missing(new)) {
+        # Get the metadata (from the schema)
+        self$schema$metadata
+      } else {
+        # Set the metadata
+        new <- prepare_key_value_metadata(new)
+        out <- self$ReplaceSchemaMetadata(new)
+        # ReplaceSchemaMetadata returns a new object but we're modifying in place,
+        # so swap in that new C++ object pointer into our R6 object
+        self$set_pointer(out$pointer())
+        self
+      }
+    },
+    r_metadata = function(new) {
+      # Helper for the R metadata that handles the serialization
+      # See also method on Schema
+      if (missing(new)) {
+        out <- self$metadata$r
+        if (!is.null(out)) {
+          # Can't unserialize NULL
+          out <- .unserialize_arrow_r_metadata(out)
+        }
+        # Returns either NULL or a named list
+        out
+      } else {
+        # Set the R metadata
+        self$metadata$r <- .serialize_arrow_r_metadata(new)
+        self
+      }
+    }
+  )
+)
+
+#' @export
+as.data.frame.ArrowTabular <- function(x, row.names = NULL, optional = FALSE, ...) {
+  df <- x$to_data_frame()
+
+  if (!is.null(r_metadata <- x$metadata$r)) {
+    df <- apply_arrow_r_metadata(df, .unserialize_arrow_r_metadata(r_metadata))
+  }
+  df
+}
+
+#' @export
+`names<-.ArrowTabular` <- function(x, value) x$RenameColumns(value)
+
+#' @importFrom methods as
+#' @export
+`[.ArrowTabular` <- function(x, i, j, ..., drop = FALSE) {
+  if (nargs() == 2L) {
+    # List-like column extraction (x[i])
+    return(x[, i])
+  }
+  if (!missing(j)) {
+    # Selecting columns is cheaper than filtering rows, so do it first.
+    # That way, if we're filtering too, we have fewer arrays to filter/slice/take
+    if (is.character(j)) {
+      j_new <- match(j, names(x))
+      if (any(is.na(j_new))) {
+        stop("Column not found: ", oxford_paste(j[is.na(j_new)]), call. = FALSE)
+      }
+      j <- j_new
+    }
+    if (is_integerish(j)) {
+      if (any(is.na(j))) {
+        stop("Column indices cannot be NA", call. = FALSE)
+      }
+      if (length(j) && all(j < 0)) {
+        # in R, negative j means "everything but j"
+        j <- setdiff(seq_len(x$num_columns), -1 * j)
+      }
+      x <- x$SelectColumns(as.integer(j) - 1L)
+    }
+
+    if (drop && ncol(x) == 1L) {
+      x <- x$column(0)
+    }
+  }
+  if (!missing(i)) {
+    x <- filter_rows(x, i, ...)
+  }
+  x
+}
+
+#' @export
+`[[.ArrowTabular` <- function(x, i, ...) {
+  if (is.character(i)) {
+    x$GetColumnByName(i)
+  } else if (is.numeric(i)) {
+    x$column(i - 1)
+  } else {
+    stop("'i' must be character or numeric, not ", class(i), call. = FALSE)
+  }
+}
+
+#' @export
+`$.ArrowTabular` <- function(x, name, ...) {
+  assert_that(is.string(name))
+  if (name %in% ls(x)) {
+    get(name, x)
+  } else {
+    x$GetColumnByName(name)
+  }
+}
+
+#' @export
+`[[<-.ArrowTabular` <- function(x, i, value) {
+  if (!is.character(i) & !is.numeric(i)) {
+    stop("'i' must be character or numeric, not ", class(i), call. = FALSE)
+  }
+  assert_that(length(i) == 1, !is.na(i))
+
+  if (is.null(value)) {
+    if (is.character(i)) {
+      i <- match(i, names(x))
+    }
+    x <- x$RemoveColumn(i - 1L)
+  } else {
+    if (!is.character(i)) {
+      # get or create a/the column name
+      if (i <= x$num_columns) {
+        i <- names(x)[i]
+      } else {
+        i <- as.character(i)
+      }
+    }
+
+    # auto-magic recycling on non-ArrowObjects
+    if (!inherits(value, "ArrowObject")) {
+      value <- vctrs::vec_recycle(value, x$num_rows)
+    }
+
+    # construct the field
+    if (inherits(x, "RecordBatch") && !inherits(value, "Array")) {
+      value <- Array$create(value)
+    } else if (inherits(x, "Table") && !inherits(value, "ChunkedArray")) {
+      value <- ChunkedArray$create(value)
+    }
+    new_field <- field(i, value$type)
+
+    if (i %in% names(x)) {
+      i <- match(i, names(x)) - 1L
+      x <- x$SetColumn(i, new_field, value)
+    } else {
+      i <- x$num_columns
+      x <- x$AddColumn(i, new_field, value)
+    }
+  }
+  x
+}
+
+#' @export
+`$<-.ArrowTabular` <- function(x, i, value) {
+  assert_that(is.string(i))
+  # We need to check if `i` is in names in case it is an active binding (e.g.
+  # `metadata`, in which case we use assign to change the active binding instead
+  # of the column in the table)
+  if (i %in% ls(x)) {
+    assign(i, value, x)
+  } else {
+    x[[i]] <- value
+  }
+  x
+}
+
+#' @export
+dim.ArrowTabular <- function(x) c(x$num_rows, x$num_columns)
+
+#' @export
+length.ArrowTabular <- function(x) x$num_columns
+
+#' @export
+as.list.ArrowTabular <- function(x, ...) as.list(as.data.frame(x, ...))
+
+#' @export
+row.names.ArrowTabular <- function(x) as.character(seq_len(nrow(x)))
+
+#' @export
+dimnames.ArrowTabular <- function(x) list(row.names(x), names(x))
+
+#' @export
+head.ArrowTabular <- head.ArrowDatum
+
+#' @export
+tail.ArrowTabular <- tail.ArrowDatum
+
+#' @export
+na.fail.ArrowTabular <- function(object, ...) {
+  for (col in seq_len(object$num_columns)) {
+    if (object$column(col - 1L)$null_count > 0) {
+      stop("missing values in object", call. = FALSE)
+    }
+  }
+  object
+}
+
+#' @export
+na.omit.ArrowTabular <- function(object, ...) {
+  not_na <- map(object$columns, ~ call_function("is_valid", .x))
+  not_na_agg <- Reduce("&", not_na)
+  object$Filter(not_na_agg)
+}
+
+#' @export
+na.exclude.ArrowTabular <- na.omit.ArrowTabular
diff --git a/src/arrow/r/R/arrowExports.R b/src/arrow/r/R/arrowExports.R
new file mode 100644
index 000000000..014b1641f
--- /dev/null
+++ b/src/arrow/r/R/arrowExports.R
@@ -0,0 +1,1801 @@
+# Generated by using data-raw/codegen.R -> do not edit by hand
+
+test_SET_STRING_ELT <- function(s) {
+  invisible(.Call(`_arrow_test_SET_STRING_ELT`, s))
+}
+
+test_same_Array <- function(x, y) {
+  .Call(`_arrow_test_same_Array`, x, y)
+}
+
+is_arrow_altrep <- function(x) {
+  .Call(`_arrow_is_arrow_altrep`, x)
+}
+
+Array__Slice1 <- function(array, offset) {
+  .Call(`_arrow_Array__Slice1`, array, offset)
+}
+
+Array__Slice2 <- function(array, offset, length) {
+  .Call(`_arrow_Array__Slice2`, array, offset, length)
+}
+
+Array__IsNull <- function(x, i) {
+  .Call(`_arrow_Array__IsNull`, x, i)
+}
+
+Array__IsValid <- function(x, i) {
+  .Call(`_arrow_Array__IsValid`, x, i)
+}
+
+Array__length <- function(x) {
+  .Call(`_arrow_Array__length`, x)
+}
+
+Array__offset <- function(x) {
+  .Call(`_arrow_Array__offset`, x)
+}
+
+Array__null_count <- function(x) {
+  .Call(`_arrow_Array__null_count`, x)
+}
+
+Array__type <- function(x) {
+  .Call(`_arrow_Array__type`, x)
+}
+
+Array__ToString <- function(x) {
+  .Call(`_arrow_Array__ToString`, x)
+}
+
+Array__type_id <- function(x) {
+  .Call(`_arrow_Array__type_id`, x)
+}
+
+Array__Equals <- function(lhs, rhs) {
+  .Call(`_arrow_Array__Equals`, lhs, rhs)
+}
+
+Array__ApproxEquals <- function(lhs, rhs) {
+  .Call(`_arrow_Array__ApproxEquals`, lhs, rhs)
+}
+
+Array__Diff <- function(lhs, rhs) {
+  .Call(`_arrow_Array__Diff`, lhs, rhs)
+}
+
+Array__data <- function(array) {
+  .Call(`_arrow_Array__data`, array)
+}
+
+Array__RangeEquals <- function(self, other, start_idx, end_idx, other_start_idx) {
+  .Call(`_arrow_Array__RangeEquals`, self, other, start_idx, end_idx, other_start_idx)
+}
+
+Array__View <- function(array, type) {
+  .Call(`_arrow_Array__View`, array, type)
+}
+
+Array__Validate <- function(array) {
+  invisible(.Call(`_arrow_Array__Validate`, array))
+}
+
+DictionaryArray__indices <- function(array) {
+  .Call(`_arrow_DictionaryArray__indices`, array)
+}
+
+DictionaryArray__dictionary <- function(array) {
+  .Call(`_arrow_DictionaryArray__dictionary`, array)
+}
+
+StructArray__field <- function(array, i) {
+  .Call(`_arrow_StructArray__field`, array, i)
+}
+
+StructArray__GetFieldByName <- function(array, name) {
+  .Call(`_arrow_StructArray__GetFieldByName`, array, name)
+}
+
+StructArray__Flatten <- function(array) {
+  .Call(`_arrow_StructArray__Flatten`, array)
+}
+
+ListArray__value_type <- function(array) {
+  .Call(`_arrow_ListArray__value_type`, array)
+}
+
+LargeListArray__value_type <- function(array) {
+  .Call(`_arrow_LargeListArray__value_type`, array)
+}
+
+ListArray__values <- function(array) {
+  .Call(`_arrow_ListArray__values`, array)
+}
+
+LargeListArray__values <- function(array) {
+  .Call(`_arrow_LargeListArray__values`, array)
+}
+
+ListArray__value_length <- function(array, i) {
+  .Call(`_arrow_ListArray__value_length`, array, i)
+}
+
+LargeListArray__value_length <- function(array, i) {
+  .Call(`_arrow_LargeListArray__value_length`, array, i)
+}
+
+FixedSizeListArray__value_length <- function(array, i) {
+  .Call(`_arrow_FixedSizeListArray__value_length`, array, i)
+}
+
+ListArray__value_offset <- function(array, i) {
+  .Call(`_arrow_ListArray__value_offset`, array, i)
+}
+
+LargeListArray__value_offset <- function(array, i) {
+  .Call(`_arrow_LargeListArray__value_offset`, array, i)
+}
+
+FixedSizeListArray__value_offset <- function(array, i) {
+  .Call(`_arrow_FixedSizeListArray__value_offset`, array, i)
+}
+
+ListArray__raw_value_offsets <- function(array) {
+  .Call(`_arrow_ListArray__raw_value_offsets`, array)
+}
+
+LargeListArray__raw_value_offsets <- function(array) {
+  .Call(`_arrow_LargeListArray__raw_value_offsets`, array)
+}
+
+Array__as_vector <- function(array) {
+  .Call(`_arrow_Array__as_vector`, array)
+}
+
+ChunkedArray__as_vector <- function(chunked_array, use_threads) {
+  .Call(`_arrow_ChunkedArray__as_vector`, chunked_array, use_threads)
+}
+
+RecordBatch__to_dataframe <- function(batch, use_threads) {
+  .Call(`_arrow_RecordBatch__to_dataframe`, batch, use_threads)
+}
+
+Table__to_dataframe <- function(table, use_threads) {
+  .Call(`_arrow_Table__to_dataframe`, table, use_threads)
+}
+
+ArrayData__get_type <- function(x) {
+  .Call(`_arrow_ArrayData__get_type`, x)
+}
+
+ArrayData__get_length <- function(x) {
+  .Call(`_arrow_ArrayData__get_length`, x)
+}
+
+ArrayData__get_null_count <- function(x) {
+  .Call(`_arrow_ArrayData__get_null_count`, x)
+}
+
+ArrayData__get_offset <- function(x) {
+  .Call(`_arrow_ArrayData__get_offset`, x)
+}
+
+ArrayData__buffers <- function(x) {
+  .Call(`_arrow_ArrayData__buffers`, x)
+}
+
+Buffer__is_mutable <- function(buffer) {
+  .Call(`_arrow_Buffer__is_mutable`, buffer)
+}
+
+Buffer__ZeroPadding <- function(buffer) {
+  invisible(.Call(`_arrow_Buffer__ZeroPadding`, buffer))
+}
+
+Buffer__capacity <- function(buffer) {
+  .Call(`_arrow_Buffer__capacity`, buffer)
+}
+
+Buffer__size <- function(buffer) {
+  .Call(`_arrow_Buffer__size`, buffer)
+}
+
+r___RBuffer__initialize <- function(x) {
+  .Call(`_arrow_r___RBuffer__initialize`, x)
+}
+
+Buffer__data <- function(buffer) {
+  .Call(`_arrow_Buffer__data`, buffer)
+}
+
+Buffer__Equals <- function(x, y) {
+  .Call(`_arrow_Buffer__Equals`, x, y)
+}
+
+ChunkedArray__length <- function(chunked_array) {
+  .Call(`_arrow_ChunkedArray__length`, chunked_array)
+}
+
+ChunkedArray__null_count <- function(chunked_array) {
+  .Call(`_arrow_ChunkedArray__null_count`, chunked_array)
+}
+
+ChunkedArray__num_chunks <- function(chunked_array) {
+  .Call(`_arrow_ChunkedArray__num_chunks`, chunked_array)
+}
+
+ChunkedArray__chunk <- function(chunked_array, i) {
+  .Call(`_arrow_ChunkedArray__chunk`, chunked_array, i)
+}
+
+ChunkedArray__chunks <- function(chunked_array) {
+  .Call(`_arrow_ChunkedArray__chunks`, chunked_array)
+}
+
+ChunkedArray__type <- function(chunked_array) {
+  .Call(`_arrow_ChunkedArray__type`, chunked_array)
+}
+
+ChunkedArray__Slice1 <- function(chunked_array, offset) {
+  .Call(`_arrow_ChunkedArray__Slice1`, chunked_array, offset)
+}
+
+ChunkedArray__Slice2 <- function(chunked_array, offset, length) {
+  .Call(`_arrow_ChunkedArray__Slice2`, chunked_array, offset, length)
+}
+
+ChunkedArray__View <- function(array, type) {
+  .Call(`_arrow_ChunkedArray__View`, array, type)
+}
+
+ChunkedArray__Validate <- function(chunked_array) {
+  invisible(.Call(`_arrow_ChunkedArray__Validate`, chunked_array))
+}
+
+ChunkedArray__Equals <- function(x, y) {
+  .Call(`_arrow_ChunkedArray__Equals`, x, y)
+}
+
+ChunkedArray__ToString <- function(x) {
+  .Call(`_arrow_ChunkedArray__ToString`, x)
+}
+
+ChunkedArray__from_list <- function(chunks, s_type) {
+  .Call(`_arrow_ChunkedArray__from_list`, chunks, s_type)
+}
+
+util___Codec__Create <- function(codec, compression_level) {
+  .Call(`_arrow_util___Codec__Create`, codec, compression_level)
+}
+
+util___Codec__name <- function(codec) {
+  .Call(`_arrow_util___Codec__name`, codec)
+}
+
+util___Codec__IsAvailable <- function(codec) {
+  .Call(`_arrow_util___Codec__IsAvailable`, codec)
+}
+
+io___CompressedOutputStream__Make <- function(codec, raw) {
+  .Call(`_arrow_io___CompressedOutputStream__Make`, codec, raw)
+}
+
+io___CompressedInputStream__Make <- function(codec, raw) {
+  .Call(`_arrow_io___CompressedInputStream__Make`, codec, raw)
+}
+
+ExecPlan_create <- function(use_threads) {
+  .Call(`_arrow_ExecPlan_create`, use_threads)
+}
+
+ExecPlan_run <- function(plan, final_node, sort_options, head) {
+  .Call(`_arrow_ExecPlan_run`, plan, final_node, sort_options, head)
+}
+
+ExecPlan_StopProducing <- function(plan) {
+  invisible(.Call(`_arrow_ExecPlan_StopProducing`, plan))
+}
+
+ExecNode_output_schema <- function(node) {
+  .Call(`_arrow_ExecNode_output_schema`, node)
+}
+
+ExecNode_Scan <- function(plan, dataset, filter, materialized_field_names) {
+  .Call(`_arrow_ExecNode_Scan`, plan, dataset, filter, materialized_field_names)
+}
+
+ExecNode_Filter <- function(input, filter) {
+  .Call(`_arrow_ExecNode_Filter`, input, filter)
+}
+
+ExecNode_Project <- function(input, exprs, names) {
+  .Call(`_arrow_ExecNode_Project`, input, exprs, names)
+}
+
+ExecNode_Aggregate <- function(input, options, target_names, out_field_names, key_names) {
+  .Call(`_arrow_ExecNode_Aggregate`, input, options, target_names, out_field_names, key_names)
+}
+
+ExecNode_Join <- function(input, type, right_data, left_keys, right_keys, left_output, right_output) {
+  .Call(`_arrow_ExecNode_Join`, input, type, right_data, left_keys, right_keys, left_output, right_output)
+}
+
+ExecNode_ReadFromRecordBatchReader <- function(plan, reader) {
+  .Call(`_arrow_ExecNode_ReadFromRecordBatchReader`, plan, reader)
+}
+
+RecordBatch__cast <- function(batch, schema, options) {
+  .Call(`_arrow_RecordBatch__cast`, batch, schema, options)
+}
+
+Table__cast <- function(table, schema, options) {
+  .Call(`_arrow_Table__cast`, table, schema, options)
+}
+
+compute__CallFunction <- function(func_name, args, options) {
+  .Call(`_arrow_compute__CallFunction`, func_name, args, options)
+}
+
+compute__GetFunctionNames <- function() {
+  .Call(`_arrow_compute__GetFunctionNames`)
+}
+
+build_info <- function() {
+  .Call(`_arrow_build_info`)
+}
+
+runtime_info <- function() {
+  .Call(`_arrow_runtime_info`)
+}
+
+csv___WriteOptions__initialize <- function(options) {
+  .Call(`_arrow_csv___WriteOptions__initialize`, options)
+}
+
+csv___ReadOptions__initialize <- function(options) {
+  .Call(`_arrow_csv___ReadOptions__initialize`, options)
+}
+
+csv___ParseOptions__initialize <- function(options) {
+  .Call(`_arrow_csv___ParseOptions__initialize`, options)
+}
+
+csv___ReadOptions__column_names <- function(options) {
+  .Call(`_arrow_csv___ReadOptions__column_names`, options)
+}
+
+csv___ConvertOptions__initialize <- function(options) {
+  .Call(`_arrow_csv___ConvertOptions__initialize`, options)
+}
+
+csv___TableReader__Make <- function(input, read_options, parse_options, convert_options) {
+  .Call(`_arrow_csv___TableReader__Make`, input, read_options, parse_options, convert_options)
+}
+
+csv___TableReader__Read <- function(table_reader) {
+  .Call(`_arrow_csv___TableReader__Read`, table_reader)
+}
+
+TimestampParser__kind <- function(parser) {
+  .Call(`_arrow_TimestampParser__kind`, parser)
+}
+
+TimestampParser__format <- function(parser) {
+  .Call(`_arrow_TimestampParser__format`, parser)
+}
+
+TimestampParser__MakeStrptime <- function(format) {
+  .Call(`_arrow_TimestampParser__MakeStrptime`, format)
+}
+
+TimestampParser__MakeISO8601 <- function() {
+  .Call(`_arrow_TimestampParser__MakeISO8601`)
+}
+
+csv___WriteCSV__Table <- function(table, write_options, stream) {
+  invisible(.Call(`_arrow_csv___WriteCSV__Table`, table, write_options, stream))
+}
+
+csv___WriteCSV__RecordBatch <- function(record_batch, write_options, stream) {
+  invisible(.Call(`_arrow_csv___WriteCSV__RecordBatch`, record_batch, write_options, stream))
+}
+
+dataset___Dataset__NewScan <- function(ds) {
+  .Call(`_arrow_dataset___Dataset__NewScan`, ds)
+}
+
+dataset___Dataset__schema <- function(dataset) {
+  .Call(`_arrow_dataset___Dataset__schema`, dataset)
+}
+
+dataset___Dataset__type_name <- function(dataset) {
+  .Call(`_arrow_dataset___Dataset__type_name`, dataset)
+}
+
+dataset___Dataset__ReplaceSchema <- function(dataset, schm) {
+  .Call(`_arrow_dataset___Dataset__ReplaceSchema`, dataset, schm)
+}
+
+dataset___UnionDataset__create <- function(datasets, schm) {
+  .Call(`_arrow_dataset___UnionDataset__create`, datasets, schm)
+}
+
+dataset___InMemoryDataset__create <- function(table) {
+  .Call(`_arrow_dataset___InMemoryDataset__create`, table)
+}
+
+dataset___UnionDataset__children <- function(ds) {
+  .Call(`_arrow_dataset___UnionDataset__children`, ds)
+}
+
+dataset___FileSystemDataset__format <- function(dataset) {
+  .Call(`_arrow_dataset___FileSystemDataset__format`, dataset)
+}
+
+dataset___FileSystemDataset__filesystem <- function(dataset) {
+  .Call(`_arrow_dataset___FileSystemDataset__filesystem`, dataset)
+}
+
+dataset___FileSystemDataset__files <- function(dataset) {
+  .Call(`_arrow_dataset___FileSystemDataset__files`, dataset)
+}
+
+dataset___DatasetFactory__Finish1 <- function(factory, unify_schemas) {
+  .Call(`_arrow_dataset___DatasetFactory__Finish1`, factory, unify_schemas)
+}
+
+dataset___DatasetFactory__Finish2 <- function(factory, schema) {
+  .Call(`_arrow_dataset___DatasetFactory__Finish2`, factory, schema)
+}
+
+dataset___DatasetFactory__Inspect <- function(factory, unify_schemas) {
+  .Call(`_arrow_dataset___DatasetFactory__Inspect`, factory, unify_schemas)
+}
+
+dataset___UnionDatasetFactory__Make <- function(children) {
+  .Call(`_arrow_dataset___UnionDatasetFactory__Make`, children)
+}
+
+dataset___FileSystemDatasetFactory__Make0 <- function(fs, paths, format) {
+  .Call(`_arrow_dataset___FileSystemDatasetFactory__Make0`, fs, paths, format)
+}
+
+dataset___FileSystemDatasetFactory__Make2 <- function(fs, selector, format, partitioning) {
+  .Call(`_arrow_dataset___FileSystemDatasetFactory__Make2`, fs, selector, format, partitioning)
+}
+
+dataset___FileSystemDatasetFactory__Make1 <- function(fs, selector, format) {
+  .Call(`_arrow_dataset___FileSystemDatasetFactory__Make1`, fs, selector, format)
+}
+
+dataset___FileSystemDatasetFactory__Make3 <- function(fs, selector, format, factory) {
+  .Call(`_arrow_dataset___FileSystemDatasetFactory__Make3`, fs, selector, format, factory)
+}
+
+dataset___FileFormat__type_name <- function(format) {
+  .Call(`_arrow_dataset___FileFormat__type_name`, format)
+}
+
+dataset___FileFormat__DefaultWriteOptions <- function(fmt) {
+  .Call(`_arrow_dataset___FileFormat__DefaultWriteOptions`, fmt)
+}
+
+dataset___ParquetFileFormat__Make <- function(options, dict_columns) {
+  .Call(`_arrow_dataset___ParquetFileFormat__Make`, options, dict_columns)
+}
+
+dataset___FileWriteOptions__type_name <- function(options) {
+  .Call(`_arrow_dataset___FileWriteOptions__type_name`, options)
+}
+
+dataset___ParquetFileWriteOptions__update <- function(options, writer_props, arrow_writer_props) {
+  invisible(.Call(`_arrow_dataset___ParquetFileWriteOptions__update`, options, writer_props, arrow_writer_props))
+}
+
+dataset___IpcFileWriteOptions__update2 <- function(ipc_options, use_legacy_format, codec, metadata_version) {
+  invisible(.Call(`_arrow_dataset___IpcFileWriteOptions__update2`, ipc_options, use_legacy_format, codec, metadata_version))
+}
+
+dataset___IpcFileWriteOptions__update1 <- function(ipc_options, use_legacy_format, metadata_version) {
+  invisible(.Call(`_arrow_dataset___IpcFileWriteOptions__update1`, ipc_options, use_legacy_format, metadata_version))
+}
+
+dataset___CsvFileWriteOptions__update <- function(csv_options, write_options) {
+  invisible(.Call(`_arrow_dataset___CsvFileWriteOptions__update`, csv_options, write_options))
+}
+
+dataset___IpcFileFormat__Make <- function() {
+  .Call(`_arrow_dataset___IpcFileFormat__Make`)
+}
+
+dataset___CsvFileFormat__Make <- function(parse_options, convert_options, read_options) {
+  .Call(`_arrow_dataset___CsvFileFormat__Make`, parse_options, convert_options, read_options)
+}
+
+dataset___FragmentScanOptions__type_name <- function(fragment_scan_options) {
+  .Call(`_arrow_dataset___FragmentScanOptions__type_name`, fragment_scan_options)
+}
+
+dataset___CsvFragmentScanOptions__Make <- function(convert_options, read_options) {
+  .Call(`_arrow_dataset___CsvFragmentScanOptions__Make`, convert_options, read_options)
+}
+
+dataset___ParquetFragmentScanOptions__Make <- function(use_buffered_stream, buffer_size, pre_buffer) {
+  .Call(`_arrow_dataset___ParquetFragmentScanOptions__Make`, use_buffered_stream, buffer_size, pre_buffer)
+}
+
+dataset___DirectoryPartitioning <- function(schm, segment_encoding) {
+  .Call(`_arrow_dataset___DirectoryPartitioning`, schm, segment_encoding)
+}
+
+dataset___DirectoryPartitioning__MakeFactory <- function(field_names, segment_encoding) {
+  .Call(`_arrow_dataset___DirectoryPartitioning__MakeFactory`, field_names, segment_encoding)
+}
+
+dataset___HivePartitioning <- function(schm, null_fallback, segment_encoding) {
+  .Call(`_arrow_dataset___HivePartitioning`, schm, null_fallback, segment_encoding)
+}
+
+dataset___HivePartitioning__MakeFactory <- function(null_fallback, segment_encoding) {
+  .Call(`_arrow_dataset___HivePartitioning__MakeFactory`, null_fallback, segment_encoding)
+}
+
+dataset___ScannerBuilder__ProjectNames <- function(sb, cols) {
+  invisible(.Call(`_arrow_dataset___ScannerBuilder__ProjectNames`, sb, cols))
+}
+
+dataset___ScannerBuilder__ProjectExprs <- function(sb, exprs, names) {
+  invisible(.Call(`_arrow_dataset___ScannerBuilder__ProjectExprs`, sb, exprs, names))
+}
+
+dataset___ScannerBuilder__Filter <- function(sb, expr) {
+  invisible(.Call(`_arrow_dataset___ScannerBuilder__Filter`, sb, expr))
+}
+
+dataset___ScannerBuilder__UseThreads <- function(sb, threads) {
+  invisible(.Call(`_arrow_dataset___ScannerBuilder__UseThreads`, sb, threads))
+}
+
+dataset___ScannerBuilder__UseAsync <- function(sb, use_async) {
+  invisible(.Call(`_arrow_dataset___ScannerBuilder__UseAsync`, sb, use_async))
+}
+
+dataset___ScannerBuilder__BatchSize <- function(sb, batch_size) {
+  invisible(.Call(`_arrow_dataset___ScannerBuilder__BatchSize`, sb, batch_size))
+}
+
+dataset___ScannerBuilder__FragmentScanOptions <- function(sb, options) {
+  invisible(.Call(`_arrow_dataset___ScannerBuilder__FragmentScanOptions`, sb, options))
+}
+
+dataset___ScannerBuilder__schema <- function(sb) {
+  .Call(`_arrow_dataset___ScannerBuilder__schema`, sb)
+}
+
+dataset___ScannerBuilder__Finish <- function(sb) {
+  .Call(`_arrow_dataset___ScannerBuilder__Finish`, sb)
+}
+
+dataset___ScannerBuilder__FromRecordBatchReader <- function(reader) {
+  .Call(`_arrow_dataset___ScannerBuilder__FromRecordBatchReader`, reader)
+}
+
+dataset___Scanner__ToTable <- function(scanner) {
+  .Call(`_arrow_dataset___Scanner__ToTable`, scanner)
+}
+
+dataset___Scanner__ScanBatches <- function(scanner) {
+  .Call(`_arrow_dataset___Scanner__ScanBatches`, scanner)
+}
+
+dataset___Scanner__ToRecordBatchReader <- function(scanner) {
+  .Call(`_arrow_dataset___Scanner__ToRecordBatchReader`, scanner)
+}
+
+dataset___Scanner__head <- function(scanner, n) {
+  .Call(`_arrow_dataset___Scanner__head`, scanner, n)
+}
+
+dataset___Scanner__schema <- function(sc) {
+  .Call(`_arrow_dataset___Scanner__schema`, sc)
+}
+
+dataset___ScanTask__get_batches <- function(scan_task) {
+  .Call(`_arrow_dataset___ScanTask__get_batches`, scan_task)
+}
+
+dataset___Dataset__Write <- function(file_write_options, filesystem, base_dir, partitioning, basename_template, scanner, existing_data_behavior) {
+  invisible(.Call(`_arrow_dataset___Dataset__Write`, file_write_options, filesystem, base_dir, partitioning, basename_template, scanner, existing_data_behavior))
+}
+
+dataset___Scanner__TakeRows <- function(scanner, indices) {
+  .Call(`_arrow_dataset___Scanner__TakeRows`, scanner, indices)
+}
+
+dataset___Scanner__CountRows <- function(scanner) {
+  .Call(`_arrow_dataset___Scanner__CountRows`, scanner)
+}
+
+Int8__initialize <- function() {
+  .Call(`_arrow_Int8__initialize`)
+}
+
+Int16__initialize <- function() {
+  .Call(`_arrow_Int16__initialize`)
+}
+
+Int32__initialize <- function() {
+  .Call(`_arrow_Int32__initialize`)
+}
+
+Int64__initialize <- function() {
+  .Call(`_arrow_Int64__initialize`)
+}
+
+UInt8__initialize <- function() {
+  .Call(`_arrow_UInt8__initialize`)
+}
+
+UInt16__initialize <- function() {
+  .Call(`_arrow_UInt16__initialize`)
+}
+
+UInt32__initialize <- function() {
+  .Call(`_arrow_UInt32__initialize`)
+}
+
+UInt64__initialize <- function() {
+  .Call(`_arrow_UInt64__initialize`)
+}
+
+Float16__initialize <- function() {
+  .Call(`_arrow_Float16__initialize`)
+}
+
+Float32__initialize <- function() {
+  .Call(`_arrow_Float32__initialize`)
+}
+
+Float64__initialize <- function() {
+  .Call(`_arrow_Float64__initialize`)
+}
+
+Boolean__initialize <- function() {
+  .Call(`_arrow_Boolean__initialize`)
+}
+
+Utf8__initialize <- function() {
+  .Call(`_arrow_Utf8__initialize`)
+}
+
+LargeUtf8__initialize <- function() {
+  .Call(`_arrow_LargeUtf8__initialize`)
+}
+
+Binary__initialize <- function() {
+  .Call(`_arrow_Binary__initialize`)
+}
+
+LargeBinary__initialize <- function() {
+  .Call(`_arrow_LargeBinary__initialize`)
+}
+
+Date32__initialize <- function() {
+  .Call(`_arrow_Date32__initialize`)
+}
+
+Date64__initialize <- function() {
+  .Call(`_arrow_Date64__initialize`)
+}
+
+Null__initialize <- function() {
+  .Call(`_arrow_Null__initialize`)
+}
+
+Decimal128Type__initialize <- function(precision, scale) {
+  .Call(`_arrow_Decimal128Type__initialize`, precision, scale)
+}
+
+FixedSizeBinary__initialize <- function(byte_width) {
+  .Call(`_arrow_FixedSizeBinary__initialize`, byte_width)
+}
+
+Timestamp__initialize <- function(unit, timezone) {
+  .Call(`_arrow_Timestamp__initialize`, unit, timezone)
+}
+
+Time32__initialize <- function(unit) {
+  .Call(`_arrow_Time32__initialize`, unit)
+}
+
+Time64__initialize <- function(unit) {
+  .Call(`_arrow_Time64__initialize`, unit)
+}
+
+list__ <- function(x) {
+  .Call(`_arrow_list__`, x)
+}
+
+large_list__ <- function(x) {
+  .Call(`_arrow_large_list__`, x)
+}
+
+fixed_size_list__ <- function(x, list_size) {
+  .Call(`_arrow_fixed_size_list__`, x, list_size)
+}
+
+struct__ <- function(fields) {
+  .Call(`_arrow_struct__`, fields)
+}
+
+DataType__ToString <- function(type) {
+  .Call(`_arrow_DataType__ToString`, type)
+}
+
+DataType__name <- function(type) {
+  .Call(`_arrow_DataType__name`, type)
+}
+
+DataType__Equals <- function(lhs, rhs) {
+  .Call(`_arrow_DataType__Equals`, lhs, rhs)
+}
+
+DataType__num_fields <- function(type) {
+  .Call(`_arrow_DataType__num_fields`, type)
+}
+
+DataType__fields <- function(type) {
+  .Call(`_arrow_DataType__fields`, type)
+}
+
+DataType__id <- function(type) {
+  .Call(`_arrow_DataType__id`, type)
+}
+
+ListType__ToString <- function(type) {
+  .Call(`_arrow_ListType__ToString`, type)
+}
+
+FixedWidthType__bit_width <- function(type) {
+  .Call(`_arrow_FixedWidthType__bit_width`, type)
+}
+
+DateType__unit <- function(type) {
+  .Call(`_arrow_DateType__unit`, type)
+}
+
+TimeType__unit <- function(type) {
+  .Call(`_arrow_TimeType__unit`, type)
+}
+
+DecimalType__precision <- function(type) {
+  .Call(`_arrow_DecimalType__precision`, type)
+}
+
+DecimalType__scale <- function(type) {
+  .Call(`_arrow_DecimalType__scale`, type)
+}
+
+TimestampType__timezone <- function(type) {
+  .Call(`_arrow_TimestampType__timezone`, type)
+}
+
+TimestampType__unit <- function(type) {
+  .Call(`_arrow_TimestampType__unit`, type)
+}
+
+DictionaryType__initialize <- function(index_type, value_type, ordered) {
+  .Call(`_arrow_DictionaryType__initialize`, index_type, value_type, ordered)
+}
+
+DictionaryType__index_type <- function(type) {
+  .Call(`_arrow_DictionaryType__index_type`, type)
+}
+
+DictionaryType__value_type <- function(type) {
+  .Call(`_arrow_DictionaryType__value_type`, type)
+}
+
+DictionaryType__name <- function(type) {
+  .Call(`_arrow_DictionaryType__name`, type)
+}
+
+DictionaryType__ordered <- function(type) {
+  .Call(`_arrow_DictionaryType__ordered`, type)
+}
+
+StructType__GetFieldByName <- function(type, name) {
+  .Call(`_arrow_StructType__GetFieldByName`, type, name)
+}
+
+StructType__GetFieldIndex <- function(type, name) {
+  .Call(`_arrow_StructType__GetFieldIndex`, type, name)
+}
+
+StructType__field_names <- function(type) {
+  .Call(`_arrow_StructType__field_names`, type)
+}
+
+ListType__value_field <- function(type) {
+  .Call(`_arrow_ListType__value_field`, type)
+}
+
+ListType__value_type <- function(type) {
+  .Call(`_arrow_ListType__value_type`, type)
+}
+
+LargeListType__value_field <- function(type) {
+  .Call(`_arrow_LargeListType__value_field`, type)
+}
+
+LargeListType__value_type <- function(type) {
+  .Call(`_arrow_LargeListType__value_type`, type)
+}
+
+FixedSizeListType__value_field <- function(type) {
+  .Call(`_arrow_FixedSizeListType__value_field`, type)
+}
+
+FixedSizeListType__value_type <- function(type) {
+  .Call(`_arrow_FixedSizeListType__value_type`, type)
+}
+
+FixedSizeListType__list_size <- function(type) {
+  .Call(`_arrow_FixedSizeListType__list_size`, type)
+}
+
+compute___expr__equals <- function(lhs, rhs) {
+  .Call(`_arrow_compute___expr__equals`, lhs, rhs)
+}
+
+compute___expr__call <- function(func_name, argument_list, options) {
+  .Call(`_arrow_compute___expr__call`, func_name, argument_list, options)
+}
+
+field_names_in_expression <- function(x) {
+  .Call(`_arrow_field_names_in_expression`, x)
+}
+
+compute___expr__get_field_ref_name <- function(x) {
+  .Call(`_arrow_compute___expr__get_field_ref_name`, x)
+}
+
+compute___expr__field_ref <- function(name) {
+  .Call(`_arrow_compute___expr__field_ref`, name)
+}
+
+compute___expr__scalar <- function(x) {
+  .Call(`_arrow_compute___expr__scalar`, x)
+}
+
+compute___expr__ToString <- function(x) {
+  .Call(`_arrow_compute___expr__ToString`, x)
+}
+
+compute___expr__type <- function(x, schema) {
+  .Call(`_arrow_compute___expr__type`, x, schema)
+}
+
+compute___expr__type_id <- function(x, schema) {
+  .Call(`_arrow_compute___expr__type_id`, x, schema)
+}
+
+ipc___WriteFeather__Table <- function(stream, table, version, chunk_size, compression, compression_level) {
+  invisible(.Call(`_arrow_ipc___WriteFeather__Table`, stream, table, version, chunk_size, compression, compression_level))
+}
+
+ipc___feather___Reader__version <- function(reader) {
+  .Call(`_arrow_ipc___feather___Reader__version`, reader)
+}
+
+ipc___feather___Reader__Read <- function(reader, columns) {
+  .Call(`_arrow_ipc___feather___Reader__Read`, reader, columns)
+}
+
+ipc___feather___Reader__Open <- function(stream) {
+  .Call(`_arrow_ipc___feather___Reader__Open`, stream)
+}
+
+ipc___feather___Reader__schema <- function(reader) {
+  .Call(`_arrow_ipc___feather___Reader__schema`, reader)
+}
+
+Field__initialize <- function(name, field, nullable) {
+  .Call(`_arrow_Field__initialize`, name, field, nullable)
+}
+
+Field__ToString <- function(field) {
+  .Call(`_arrow_Field__ToString`, field)
+}
+
+Field__name <- function(field) {
+  .Call(`_arrow_Field__name`, field)
+}
+
+Field__Equals <- function(field, other) {
+  .Call(`_arrow_Field__Equals`, field, other)
+}
+
+Field__nullable <- function(field) {
+  .Call(`_arrow_Field__nullable`, field)
+}
+
+Field__type <- function(field) {
+  .Call(`_arrow_Field__type`, field)
+}
+
+fs___FileInfo__type <- function(x) {
+  .Call(`_arrow_fs___FileInfo__type`, x)
+}
+
+fs___FileInfo__set_type <- function(x, type) {
+  invisible(.Call(`_arrow_fs___FileInfo__set_type`, x, type))
+}
+
+fs___FileInfo__path <- function(x) {
+  .Call(`_arrow_fs___FileInfo__path`, x)
+}
+
+fs___FileInfo__set_path <- function(x, path) {
+  invisible(.Call(`_arrow_fs___FileInfo__set_path`, x, path))
+}
+
+fs___FileInfo__size <- function(x) {
+  .Call(`_arrow_fs___FileInfo__size`, x)
+}
+
+fs___FileInfo__set_size <- function(x, size) {
+  invisible(.Call(`_arrow_fs___FileInfo__set_size`, x, size))
+}
+
+fs___FileInfo__base_name <- function(x) {
+  .Call(`_arrow_fs___FileInfo__base_name`, x)
+}
+
+fs___FileInfo__extension <- function(x) {
+  .Call(`_arrow_fs___FileInfo__extension`, x)
+}
+
+fs___FileInfo__mtime <- function(x) {
+  .Call(`_arrow_fs___FileInfo__mtime`, x)
+}
+
+fs___FileInfo__set_mtime <- function(x, time) {
+  invisible(.Call(`_arrow_fs___FileInfo__set_mtime`, x, time))
+}
+
+fs___FileSelector__base_dir <- function(selector) {
+  .Call(`_arrow_fs___FileSelector__base_dir`, selector)
+}
+
+fs___FileSelector__allow_not_found <- function(selector) {
+  .Call(`_arrow_fs___FileSelector__allow_not_found`, selector)
+}
+
+fs___FileSelector__recursive <- function(selector) {
+  .Call(`_arrow_fs___FileSelector__recursive`, selector)
+}
+
+fs___FileSelector__create <- function(base_dir, allow_not_found, recursive) {
+  .Call(`_arrow_fs___FileSelector__create`, base_dir, allow_not_found, recursive)
+}
+
+fs___FileSystem__GetTargetInfos_Paths <- function(file_system, paths) {
+  .Call(`_arrow_fs___FileSystem__GetTargetInfos_Paths`, file_system, paths)
+}
+
+fs___FileSystem__GetTargetInfos_FileSelector <- function(file_system, selector) {
+  .Call(`_arrow_fs___FileSystem__GetTargetInfos_FileSelector`, file_system, selector)
+}
+
+fs___FileSystem__CreateDir <- function(file_system, path, recursive) {
+  invisible(.Call(`_arrow_fs___FileSystem__CreateDir`, file_system, path, recursive))
+}
+
+fs___FileSystem__DeleteDir <- function(file_system, path) {
+  invisible(.Call(`_arrow_fs___FileSystem__DeleteDir`, file_system, path))
+}
+
+fs___FileSystem__DeleteDirContents <- function(file_system, path) {
+  invisible(.Call(`_arrow_fs___FileSystem__DeleteDirContents`, file_system, path))
+}
+
+fs___FileSystem__DeleteFile <- function(file_system, path) {
+  invisible(.Call(`_arrow_fs___FileSystem__DeleteFile`, file_system, path))
+}
+
+fs___FileSystem__DeleteFiles <- function(file_system, paths) {
+  invisible(.Call(`_arrow_fs___FileSystem__DeleteFiles`, file_system, paths))
+}
+
+fs___FileSystem__Move <- function(file_system, src, dest) {
+  invisible(.Call(`_arrow_fs___FileSystem__Move`, file_system, src, dest))
+}
+
+fs___FileSystem__CopyFile <- function(file_system, src, dest) {
+  invisible(.Call(`_arrow_fs___FileSystem__CopyFile`, file_system, src, dest))
+}
+
+fs___FileSystem__OpenInputStream <- function(file_system, path) {
+  .Call(`_arrow_fs___FileSystem__OpenInputStream`, file_system, path)
+}
+
+fs___FileSystem__OpenInputFile <- function(file_system, path) {
+  .Call(`_arrow_fs___FileSystem__OpenInputFile`, file_system, path)
+}
+
+fs___FileSystem__OpenOutputStream <- function(file_system, path) {
+  .Call(`_arrow_fs___FileSystem__OpenOutputStream`, file_system, path)
+}
+
+fs___FileSystem__OpenAppendStream <- function(file_system, path) {
+  .Call(`_arrow_fs___FileSystem__OpenAppendStream`, file_system, path)
+}
+
+fs___FileSystem__type_name <- function(file_system) {
+  .Call(`_arrow_fs___FileSystem__type_name`, file_system)
+}
+
+fs___LocalFileSystem__create <- function() {
+  .Call(`_arrow_fs___LocalFileSystem__create`)
+}
+
+fs___SubTreeFileSystem__create <- function(base_path, base_fs) {
+  .Call(`_arrow_fs___SubTreeFileSystem__create`, base_path, base_fs)
+}
+
+fs___SubTreeFileSystem__base_fs <- function(file_system) {
+  .Call(`_arrow_fs___SubTreeFileSystem__base_fs`, file_system)
+}
+
+fs___SubTreeFileSystem__base_path <- function(file_system) {
+  .Call(`_arrow_fs___SubTreeFileSystem__base_path`, file_system)
+}
+
+fs___FileSystemFromUri <- function(path) {
+  .Call(`_arrow_fs___FileSystemFromUri`, path)
+}
+
+fs___CopyFiles <- function(source_fs, source_sel, destination_fs, destination_base_dir, chunk_size, use_threads) {
+  invisible(.Call(`_arrow_fs___CopyFiles`, source_fs, source_sel, destination_fs, destination_base_dir, chunk_size, use_threads))
+}
+
+fs___S3FileSystem__create <- function(anonymous, access_key, secret_key, session_token, role_arn, session_name, external_id, load_frequency, region, endpoint_override, scheme, background_writes) {
+  .Call(`_arrow_fs___S3FileSystem__create`, anonymous, access_key, secret_key, session_token, role_arn, session_name, external_id, load_frequency, region, endpoint_override, scheme, background_writes)
+}
+
+fs___S3FileSystem__region <- function(fs) {
+  .Call(`_arrow_fs___S3FileSystem__region`, fs)
+}
+
+io___Readable__Read <- function(x, nbytes) {
+  .Call(`_arrow_io___Readable__Read`, x, nbytes)
+}
+
+io___InputStream__Close <- function(x) {
+  invisible(.Call(`_arrow_io___InputStream__Close`, x))
+}
+
+io___OutputStream__Close <- function(x) {
+  invisible(.Call(`_arrow_io___OutputStream__Close`, x))
+}
+
+io___RandomAccessFile__GetSize <- function(x) {
+  .Call(`_arrow_io___RandomAccessFile__GetSize`, x)
+}
+
+io___RandomAccessFile__supports_zero_copy <- function(x) {
+  .Call(`_arrow_io___RandomAccessFile__supports_zero_copy`, x)
+}
+
+io___RandomAccessFile__Seek <- function(x, position) {
+  invisible(.Call(`_arrow_io___RandomAccessFile__Seek`, x, position))
+}
+
+io___RandomAccessFile__Tell <- function(x) {
+  .Call(`_arrow_io___RandomAccessFile__Tell`, x)
+}
+
+io___RandomAccessFile__Read0 <- function(x) {
+  .Call(`_arrow_io___RandomAccessFile__Read0`, x)
+}
+
+io___RandomAccessFile__ReadAt <- function(x, position, nbytes) {
+  .Call(`_arrow_io___RandomAccessFile__ReadAt`, x, position, nbytes)
+}
+
+io___MemoryMappedFile__Create <- function(path, size) {
+  .Call(`_arrow_io___MemoryMappedFile__Create`, path, size)
+}
+
+io___MemoryMappedFile__Open <- function(path, mode) {
+  .Call(`_arrow_io___MemoryMappedFile__Open`, path, mode)
+}
+
+io___MemoryMappedFile__Resize <- function(x, size) {
+  invisible(.Call(`_arrow_io___MemoryMappedFile__Resize`, x, size))
+}
+
+io___ReadableFile__Open <- function(path) {
+  .Call(`_arrow_io___ReadableFile__Open`, path)
+}
+
+io___BufferReader__initialize <- function(buffer) {
+  .Call(`_arrow_io___BufferReader__initialize`, buffer)
+}
+
+io___Writable__write <- function(stream, buf) {
+  invisible(.Call(`_arrow_io___Writable__write`, stream, buf))
+}
+
+io___OutputStream__Tell <- function(stream) {
+  .Call(`_arrow_io___OutputStream__Tell`, stream)
+}
+
+io___FileOutputStream__Open <- function(path) {
+  .Call(`_arrow_io___FileOutputStream__Open`, path)
+}
+
+io___BufferOutputStream__Create <- function(initial_capacity) {
+  .Call(`_arrow_io___BufferOutputStream__Create`, initial_capacity)
+}
+
+io___BufferOutputStream__capacity <- function(stream) {
+  .Call(`_arrow_io___BufferOutputStream__capacity`, stream)
+}
+
+io___BufferOutputStream__Finish <- function(stream) {
+  .Call(`_arrow_io___BufferOutputStream__Finish`, stream)
+}
+
+io___BufferOutputStream__Tell <- function(stream) {
+  .Call(`_arrow_io___BufferOutputStream__Tell`, stream)
+}
+
+io___BufferOutputStream__Write <- function(stream, bytes) {
+  invisible(.Call(`_arrow_io___BufferOutputStream__Write`, stream, bytes))
+}
+
+json___ReadOptions__initialize <- function(use_threads, block_size) {
+  .Call(`_arrow_json___ReadOptions__initialize`, use_threads, block_size)
+}
+
+json___ParseOptions__initialize1 <- function(newlines_in_values) {
+  .Call(`_arrow_json___ParseOptions__initialize1`, newlines_in_values)
+}
+
+json___ParseOptions__initialize2 <- function(newlines_in_values, explicit_schema) {
+  .Call(`_arrow_json___ParseOptions__initialize2`, newlines_in_values, explicit_schema)
+}
+
+json___TableReader__Make <- function(input, read_options, parse_options) {
+  .Call(`_arrow_json___TableReader__Make`, input, read_options, parse_options)
+}
+
+json___TableReader__Read <- function(table_reader) {
+  .Call(`_arrow_json___TableReader__Read`, table_reader)
+}
+
+MemoryPool__default <- function() {
+  .Call(`_arrow_MemoryPool__default`)
+}
+
+MemoryPool__bytes_allocated <- function(pool) {
+  .Call(`_arrow_MemoryPool__bytes_allocated`, pool)
+}
+
+MemoryPool__max_memory <- function(pool) {
+  .Call(`_arrow_MemoryPool__max_memory`, pool)
+}
+
+MemoryPool__backend_name <- function(pool) {
+  .Call(`_arrow_MemoryPool__backend_name`, pool)
+}
+
+supported_memory_backends <- function() {
+  .Call(`_arrow_supported_memory_backends`)
+}
+
+ipc___Message__body_length <- function(message) {
+  .Call(`_arrow_ipc___Message__body_length`, message)
+}
+
+ipc___Message__metadata <- function(message) {
+  .Call(`_arrow_ipc___Message__metadata`, message)
+}
+
+ipc___Message__body <- function(message) {
+  .Call(`_arrow_ipc___Message__body`, message)
+}
+
+ipc___Message__Verify <- function(message) {
+  .Call(`_arrow_ipc___Message__Verify`, message)
+}
+
+ipc___Message__type <- function(message) {
+  .Call(`_arrow_ipc___Message__type`, message)
+}
+
+ipc___Message__Equals <- function(x, y) {
+  .Call(`_arrow_ipc___Message__Equals`, x, y)
+}
+
+ipc___ReadRecordBatch__Message__Schema <- function(message, schema) {
+  .Call(`_arrow_ipc___ReadRecordBatch__Message__Schema`, message, schema)
+}
+
+ipc___ReadSchema_InputStream <- function(stream) {
+  .Call(`_arrow_ipc___ReadSchema_InputStream`, stream)
+}
+
+ipc___ReadSchema_Message <- function(message) {
+  .Call(`_arrow_ipc___ReadSchema_Message`, message)
+}
+
+ipc___MessageReader__Open <- function(stream) {
+  .Call(`_arrow_ipc___MessageReader__Open`, stream)
+}
+
+ipc___MessageReader__ReadNextMessage <- function(reader) {
+  .Call(`_arrow_ipc___MessageReader__ReadNextMessage`, reader)
+}
+
+ipc___ReadMessage <- function(stream) {
+  .Call(`_arrow_ipc___ReadMessage`, stream)
+}
+
+parquet___arrow___ArrowReaderProperties__Make <- function(use_threads) {
+  .Call(`_arrow_parquet___arrow___ArrowReaderProperties__Make`, use_threads)
+}
+
+parquet___arrow___ArrowReaderProperties__set_use_threads <- function(properties, use_threads) {
+  invisible(.Call(`_arrow_parquet___arrow___ArrowReaderProperties__set_use_threads`, properties, use_threads))
+}
+
+parquet___arrow___ArrowReaderProperties__get_use_threads <- function(properties, use_threads) {
+  .Call(`_arrow_parquet___arrow___ArrowReaderProperties__get_use_threads`, properties, use_threads)
+}
+
+parquet___arrow___ArrowReaderProperties__get_read_dictionary <- function(properties, column_index) {
+  .Call(`_arrow_parquet___arrow___ArrowReaderProperties__get_read_dictionary`, properties, column_index)
+}
+
+parquet___arrow___ArrowReaderProperties__set_read_dictionary <- function(properties, column_index, read_dict) {
+  invisible(.Call(`_arrow_parquet___arrow___ArrowReaderProperties__set_read_dictionary`, properties, column_index, read_dict))
+}
+
+parquet___arrow___FileReader__OpenFile <- function(file, props) {
+  .Call(`_arrow_parquet___arrow___FileReader__OpenFile`, file, props)
+}
+
+parquet___arrow___FileReader__ReadTable1 <- function(reader) {
+  .Call(`_arrow_parquet___arrow___FileReader__ReadTable1`, reader)
+}
+
+parquet___arrow___FileReader__ReadTable2 <- function(reader, column_indices) {
+  .Call(`_arrow_parquet___arrow___FileReader__ReadTable2`, reader, column_indices)
+}
+
+parquet___arrow___FileReader__ReadRowGroup1 <- function(reader, i) {
+  .Call(`_arrow_parquet___arrow___FileReader__ReadRowGroup1`, reader, i)
+}
+
+parquet___arrow___FileReader__ReadRowGroup2 <- function(reader, i, column_indices) {
+  .Call(`_arrow_parquet___arrow___FileReader__ReadRowGroup2`, reader, i, column_indices)
+}
+
+parquet___arrow___FileReader__ReadRowGroups1 <- function(reader, row_groups) {
+  .Call(`_arrow_parquet___arrow___FileReader__ReadRowGroups1`, reader, row_groups)
+}
+
+parquet___arrow___FileReader__ReadRowGroups2 <- function(reader, row_groups, column_indices) {
+  .Call(`_arrow_parquet___arrow___FileReader__ReadRowGroups2`, reader, row_groups, column_indices)
+}
+
+parquet___arrow___FileReader__num_rows <- function(reader) {
+  .Call(`_arrow_parquet___arrow___FileReader__num_rows`, reader)
+}
+
+parquet___arrow___FileReader__num_columns <- function(reader) {
+  .Call(`_arrow_parquet___arrow___FileReader__num_columns`, reader)
+}
+
+parquet___arrow___FileReader__num_row_groups <- function(reader) {
+  .Call(`_arrow_parquet___arrow___FileReader__num_row_groups`, reader)
+}
+
+parquet___arrow___FileReader__ReadColumn <- function(reader, i) {
+  .Call(`_arrow_parquet___arrow___FileReader__ReadColumn`, reader, i)
+}
+
+parquet___ArrowWriterProperties___create <- function(allow_truncated_timestamps, use_deprecated_int96_timestamps, timestamp_unit) {
+  .Call(`_arrow_parquet___ArrowWriterProperties___create`, allow_truncated_timestamps, use_deprecated_int96_timestamps, timestamp_unit)
+}
+
+parquet___WriterProperties___Builder__create <- function() {
+  .Call(`_arrow_parquet___WriterProperties___Builder__create`)
+}
+
+parquet___WriterProperties___Builder__version <- function(builder, version) {
+  invisible(.Call(`_arrow_parquet___WriterProperties___Builder__version`, builder, version))
+}
+
+parquet___ArrowWriterProperties___Builder__set_compressions <- function(builder, paths, types) {
+  invisible(.Call(`_arrow_parquet___ArrowWriterProperties___Builder__set_compressions`, builder, paths, types))
+}
+
+parquet___ArrowWriterProperties___Builder__set_compression_levels <- function(builder, paths, levels) {
+  invisible(.Call(`_arrow_parquet___ArrowWriterProperties___Builder__set_compression_levels`, builder, paths, levels))
+}
+
+parquet___ArrowWriterProperties___Builder__set_use_dictionary <- function(builder, paths, use_dictionary) {
+  invisible(.Call(`_arrow_parquet___ArrowWriterProperties___Builder__set_use_dictionary`, builder, paths, use_dictionary))
+}
+
+parquet___ArrowWriterProperties___Builder__set_write_statistics <- function(builder, paths, write_statistics) {
+  invisible(.Call(`_arrow_parquet___ArrowWriterProperties___Builder__set_write_statistics`, builder, paths, write_statistics))
+}
+
+parquet___ArrowWriterProperties___Builder__data_page_size <- function(builder, data_page_size) {
+  invisible(.Call(`_arrow_parquet___ArrowWriterProperties___Builder__data_page_size`, builder, data_page_size))
+}
+
+parquet___WriterProperties___Builder__build <- function(builder) {
+  .Call(`_arrow_parquet___WriterProperties___Builder__build`, builder)
+}
+
+parquet___arrow___ParquetFileWriter__Open <- function(schema, sink, properties, arrow_properties) {
+  .Call(`_arrow_parquet___arrow___ParquetFileWriter__Open`, schema, sink, properties, arrow_properties)
+}
+
+parquet___arrow___FileWriter__WriteTable <- function(writer, table, chunk_size) {
+  invisible(.Call(`_arrow_parquet___arrow___FileWriter__WriteTable`, writer, table, chunk_size))
+}
+
+parquet___arrow___FileWriter__Close <- function(writer) {
+  invisible(.Call(`_arrow_parquet___arrow___FileWriter__Close`, writer))
+}
+
+parquet___arrow___WriteTable <- function(table, sink, properties, arrow_properties) {
+  invisible(.Call(`_arrow_parquet___arrow___WriteTable`, table, sink, properties, arrow_properties))
+}
+
+parquet___arrow___FileReader__GetSchema <- function(reader) {
+  .Call(`_arrow_parquet___arrow___FileReader__GetSchema`, reader)
+}
+
+allocate_arrow_schema <- function() {
+  .Call(`_arrow_allocate_arrow_schema`)
+}
+
+delete_arrow_schema <- function(ptr) {
+  invisible(.Call(`_arrow_delete_arrow_schema`, ptr))
+}
+
+allocate_arrow_array <- function() {
+  .Call(`_arrow_allocate_arrow_array`)
+}
+
+delete_arrow_array <- function(ptr) {
+  invisible(.Call(`_arrow_delete_arrow_array`, ptr))
+}
+
+allocate_arrow_array_stream <- function() {
+  .Call(`_arrow_allocate_arrow_array_stream`)
+}
+
+delete_arrow_array_stream <- function(ptr) {
+  invisible(.Call(`_arrow_delete_arrow_array_stream`, ptr))
+}
+
+ImportArray <- function(array, schema) {
+  .Call(`_arrow_ImportArray`, array, schema)
+}
+
+ImportRecordBatch <- function(array, schema) {
+  .Call(`_arrow_ImportRecordBatch`, array, schema)
+}
+
+ImportSchema <- function(schema) {
+  .Call(`_arrow_ImportSchema`, schema)
+}
+
+ImportField <- function(field) {
+  .Call(`_arrow_ImportField`, field)
+}
+
+ImportType <- function(type) {
+  .Call(`_arrow_ImportType`, type)
+}
+
+ImportRecordBatchReader <- function(stream) {
+  .Call(`_arrow_ImportRecordBatchReader`, stream)
+}
+
+ExportType <- function(type, ptr) {
+  invisible(.Call(`_arrow_ExportType`, type, ptr))
+}
+
+ExportField <- function(field, ptr) {
+  invisible(.Call(`_arrow_ExportField`, field, ptr))
+}
+
+ExportSchema <- function(schema, ptr) {
+  invisible(.Call(`_arrow_ExportSchema`, schema, ptr))
+}
+
+ExportArray <- function(array, array_ptr, schema_ptr) {
+  invisible(.Call(`_arrow_ExportArray`, array, array_ptr, schema_ptr))
+}
+
+ExportRecordBatch <- function(batch, array_ptr, schema_ptr) {
+  invisible(.Call(`_arrow_ExportRecordBatch`, batch, array_ptr, schema_ptr))
+}
+
+ExportRecordBatchReader <- function(reader, stream_ptr) {
+  invisible(.Call(`_arrow_ExportRecordBatchReader`, reader, stream_ptr))
+}
+
+Table__from_dots <- function(lst, schema_sxp, use_threads) {
+  .Call(`_arrow_Table__from_dots`, lst, schema_sxp, use_threads)
+}
+
+vec_to_arrow <- function(x, s_type) {
+  .Call(`_arrow_vec_to_arrow`, x, s_type)
+}
+
+DictionaryArray__FromArrays <- function(type, indices, dict) {
+  .Call(`_arrow_DictionaryArray__FromArrays`, type, indices, dict)
+}
+
+RecordBatch__num_columns <- function(x) {
+  .Call(`_arrow_RecordBatch__num_columns`, x)
+}
+
+RecordBatch__num_rows <- function(x) {
+  .Call(`_arrow_RecordBatch__num_rows`, x)
+}
+
+RecordBatch__schema <- function(x) {
+  .Call(`_arrow_RecordBatch__schema`, x)
+}
+
+RecordBatch__RenameColumns <- function(batch, names) {
+  .Call(`_arrow_RecordBatch__RenameColumns`, batch, names)
+}
+
+RecordBatch__ReplaceSchemaMetadata <- function(x, metadata) {
+  .Call(`_arrow_RecordBatch__ReplaceSchemaMetadata`, x, metadata)
+}
+
+RecordBatch__columns <- function(batch) {
+  .Call(`_arrow_RecordBatch__columns`, batch)
+}
+
+RecordBatch__column <- function(batch, i) {
+  .Call(`_arrow_RecordBatch__column`, batch, i)
+}
+
+RecordBatch__GetColumnByName <- function(batch, name) {
+  .Call(`_arrow_RecordBatch__GetColumnByName`, batch, name)
+}
+
+RecordBatch__SelectColumns <- function(batch, indices) {
+  .Call(`_arrow_RecordBatch__SelectColumns`, batch, indices)
+}
+
+RecordBatch__Equals <- function(self, other, check_metadata) {
+  .Call(`_arrow_RecordBatch__Equals`, self, other, check_metadata)
+}
+
+RecordBatch__AddColumn <- function(batch, i, field, column) {
+  .Call(`_arrow_RecordBatch__AddColumn`, batch, i, field, column)
+}
+
+RecordBatch__SetColumn <- function(batch, i, field, column) {
+  .Call(`_arrow_RecordBatch__SetColumn`, batch, i, field, column)
+}
+
+RecordBatch__RemoveColumn <- function(batch, i) {
+  .Call(`_arrow_RecordBatch__RemoveColumn`, batch, i)
+}
+
+RecordBatch__column_name <- function(batch, i) {
+  .Call(`_arrow_RecordBatch__column_name`, batch, i)
+}
+
+RecordBatch__names <- function(batch) {
+  .Call(`_arrow_RecordBatch__names`, batch)
+}
+
+RecordBatch__Slice1 <- function(self, offset) {
+  .Call(`_arrow_RecordBatch__Slice1`, self, offset)
+}
+
+RecordBatch__Slice2 <- function(self, offset, length) {
+  .Call(`_arrow_RecordBatch__Slice2`, self, offset, length)
+}
+
+ipc___SerializeRecordBatch__Raw <- function(batch) {
+  .Call(`_arrow_ipc___SerializeRecordBatch__Raw`, batch)
+}
+
+ipc___ReadRecordBatch__InputStream__Schema <- function(stream, schema) {
+  .Call(`_arrow_ipc___ReadRecordBatch__InputStream__Schema`, stream, schema)
+}
+
+RecordBatch__from_arrays <- function(schema_sxp, lst) {
+  .Call(`_arrow_RecordBatch__from_arrays`, schema_sxp, lst)
+}
+
+RecordBatchReader__schema <- function(reader) {
+  .Call(`_arrow_RecordBatchReader__schema`, reader)
+}
+
+RecordBatchReader__ReadNext <- function(reader) {
+  .Call(`_arrow_RecordBatchReader__ReadNext`, reader)
+}
+
+RecordBatchReader__batches <- function(reader) {
+  .Call(`_arrow_RecordBatchReader__batches`, reader)
+}
+
+Table__from_RecordBatchReader <- function(reader) {
+  .Call(`_arrow_Table__from_RecordBatchReader`, reader)
+}
+
+ipc___RecordBatchStreamReader__Open <- function(stream) {
+  .Call(`_arrow_ipc___RecordBatchStreamReader__Open`, stream)
+}
+
+ipc___RecordBatchFileReader__schema <- function(reader) {
+  .Call(`_arrow_ipc___RecordBatchFileReader__schema`, reader)
+}
+
+ipc___RecordBatchFileReader__num_record_batches <- function(reader) {
+  .Call(`_arrow_ipc___RecordBatchFileReader__num_record_batches`, reader)
+}
+
+ipc___RecordBatchFileReader__ReadRecordBatch <- function(reader, i) {
+  .Call(`_arrow_ipc___RecordBatchFileReader__ReadRecordBatch`, reader, i)
+}
+
+ipc___RecordBatchFileReader__Open <- function(file) {
+  .Call(`_arrow_ipc___RecordBatchFileReader__Open`, file)
+}
+
+Table__from_RecordBatchFileReader <- function(reader) {
+  .Call(`_arrow_Table__from_RecordBatchFileReader`, reader)
+}
+
+ipc___RecordBatchFileReader__batches <- function(reader) {
+  .Call(`_arrow_ipc___RecordBatchFileReader__batches`, reader)
+}
+
+ipc___RecordBatchWriter__WriteRecordBatch <- function(batch_writer, batch) {
+  invisible(.Call(`_arrow_ipc___RecordBatchWriter__WriteRecordBatch`, batch_writer, batch))
+}
+
+ipc___RecordBatchWriter__WriteTable <- function(batch_writer, table) {
+  invisible(.Call(`_arrow_ipc___RecordBatchWriter__WriteTable`, batch_writer, table))
+}
+
+ipc___RecordBatchWriter__Close <- function(batch_writer) {
+  invisible(.Call(`_arrow_ipc___RecordBatchWriter__Close`, batch_writer))
+}
+
+ipc___RecordBatchFileWriter__Open <- function(stream, schema, use_legacy_format, metadata_version) {
+  .Call(`_arrow_ipc___RecordBatchFileWriter__Open`, stream, schema, use_legacy_format, metadata_version)
+}
+
+ipc___RecordBatchStreamWriter__Open <- function(stream, schema, use_legacy_format, metadata_version) {
+  .Call(`_arrow_ipc___RecordBatchStreamWriter__Open`, stream, schema, use_legacy_format, metadata_version)
+}
+
+Array__GetScalar <- function(x, i) {
+  .Call(`_arrow_Array__GetScalar`, x, i)
+}
+
+Scalar__ToString <- function(s) {
+  .Call(`_arrow_Scalar__ToString`, s)
+}
+
+StructScalar__field <- function(s, i) {
+  .Call(`_arrow_StructScalar__field`, s, i)
+}
+
+StructScalar__GetFieldByName <- function(s, name) {
+  .Call(`_arrow_StructScalar__GetFieldByName`, s, name)
+}
+
+Scalar__as_vector <- function(scalar) {
+  .Call(`_arrow_Scalar__as_vector`, scalar)
+}
+
+MakeArrayFromScalar <- function(scalar, n) {
+  .Call(`_arrow_MakeArrayFromScalar`, scalar, n)
+}
+
+Scalar__is_valid <- function(s) {
+  .Call(`_arrow_Scalar__is_valid`, s)
+}
+
+Scalar__type <- function(s) {
+  .Call(`_arrow_Scalar__type`, s)
+}
+
+Scalar__Equals <- function(lhs, rhs) {
+  .Call(`_arrow_Scalar__Equals`, lhs, rhs)
+}
+
+Scalar__ApproxEquals <- function(lhs, rhs) {
+  .Call(`_arrow_Scalar__ApproxEquals`, lhs, rhs)
+}
+
+schema_ <- function(fields) {
+  .Call(`_arrow_schema_`, fields)
+}
+
+Schema__ToString <- function(s) {
+  .Call(`_arrow_Schema__ToString`, s)
+}
+
+Schema__num_fields <- function(s) {
+  .Call(`_arrow_Schema__num_fields`, s)
+}
+
+Schema__field <- function(s, i) {
+  .Call(`_arrow_Schema__field`, s, i)
+}
+
+Schema__AddField <- function(s, i, field) {
+  .Call(`_arrow_Schema__AddField`, s, i, field)
+}
+
+Schema__SetField <- function(s, i, field) {
+  .Call(`_arrow_Schema__SetField`, s, i, field)
+}
+
+Schema__RemoveField <- function(s, i) {
+  .Call(`_arrow_Schema__RemoveField`, s, i)
+}
+
+Schema__GetFieldByName <- function(s, x) {
+  .Call(`_arrow_Schema__GetFieldByName`, s, x)
+}
+
+Schema__fields <- function(schema) {
+  .Call(`_arrow_Schema__fields`, schema)
+}
+
+Schema__field_names <- function(schema) {
+  .Call(`_arrow_Schema__field_names`, schema)
+}
+
+Schema__HasMetadata <- function(schema) {
+  .Call(`_arrow_Schema__HasMetadata`, schema)
+}
+
+Schema__metadata <- function(schema) {
+  .Call(`_arrow_Schema__metadata`, schema)
+}
+
+Schema__WithMetadata <- function(schema, metadata) {
+  .Call(`_arrow_Schema__WithMetadata`, schema, metadata)
+}
+
+Schema__serialize <- function(schema) {
+  .Call(`_arrow_Schema__serialize`, schema)
+}
+
+Schema__Equals <- function(schema, other, check_metadata) {
+  .Call(`_arrow_Schema__Equals`, schema, other, check_metadata)
+}
+
+arrow__UnifySchemas <- function(schemas) {
+  .Call(`_arrow_arrow__UnifySchemas`, schemas)
+}
+
+Table__num_columns <- function(x) {
+  .Call(`_arrow_Table__num_columns`, x)
+}
+
+Table__num_rows <- function(x) {
+  .Call(`_arrow_Table__num_rows`, x)
+}
+
+Table__schema <- function(x) {
+  .Call(`_arrow_Table__schema`, x)
+}
+
+Table__ReplaceSchemaMetadata <- function(x, metadata) {
+  .Call(`_arrow_Table__ReplaceSchemaMetadata`, x, metadata)
+}
+
+Table__column <- function(table, i) {
+  .Call(`_arrow_Table__column`, table, i)
+}
+
+Table__field <- function(table, i) {
+  .Call(`_arrow_Table__field`, table, i)
+}
+
+Table__columns <- function(table) {
+  .Call(`_arrow_Table__columns`, table)
+}
+
+Table__ColumnNames <- function(table) {
+  .Call(`_arrow_Table__ColumnNames`, table)
+}
+
+Table__RenameColumns <- function(table, names) {
+  .Call(`_arrow_Table__RenameColumns`, table, names)
+}
+
+Table__Slice1 <- function(table, offset) {
+  .Call(`_arrow_Table__Slice1`, table, offset)
+}
+
+Table__Slice2 <- function(table, offset, length) {
+  .Call(`_arrow_Table__Slice2`, table, offset, length)
+}
+
+Table__Equals <- function(lhs, rhs, check_metadata) {
+  .Call(`_arrow_Table__Equals`, lhs, rhs, check_metadata)
+}
+
+Table__Validate <- function(table) {
+  .Call(`_arrow_Table__Validate`, table)
+}
+
+Table__ValidateFull <- function(table) {
+  .Call(`_arrow_Table__ValidateFull`, table)
+}
+
+Table__GetColumnByName <- function(table, name) {
+  .Call(`_arrow_Table__GetColumnByName`, table, name)
+}
+
+Table__RemoveColumn <- function(table, i) {
+  .Call(`_arrow_Table__RemoveColumn`, table, i)
+}
+
+Table__AddColumn <- function(table, i, field, column) {
+  .Call(`_arrow_Table__AddColumn`, table, i, field, column)
+}
+
+Table__SetColumn <- function(table, i, field, column) {
+  .Call(`_arrow_Table__SetColumn`, table, i, field, column)
+}
+
+Table__SelectColumns <- function(table, indices) {
+  .Call(`_arrow_Table__SelectColumns`, table, indices)
+}
+
+all_record_batches <- function(lst) {
+  .Call(`_arrow_all_record_batches`, lst)
+}
+
+Table__from_record_batches <- function(batches, schema_sxp) {
+  .Call(`_arrow_Table__from_record_batches`, batches, schema_sxp)
+}
+
+GetCpuThreadPoolCapacity <- function() {
+  .Call(`_arrow_GetCpuThreadPoolCapacity`)
+}
+
+SetCpuThreadPoolCapacity <- function(threads) {
+  invisible(.Call(`_arrow_SetCpuThreadPoolCapacity`, threads))
+}
+
+GetIOThreadPoolCapacity <- function() {
+  .Call(`_arrow_GetIOThreadPoolCapacity`)
+}
+
+SetIOThreadPoolCapacity <- function(threads) {
+  invisible(.Call(`_arrow_SetIOThreadPoolCapacity`, threads))
+}
+
+Array__infer_type <- function(x) {
+  .Call(`_arrow_Array__infer_type`, x)
+}
diff --git a/src/arrow/r/R/buffer.R b/src/arrow/r/R/buffer.R
new file mode 100644
index 000000000..a9424fd0d
--- /dev/null
+++ b/src/arrow/r/R/buffer.R
@@ -0,0 +1,78 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#' @title Buffer class
+#' @usage NULL
+#' @format NULL
+#' @docType class
+#' @description A Buffer is an object containing a pointer to a piece of
+#' contiguous memory with a particular size.
+#' @section Factory:
+#' `buffer()` lets you create an `arrow::Buffer` from an R object
+#' @section Methods:
+#'
+#' - `$is_mutable` : is this buffer mutable?
+#' - `$ZeroPadding()` : zero bytes in padding, i.e. bytes between size and capacity
+#' - `$size` : size in memory, in bytes
+#' - `$capacity`: possible capacity, in bytes
+#'
+#' @rdname buffer
+#' @name buffer
+#' @examplesIf arrow_available()
+#' my_buffer <- buffer(c(1, 2, 3, 4))
+#' my_buffer$is_mutable
+#' my_buffer$ZeroPadding()
+#' my_buffer$size
+#' my_buffer$capacity
+#' @export
+#' @include arrow-package.R
+#' @include enums.R
+Buffer <- R6Class("Buffer",
+  inherit = ArrowObject,
+  public = list(
+    ZeroPadding = function() Buffer__ZeroPadding(self),
+    data = function() Buffer__data(self),
+    Equals = function(other, ...) {
+      inherits(other, "Buffer") && Buffer__Equals(self, other)
+    }
+  ),
+  active = list(
+    is_mutable = function() Buffer__is_mutable(self),
+    size = function() Buffer__size(self),
+    capacity = function() Buffer__capacity(self)
+  )
+)
+
+Buffer$create <- function(x) {
+  if (inherits(x, "Buffer")) {
+    x
+  } else if (inherits(x, c("raw", "numeric", "integer", "complex"))) {
+    r___RBuffer__initialize(x)
+  } else if (inherits(x, "BufferOutputStream")) {
+    x$finish()
+  } else {
+    stop("Cannot convert object of class ", class(x), " to arrow::Buffer")
+  }
+}
+
+#' @param x R object. Only raw, numeric and integer vectors are currently supported
+#' @return an instance of `Buffer` that borrows memory from `x`
+#' @export
+buffer <- Buffer$create
+
+#' @export
+as.raw.Buffer <- function(x) x$data()
diff --git a/src/arrow/r/R/chunked-array.R b/src/arrow/r/R/chunked-array.R
new file mode 100644
index 000000000..597180ea7
--- /dev/null
+++ b/src/arrow/r/R/chunked-array.R
@@ -0,0 +1,153 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#' @include arrow-datum.R
+
+#' @title ChunkedArray class
+#' @usage NULL
+#' @format NULL
+#' @docType class
+#' @description A `ChunkedArray` is a data structure managing a list of
+#' primitive Arrow [Arrays][Array] logically as one large array. Chunked arrays
+#' may be grouped together in a [Table].
+#' @section Factory:
+#' The `ChunkedArray$create()` factory method instantiates the object from
+#' various Arrays or R vectors. `chunked_array()` is an alias for it.
+#'
+#' @section Methods:
+#'
+#' - `$length()`: Size in the number of elements this array contains
+#' - `$chunk(i)`: Extract an `Array` chunk by integer position
+#' - `$as_vector()`: convert to an R vector
+#' - `$Slice(offset, length = NULL)`: Construct a zero-copy slice of the array
+#'    with the indicated offset and length. If length is `NULL`, the slice goes
+#'    until the end of the array.
+#' - `$Take(i)`: return a `ChunkedArray` with values at positions given by
+#'    integers `i`. If `i` is an Arrow `Array` or `ChunkedArray`, it will be
+#'    coerced to an R vector before taking.
+#' - `$Filter(i, keep_na = TRUE)`: return a `ChunkedArray` with values at positions where
+#'    logical vector or Arrow boolean-type `(Chunked)Array` `i` is `TRUE`.
+#' - `$SortIndices(descending = FALSE)`: return an `Array` of integer positions that can be
+#'    used to rearrange the `ChunkedArray` in ascending or descending order
+#' - `$cast(target_type, safe = TRUE, options = cast_options(safe))`: Alter the
+#'    data in the array to change its type.
+#' - `$null_count`: The number of null entries in the array
+#' - `$chunks`: return a list of `Array`s
+#' - `$num_chunks`: integer number of chunks in the `ChunkedArray`
+#' - `$type`: logical type of data
+#' - `$View(type)`: Construct a zero-copy view of this `ChunkedArray` with the
+#'    given type.
+#' - `$Validate()`: Perform any validation checks to determine obvious inconsistencies
+#'    within the array's internal data. This can be an expensive check, potentially `O(length)`
+#'
+#' @rdname ChunkedArray
+#' @name ChunkedArray
+#' @seealso [Array]
+#' @examplesIf arrow_available()
+#' # Pass items into chunked_array as separate objects to create chunks
+#' class_scores <- chunked_array(c(87, 88, 89), c(94, 93, 92), c(71, 72, 73))
+#' class_scores$num_chunks
+#'
+#' # When taking a Slice from a chunked_array, chunks are preserved
+#' class_scores$Slice(2, length = 5)
+#'
+#' # You can combine Take and SortIndices to return a ChunkedArray with 1 chunk
+#' # containing all values, ordered.
+#' class_scores$Take(class_scores$SortIndices(descending = TRUE))
+#'
+#' # If you pass a list into chunked_array, you get a list of length 1
+#' list_scores <- chunked_array(list(c(9.9, 9.6, 9.5), c(8.2, 8.3, 8.4), c(10.0, 9.9, 9.8)))
+#' list_scores$num_chunks
+#'
+#' # When constructing a ChunkedArray, the first chunk is used to infer type.
+#' doubles <- chunked_array(c(1, 2, 3), c(5L, 6L, 7L))
+#' doubles$type
+#' @export
+ChunkedArray <- R6Class("ChunkedArray",
+  inherit = ArrowDatum,
+  public = list(
+    length = function() ChunkedArray__length(self),
+    type_id = function() ChunkedArray__type(self)$id,
+    chunk = function(i) Array$create(ChunkedArray__chunk(self, i)),
+    as_vector = function() ChunkedArray__as_vector(self, option_use_threads()),
+    Slice = function(offset, length = NULL) {
+      if (is.null(length)) {
+        ChunkedArray__Slice1(self, offset)
+      } else {
+        ChunkedArray__Slice2(self, offset, length)
+      }
+    },
+    Take = function(i) {
+      if (is.numeric(i)) {
+        i <- as.integer(i)
+      }
+      if (is.integer(i)) {
+        i <- Array$create(i)
+      }
+      call_function("take", self, i)
+    },
+    Filter = function(i, keep_na = TRUE) {
+      if (is.logical(i)) {
+        i <- Array$create(i)
+      }
+      call_function("filter", self, i, options = list(keep_na = keep_na))
+    },
+    SortIndices = function(descending = FALSE) {
+      assert_that(is.logical(descending))
+      assert_that(length(descending) == 1L)
+      assert_that(!is.na(descending))
+      # TODO: after ARROW-12042 is closed, review whether this and the
+      # Array$SortIndices definition can be consolidated
+      call_function(
+        "sort_indices",
+        self,
+        options = list(names = "", orders = as.integer(descending))
+      )
+    },
+    View = function(type) {
+      ChunkedArray__View(self, as_type(type))
+    },
+    Validate = function() {
+      ChunkedArray__Validate(self)
+    },
+    ToString = function() {
+      ChunkedArray__ToString(self)
+    },
+    Equals = function(other, ...) {
+      inherits(other, "ChunkedArray") && ChunkedArray__Equals(self, other)
+    }
+  ),
+  active = list(
+    null_count = function() ChunkedArray__null_count(self),
+    num_chunks = function() ChunkedArray__num_chunks(self),
+    chunks = function() map(ChunkedArray__chunks(self), Array$create),
+    type = function() ChunkedArray__type(self)
+  )
+)
+
+ChunkedArray$create <- function(..., type = NULL) {
+  if (!is.null(type)) {
+    type <- as_type(type)
+  }
+  ChunkedArray__from_list(list2(...), type)
+}
+
+#' @param \dots Vectors to coerce
+#' @param type currently ignored
+#' @rdname ChunkedArray
+#' @export
+chunked_array <- ChunkedArray$create
diff --git a/src/arrow/r/R/compression.R b/src/arrow/r/R/compression.R
new file mode 100644
index 000000000..7107012d0
--- /dev/null
+++ b/src/arrow/r/R/compression.R
@@ -0,0 +1,124 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#' @include enums.R
+#' @include arrow-package.R
+#' @include io.R
+
+#' @title Compression Codec class
+#' @usage NULL
+#' @format NULL
+#' @docType class
+#' @description Codecs allow you to create [compressed input and output
+#' streams][compression].
+#' @section Factory:
+#' The `Codec$create()` factory method takes the following arguments:
+#' * `type`: string name of the compression method. Possible values are
+#'    "uncompressed", "snappy", "gzip", "brotli", "zstd", "lz4", "lzo", or
+#'    "bz2". `type` may be upper- or lower-cased. Not all methods may be
+#'    available; support depends on build-time flags for the C++ library.
+#'    See [codec_is_available()]. Most builds support at least "snappy" and
+#'    "gzip". All support "uncompressed".
+#' * `compression_level`: compression level, the default value (`NA`) uses the
+#'    default compression level for the selected compression `type`.
+#' @rdname Codec
+#' @name Codec
+#' @export
+Codec <- R6Class("Codec",
+  inherit = ArrowObject,
+  active = list(
+    name = function() util___Codec__name(self),
+    level = function() abort("Codec$level() not yet implemented")
+  )
+)
+Codec$create <- function(type = "gzip", compression_level = NA) {
+  if (is.string(type)) {
+    type <- util___Codec__Create(
+      compression_from_name(type), compression_level
+    )
+  }
+  assert_is(type, "Codec")
+  type
+}
+
+#' Check whether a compression codec is available
+#'
+#' Support for compression libraries depends on the build-time settings of
+#' the Arrow C++ library. This function lets you know which are available for
+#' use.
+#' @param type A string, one of "uncompressed", "snappy", "gzip", "brotli",
+#' "zstd", "lz4", "lzo", or "bz2", case insensitive.
+#' @return Logical: is `type` available?
+#' @export
+#' @examplesIf arrow_available()
+#' codec_is_available("gzip")
+codec_is_available <- function(type) {
+  util___Codec__IsAvailable(compression_from_name(type))
+}
+
+compression_from_name <- function(name) {
+  map_int(name, ~ CompressionType[[match.arg(toupper(.x), names(CompressionType))]])
+}
+
+#' @title Compressed stream classes
+#' @rdname compression
+#' @name compression
+#' @aliases CompressedInputStream CompressedOutputStream
+#' @docType class
+#' @usage NULL
+#' @format NULL
+#' @description `CompressedInputStream` and `CompressedOutputStream`
+#' allow you to apply a compression [Codec] to an
+#' input or output stream.
+#'
+#' @section Factory:
+#'
+#' The `CompressedInputStream$create()` and `CompressedOutputStream$create()`
+#' factory methods instantiate the object and take the following arguments:
+#'
+#' - `stream` An [InputStream] or [OutputStream], respectively
+#' - `codec` A `Codec`, either a [Codec][Codec] instance or a string
+#' - `compression_level` compression level for when the `codec` argument is given as a string
+#'
+#' @section Methods:
+#'
+#' Methods are inherited from [InputStream] and [OutputStream], respectively
+#' @export
+#' @include arrow-package.R
+CompressedOutputStream <- R6Class("CompressedOutputStream", inherit = OutputStream)
+CompressedOutputStream$create <- function(stream, codec = "gzip", compression_level = NA) {
+  codec <- Codec$create(codec, compression_level = compression_level)
+  if (is.string(stream)) {
+    stream <- FileOutputStream$create(stream)
+  }
+  assert_is(stream, "OutputStream")
+  io___CompressedOutputStream__Make(codec, stream)
+}
+
+#' @rdname compression
+#' @usage NULL
+#' @format NULL
+#' @export
+CompressedInputStream <- R6Class("CompressedInputStream", inherit = InputStream)
+CompressedInputStream$create <- function(stream, codec = "gzip", compression_level = NA) {
+  codec <- Codec$create(codec, compression_level = compression_level)
+  if (is.string(stream)) {
+    stream <- ReadableFile$create(stream)
+  }
+  assert_is(stream, "InputStream")
+  io___CompressedInputStream__Make(codec, stream)
+}
diff --git a/src/arrow/r/R/compute.R b/src/arrow/r/R/compute.R
new file mode 100644
index 000000000..0a7d77a09
--- /dev/null
+++ b/src/arrow/r/R/compute.R
@@ -0,0 +1,309 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#' Call an Arrow compute function
+#'
+#' This function provides a lower-level API for calling Arrow functions by their
+#' string function name. You won't use it directly for most applications.
+#' Many Arrow compute functions are mapped to R methods,
+#' and in a `dplyr` evaluation context, [all Arrow functions][list_compute_functions()]
+#' are callable with an `arrow_` prefix.
+#' @param function_name string Arrow compute function name
+#' @param ... Function arguments, which may include `Array`, `ChunkedArray`, `Scalar`,
+#' `RecordBatch`, or `Table`.
+#' @param args list arguments as an alternative to specifying in `...`
+#' @param options named list of C++ function options.
+#' @details When passing indices in `...`, `args`, or `options`, express them as
+#' 0-based integers (consistent with C++).
+#' @return An `Array`, `ChunkedArray`, `Scalar`, `RecordBatch`, or `Table`, whatever the compute function results in.
+#' @seealso [Arrow C++ documentation](https://arrow.apache.org/docs/cpp/compute.html) for
+#'   the functions and their respective options.
+#' @examplesIf arrow_available()
+#' a <- Array$create(c(1L, 2L, 3L, NA, 5L))
+#' s <- Scalar$create(4L)
+#' call_function("coalesce", a, s)
+#'
+#' a <- Array$create(rnorm(10000))
+#' call_function("quantile", a, options = list(q = seq(0, 1, 0.25)))
+#' @export
+#' @include array.R
+#' @include chunked-array.R
+#' @include scalar.R
+call_function <- function(function_name, ..., args = list(...), options = empty_named_list()) {
+  assert_that(is.string(function_name))
+  assert_that(is.list(options), !is.null(names(options)))
+
+  datum_classes <- c("Array", "ChunkedArray", "RecordBatch", "Table", "Scalar")
+  valid_args <- map_lgl(args, ~ inherits(., datum_classes))
+  if (!all(valid_args)) {
+    # Lame, just pick one to report
+    first_bad <- min(which(!valid_args))
+    stop(
+      "Argument ", first_bad, " is of class ", head(class(args[[first_bad]]), 1),
+      " but it must be one of ", oxford_paste(datum_classes, "or"),
+      call. = FALSE
+    )
+  }
+
+  compute__CallFunction(function_name, args, options)
+}
+
+#' List available Arrow C++ compute functions
+#'
+#' This function lists the names of all available Arrow C++ library compute functions.
+#' These can be called by passing to [call_function()], or they can be
+#' called by name with an `arrow_` prefix inside a `dplyr` verb.
+#'
+#' The resulting list describes the capabilities of your `arrow` build.
+#' Some functions, such as string and regular expression functions,
+#' require optional build-time C++ dependencies. If your `arrow` package
+#' was not compiled with those features enabled, those functions will
+#' not appear in this list.
+#'
+#' Some functions take options that need to be passed when calling them
+#' (in a list called `options`). These options require custom handling
+#' in C++; many functions already have that handling set up but not all do.
+#' If you encounter one that needs special handling for options, please
+#' report an issue.
+#'
+#' Note that this list does *not* enumerate all of the R bindings for these functions.
+#' The package includes Arrow methods for many base R functions that can
+#' be called directly on Arrow objects, as well as some tidyverse-flavored versions
+#' available inside `dplyr` verbs.
+#'
+#' @param pattern Optional regular expression to filter the function list
+#' @param ... Additional parameters passed to `grep()`
+#' @return A character vector of available Arrow C++ function names
+#' @examplesIf arrow_available()
+#' available_funcs <- list_compute_functions()
+#' utf8_funcs <- list_compute_functions(pattern = "^UTF8", ignore.case = TRUE)
+#' @export
+list_compute_functions <- function(pattern = NULL, ...) {
+  funcs <- compute__GetFunctionNames()
+  if (!is.null(pattern)) {
+    funcs <- grep(pattern, funcs, value = TRUE, ...)
+  }
+  # TODO: Filtering of hash funcs will already happen in C++ with ARROW-13943
+  funcs <- grep(
+    "^hash_",
+    funcs,
+    value = TRUE,
+    invert = TRUE
+  )
+  funcs
+}
+
+#' @export
+sum.ArrowDatum <- function(..., na.rm = FALSE) {
+  scalar_aggregate("sum", ..., na.rm = na.rm)
+}
+
+#' @export
+mean.ArrowDatum <- function(..., na.rm = FALSE) {
+  scalar_aggregate("mean", ..., na.rm = na.rm)
+}
+
+#' @export
+min.ArrowDatum <- function(..., na.rm = FALSE) {
+  scalar_aggregate("min_max", ..., na.rm = na.rm)$GetFieldByName("min")
+}
+
+#' @export
+max.ArrowDatum <- function(..., na.rm = FALSE) {
+  scalar_aggregate("min_max", ..., na.rm = na.rm)$GetFieldByName("max")
+}
+
+scalar_aggregate <- function(FUN, ..., na.rm = FALSE, min_count = 0L) {
+  a <- collect_arrays_from_dots(list(...))
+  if (FUN == "min_max" && na.rm && a$null_count == length(a)) {
+    Array$create(data.frame(min = Inf, max = -Inf))
+    # If na.rm == TRUE and all values in array are NA, R returns
+    # Inf/-Inf, which are type double. Since Arrow is type-stable
+    # and does not do that, we handle this special case here.
+  } else {
+    call_function(FUN, a, options = list(skip_nulls = na.rm, min_count = min_count))
+  }
+}
+
+collect_arrays_from_dots <- function(dots) {
+  # Given a list that may contain both Arrays and ChunkedArrays,
+  # return a single ChunkedArray containing all of those chunks
+  # (may return a regular Array if there is only one element in dots)
+  # If there is only one element and it is a scalar, it returns the scalar
+  if (length(dots) == 1) {
+    return(dots[[1]])
+  }
+
+  assert_that(all(map_lgl(dots, is.Array)))
+  arrays <- unlist(lapply(dots, function(x) {
+    if (inherits(x, "ChunkedArray")) {
+      x$chunks
+    } else {
+      x
+    }
+  }))
+  ChunkedArray$create(!!!arrays)
+}
+
+#' @export
+quantile.ArrowDatum <- function(x,
+                                probs = seq(0, 1, 0.25),
+                                na.rm = FALSE,
+                                type = 7,
+                                interpolation = c("linear", "lower", "higher", "nearest", "midpoint"),
+                                ...) {
+  if (inherits(x, "Scalar")) x <- Array$create(x)
+  assert_is(probs, c("numeric", "integer"))
+  assert_that(length(probs) > 0)
+  assert_that(all(probs >= 0 & probs <= 1))
+  if (!na.rm && x$null_count > 0) {
+    stop("Missing values not allowed if 'na.rm' is FALSE", call. = FALSE)
+  }
+  if (type != 7) {
+    stop(
+      "Argument `type` not supported in Arrow. To control the quantile ",
+      "interpolation algorithm, set argument `interpolation` to one of: ",
+      "\"linear\" (the default), \"lower\", \"higher\", \"nearest\", or ",
+      "\"midpoint\".",
+      call. = FALSE
+    )
+  }
+  interpolation <- QuantileInterpolation[[toupper(match.arg(interpolation))]]
+  out <- call_function("quantile", x, options = list(q = probs, interpolation = interpolation))
+  if (length(out) == 0) {
+    # When there are no non-missing values in the data, the Arrow quantile
+    # function returns an empty Array, but for consistency with the R quantile
+    # function, we want an Array of NA_real_ with the same length as probs
+    out <- Array$create(rep(NA_real_, length(probs)))
+  }
+  out
+}
+
+#' @export
+median.ArrowDatum <- function(x, na.rm = FALSE, ...) {
+  if (!na.rm && x$null_count > 0) {
+    Scalar$create(NA_real_)
+  } else {
+    Scalar$create(quantile(x, probs = 0.5, na.rm = TRUE, ...))
+  }
+}
+
+#' @export
+unique.ArrowDatum <- function(x, incomparables = FALSE, ...) {
+  call_function("unique", x)
+}
+
+#' @export
+any.ArrowDatum <- function(..., na.rm = FALSE) {
+  scalar_aggregate("any", ..., na.rm = na.rm)
+}
+
+#' @export
+all.ArrowDatum <- function(..., na.rm = FALSE) {
+  scalar_aggregate("all", ..., na.rm = na.rm)
+}
+
+#' `match` and `%in%` for Arrow objects
+#'
+#' `base::match()` is not a generic, so we can't just define Arrow methods for
+#' it. This function exposes the analogous functions in the Arrow C++ library.
+#'
+#' @param x `Scalar`, `Array` or `ChunkedArray`
+#' @param table `Scalar`, Array`, `ChunkedArray`, or R vector lookup table.
+#' @param ... additional arguments, ignored
+#' @return `match_arrow()` returns an `int32`-type Arrow object of the same length
+#' and type as `x` with the (0-based) indexes into `table`. `is_in()` returns a
+#' `boolean`-type Arrow object of the same length and type as `x` with values indicating
+#' per element of `x` it it is present in `table`.
+#' @examplesIf arrow_available()
+#' # note that the returned value is 0-indexed
+#' cars_tbl <- arrow_table(name = rownames(mtcars), mtcars)
+#' match_arrow(Scalar$create("Mazda RX4 Wag"), cars_tbl$name)
+#'
+#' is_in(Array$create("Mazda RX4 Wag"), cars_tbl$name)
+#'
+#' # Although there are multiple matches, you are returned the index of the first
+#' # match, as with the base R equivalent
+#' match(4, mtcars$cyl) # 1-indexed
+#' match_arrow(Scalar$create(4), cars_tbl$cyl) # 0-indexed
+#'
+#' # If `x` contains multiple values, you are returned the indices of the first
+#' # match for each value.
+#' match(c(4, 6, 8), mtcars$cyl)
+#' match_arrow(Array$create(c(4, 6, 8)), cars_tbl$cyl)
+#'
+#' # Return type matches type of `x`
+#' is_in(c(4, 6, 8), mtcars$cyl) # returns vector
+#' is_in(Scalar$create(4), mtcars$cyl) # returns Scalar
+#' is_in(Array$create(c(4, 6, 8)), cars_tbl$cyl) # returns Array
+#' is_in(ChunkedArray$create(c(4, 6), 8), cars_tbl$cyl) # returns ChunkedArray
+#' @export
+match_arrow <- function(x, table, ...) {
+  if (!inherits(x, "ArrowDatum")) {
+    x <- Array$create(x)
+  }
+
+  if (!inherits(table, c("Array", "ChunkedArray"))) {
+    table <- Array$create(table)
+  }
+  call_function("index_in_meta_binary", x, table)
+}
+
+#' @rdname match_arrow
+#' @export
+is_in <- function(x, table, ...) {
+  if (!inherits(x, "ArrowDatum")) {
+    x <- Array$create(x)
+  }
+
+  if (!inherits(table, c("Array", "DictionaryArray", "ChunkedArray"))) {
+    table <- Array$create(table)
+  }
+  call_function("is_in_meta_binary", x, table)
+}
+
+#' `table` for Arrow objects
+#'
+#' This function tabulates the values in the array and returns a table of counts.
+#' @param x `Array` or `ChunkedArray`
+#' @return A `StructArray` containing "values" (same type as `x`) and "counts"
+#' `Int64`.
+#' @examplesIf arrow_available()
+#' cyl_vals <- Array$create(mtcars$cyl)
+#' counts <- value_counts(cyl_vals)
+#' @export
+value_counts <- function(x) {
+  call_function("value_counts", x)
+}
+
+#' Cast options
+#'
+#' @param safe logical: enforce safe conversion? Default `TRUE`
+#' @param ... additional cast options, such as `allow_int_overflow`,
+#' `allow_time_truncate`, and `allow_float_truncate`, which are set to `!safe`
+#' by default
+#' @return A list
+#' @export
+#' @keywords internal
+cast_options <- function(safe = TRUE, ...) {
+  opts <- list(
+    allow_int_overflow = !safe,
+    allow_time_truncate = !safe,
+    allow_float_truncate = !safe
+  )
+  modifyList(opts, list(...))
+}
diff --git a/src/arrow/r/R/config.R b/src/arrow/r/R/config.R
new file mode 100644
index 000000000..af07ad9a9
--- /dev/null
+++ b/src/arrow/r/R/config.R
@@ -0,0 +1,44 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#' Manage the global CPU thread pool in libarrow
+#'
+#' @export
+cpu_count <- function() {
+  GetCpuThreadPoolCapacity()
+}
+
+#' @rdname cpu_count
+#' @param num_threads integer: New number of threads for thread pool
+#' @export
+set_cpu_count <- function(num_threads) {
+  SetCpuThreadPoolCapacity(as.integer(num_threads))
+}
+
+#' Manage the global I/O thread pool in libarrow
+#'
+#' @export
+io_thread_count <- function() {
+  GetIOThreadPoolCapacity()
+}
+
+#' @rdname io_thread_count
+#' @param num_threads integer: New number of threads for thread pool
+#' @export
+set_io_thread_count <- function(num_threads) {
+  SetIOThreadPoolCapacity(as.integer(num_threads))
+}
diff --git a/src/arrow/r/R/csv.R b/src/arrow/r/R/csv.R
new file mode 100644
index 000000000..ee890578f
--- /dev/null
+++ b/src/arrow/r/R/csv.R
@@ -0,0 +1,644 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#' Read a CSV or other delimited file with Arrow
+#'
+#' These functions uses the Arrow C++ CSV reader to read into a `data.frame`.
+#' Arrow C++ options have been mapped to argument names that follow those of
+#' `readr::read_delim()`, and `col_select` was inspired by `vroom::vroom()`.
+#'
+#' `read_csv_arrow()` and `read_tsv_arrow()` are wrappers around
+#' `read_delim_arrow()` that specify a delimiter.
+#'
+#' Note that not all `readr` options are currently implemented here. Please file
+#' an issue if you encounter one that `arrow` should support.
+#'
+#' If you need to control Arrow-specific reader parameters that don't have an
+#' equivalent in `readr::read_csv()`, you can either provide them in the
+#' `parse_options`, `convert_options`, or `read_options` arguments, or you can
+#' use [CsvTableReader] directly for lower-level access.
+#'
+#' @section Specifying column types and names:
+#'
+#' By default, the CSV reader will infer the column names and data types from the file, but there
+#' are a few ways you can specify them directly.
+#'
+#' One way is to provide an Arrow [Schema] in the `schema` argument,
+#' which is an ordered map of column name to type.
+#' When provided, it satisfies both the `col_names` and `col_types` arguments.
+#' This is good if you know all of this information up front.
+#'
+#' You can also pass a `Schema` to the `col_types` argument. If you do this,
+#' column names will still be inferred from the file unless you also specify
+#' `col_names`. In either case, the column names in the `Schema` must match the
+#' data's column names, whether they are explicitly provided or inferred. That
+#' said, this `Schema` does not have to reference all columns: those omitted
+#' will have their types inferred.
+#'
+#' Alternatively, you can declare column types by providing the compact string representation
+#' that `readr` uses to the `col_types` argument. This means you provide a
+#' single string, one character per column, where the characters map to Arrow
+#' types analogously to the `readr` type mapping:
+#'
+#' * "c": `utf8()`
+#' * "i": `int32()`
+#' * "n": `float64()`
+#' * "d": `float64()`
+#' * "l": `bool()`
+#' * "f": `dictionary()`
+#' * "D": `date32()`
+#' * "T": `timestamp()`
+#' * "t": `time32()`
+#' * "_": `null()`
+#' * "-": `null()`
+#' * "?": infer the type from the data
+#'
+#' If you use the compact string representation for `col_types`, you must also
+#' specify `col_names`.
+#'
+#' Regardless of how types are specified, all columns with a `null()` type will
+#' be dropped.
+#'
+#' Note that if you are specifying column names, whether by `schema` or
+#' `col_names`, and the CSV file has a header row that would otherwise be used
+#' to idenfity column names, you'll need to add `skip = 1` to skip that row.
+#'
+#' @param file A character file name or URI, `raw` vector, an Arrow input stream,
+#' or a `FileSystem` with path (`SubTreeFileSystem`).
+#' If a file name, a memory-mapped Arrow [InputStream] will be opened and
+#' closed when finished; compression will be detected from the file extension
+#' and handled automatically. If an input stream is provided, it will be left
+#' open.
+#' @param delim Single character used to separate fields within a record.
+#' @param quote Single character used to quote strings.
+#' @param escape_double Does the file escape quotes by doubling them?
+#' i.e. If this option is `TRUE`, the value `""""` represents
+#' a single quote, `\"`.
+#' @param escape_backslash Does the file use backslashes to escape special
+#' characters? This is more general than `escape_double` as backslashes
+#' can be used to escape the delimiter character, the quote character, or
+#' to add special characters like `\\n`.
+#' @param schema [Schema] that describes the table. If provided, it will be
+#' used to satisfy both `col_names` and `col_types`.
+#' @param col_names If `TRUE`, the first row of the input will be used as the
+#' column names and will not be included in the data frame. If `FALSE`, column
+#' names will be generated by Arrow, starting with "f0", "f1", ..., "fN".
+#' Alternatively, you can specify a character vector of column names.
+#' @param col_types A compact string representation of the column types, or
+#' `NULL` (the default) to infer types from the data.
+#' @param col_select A character vector of column names to keep, as in the
+#' "select" argument to `data.table::fread()`, or a
+#' [tidy selection specification][tidyselect::vars_select()]
+#' of columns, as used in `dplyr::select()`.
+#' @param na A character vector of strings to interpret as missing values.
+#' @param quoted_na Should missing values inside quotes be treated as missing
+#' values (the default) or strings. (Note that this is different from the
+#' the Arrow C++ default for the corresponding convert option,
+#' `strings_can_be_null`.)
+#' @param skip_empty_rows Should blank rows be ignored altogether? If
+#' `TRUE`, blank rows will not be represented at all. If `FALSE`, they will be
+#' filled with missings.
+#' @param skip Number of lines to skip before reading data.
+#' @param timestamp_parsers User-defined timestamp parsers. If more than one
+#' parser is specified, the CSV conversion logic will try parsing values
+#' starting from the beginning of this vector. Possible values are:
+#'  - `NULL`: the default, which uses the ISO-8601 parser
+#'  - a character vector of [strptime][base::strptime()] parse strings
+#'  - a list of [TimestampParser] objects
+#' @param parse_options see [file reader options][CsvReadOptions].
+#' If given, this overrides any
+#' parsing options provided in other arguments (e.g. `delim`, `quote`, etc.).
+#' @param convert_options see [file reader options][CsvReadOptions]
+#' @param read_options see [file reader options][CsvReadOptions]
+#' @param as_data_frame Should the function return a `data.frame` (default) or
+#' an Arrow [Table]?
+#'
+#' @return A `data.frame`, or a Table if `as_data_frame = FALSE`.
+#' @export
+#' @examplesIf arrow_available()
+#' tf <- tempfile()
+#' on.exit(unlink(tf))
+#' write.csv(mtcars, file = tf)
+#' df <- read_csv_arrow(tf)
+#' dim(df)
+#' # Can select columns
+#' df <- read_csv_arrow(tf, col_select = starts_with("d"))
+read_delim_arrow <- function(file,
+                             delim = ",",
+                             quote = '"',
+                             escape_double = TRUE,
+                             escape_backslash = FALSE,
+                             schema = NULL,
+                             col_names = TRUE,
+                             col_types = NULL,
+                             col_select = NULL,
+                             na = c("", "NA"),
+                             quoted_na = TRUE,
+                             skip_empty_rows = TRUE,
+                             skip = 0L,
+                             parse_options = NULL,
+                             convert_options = NULL,
+                             read_options = NULL,
+                             as_data_frame = TRUE,
+                             timestamp_parsers = NULL) {
+  if (inherits(schema, "Schema")) {
+    col_names <- names(schema)
+    col_types <- schema
+  }
+  if (is.null(parse_options)) {
+    parse_options <- readr_to_csv_parse_options(
+      delim,
+      quote,
+      escape_double,
+      escape_backslash,
+      skip_empty_rows
+    )
+  }
+  if (is.null(read_options)) {
+    read_options <- readr_to_csv_read_options(skip, col_names)
+  }
+  if (is.null(convert_options)) {
+    convert_options <- readr_to_csv_convert_options(
+      na,
+      quoted_na,
+      col_types = col_types,
+      col_names = read_options$column_names,
+      timestamp_parsers = timestamp_parsers
+    )
+  }
+
+  if (!inherits(file, "InputStream")) {
+    file <- make_readable_file(file)
+    on.exit(file$close())
+  }
+  reader <- CsvTableReader$create(
+    file,
+    read_options = read_options,
+    parse_options = parse_options,
+    convert_options = convert_options
+  )
+
+  tab <- reader$Read()
+
+  # TODO: move this into convert_options using include_columns
+  col_select <- enquo(col_select)
+  if (!quo_is_null(col_select)) {
+    tab <- tab[vars_select(names(tab), !!col_select)]
+  }
+
+  if (isTRUE(as_data_frame)) {
+    tab <- as.data.frame(tab)
+  }
+
+  tab
+}
+
+#' @rdname read_delim_arrow
+#' @export
+read_csv_arrow <- function(file,
+                           quote = '"',
+                           escape_double = TRUE,
+                           escape_backslash = FALSE,
+                           schema = NULL,
+                           col_names = TRUE,
+                           col_types = NULL,
+                           col_select = NULL,
+                           na = c("", "NA"),
+                           quoted_na = TRUE,
+                           skip_empty_rows = TRUE,
+                           skip = 0L,
+                           parse_options = NULL,
+                           convert_options = NULL,
+                           read_options = NULL,
+                           as_data_frame = TRUE,
+                           timestamp_parsers = NULL) {
+  mc <- match.call()
+  mc$delim <- ","
+  mc[[1]] <- get("read_delim_arrow", envir = asNamespace("arrow"))
+  eval.parent(mc)
+}
+
+#' @rdname read_delim_arrow
+#' @export
+read_tsv_arrow <- function(file,
+                           quote = '"',
+                           escape_double = TRUE,
+                           escape_backslash = FALSE,
+                           schema = NULL,
+                           col_names = TRUE,
+                           col_types = NULL,
+                           col_select = NULL,
+                           na = c("", "NA"),
+                           quoted_na = TRUE,
+                           skip_empty_rows = TRUE,
+                           skip = 0L,
+                           parse_options = NULL,
+                           convert_options = NULL,
+                           read_options = NULL,
+                           as_data_frame = TRUE,
+                           timestamp_parsers = NULL) {
+  mc <- match.call()
+  mc$delim <- "\t"
+  mc[[1]] <- get("read_delim_arrow", envir = asNamespace("arrow"))
+  eval.parent(mc)
+}
+
+#' @title Arrow CSV and JSON table reader classes
+#' @rdname CsvTableReader
+#' @name CsvTableReader
+#' @docType class
+#' @usage NULL
+#' @format NULL
+#' @description `CsvTableReader` and `JsonTableReader` wrap the Arrow C++ CSV
+#' and JSON table readers. See their usage in [read_csv_arrow()] and
+#' [read_json_arrow()], respectively.
+#'
+#' @section Factory:
+#'
+#' The `CsvTableReader$create()` and `JsonTableReader$create()` factory methods
+#' take the following arguments:
+#'
+#' - `file` An Arrow [InputStream]
+#' - `convert_options` (CSV only), `parse_options`, `read_options`: see
+#'    [CsvReadOptions]
+#' - `...` additional parameters.
+#'
+#' @section Methods:
+#'
+#' - `$Read()`: returns an Arrow Table.
+#'
+#' @include arrow-package.R
+#' @export
+CsvTableReader <- R6Class("CsvTableReader",
+  inherit = ArrowObject,
+  public = list(
+    Read = function() csv___TableReader__Read(self)
+  )
+)
+CsvTableReader$create <- function(file,
+                                  read_options = CsvReadOptions$create(),
+                                  parse_options = CsvParseOptions$create(),
+                                  convert_options = CsvConvertOptions$create(),
+                                  ...) {
+  assert_is(file, "InputStream")
+  csv___TableReader__Make(file, read_options, parse_options, convert_options)
+}
+
+#' @title File reader options
+#' @rdname CsvReadOptions
+#' @name CsvReadOptions
+#' @docType class
+#' @usage NULL
+#' @format NULL
+#' @description `CsvReadOptions`, `CsvParseOptions`, `CsvConvertOptions`,
+#' `JsonReadOptions`, `JsonParseOptions`, and `TimestampParser` are containers for various
+#' file reading options. See their usage in [read_csv_arrow()] and
+#' [read_json_arrow()], respectively.
+#'
+#' @section Factory:
+#'
+#' The `CsvReadOptions$create()` and `JsonReadOptions$create()` factory methods
+#' take the following arguments:
+#'
+#' - `use_threads` Whether to use the global CPU thread pool
+#' - `block_size` Block size we request from the IO layer; also determines
+#' the size of chunks when use_threads is `TRUE`. NB: if `FALSE`, JSON input
+#' must end with an empty line.
+#'
+#' `CsvReadOptions$create()` further accepts these additional arguments:
+#'
+#' - `skip_rows` Number of lines to skip before reading data (default 0)
+#' - `column_names` Character vector to supply column names. If length-0
+#' (the default), the first non-skipped row will be parsed to generate column
+#' names, unless `autogenerate_column_names` is `TRUE`.
+#' - `autogenerate_column_names` Logical: generate column names instead of
+#' using the first non-skipped row (the default)? If `TRUE`, column names will
+#' be "f0", "f1", ..., "fN".
+#'
+#' `CsvParseOptions$create()` takes the following arguments:
+#'
+#' - `delimiter` Field delimiting character (default `","`)
+#' - `quoting` Logical: are strings quoted? (default `TRUE`)
+#' - `quote_char` Quoting character, if `quoting` is `TRUE`
+#' - `double_quote` Logical: are quotes inside values double-quoted? (default `TRUE`)
+#' - `escaping` Logical: whether escaping is used (default `FALSE`)
+#' - `escape_char` Escaping character, if `escaping` is `TRUE`
+#' - `newlines_in_values` Logical: are values allowed to contain CR (`0x0d`)
+#'    and LF (`0x0a`) characters? (default `FALSE`)
+#' - `ignore_empty_lines` Logical: should empty lines be ignored (default) or
+#'    generate a row of missing values (if `FALSE`)?
+#'
+#' `JsonParseOptions$create()` accepts only the `newlines_in_values` argument.
+#'
+#' `CsvConvertOptions$create()` takes the following arguments:
+#'
+#' - `check_utf8` Logical: check UTF8 validity of string columns? (default `TRUE`)
+#' - `null_values` character vector of recognized spellings for null values.
+#'    Analogous to the `na.strings` argument to
+#'    [`read.csv()`][utils::read.csv()] or `na` in `readr::read_csv()`.
+#' - `strings_can_be_null` Logical: can string / binary columns have
+#'    null values? Similar to the `quoted_na` argument to `readr::read_csv()`.
+#'    (default `FALSE`)
+#' - `true_values` character vector of recognized spellings for `TRUE` values
+#' - `false_values` character vector of recognized spellings for `FALSE` values
+#' - `col_types` A `Schema` or `NULL` to infer types
+#' - `auto_dict_encode` Logical: Whether to try to automatically
+#'    dictionary-encode string / binary data (think `stringsAsFactors`). Default `FALSE`.
+#'    This setting is ignored for non-inferred columns (those in `col_types`).
+#' - `auto_dict_max_cardinality` If `auto_dict_encode`, string/binary columns
+#'    are dictionary-encoded up to this number of unique values (default 50),
+#'    after which it switches to regular encoding.
+#' - `include_columns` If non-empty, indicates the names of columns from the
+#'    CSV file that should be actually read and converted (in the vector's order).
+#' - `include_missing_columns` Logical: if `include_columns` is provided, should
+#'    columns named in it but not found in the data be included as a column of
+#'    type `null()`? The default (`FALSE`) means that the reader will instead
+#'    raise an error.
+#' - `timestamp_parsers` User-defined timestamp parsers. If more than one
+#'    parser is specified, the CSV conversion logic will try parsing values
+#'    starting from the beginning of this vector. Possible values are
+#'    (a) `NULL`, the default, which uses the ISO-8601 parser;
+#'    (b) a character vector of [strptime][base::strptime()] parse strings; or
+#'    (c) a list of [TimestampParser] objects.
+#'
+#' `TimestampParser$create()` takes an optional `format` string argument.
+#' See [`strptime()`][base::strptime()] for example syntax.
+#' The default is to use an ISO-8601 format parser.
+#'
+#' The `CsvWriteOptions$create()` factory method takes the following arguments:
+#' - `include_header` Whether to write an initial header line with column names
+#' - `batch_size` Maximum number of rows processed at a time. Default is 1024.
+#'
+#' @section Active bindings:
+#'
+#' - `column_names`: from `CsvReadOptions`
+#'
+#' @export
+CsvReadOptions <- R6Class("CsvReadOptions",
+  inherit = ArrowObject,
+  active = list(
+    column_names = function() csv___ReadOptions__column_names(self)
+  )
+)
+CsvReadOptions$create <- function(use_threads = option_use_threads(),
+                                  block_size = 1048576L,
+                                  skip_rows = 0L,
+                                  column_names = character(0),
+                                  autogenerate_column_names = FALSE) {
+  csv___ReadOptions__initialize(
+    list(
+      use_threads = use_threads,
+      block_size = block_size,
+      skip_rows = skip_rows,
+      column_names = column_names,
+      autogenerate_column_names = autogenerate_column_names
+    )
+  )
+}
+
+#' @rdname CsvReadOptions
+#' @export
+CsvWriteOptions <- R6Class("CsvWriteOptions", inherit = ArrowObject)
+CsvWriteOptions$create <- function(include_header = TRUE, batch_size = 1024L) {
+  assert_that(is_integerish(batch_size, n = 1, finite = TRUE), batch_size > 0)
+  csv___WriteOptions__initialize(
+    list(
+      include_header = include_header,
+      batch_size = as.integer(batch_size)
+    )
+  )
+}
+
+readr_to_csv_read_options <- function(skip, col_names, col_types) {
+  if (isTRUE(col_names)) {
+    # C++ default to parse is 0-length string array
+    col_names <- character(0)
+  }
+  if (identical(col_names, FALSE)) {
+    CsvReadOptions$create(skip_rows = skip, autogenerate_column_names = TRUE)
+  } else {
+    CsvReadOptions$create(skip_rows = skip, column_names = col_names)
+  }
+}
+
+#' @rdname CsvReadOptions
+#' @usage NULL
+#' @format NULL
+#' @docType class
+#' @export
+CsvParseOptions <- R6Class("CsvParseOptions", inherit = ArrowObject)
+CsvParseOptions$create <- function(delimiter = ",",
+                                   quoting = TRUE,
+                                   quote_char = '"',
+                                   double_quote = TRUE,
+                                   escaping = FALSE,
+                                   escape_char = "\\",
+                                   newlines_in_values = FALSE,
+                                   ignore_empty_lines = TRUE) {
+  csv___ParseOptions__initialize(
+    list(
+      delimiter = delimiter,
+      quoting = quoting,
+      quote_char = quote_char,
+      double_quote = double_quote,
+      escaping = escaping,
+      escape_char = escape_char,
+      newlines_in_values = newlines_in_values,
+      ignore_empty_lines = ignore_empty_lines
+    )
+  )
+}
+
+readr_to_csv_parse_options <- function(delim = ",",
+                                       quote = '"',
+                                       escape_double = TRUE,
+                                       escape_backslash = FALSE,
+                                       skip_empty_rows = TRUE) {
+  # This function translates from the readr argument list to the arrow arg names
+  # TODO: validate inputs
+  CsvParseOptions$create(
+    delimiter = delim,
+    quoting = nzchar(quote),
+    quote_char = quote,
+    double_quote = escape_double,
+    escaping = escape_backslash,
+    escape_char = "\\",
+    newlines_in_values = escape_backslash,
+    ignore_empty_lines = skip_empty_rows
+  )
+}
+
+#' @rdname CsvReadOptions
+#' @usage NULL
+#' @format NULL
+#' @docType class
+#' @export
+TimestampParser <- R6Class("TimestampParser",
+  inherit = ArrowObject,
+  public = list(
+    kind = function() TimestampParser__kind(self),
+    format = function() TimestampParser__format(self)
+  )
+)
+TimestampParser$create <- function(format = NULL) {
+  if (is.null(format)) {
+    TimestampParser__MakeISO8601()
+  } else {
+    TimestampParser__MakeStrptime(format)
+  }
+}
+
+#' @rdname CsvReadOptions
+#' @usage NULL
+#' @format NULL
+#' @docType class
+#' @export
+CsvConvertOptions <- R6Class("CsvConvertOptions", inherit = ArrowObject)
+CsvConvertOptions$create <- function(check_utf8 = TRUE,
+                                     null_values = c("", "NA"),
+                                     true_values = c("T", "true", "TRUE"),
+                                     false_values = c("F", "false", "FALSE"),
+                                     strings_can_be_null = FALSE,
+                                     col_types = NULL,
+                                     auto_dict_encode = FALSE,
+                                     auto_dict_max_cardinality = 50L,
+                                     include_columns = character(),
+                                     include_missing_columns = FALSE,
+                                     timestamp_parsers = NULL) {
+  if (!is.null(col_types) && !inherits(col_types, "Schema")) {
+    abort(c(
+      "Unsupported `col_types` specification.",
+      i = "`col_types` must be NULL, or a <Schema>."
+    ))
+  }
+
+  csv___ConvertOptions__initialize(
+    list(
+      check_utf8 = check_utf8,
+      null_values = null_values,
+      strings_can_be_null = strings_can_be_null,
+      col_types = col_types,
+      true_values = true_values,
+      false_values = false_values,
+      auto_dict_encode = auto_dict_encode,
+      auto_dict_max_cardinality = auto_dict_max_cardinality,
+      include_columns = include_columns,
+      include_missing_columns = include_missing_columns,
+      timestamp_parsers = timestamp_parsers
+    )
+  )
+}
+
+readr_to_csv_convert_options <- function(na,
+                                         quoted_na,
+                                         col_types = NULL,
+                                         col_names = NULL,
+                                         timestamp_parsers = NULL) {
+  include_columns <- character()
+
+  if (is.character(col_types)) {
+    if (length(col_types) != 1L) {
+      abort("`col_types` is a character vector that is not of size 1")
+    }
+    n <- nchar(col_types)
+    specs <- substring(col_types, seq_len(n), seq_len(n))
+    if (!is_bare_character(col_names, n)) {
+      abort("Compact specification for `col_types` requires `col_names`")
+    }
+
+    col_types <- set_names(nm = col_names, map2(specs, col_names, ~ {
+      switch(.x,
+        "c" = utf8(),
+        "i" = int32(),
+        "n" = float64(),
+        "d" = float64(),
+        "l" = bool(),
+        "f" = dictionary(),
+        "D" = date32(),
+        "T" = timestamp(),
+        "t" = time32(),
+        "_" = null(),
+        "-" = null(),
+        "?" = NULL,
+        abort("Unsupported compact specification: '", .x, "' for column '", .y, "'")
+      )
+    }))
+    # To "guess" types, omit them from col_types
+    col_types <- keep(col_types, ~ !is.null(.x))
+    col_types <- schema(!!!col_types)
+  }
+
+  if (!is.null(col_types)) {
+    assert_is(col_types, "Schema")
+    # If any columns are null(), drop them
+    # (by specifying the other columns in include_columns)
+    nulls <- map_lgl(col_types$fields, ~ .$type$Equals(null()))
+    if (any(nulls)) {
+      include_columns <- setdiff(col_names, names(col_types)[nulls])
+    }
+  }
+  CsvConvertOptions$create(
+    null_values = na,
+    strings_can_be_null = quoted_na,
+    col_types = col_types,
+    timestamp_parsers = timestamp_parsers,
+    include_columns = include_columns
+  )
+}
+
+#' Write CSV file to disk
+#'
+#' @param x `data.frame`, [RecordBatch], or [Table]
+#' @param sink A string file path, URI, or [OutputStream], or path in a file
+#' system (`SubTreeFileSystem`)
+#' @param include_header Whether to write an initial header line with column names
+#' @param batch_size Maximum number of rows processed at a time. Default is 1024.
+#'
+#' @return The input `x`, invisibly. Note that if `sink` is an [OutputStream],
+#' the stream will be left open.
+#' @export
+#' @examplesIf arrow_available()
+#' tf <- tempfile()
+#' on.exit(unlink(tf))
+#' write_csv_arrow(mtcars, tf)
+#' @include arrow-package.R
+write_csv_arrow <- function(x,
+                            sink,
+                            include_header = TRUE,
+                            batch_size = 1024L) {
+  write_options <- CsvWriteOptions$create(include_header, batch_size)
+
+  x_out <- x
+  if (is.data.frame(x)) {
+    x <- Table$create(x)
+  }
+
+  assert_that(is_writable_table(x))
+
+  if (!inherits(sink, "OutputStream")) {
+    sink <- make_output_stream(sink)
+    on.exit(sink$close())
+  }
+
+  if (inherits(x, "RecordBatch")) {
+    csv___WriteCSV__RecordBatch(x, write_options, sink)
+  } else if (inherits(x, "Table")) {
+    csv___WriteCSV__Table(x, write_options, sink)
+  }
+
+  invisible(x_out)
+}
diff --git a/src/arrow/r/R/dataset-factory.R b/src/arrow/r/R/dataset-factory.R
new file mode 100644
index 000000000..c56a6b181
--- /dev/null
+++ b/src/arrow/r/R/dataset-factory.R
@@ -0,0 +1,170 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#' @include dataset.R
+
+#' @usage NULL
+#' @format NULL
+#' @rdname Dataset
+#' @export
+DatasetFactory <- R6Class("DatasetFactory",
+  inherit = ArrowObject,
+  public = list(
+    Finish = function(schema = NULL, unify_schemas = FALSE) {
+      if (is.null(schema)) {
+        dataset___DatasetFactory__Finish1(self, unify_schemas)
+      } else {
+        assert_is(schema, "Schema")
+        dataset___DatasetFactory__Finish2(self, schema)
+      }
+    },
+    Inspect = function(unify_schemas = FALSE) {
+      dataset___DatasetFactory__Inspect(self, unify_schemas)
+    }
+  )
+)
+DatasetFactory$create <- function(x,
+                                  filesystem = NULL,
+                                  format = c("parquet", "arrow", "ipc", "feather", "csv", "tsv", "text"),
+                                  partitioning = NULL,
+                                  ...) {
+  if (is_list_of(x, "DatasetFactory")) {
+    return(dataset___UnionDatasetFactory__Make(x))
+  }
+
+  if (is.character(format)) {
+    format <- FileFormat$create(match.arg(format), ...)
+  } else {
+    assert_is(format, "FileFormat")
+  }
+
+  path_and_fs <- get_paths_and_filesystem(x, filesystem)
+  info <- path_and_fs$fs$GetFileInfo(path_and_fs$path)
+
+  if (length(info) > 1 || info[[1]]$type == FileType$File) {
+    # x looks like a vector of one or more file paths (not a directory path)
+    return(FileSystemDatasetFactory$create(path_and_fs$fs, NULL, path_and_fs$path, format))
+  }
+
+  if (!is.null(partitioning)) {
+    if (inherits(partitioning, "Schema")) {
+      partitioning <- DirectoryPartitioning$create(partitioning)
+    } else if (is.character(partitioning)) {
+      # These are the column/field names, and we should autodetect their types
+      partitioning <- DirectoryPartitioningFactory$create(partitioning)
+    }
+  }
+
+  selector <- FileSelector$create(path_and_fs$path, allow_not_found = FALSE, recursive = TRUE)
+
+  FileSystemDatasetFactory$create(path_and_fs$fs, selector, NULL, format, partitioning)
+}
+
+#' Create a DatasetFactory
+#'
+#' A [Dataset] can constructed using one or more [DatasetFactory]s.
+#' This function helps you construct a `DatasetFactory` that you can pass to
+#' [open_dataset()].
+#'
+#' If you would only have a single `DatasetFactory` (for example, you have a
+#' single directory containing Parquet files), you can call `open_dataset()`
+#' directly. Use `dataset_factory()` when you
+#' want to combine different directories, file systems, or file formats.
+#'
+#' @param x A string path to a directory containing data files, a vector of one
+#' one or more string paths to data files, or a list of `DatasetFactory` objects
+#' whose datasets should be combined. If this argument is specified it will be
+#' used to construct a `UnionDatasetFactory` and other arguments will be
+#' ignored.
+#' @param filesystem A [FileSystem] object; if omitted, the `FileSystem` will
+#' be detected from `x`
+#' @param format A [FileFormat] object, or a string identifier of the format of
+#' the files in `x`. Currently supported values:
+#' * "parquet"
+#' * "ipc"/"arrow"/"feather", all aliases for each other; for Feather, note that
+#'   only version 2 files are supported
+#' * "csv"/"text", aliases for the same thing (because comma is the default
+#'   delimiter for text files
+#' * "tsv", equivalent to passing `format = "text", delimiter = "\t"`
+#'
+#' Default is "parquet", unless a `delimiter` is also specified, in which case
+#' it is assumed to be "text".
+#' @param partitioning One of
+#'   * A `Schema`, in which case the file paths relative to `sources` will be
+#'    parsed, and path segments will be matched with the schema fields. For
+#'    example, `schema(year = int16(), month = int8())` would create partitions
+#'    for file paths like "2019/01/file.parquet", "2019/02/file.parquet", etc.
+#'   * A character vector that defines the field names corresponding to those
+#'    path segments (that is, you're providing the names that would correspond
+#'    to a `Schema` but the types will be autodetected)
+#'   * A `HivePartitioning` or `HivePartitioningFactory`, as returned
+#'    by [hive_partition()] which parses explicit or autodetected fields from
+#'    Hive-style path segments
+#'   * `NULL` for no partitioning
+#' @param ... Additional format-specific options, passed to
+#' `FileFormat$create()`. For CSV options, note that you can specify them either
+#' with the Arrow C++ library naming ("delimiter", "quoting", etc.) or the
+#' `readr`-style naming used in [read_csv_arrow()] ("delim", "quote", etc.).
+#' Not all `readr` options are currently supported; please file an issue if you
+#' encounter one that `arrow` should support.
+#' @return A `DatasetFactory` object. Pass this to [open_dataset()],
+#' in a list potentially with other `DatasetFactory` objects, to create
+#' a `Dataset`.
+#' @export
+dataset_factory <- DatasetFactory$create
+
+#' @usage NULL
+#' @format NULL
+#' @rdname Dataset
+#' @export
+FileSystemDatasetFactory <- R6Class("FileSystemDatasetFactory",
+  inherit = DatasetFactory
+)
+FileSystemDatasetFactory$create <- function(filesystem,
+                                            selector = NULL,
+                                            paths = NULL,
+                                            format,
+                                            partitioning = NULL) {
+  assert_is(filesystem, "FileSystem")
+  is.null(selector) || assert_is(selector, "FileSelector")
+  is.null(paths) || assert_is(paths, "character")
+  assert_that(
+    xor(is.null(selector), is.null(paths)),
+    msg = "Either selector or paths must be specified"
+  )
+  assert_is(format, "FileFormat")
+  if (!is.null(paths)) {
+    assert_that(is.null(partitioning), msg = "Partitioning not supported with paths")
+  }
+
+  if (!is.null(paths)) {
+    ptr <- dataset___FileSystemDatasetFactory__Make0(filesystem, paths, format)
+  } else if (is.null(partitioning)) {
+    ptr <- dataset___FileSystemDatasetFactory__Make1(filesystem, selector, format)
+  } else if (inherits(partitioning, "PartitioningFactory")) {
+    ptr <- dataset___FileSystemDatasetFactory__Make3(filesystem, selector, format, partitioning)
+  } else if (inherits(partitioning, "Partitioning")) {
+    ptr <- dataset___FileSystemDatasetFactory__Make2(filesystem, selector, format, partitioning)
+  } else {
+    stop(
+      "Expected 'partitioning' to be NULL, PartitioningFactory or Partitioning",
+      call. = FALSE
+    )
+  }
+
+  ptr
+}
diff --git a/src/arrow/r/R/dataset-format.R b/src/arrow/r/R/dataset-format.R
new file mode 100644
index 000000000..b0b93219e
--- /dev/null
+++ b/src/arrow/r/R/dataset-format.R
@@ -0,0 +1,353 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#' Dataset file formats
+#'
+#' @description
+#' A `FileFormat` holds information about how to read and parse the files
+#' included in a `Dataset`. There are subclasses corresponding to the supported
+#' file formats (`ParquetFileFormat` and `IpcFileFormat`).
+#'
+#' @section Factory:
+#' `FileFormat$create()` takes the following arguments:
+#' * `format`: A string identifier of the file format. Currently supported values:
+#'   * "parquet"
+#'   * "ipc"/"arrow"/"feather", all aliases for each other; for Feather, note that
+#'     only version 2 files are supported
+#'   * "csv"/"text", aliases for the same thing (because comma is the default
+#'     delimiter for text files
+#'   * "tsv", equivalent to passing `format = "text", delimiter = "\t"`
+#' * `...`: Additional format-specific options
+#'
+#'   `format = "parquet"``:
+#'   * `dict_columns`: Names of columns which should be read as dictionaries.
+#'   * Any Parquet options from [FragmentScanOptions].
+#'
+#'   `format = "text"`: see [CsvParseOptions]. Note that you can specify them either
+#'   with the Arrow C++ library naming ("delimiter", "quoting", etc.) or the
+#'   `readr`-style naming used in [read_csv_arrow()] ("delim", "quote", etc.).
+#'   Not all `readr` options are currently supported; please file an issue if
+#'   you encounter one that `arrow` should support. Also, the following options are
+#'   supported. From [CsvReadOptions]:
+#'   * `skip_rows`
+#'   * `column_names`
+#'   * `autogenerate_column_names`
+#'   From [CsvFragmentScanOptions] (these values can be overridden at scan time):
+#'   * `convert_options`: a [CsvConvertOptions]
+#'   * `block_size`
+#'
+#' It returns the appropriate subclass of `FileFormat` (e.g. `ParquetFileFormat`)
+#' @rdname FileFormat
+#' @name FileFormat
+#' @examplesIf arrow_with_dataset() && tolower(Sys.info()[["sysname"]]) != "windows"
+#' ## Semi-colon delimited files
+#' # Set up directory for examples
+#' tf <- tempfile()
+#' dir.create(tf)
+#' on.exit(unlink(tf))
+#' write.table(mtcars, file.path(tf, "file1.txt"), sep = ";", row.names = FALSE)
+#'
+#' # Create FileFormat object
+#' format <- FileFormat$create(format = "text", delimiter = ";")
+#'
+#' open_dataset(tf, format = format)
+#' @export
+FileFormat <- R6Class("FileFormat",
+  inherit = ArrowObject,
+  active = list(
+    # @description
+    # Return the `FileFormat`'s type
+    type = function() dataset___FileFormat__type_name(self)
+  )
+)
+FileFormat$create <- function(format, schema = NULL, ...) {
+  opt_names <- names(list(...))
+  if (format %in% c("csv", "text") || any(opt_names %in% c("delim", "delimiter"))) {
+    CsvFileFormat$create(schema = schema, ...)
+  } else if (format == c("tsv")) {
+    CsvFileFormat$create(delimiter = "\t", schema = schema, ...)
+  } else if (format == "parquet") {
+    ParquetFileFormat$create(...)
+  } else if (format %in% c("ipc", "arrow", "feather")) { # These are aliases for the same thing
+    dataset___IpcFileFormat__Make()
+  } else {
+    stop("Unsupported file format: ", format, call. = FALSE)
+  }
+}
+
+#' @export
+as.character.FileFormat <- function(x, ...) {
+  out <- x$type
+  # Slight hack: special case IPC -> feather, otherwise is just the type_name
+  ifelse(out == "ipc", "feather", out)
+}
+
+#' @usage NULL
+#' @format NULL
+#' @rdname FileFormat
+#' @export
+ParquetFileFormat <- R6Class("ParquetFileFormat", inherit = FileFormat)
+ParquetFileFormat$create <- function(...,
+                                     dict_columns = character(0)) {
+  options <- ParquetFragmentScanOptions$create(...)
+  dataset___ParquetFileFormat__Make(options, dict_columns)
+}
+
+#' @usage NULL
+#' @format NULL
+#' @rdname FileFormat
+#' @export
+IpcFileFormat <- R6Class("IpcFileFormat", inherit = FileFormat)
+
+#' @usage NULL
+#' @format NULL
+#' @rdname FileFormat
+#' @export
+CsvFileFormat <- R6Class("CsvFileFormat", inherit = FileFormat)
+CsvFileFormat$create <- function(...,
+                                 opts = csv_file_format_parse_options(...),
+                                 convert_options = csv_file_format_convert_opts(...),
+                                 read_options = csv_file_format_read_opts(...)) {
+  dataset___CsvFileFormat__Make(opts, convert_options, read_options)
+}
+
+# Support both readr-style option names and Arrow C++ option names
+csv_file_format_parse_options <- function(...) {
+  opts <- list(...)
+  # Filter out arguments meant for CsvConvertOptions/CsvReadOptions
+  convert_opts <- names(formals(CsvConvertOptions$create))
+  read_opts <- names(formals(CsvReadOptions$create))
+  opts[convert_opts] <- NULL
+  opts[read_opts] <- NULL
+  opts[["schema"]] <- NULL
+  opt_names <- names(opts)
+  # Catch any readr-style options specified with full option names that are
+  # supported by read_delim_arrow() (and its wrappers) but are not yet
+  # supported here
+  unsup_readr_opts <- setdiff(
+    names(formals(read_delim_arrow)),
+    names(formals(readr_to_csv_parse_options))
+  )
+  is_unsup_opt <- opt_names %in% unsup_readr_opts
+  unsup_opts <- opt_names[is_unsup_opt]
+  if (length(unsup_opts)) {
+    stop(
+      "The following ",
+      ngettext(length(unsup_opts), "option is ", "options are "),
+      "supported in \"read_delim_arrow\" functions ",
+      "but not yet supported here: ",
+      oxford_paste(unsup_opts),
+      call. = FALSE
+    )
+  }
+  # Catch any options with full or partial names that do not match any of the
+  # recognized Arrow C++ option names or readr-style option names
+  arrow_opts <- names(formals(CsvParseOptions$create))
+  readr_opts <- names(formals(readr_to_csv_parse_options))
+  is_arrow_opt <- !is.na(pmatch(opt_names, arrow_opts))
+  is_readr_opt <- !is.na(pmatch(opt_names, readr_opts))
+  unrec_opts <- opt_names[!is_arrow_opt & !is_readr_opt]
+  if (length(unrec_opts)) {
+    stop(
+      "Unrecognized ",
+      ngettext(length(unrec_opts), "option", "options"),
+      ": ",
+      oxford_paste(unrec_opts),
+      call. = FALSE
+    )
+  }
+  # Catch options with ambiguous partial names (such as "del") that make it
+  # unclear whether the user is specifying Arrow C++ options ("delimiter") or
+  # readr-style options ("delim")
+  is_ambig_opt <- is.na(pmatch(opt_names, c(arrow_opts, readr_opts)))
+  ambig_opts <- opt_names[is_ambig_opt]
+  if (length(ambig_opts)) {
+    stop("Ambiguous ",
+      ngettext(length(ambig_opts), "option", "options"),
+      ": ",
+      oxford_paste(ambig_opts),
+      ". Use full argument names",
+      call. = FALSE
+    )
+  }
+  if (any(is_readr_opt)) {
+    # Catch cases when the user specifies a mix of Arrow C++ options and
+    # readr-style options
+    if (!all(is_readr_opt)) {
+      stop("Use either Arrow parse options or readr parse options, not both",
+        call. = FALSE
+      )
+    }
+    do.call(readr_to_csv_parse_options, opts) # all options have readr-style names
+  } else {
+    do.call(CsvParseOptions$create, opts) # all options have Arrow C++ names
+  }
+}
+
+csv_file_format_convert_opts <- function(...) {
+  opts <- list(...)
+  # Filter out arguments meant for CsvParseOptions/CsvReadOptions
+  arrow_opts <- names(formals(CsvParseOptions$create))
+  readr_opts <- names(formals(readr_to_csv_parse_options))
+  read_opts <- names(formals(CsvReadOptions$create))
+  opts[arrow_opts] <- NULL
+  opts[readr_opts] <- NULL
+  opts[read_opts] <- NULL
+  opts[["schema"]] <- NULL
+  do.call(CsvConvertOptions$create, opts)
+}
+
+csv_file_format_read_opts <- function(schema = NULL, ...) {
+  opts <- list(...)
+  # Filter out arguments meant for CsvParseOptions/CsvConvertOptions
+  arrow_opts <- names(formals(CsvParseOptions$create))
+  readr_opts <- names(formals(readr_to_csv_parse_options))
+  convert_opts <- names(formals(CsvConvertOptions$create))
+  opts[arrow_opts] <- NULL
+  opts[readr_opts] <- NULL
+  opts[convert_opts] <- NULL
+  if (!is.null(schema)) {
+    opts[["column_names"]] <- names(schema)
+  }
+  do.call(CsvReadOptions$create, opts)
+}
+
+#' Format-specific scan options
+#'
+#' @description
+#' A `FragmentScanOptions` holds options specific to a `FileFormat` and a scan
+#' operation.
+#'
+#' @section Factory:
+#' `FragmentScanOptions$create()` takes the following arguments:
+#' * `format`: A string identifier of the file format. Currently supported values:
+#'   * "parquet"
+#'   * "csv"/"text", aliases for the same format.
+#' * `...`: Additional format-specific options
+#'
+#'   `format = "parquet"``:
+#'   * `use_buffered_stream`: Read files through buffered input streams rather than
+#'                            loading entire row groups at once. This may be enabled
+#'                            to reduce memory overhead. Disabled by default.
+#'   * `buffer_size`: Size of buffered stream, if enabled. Default is 8KB.
+#'   * `pre_buffer`: Pre-buffer the raw Parquet data. This can improve performance
+#'                   on high-latency filesystems. Disabled by default.
+#
+#'   `format = "text"`: see [CsvConvertOptions]. Note that options can only be
+#'   specified with the Arrow C++ library naming. Also, "block_size" from
+#'   [CsvReadOptions] may be given.
+#'
+#' It returns the appropriate subclass of `FragmentScanOptions`
+#' (e.g. `CsvFragmentScanOptions`).
+#' @rdname FragmentScanOptions
+#' @name FragmentScanOptions
+#' @export
+FragmentScanOptions <- R6Class("FragmentScanOptions",
+  inherit = ArrowObject,
+  active = list(
+    # @description
+    # Return the `FragmentScanOptions`'s type
+    type = function() dataset___FragmentScanOptions__type_name(self)
+  )
+)
+FragmentScanOptions$create <- function(format, ...) {
+  if (format %in% c("csv", "text", "tsv")) {
+    CsvFragmentScanOptions$create(...)
+  } else if (format == "parquet") {
+    ParquetFragmentScanOptions$create(...)
+  } else {
+    stop("Unsupported file format: ", format, call. = FALSE)
+  }
+}
+
+#' @export
+as.character.FragmentScanOptions <- function(x, ...) {
+  x$type
+}
+
+#' @usage NULL
+#' @format NULL
+#' @rdname FragmentScanOptions
+#' @export
+CsvFragmentScanOptions <- R6Class("CsvFragmentScanOptions", inherit = FragmentScanOptions)
+CsvFragmentScanOptions$create <- function(...,
+                                          convert_opts = csv_file_format_convert_opts(...),
+                                          read_opts = csv_file_format_read_opts(...)) {
+  dataset___CsvFragmentScanOptions__Make(convert_opts, read_opts)
+}
+
+#' @usage NULL
+#' @format NULL
+#' @rdname FragmentScanOptions
+#' @export
+ParquetFragmentScanOptions <- R6Class("ParquetFragmentScanOptions", inherit = FragmentScanOptions)
+ParquetFragmentScanOptions$create <- function(use_buffered_stream = FALSE,
+                                              buffer_size = 8196,
+                                              pre_buffer = TRUE) {
+  dataset___ParquetFragmentScanOptions__Make(use_buffered_stream, buffer_size, pre_buffer)
+}
+
+#' Format-specific write options
+#'
+#' @description
+#' A `FileWriteOptions` holds write options specific to a `FileFormat`.
+FileWriteOptions <- R6Class("FileWriteOptions",
+  inherit = ArrowObject,
+  public = list(
+    update = function(table, ...) {
+      if (self$type == "parquet") {
+        dataset___ParquetFileWriteOptions__update(
+          self,
+          ParquetWriterProperties$create(table, ...),
+          ParquetArrowWriterProperties$create(...)
+        )
+      } else if (self$type == "ipc") {
+        args <- list(...)
+        if (is.null(args$codec)) {
+          dataset___IpcFileWriteOptions__update1(
+            self,
+            get_ipc_use_legacy_format(args$use_legacy_format),
+            get_ipc_metadata_version(args$metadata_version)
+          )
+        } else {
+          dataset___IpcFileWriteOptions__update2(
+            self,
+            get_ipc_use_legacy_format(args$use_legacy_format),
+            args$codec,
+            get_ipc_metadata_version(args$metadata_version)
+          )
+        }
+      } else if (self$type == "csv") {
+        dataset___CsvFileWriteOptions__update(
+          self,
+          CsvWriteOptions$create(...)
+        )
+      }
+      invisible(self)
+    }
+  ),
+  active = list(
+    type = function() dataset___FileWriteOptions__type_name(self)
+  )
+)
+FileWriteOptions$create <- function(format, ...) {
+  if (!inherits(format, "FileFormat")) {
+    format <- FileFormat$create(format)
+  }
+  options <- dataset___FileFormat__DefaultWriteOptions(format)
+  options$update(...)
+}
diff --git a/src/arrow/r/R/dataset-partition.R b/src/arrow/r/R/dataset-partition.R
new file mode 100644
index 000000000..35d5bc00c
--- /dev/null
+++ b/src/arrow/r/R/dataset-partition.R
@@ -0,0 +1,132 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#' Define Partitioning for a Dataset
+#'
+#' @description
+#' Pass a `Partitioning` object to a [FileSystemDatasetFactory]'s `$create()`
+#' method to indicate how the file's paths should be interpreted to define
+#' partitioning.
+#'
+#' `DirectoryPartitioning` describes how to interpret raw path segments, in
+#' order. For example, `schema(year = int16(), month = int8())` would define
+#' partitions for file paths like "2019/01/file.parquet",
+#' "2019/02/file.parquet", etc. In this scheme `NULL` values will be skipped. In
+#' the previous example: when writing a dataset if the month was `NA` (or
+#' `NULL`), the files would be placed in "2019/file.parquet". When reading, the
+#' rows in "2019/file.parquet" would return an `NA` for the month column. An
+#' error will be raised if an outer directory is `NULL` and an inner directory
+#' is not.
+#'
+#' `HivePartitioning` is for Hive-style partitioning, which embeds field
+#' names and values in path segments, such as
+#' "/year=2019/month=2/data.parquet". Because fields are named in the path
+#' segments, order does not matter. This partitioning scheme allows `NULL`
+#' values. They will be replaced by a configurable `null_fallback` which
+#' defaults to the string `"__HIVE_DEFAULT_PARTITION__"` when writing. When
+#' reading, the `null_fallback` string will be replaced with `NA`s as
+#' appropriate.
+#'
+#' `PartitioningFactory` subclasses instruct the `DatasetFactory` to detect
+#' partition features from the file paths.
+#' @section Factory:
+#' Both `DirectoryPartitioning$create()` and `HivePartitioning$create()`
+#' methods take a [Schema] as a single input argument. The helper
+#' function [`hive_partition(...)`][hive_partition] is shorthand for
+#' `HivePartitioning$create(schema(...))`.
+#'
+#' With `DirectoryPartitioningFactory$create()`, you can provide just the
+#' names of the path segments (in our example, `c("year", "month")`), and
+#' the `DatasetFactory` will infer the data types for those partition variables.
+#' `HivePartitioningFactory$create()` takes no arguments: both variable names
+#' and their types can be inferred from the file paths. `hive_partition()` with
+#' no arguments returns a `HivePartitioningFactory`.
+#' @name Partitioning
+#' @rdname Partitioning
+#' @export
+Partitioning <- R6Class("Partitioning", inherit = ArrowObject)
+#' @usage NULL
+#' @format NULL
+#' @rdname Partitioning
+#' @export
+DirectoryPartitioning <- R6Class("DirectoryPartitioning", inherit = Partitioning)
+DirectoryPartitioning$create <- function(schm, segment_encoding = "uri") {
+  dataset___DirectoryPartitioning(schm, segment_encoding = segment_encoding)
+}
+
+#' @usage NULL
+#' @format NULL
+#' @rdname Partitioning
+#' @export
+HivePartitioning <- R6Class("HivePartitioning", inherit = Partitioning)
+HivePartitioning$create <- function(schm, null_fallback = NULL, segment_encoding = "uri") {
+  dataset___HivePartitioning(schm,
+    null_fallback = null_fallback_or_default(null_fallback),
+    segment_encoding = segment_encoding
+  )
+}
+
+#' Construct Hive partitioning
+#'
+#' Hive partitioning embeds field names and values in path segments, such as
+#' "/year=2019/month=2/data.parquet".
+#'
+#' Because fields are named in the path segments, order of fields passed to
+#' `hive_partition()` does not matter.
+#' @param ... named list of [data types][data-type], passed to [schema()]
+#' @param null_fallback character to be used in place of missing values (`NA` or `NULL`)
+#' in partition columns. Default is `"__HIVE_DEFAULT_PARTITION__"`,
+#' which is what Hive uses.
+#' @param segment_encoding Decode partition segments after splitting paths.
+#' Default is `"uri"` (URI-decode segments). May also be `"none"` (leave as-is).
+#' @return A [HivePartitioning][Partitioning], or a `HivePartitioningFactory` if
+#' calling `hive_partition()` with no arguments.
+#' @examplesIf arrow_with_dataset()
+#' hive_partition(year = int16(), month = int8())
+#' @export
+hive_partition <- function(..., null_fallback = NULL, segment_encoding = "uri") {
+  schm <- schema(...)
+  if (length(schm) == 0) {
+    HivePartitioningFactory$create(null_fallback, segment_encoding)
+  } else {
+    HivePartitioning$create(schm, null_fallback, segment_encoding)
+  }
+}
+
+PartitioningFactory <- R6Class("PartitioningFactory", inherit = ArrowObject)
+
+#' @usage NULL
+#' @format NULL
+#' @rdname Partitioning
+#' @export
+DirectoryPartitioningFactory <- R6Class("DirectoryPartitioningFactory ", inherit = PartitioningFactory)
+DirectoryPartitioningFactory$create <- function(field_names, segment_encoding = "uri") {
+  dataset___DirectoryPartitioning__MakeFactory(field_names, segment_encoding)
+}
+
+#' @usage NULL
+#' @format NULL
+#' @rdname Partitioning
+#' @export
+HivePartitioningFactory <- R6Class("HivePartitioningFactory", inherit = PartitioningFactory)
+HivePartitioningFactory$create <- function(null_fallback = NULL, segment_encoding = "uri") {
+  dataset___HivePartitioning__MakeFactory(null_fallback_or_default(null_fallback), segment_encoding)
+}
+
+null_fallback_or_default <- function(null_fallback) {
+  null_fallback %||% "__HIVE_DEFAULT_PARTITION__"
+}
diff --git a/src/arrow/r/R/dataset-scan.R b/src/arrow/r/R/dataset-scan.R
new file mode 100644
index 000000000..03c926fb4
--- /dev/null
+++ b/src/arrow/r/R/dataset-scan.R
@@ -0,0 +1,262 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#' Scan the contents of a dataset
+#'
+#' @description
+#' A `Scanner` iterates over a [Dataset]'s fragments and returns data
+#' according to given row filtering and column projection. A `ScannerBuilder`
+#' can help create one.
+#'
+#' @section Factory:
+#' `Scanner$create()` wraps the `ScannerBuilder` interface to make a `Scanner`.
+#' It takes the following arguments:
+#'
+#' * `dataset`: A `Dataset` or `arrow_dplyr_query` object, as returned by the
+#'    `dplyr` methods on `Dataset`.
+#' * `projection`: A character vector of column names to select columns or a
+#'    named list of expressions
+#' * `filter`: A `Expression` to filter the scanned rows by, or `TRUE` (default)
+#'    to keep all rows.
+#' * `use_threads`: logical: should scanning use multithreading? Default `TRUE`
+#' * `use_async`: logical: should the async scanner (performs better on
+#'    high-latency/highly parallel filesystems like S3) be used? Default `FALSE`
+#' * `...`: Additional arguments, currently ignored
+#' @section Methods:
+#' `ScannerBuilder` has the following methods:
+#'
+#' - `$Project(cols)`: Indicate that the scan should only return columns given
+#' by `cols`, a character vector of column names
+#' - `$Filter(expr)`: Filter rows by an [Expression].
+#' - `$UseThreads(threads)`: logical: should the scan use multithreading?
+#' The method's default input is `TRUE`, but you must call the method to enable
+#' multithreading because the scanner default is `FALSE`.
+#' - `$UseAsync(use_async)`: logical: should the async scanner be used?
+#' - `$BatchSize(batch_size)`: integer: Maximum row count of scanned record
+#' batches, default is 32K. If scanned record batches are overflowing memory
+#' then this method can be called to reduce their size.
+#' - `$schema`: Active binding, returns the [Schema] of the Dataset
+#' - `$Finish()`: Returns a `Scanner`
+#'
+#' `Scanner` currently has a single method, `$ToTable()`, which evaluates the
+#' query and returns an Arrow [Table].
+#' @rdname Scanner
+#' @name Scanner
+#' @export
+Scanner <- R6Class("Scanner",
+  inherit = ArrowObject,
+  public = list(
+    ToTable = function() dataset___Scanner__ToTable(self),
+    ScanBatches = function() dataset___Scanner__ScanBatches(self),
+    ToRecordBatchReader = function() dataset___Scanner__ToRecordBatchReader(self),
+    CountRows = function() dataset___Scanner__CountRows(self)
+  ),
+  active = list(
+    schema = function() dataset___Scanner__schema(self)
+  )
+)
+Scanner$create <- function(dataset,
+                           projection = NULL,
+                           filter = TRUE,
+                           use_threads = option_use_threads(),
+                           use_async = getOption("arrow.use_async", FALSE),
+                           batch_size = NULL,
+                           fragment_scan_options = NULL,
+                           ...) {
+  if (inherits(dataset, "arrow_dplyr_query")) {
+    if (is_collapsed(dataset)) {
+      # TODO: Is there a way to get a RecordBatchReader rather than evaluating?
+      dataset$.data <- as_adq(dplyr::compute(dataset$.data))$.data
+    }
+
+    proj <- c(dataset$selected_columns, dataset$temp_columns)
+
+    if (!is.null(projection)) {
+      if (is.character(projection)) {
+        stopifnot("attempting to project with unknown columns" = all(projection %in% names(proj)))
+        proj <- proj[projection]
+      } else {
+        # TODO: ARROW-13802 accepting lists of Expressions as a projection
+        warning(
+          "Scanner$create(projection = ...) must be a character vector, ",
+          "ignoring the projection argument."
+        )
+      }
+    }
+
+    if (!isTRUE(filter)) {
+      dataset <- set_filters(dataset, filter)
+    }
+
+    return(Scanner$create(
+      dataset$.data,
+      proj,
+      dataset$filtered_rows,
+      use_threads,
+      use_async,
+      batch_size,
+      fragment_scan_options,
+      ...
+    ))
+  }
+
+  scanner_builder <- ScannerBuilder$create(dataset)
+  if (use_threads) {
+    scanner_builder$UseThreads()
+  }
+  if (use_async) {
+    scanner_builder$UseAsync()
+  }
+  if (!is.null(projection)) {
+    scanner_builder$Project(projection)
+  }
+  if (!isTRUE(filter)) {
+    scanner_builder$Filter(filter)
+  }
+  if (is_integerish(batch_size)) {
+    scanner_builder$BatchSize(batch_size)
+  }
+  if (!is.null(fragment_scan_options)) {
+    scanner_builder$FragmentScanOptions(fragment_scan_options)
+  }
+  scanner_builder$Finish()
+}
+
+#' @export
+names.Scanner <- function(x) names(x$schema)
+
+#' @export
+head.Scanner <- function(x, n = 6L, ...) {
+  assert_that(n > 0) # For now
+  dataset___Scanner__head(x, n)
+}
+
+#' @export
+tail.Scanner <- function(x, n = 6L, ...) {
+  assert_that(n > 0) # For now
+  result <- list()
+  batch_num <- 0
+  for (batch in rev(dataset___Scanner__ScanBatches(x))) {
+    batch_num <- batch_num + 1
+    result[[batch_num]] <- tail(batch, n)
+    n <- n - nrow(batch)
+    if (n <= 0) break
+  }
+  Table$create(!!!rev(result))
+}
+
+ScanTask <- R6Class("ScanTask",
+  inherit = ArrowObject,
+  public = list(
+    Execute = function() dataset___ScanTask__get_batches(self)
+  )
+)
+
+#' Apply a function to a stream of RecordBatches
+#'
+#' As an alternative to calling `collect()` on a `Dataset` query, you can
+#' use this function to access the stream of `RecordBatch`es in the `Dataset`.
+#' This lets you aggregate on each chunk and pull the intermediate results into
+#' a `data.frame` for further aggregation, even if you couldn't fit the whole
+#' `Dataset` result in memory.
+#'
+#' This is experimental and not recommended for production use.
+#'
+#' @param X A `Dataset` or `arrow_dplyr_query` object, as returned by the
+#' `dplyr` methods on `Dataset`.
+#' @param FUN A function or `purrr`-style lambda expression to apply to each
+#' batch
+#' @param ... Additional arguments passed to `FUN`
+#' @param .data.frame logical: collect the resulting chunks into a single
+#' `data.frame`? Default `TRUE`
+#' @export
+map_batches <- function(X, FUN, ..., .data.frame = TRUE) {
+  if (.data.frame) {
+    lapply <- map_dfr
+  }
+  scanner <- Scanner$create(ensure_group_vars(X))
+  FUN <- as_mapper(FUN)
+  lapply(scanner$ScanBatches(), function(batch) {
+    # TODO: wrap batch in arrow_dplyr_query with X$selected_columns,
+    # X$temp_columns, and X$group_by_vars
+    # if X is arrow_dplyr_query, if some other arg (.dplyr?) == TRUE
+    FUN(batch, ...)
+  })
+}
+
+#' @usage NULL
+#' @format NULL
+#' @rdname Scanner
+#' @export
+ScannerBuilder <- R6Class("ScannerBuilder",
+  inherit = ArrowObject,
+  public = list(
+    Project = function(cols) {
+      # cols is either a character vector or a named list of Expressions
+      if (is.character(cols)) {
+        dataset___ScannerBuilder__ProjectNames(self, cols)
+      } else if (length(cols) == 0) {
+        # Empty projection
+        dataset___ScannerBuilder__ProjectNames(self, character(0))
+      } else {
+        # List of Expressions
+        dataset___ScannerBuilder__ProjectExprs(self, cols, names(cols))
+      }
+      self
+    },
+    Filter = function(expr) {
+      assert_is(expr, "Expression")
+      dataset___ScannerBuilder__Filter(self, expr)
+      self
+    },
+    UseThreads = function(threads = option_use_threads()) {
+      dataset___ScannerBuilder__UseThreads(self, threads)
+      self
+    },
+    UseAsync = function(use_async = TRUE) {
+      dataset___ScannerBuilder__UseAsync(self, use_async)
+      self
+    },
+    BatchSize = function(batch_size) {
+      dataset___ScannerBuilder__BatchSize(self, batch_size)
+      self
+    },
+    FragmentScanOptions = function(options) {
+      dataset___ScannerBuilder__FragmentScanOptions(self, options)
+      self
+    },
+    Finish = function() dataset___ScannerBuilder__Finish(self)
+  ),
+  active = list(
+    schema = function() dataset___ScannerBuilder__schema(self)
+  )
+)
+ScannerBuilder$create <- function(dataset) {
+  if (inherits(dataset, "RecordBatchReader")) {
+    return(dataset___ScannerBuilder__FromRecordBatchReader(dataset))
+  }
+
+  if (inherits(dataset, c("data.frame", "ArrowTabular"))) {
+    dataset <- InMemoryDataset$create(dataset)
+  }
+  assert_is(dataset, "Dataset")
+
+  dataset$NewScan()
+}
+
+#' @export
+names.ScannerBuilder <- function(x) names(x$schema)
diff --git a/src/arrow/r/R/dataset-write.R b/src/arrow/r/R/dataset-write.R
new file mode 100644
index 000000000..3a98357b0
--- /dev/null
+++ b/src/arrow/r/R/dataset-write.R
@@ -0,0 +1,144 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#' Write a dataset
+#'
+#' This function allows you to write a dataset. By writing to more efficient
+#' binary storage formats, and by specifying relevant partitioning, you can
+#' make it much faster to read and query.
+#'
+#' @param dataset [Dataset], [RecordBatch], [Table], `arrow_dplyr_query`, or
+#' `data.frame`. If an `arrow_dplyr_query`, the query will be evaluated and
+#' the result will be written. This means that you can `select()`, `filter()`, `mutate()`,
+#' etc. to transform the data before it is written if you need to.
+#' @param path string path, URI, or `SubTreeFileSystem` referencing a directory
+#' to write to (directory will be created if it does not exist)
+#' @param format a string identifier of the file format. Default is to use
+#' "parquet" (see [FileFormat])
+#' @param partitioning `Partitioning` or a character vector of columns to
+#' use as partition keys (to be written as path segments). Default is to
+#' use the current `group_by()` columns.
+#' @param basename_template string template for the names of files to be written.
+#' Must contain `"{i}"`, which will be replaced with an autoincremented
+#' integer to generate basenames of datafiles. For example, `"part-{i}.feather"`
+#' will yield `"part-0.feather", ...`.
+#' @param hive_style logical: write partition segments as Hive-style
+#' (`key1=value1/key2=value2/file.ext`) or as just bare values. Default is `TRUE`.
+#' @param existing_data_behavior The behavior to use when there is already data
+#' in the destination directory.  Must be one of "overwrite", "error", or
+#' "delete_matching".
+#' - "overwrite" (the default) then any new files created will overwrite
+#'   existing files
+#' - "error" then the operation will fail if the destination directory is not
+#'   empty
+#' - "delete_matching" then the writer will delete any existing partitions
+#'   if data is going to be written to those partitions and will leave alone
+#'   partitions which data is not written to.
+#' @param ... additional format-specific arguments. For available Parquet
+#' options, see [write_parquet()]. The available Feather options are
+#' - `use_legacy_format` logical: write data formatted so that Arrow libraries
+#'   versions 0.14 and lower can read it. Default is `FALSE`. You can also
+#'   enable this by setting the environment variable `ARROW_PRE_0_15_IPC_FORMAT=1`.
+#' - `metadata_version`: A string like "V5" or the equivalent integer indicating
+#'   the Arrow IPC MetadataVersion. Default (NULL) will use the latest version,
+#'   unless the environment variable `ARROW_PRE_1_0_METADATA_VERSION=1`, in
+#'   which case it will be V4.
+#' - `codec`: A [Codec] which will be used to compress body buffers of written
+#'   files. Default (NULL) will not compress body buffers.
+#' - `null_fallback`: character to be used in place of missing values (`NA` or
+#' `NULL`) when using Hive-style partitioning. See [hive_partition()].
+#' @return The input `dataset`, invisibly
+#' @examplesIf arrow_with_dataset() & arrow_with_parquet() & requireNamespace("dplyr", quietly = TRUE)
+#' # You can write datasets partitioned by the values in a column (here: "cyl").
+#' # This creates a structure of the form cyl=X/part-Z.parquet.
+#' one_level_tree <- tempfile()
+#' write_dataset(mtcars, one_level_tree, partitioning = "cyl")
+#' list.files(one_level_tree, recursive = TRUE)
+#'
+#' # You can also partition by the values in multiple columns
+#' # (here: "cyl" and "gear").
+#' # This creates a structure of the form cyl=X/gear=Y/part-Z.parquet.
+#' two_levels_tree <- tempfile()
+#' write_dataset(mtcars, two_levels_tree, partitioning = c("cyl", "gear"))
+#' list.files(two_levels_tree, recursive = TRUE)
+#'
+#' # In the two previous examples we would have:
+#' # X = {4,6,8}, the number of cylinders.
+#' # Y = {3,4,5}, the number of forward gears.
+#' # Z = {0,1,2}, the number of saved parts, starting from 0.
+#'
+#' # You can obtain the same result as as the previous examples using arrow with
+#' # a dplyr pipeline. This will be the same as two_levels_tree above, but the
+#' # output directory will be different.
+#' library(dplyr)
+#' two_levels_tree_2 <- tempfile()
+#' mtcars %>%
+#'   group_by(cyl, gear) %>%
+#'   write_dataset(two_levels_tree_2)
+#' list.files(two_levels_tree_2, recursive = TRUE)
+#'
+#' # And you can also turn off the Hive-style directory naming where the column
+#' # name is included with the values by using `hive_style = FALSE`.
+#'
+#' # Write a structure X/Y/part-Z.parquet.
+#' two_levels_tree_no_hive <- tempfile()
+#' mtcars %>%
+#'   group_by(cyl, gear) %>%
+#'   write_dataset(two_levels_tree_no_hive, hive_style = FALSE)
+#' list.files(two_levels_tree_no_hive, recursive = TRUE)
+#' @export
+write_dataset <- function(dataset,
+                          path,
+                          format = c("parquet", "feather", "arrow", "ipc", "csv"),
+                          partitioning = dplyr::group_vars(dataset),
+                          basename_template = paste0("part-{i}.", as.character(format)),
+                          hive_style = TRUE,
+                          existing_data_behavior = c("overwrite", "error", "delete_matching"),
+                          ...) {
+  format <- match.arg(format)
+  if (inherits(dataset, "arrow_dplyr_query")) {
+    # partitioning vars need to be in the `select` schema
+    dataset <- ensure_group_vars(dataset)
+  } else if (inherits(dataset, "grouped_df")) {
+    force(partitioning)
+    # Drop the grouping metadata before writing; we've already consumed it
+    # now to construct `partitioning` and don't want it in the metadata$r
+    dataset <- dplyr::ungroup(dataset)
+  }
+
+  scanner <- Scanner$create(dataset, use_async = TRUE)
+  if (!inherits(partitioning, "Partitioning")) {
+    partition_schema <- scanner$schema[partitioning]
+    if (isTRUE(hive_style)) {
+      partitioning <- HivePartitioning$create(partition_schema, null_fallback = list(...)$null_fallback)
+    } else {
+      partitioning <- DirectoryPartitioning$create(partition_schema)
+    }
+  }
+
+  path_and_fs <- get_path_and_filesystem(path)
+  options <- FileWriteOptions$create(format, table = scanner, ...)
+
+  existing_data_behavior_opts <- c("delete_matching", "overwrite", "error")
+  existing_data_behavior <- match(match.arg(existing_data_behavior), existing_data_behavior_opts) - 1L
+
+  dataset___Dataset__Write(
+    options, path_and_fs$fs, path_and_fs$path,
+    partitioning, basename_template, scanner,
+    existing_data_behavior
+  )
+}
diff --git a/src/arrow/r/R/dataset.R b/src/arrow/r/R/dataset.R
new file mode 100644
index 000000000..7207a5543
--- /dev/null
+++ b/src/arrow/r/R/dataset.R
@@ -0,0 +1,367 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#' Open a multi-file dataset
+#'
+#' Arrow Datasets allow you to query against data that has been split across
+#' multiple files. This sharding of data may indicate partitioning, which
+#' can accelerate queries that only touch some partitions (files). Call
+#' `open_dataset()` to point to a directory of data files and return a
+#' `Dataset`, then use `dplyr` methods to query it.
+#'
+#' @param sources One of:
+#'   * a string path or URI to a directory containing data files
+#'   * a string path or URI to a single file
+#'   * a character vector of paths or URIs to individual data files
+#'   * a list of `Dataset` objects as created by this function
+#'   * a list of `DatasetFactory` objects as created by [dataset_factory()].
+#'
+#' When `sources` is a vector of file URIs, they must all use the same protocol
+#' and point to files located in the same file system and having the same
+#' format.
+#' @param schema [Schema] for the `Dataset`. If `NULL` (the default), the schema
+#' will be inferred from the data sources.
+#' @param partitioning When `sources` is a directory path/URI, one of:
+#'   * a `Schema`, in which case the file paths relative to `sources` will be
+#'    parsed, and path segments will be matched with the schema fields. For
+#'    example, `schema(year = int16(), month = int8())` would create partitions
+#'    for file paths like `"2019/01/file.parquet"`, `"2019/02/file.parquet"`,
+#'    etc.
+#'   * a character vector that defines the field names corresponding to those
+#'    path segments (that is, you're providing the names that would correspond
+#'    to a `Schema` but the types will be autodetected)
+#'   * a `HivePartitioning` or `HivePartitioningFactory`, as returned
+#'    by [hive_partition()] which parses explicit or autodetected fields from
+#'    Hive-style path segments
+#'   * `NULL` for no partitioning
+#'
+#' The default is to autodetect Hive-style partitions. When `sources` is not a
+#' directory path/URI, `partitioning` is ignored.
+#' @param unify_schemas logical: should all data fragments (files, `Dataset`s)
+#' be scanned in order to create a unified schema from them? If `FALSE`, only
+#' the first fragment will be inspected for its schema. Use this fast path
+#' when you know and trust that all fragments have an identical schema.
+#' The default is `FALSE` when creating a dataset from a directory path/URI or
+#' vector of file paths/URIs (because there may be many files and scanning may
+#' be slow) but `TRUE` when `sources` is a list of `Dataset`s (because there
+#' should be few `Dataset`s in the list and their `Schema`s are already in
+#' memory).
+#' @param format A [FileFormat] object, or a string identifier of the format of
+#' the files in `x`. This argument is ignored when `sources` is a list of `Dataset` objects.
+#' Currently supported values:
+#' * "parquet"
+#' * "ipc"/"arrow"/"feather", all aliases for each other; for Feather, note that
+#'   only version 2 files are supported
+#' * "csv"/"text", aliases for the same thing (because comma is the default
+#'   delimiter for text files
+#' * "tsv", equivalent to passing `format = "text", delimiter = "\t"`
+#'
+#' Default is "parquet", unless a `delimiter` is also specified, in which case
+#' it is assumed to be "text".
+#' @param ... additional arguments passed to `dataset_factory()` when `sources`
+#' is a directory path/URI or vector of file paths/URIs, otherwise ignored.
+#' These may include `format` to indicate the file format, or other
+#' format-specific options.
+#' @return A [Dataset] R6 object. Use `dplyr` methods on it to query the data,
+#' or call [`$NewScan()`][Scanner] to construct a query directly.
+#' @export
+#' @seealso `vignette("dataset", package = "arrow")`
+#' @include arrow-package.R
+#' @examplesIf arrow_with_dataset() & arrow_with_parquet()
+#' # Set up directory for examples
+#' tf <- tempfile()
+#' dir.create(tf)
+#' on.exit(unlink(tf))
+#'
+#' data <- dplyr::group_by(mtcars, cyl)
+#' write_dataset(data, tf)
+#'
+#' # You can specify a directory containing the files for your dataset and
+#' # open_dataset will scan all files in your directory.
+#' open_dataset(tf)
+#'
+#' # You can also supply a vector of paths
+#' open_dataset(c(file.path(tf, "cyl=4/part-0.parquet"), file.path(tf, "cyl=8/part-0.parquet")))
+#'
+#' ## You must specify the file format if using a format other than parquet.
+#' tf2 <- tempfile()
+#' dir.create(tf2)
+#' on.exit(unlink(tf2))
+#' write_dataset(data, tf2, format = "ipc")
+#' # This line will results in errors when you try to work with the data
+#' \dontrun{
+#' open_dataset(tf2)
+#' }
+#' # This line will work
+#' open_dataset(tf2, format = "ipc")
+#'
+#' ## You can specify file partitioning to include it as a field in your dataset
+#' # Create a temporary directory and write example dataset
+#' tf3 <- tempfile()
+#' dir.create(tf3)
+#' on.exit(unlink(tf3))
+#' write_dataset(airquality, tf3, partitioning = c("Month", "Day"), hive_style = FALSE)
+#'
+#' # View files - you can see the partitioning means that files have been written
+#' # to folders based on Month/Day values
+#' tf3_files <- list.files(tf3, recursive = TRUE)
+#'
+#' # With no partitioning specified, dataset contains all files but doesn't include
+#' # directory names as field names
+#' open_dataset(tf3)
+#'
+#' # Now that partitioning has been specified, your dataset contains columns for Month and Day
+#' open_dataset(tf3, partitioning = c("Month", "Day"))
+#'
+#' # If you want to specify the data types for your fields, you can pass in a Schema
+#' open_dataset(tf3, partitioning = schema(Month = int8(), Day = int8()))
+open_dataset <- function(sources,
+                         schema = NULL,
+                         partitioning = hive_partition(),
+                         unify_schemas = NULL,
+                         format = c("parquet", "arrow", "ipc", "feather", "csv", "tsv", "text"),
+                         ...) {
+  if (!arrow_with_dataset()) {
+    stop("This build of the arrow package does not support Datasets", call. = FALSE)
+  }
+  if (is_list_of(sources, "Dataset")) {
+    if (is.null(schema)) {
+      if (is.null(unify_schemas) || isTRUE(unify_schemas)) {
+        # Default is to unify schemas here
+        schema <- unify_schemas(schemas = map(sources, ~ .$schema))
+      } else {
+        # Take the first one.
+        schema <- sources[[1]]$schema
+      }
+    }
+    # Enforce that all datasets have the same schema
+    assert_is(schema, "Schema")
+    sources <- lapply(sources, function(x) {
+      x$schema <- schema
+      x
+    })
+    return(dataset___UnionDataset__create(sources, schema))
+  }
+
+  factory <- DatasetFactory$create(sources, partitioning = partitioning, format = format, schema = schema, ...)
+  tryCatch(
+    # Default is _not_ to inspect/unify schemas
+    factory$Finish(schema, isTRUE(unify_schemas)),
+    error = function(e) {
+      handle_parquet_io_error(e, format)
+    }
+  )
+}
+
+#' Multi-file datasets
+#'
+#' @description
+#' Arrow Datasets allow you to query against data that has been split across
+#' multiple files. This sharding of data may indicate partitioning, which
+#' can accelerate queries that only touch some partitions (files).
+#'
+#' A `Dataset` contains one or more `Fragments`, such as files, of potentially
+#' differing type and partitioning.
+#'
+#' For `Dataset$create()`, see [open_dataset()], which is an alias for it.
+#'
+#' `DatasetFactory` is used to provide finer control over the creation of `Dataset`s.
+#'
+#' @section Factory:
+#' `DatasetFactory` is used to create a `Dataset`, inspect the [Schema] of the
+#' fragments contained in it, and declare a partitioning.
+#' `FileSystemDatasetFactory` is a subclass of `DatasetFactory` for
+#' discovering files in the local file system, the only currently supported
+#' file system.
+#'
+#' For the `DatasetFactory$create()` factory method, see [dataset_factory()], an
+#' alias for it. A `DatasetFactory` has:
+#'
+#' - `$Inspect(unify_schemas)`: If `unify_schemas` is `TRUE`, all fragments
+#' will be scanned and a unified [Schema] will be created from them; if `FALSE`
+#' (default), only the first fragment will be inspected for its schema. Use this
+#' fast path when you know and trust that all fragments have an identical schema.
+#' - `$Finish(schema, unify_schemas)`: Returns a `Dataset`. If `schema` is provided,
+#' it will be used for the `Dataset`; if omitted, a `Schema` will be created from
+#' inspecting the fragments (files) in the dataset, following `unify_schemas`
+#' as described above.
+#'
+#' `FileSystemDatasetFactory$create()` is a lower-level factory method and
+#' takes the following arguments:
+#' * `filesystem`: A [FileSystem]
+#' * `selector`: Either a [FileSelector] or `NULL`
+#' * `paths`: Either a character vector of file paths or `NULL`
+#' * `format`: A [FileFormat]
+#' * `partitioning`: Either `Partitioning`, `PartitioningFactory`, or `NULL`
+#' @section Methods:
+#'
+#' A `Dataset` has the following methods:
+#' - `$NewScan()`: Returns a [ScannerBuilder] for building a query
+#' - `$schema`: Active binding that returns the [Schema] of the Dataset; you
+#'   may also replace the dataset's schema by using `ds$schema <- new_schema`.
+#'   This method currently supports only adding, removing, or reordering
+#'   fields in the schema: you cannot alter or cast the field types.
+#'
+#' `FileSystemDataset` has the following methods:
+#' - `$files`: Active binding, returns the files of the `FileSystemDataset`
+#' - `$format`: Active binding, returns the [FileFormat] of the `FileSystemDataset`
+#'
+#' `UnionDataset` has the following methods:
+#' - `$children`: Active binding, returns all child `Dataset`s.
+#'
+#' @export
+#' @seealso [open_dataset()] for a simple interface to creating a `Dataset`
+Dataset <- R6Class("Dataset",
+  inherit = ArrowObject,
+  public = list(
+    # @description
+    # Start a new scan of the data
+    # @return A [ScannerBuilder]
+    NewScan = function() dataset___Dataset__NewScan(self),
+    ToString = function() self$schema$ToString()
+  ),
+  active = list(
+    schema = function(schema) {
+      if (missing(schema)) {
+        dataset___Dataset__schema(self)
+      } else {
+        assert_is(schema, "Schema")
+        invisible(dataset___Dataset__ReplaceSchema(self, schema))
+      }
+    },
+    metadata = function() self$schema$metadata,
+    num_rows = function() self$NewScan()$Finish()$CountRows(),
+    num_cols = function() length(self$schema),
+    # @description
+    # Return the Dataset's type.
+    type = function() dataset___Dataset__type_name(self)
+  )
+)
+Dataset$create <- open_dataset
+
+#' @name FileSystemDataset
+#' @rdname Dataset
+#' @export
+FileSystemDataset <- R6Class("FileSystemDataset",
+  inherit = Dataset,
+  public = list(
+    .class_title = function() {
+      nfiles <- length(self$files)
+      file_type <- self$format$type
+      pretty_file_type <- list(
+        parquet = "Parquet",
+        ipc = "Feather"
+      )[[file_type]]
+
+      paste(
+        class(self)[[1]],
+        "with",
+        nfiles,
+        pretty_file_type %||% file_type,
+        ifelse(nfiles == 1, "file", "files")
+      )
+    }
+  ),
+  active = list(
+    # @description
+    # Return the files contained in this `FileSystemDataset`
+    files = function() dataset___FileSystemDataset__files(self),
+    # @description
+    # Return the format of files in this `Dataset`
+    format = function() {
+      dataset___FileSystemDataset__format(self)
+    },
+    # @description
+    # Return the filesystem of files in this `Dataset`
+    filesystem = function() {
+      dataset___FileSystemDataset__filesystem(self)
+    }
+  )
+)
+
+#' @name UnionDataset
+#' @rdname Dataset
+#' @export
+UnionDataset <- R6Class("UnionDataset",
+  inherit = Dataset,
+  active = list(
+    # @description
+    # Return the UnionDataset's child `Dataset`s
+    children = function() {
+      dataset___UnionDataset__children(self)
+    }
+  )
+)
+
+#' @name InMemoryDataset
+#' @rdname Dataset
+#' @export
+InMemoryDataset <- R6Class("InMemoryDataset", inherit = Dataset)
+InMemoryDataset$create <- function(x) {
+  if (!arrow_with_dataset()) {
+    stop("This build of the arrow package does not support Datasets", call. = FALSE)
+  }
+  if (!inherits(x, "Table")) {
+    x <- Table$create(x)
+  }
+  dataset___InMemoryDataset__create(x)
+}
+
+
+#' @export
+names.Dataset <- function(x) names(x$schema)
+
+#' @export
+dim.Dataset <- function(x) c(x$num_rows, x$num_cols)
+
+#' @export
+c.Dataset <- function(...) Dataset$create(list(...))
+
+#' @export
+head.Dataset <- function(x, n = 6L, ...) {
+  head(Scanner$create(x), n)
+}
+
+#' @export
+tail.Dataset <- function(x, n = 6L, ...) {
+  tail(Scanner$create(x), n)
+}
+
+#' @export
+`[.Dataset` <- function(x, i, j, ..., drop = FALSE) {
+  if (nargs() == 2L) {
+    # List-like column extraction (x[i])
+    return(x[, i])
+  }
+  if (!missing(j)) {
+    x <- select.Dataset(x, all_of(j))
+  }
+
+  if (!missing(i)) {
+    x <- take_dataset_rows(x, i)
+  }
+  x
+}
+
+take_dataset_rows <- function(x, i) {
+  if (!is.numeric(i) || any(i < 0)) {
+    stop("Only slicing with positive indices is supported", call. = FALSE)
+  }
+  scanner <- Scanner$create(x)
+  i <- Array$create(i - 1)
+  dataset___Scanner__TakeRows(scanner, i)
+}
diff --git a/src/arrow/r/R/deprecated.R b/src/arrow/r/R/deprecated.R
new file mode 100644
index 000000000..e8848c4aa
--- /dev/null
+++ b/src/arrow/r/R/deprecated.R
@@ -0,0 +1,40 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#' @rdname read_ipc_stream
+#' @export
+read_arrow <- function(file, ...) {
+  .Deprecated(msg = "Use 'read_ipc_stream' or 'read_feather' instead.")
+  if (inherits(file, "raw")) {
+    read_ipc_stream(file, ...)
+  } else {
+    read_feather(file, ...)
+  }
+}
+
+#' @rdname write_ipc_stream
+#' @export
+write_arrow <- function(x, sink, ...) {
+  .Deprecated(msg = "Use 'write_ipc_stream' or 'write_feather' instead.")
+  if (inherits(sink, "raw")) {
+    # HACK for sparklyr
+    # Note that this returns a new R raw vector, not the one passed as `sink`
+    write_to_raw(x)
+  } else {
+    write_feather(x, sink, ...)
+  }
+}
diff --git a/src/arrow/r/R/dictionary.R b/src/arrow/r/R/dictionary.R
new file mode 100644
index 000000000..b701768d6
--- /dev/null
+++ b/src/arrow/r/R/dictionary.R
@@ -0,0 +1,69 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#' @include type.R
+
+#' @title class DictionaryType
+#'
+#' @usage NULL
+#' @format NULL
+#' @docType class
+#'
+#' @section Methods:
+#'
+#' TODO
+#'
+#' @rdname DictionaryType
+#' @name DictionaryType
+DictionaryType <- R6Class("DictionaryType",
+  inherit = FixedWidthType,
+  public = list(
+    ToString = function() {
+      prettier_dictionary_type(DataType__ToString(self))
+    }
+  ),
+  active = list(
+    index_type = function() DictionaryType__index_type(self),
+    value_type = function() DictionaryType__value_type(self),
+    name = function() DictionaryType__name(self),
+    ordered = function() DictionaryType__ordered(self)
+  )
+)
+DictionaryType$create <- function(index_type = int32(),
+                                  value_type = utf8(),
+                                  ordered = FALSE) {
+  assert_is(index_type, "DataType")
+  assert_is(value_type, "DataType")
+  DictionaryType__initialize(index_type, value_type, ordered)
+}
+
+#' Create a dictionary type
+#'
+#' @param index_type A DataType for the indices (default [int32()])
+#' @param value_type A DataType for the values (default [utf8()])
+#' @param ordered Is this an ordered dictionary (default `FALSE`)?
+#'
+#' @return A [DictionaryType]
+#' @seealso [Other Arrow data types][data-type]
+#' @export
+dictionary <- DictionaryType$create
+
+prettier_dictionary_type <- function(x) {
+  # Prettier format the "ordered" attribute
+  x <- sub(", ordered=0", "", x)
+  sub("ordered=1", "ordered", x)
+}
diff --git a/src/arrow/r/R/dplyr-arrange.R b/src/arrow/r/R/dplyr-arrange.R
new file mode 100644
index 000000000..4c8c687a3
--- /dev/null
+++ b/src/arrow/r/R/dplyr-arrange.R
@@ -0,0 +1,98 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+# The following S3 methods are registered on load if dplyr is present
+
+arrange.arrow_dplyr_query <- function(.data, ..., .by_group = FALSE) {
+  call <- match.call()
+  exprs <- quos(...)
+  if (.by_group) {
+    # when the data is is grouped and .by_group is TRUE, order the result by
+    # the grouping columns first
+    exprs <- c(quos(!!!dplyr::groups(.data)), exprs)
+  }
+  if (length(exprs) == 0) {
+    # Nothing to do
+    return(.data)
+  }
+  .data <- as_adq(.data)
+  # find and remove any dplyr::desc() and tidy-eval
+  # the arrange expressions inside an Arrow data_mask
+  sorts <- vector("list", length(exprs))
+  descs <- logical(0)
+  mask <- arrow_mask(.data)
+  for (i in seq_along(exprs)) {
+    x <- find_and_remove_desc(exprs[[i]])
+    exprs[[i]] <- x[["quos"]]
+    sorts[[i]] <- arrow_eval(exprs[[i]], mask)
+    names(sorts)[i] <- format_expr(exprs[[i]])
+    if (inherits(sorts[[i]], "try-error")) {
+      msg <- paste("Expression", names(sorts)[i], "not supported in Arrow")
+      return(abandon_ship(call, .data, msg))
+    }
+    descs[i] <- x[["desc"]]
+  }
+  .data$arrange_vars <- c(sorts, .data$arrange_vars)
+  .data$arrange_desc <- c(descs, .data$arrange_desc)
+  .data
+}
+arrange.Dataset <- arrange.ArrowTabular <- arrange.arrow_dplyr_query
+
+# Helper to handle desc() in arrange()
+# * Takes a quosure as input
+# * Returns a list with two elements:
+#   1. The quosure with any wrapping parentheses and desc() removed
+#   2. A logical value indicating whether desc() was found
+# * Performs some other validation
+find_and_remove_desc <- function(quosure) {
+  expr <- quo_get_expr(quosure)
+  descending <- FALSE
+  if (length(all.vars(expr)) < 1L) {
+    stop(
+      "Expression in arrange() does not contain any field names: ",
+      deparse(expr),
+      call. = FALSE
+    )
+  }
+  # Use a while loop to remove any number of nested pairs of enclosing
+  # parentheses and any number of nested desc() calls. In the case of multiple
+  # nested desc() calls, each one toggles the sort order.
+  while (identical(typeof(expr), "language") && is.call(expr)) {
+    if (identical(expr[[1]], quote(`(`))) {
+      # remove enclosing parentheses
+      expr <- expr[[2]]
+    } else if (identical(expr[[1]], quote(desc))) {
+      # ensure desc() has only one argument (when an R expression is a function
+      # call, length == 2 means it has exactly one argument)
+      if (length(expr) > 2) {
+        stop("desc() expects only one argument", call. = FALSE)
+      }
+      # remove desc() and toggle descending
+      expr <- expr[[2]]
+      descending <- !descending
+    } else {
+      break
+    }
+  }
+  return(
+    list(
+      quos = quo_set_expr(quosure, expr),
+      desc = descending
+    )
+  )
+}
diff --git a/src/arrow/r/R/dplyr-collect.R b/src/arrow/r/R/dplyr-collect.R
new file mode 100644
index 000000000..13e68f3f4
--- /dev/null
+++ b/src/arrow/r/R/dplyr-collect.R
@@ -0,0 +1,121 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+# The following S3 methods are registered on load if dplyr is present
+
+collect.arrow_dplyr_query <- function(x, as_data_frame = TRUE, ...) {
+  # head and tail are not ExecNodes, at best we can handle them via sink node
+  # so if there are any steps done after head/tail, we need to
+  # evaluate the query up to then and then do a new query for the rest
+  if (is_collapsed(x) && has_head_tail(x$.data)) {
+    x$.data <- as_adq(dplyr::compute(x$.data))$.data
+  }
+
+  # See query-engine.R for ExecPlan/Nodes
+  tab <- do_exec_plan(x)
+  if (as_data_frame) {
+    df <- as.data.frame(tab)
+    tab$invalidate()
+    restore_dplyr_features(df, x)
+  } else {
+    restore_dplyr_features(tab, x)
+  }
+}
+collect.ArrowTabular <- function(x, as_data_frame = TRUE, ...) {
+  if (as_data_frame) {
+    as.data.frame(x, ...)
+  } else {
+    x
+  }
+}
+collect.Dataset <- function(x, ...) dplyr::collect(as_adq(x), ...)
+
+compute.arrow_dplyr_query <- function(x, ...) dplyr::collect(x, as_data_frame = FALSE)
+compute.ArrowTabular <- function(x, ...) x
+compute.Dataset <- compute.arrow_dplyr_query
+
+pull.arrow_dplyr_query <- function(.data, var = -1) {
+  .data <- as_adq(.data)
+  var <- vars_pull(names(.data), !!enquo(var))
+  .data$selected_columns <- set_names(.data$selected_columns[var], var)
+  dplyr::collect(.data)[[1]]
+}
+pull.Dataset <- pull.ArrowTabular <- pull.arrow_dplyr_query
+
+restore_dplyr_features <- function(df, query) {
+  # An arrow_dplyr_query holds some attributes that Arrow doesn't know about
+  # After calling collect(), make sure these features are carried over
+
+  if (length(query$group_by_vars) > 0) {
+    # Preserve groupings, if present
+    if (is.data.frame(df)) {
+      df <- dplyr::grouped_df(
+        df,
+        dplyr::group_vars(query),
+        drop = dplyr::group_by_drop_default(query)
+      )
+    } else {
+      # This is a Table, via compute() or collect(as_data_frame = FALSE)
+      df <- as_adq(df)
+      df$group_by_vars <- query$group_by_vars
+      df$drop_empty_groups <- query$drop_empty_groups
+    }
+  }
+  df
+}
+
+collapse.arrow_dplyr_query <- function(x, ...) {
+  # Figure out what schema will result from the query
+  x$schema <- implicit_schema(x)
+  # Nest inside a new arrow_dplyr_query (and keep groups)
+  restore_dplyr_features(arrow_dplyr_query(x), x)
+}
+collapse.Dataset <- collapse.ArrowTabular <- function(x, ...) {
+  arrow_dplyr_query(x)
+}
+
+implicit_schema <- function(.data) {
+  .data <- ensure_group_vars(.data)
+  old_schm <- .data$.data$schema
+
+  if (is.null(.data$aggregations)) {
+    new_fields <- map(.data$selected_columns, ~ .$type(old_schm))
+    if (!is.null(.data$join) && !(.data$join$type %in% JoinType[1:4])) {
+      # Add cols from right side, except for semi/anti joins
+      right_cols <- .data$join$right_data$selected_columns
+      new_fields <- c(new_fields, map(
+        right_cols[setdiff(names(right_cols), .data$join$by)],
+        ~ .$type(.data$join$right_data$.data$schema)
+      ))
+    }
+  } else {
+    new_fields <- map(summarize_projection(.data), ~ .$type(old_schm))
+    # * Put group_by_vars first (this can't be done by summarize,
+    #   they have to be last per the aggregate node signature,
+    #   and they get projected to this order after aggregation)
+    # * Infer the output types from the aggregations
+    group_fields <- new_fields[.data$group_by_vars]
+    hash <- length(.data$group_by_vars) > 0
+    agg_fields <- imap(
+      new_fields[setdiff(names(new_fields), .data$group_by_vars)],
+      ~ output_type(.data$aggregations[[.y]][["fun"]], .x, hash)
+    )
+    new_fields <- c(group_fields, agg_fields)
+  }
+  schema(!!!new_fields)
+}
diff --git a/src/arrow/r/R/dplyr-count.R b/src/arrow/r/R/dplyr-count.R
new file mode 100644
index 000000000..c567c285f
--- /dev/null
+++ b/src/arrow/r/R/dplyr-count.R
@@ -0,0 +1,60 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# The following S3 methods are registered on load if dplyr is present
+
+count.arrow_dplyr_query <- function(x, ..., wt = NULL, sort = FALSE, name = NULL) {
+  if (!missing(...)) {
+    out <- dplyr::group_by(x, ..., .add = TRUE)
+  } else {
+    out <- x
+  }
+  out <- dplyr::tally(out, wt = {{ wt }}, sort = sort, name = name)
+
+  # Restore original group vars
+  gv <- dplyr::group_vars(x)
+  if (length(gv)) {
+    out$group_by_vars <- gv
+  }
+
+  out
+}
+
+count.Dataset <- count.ArrowTabular <- count.arrow_dplyr_query
+
+#' @importFrom rlang sym :=
+tally.arrow_dplyr_query <- function(x, wt = NULL, sort = FALSE, name = NULL) {
+  check_name <- utils::getFromNamespace("check_name", "dplyr")
+  name <- check_name(name, dplyr::group_vars(x))
+
+  if (quo_is_null(enquo(wt))) {
+    out <- dplyr::summarize(x, !!name := n())
+  } else {
+    out <- dplyr::summarize(x, !!name := sum({{ wt }}, na.rm = TRUE))
+  }
+
+  if (sort) {
+    dplyr::arrange(out, desc(!!sym(name)))
+  } else {
+    out
+  }
+}
+
+tally.Dataset <- tally.ArrowTabular <- tally.arrow_dplyr_query
+
+# we don't want to depend on dplyr, but we refrence these above
+utils::globalVariables(c("n", "desc"))
diff --git a/src/arrow/r/R/dplyr-distinct.R b/src/arrow/r/R/dplyr-distinct.R
new file mode 100644
index 000000000..5dfcb641f
--- /dev/null
+++ b/src/arrow/r/R/dplyr-distinct.R
@@ -0,0 +1,46 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# The following S3 methods are registered on load if dplyr is present
+
+distinct.arrow_dplyr_query <- function(.data, ..., .keep_all = FALSE) {
+  if (.keep_all == TRUE) {
+    # After ARROW-13993 is merged, we can implement this (ARROW-14045)
+    arrow_not_supported("`distinct()` with `.keep_all = TRUE`")
+  }
+
+  original_gv <- dplyr::group_vars(.data)
+  if (length(quos(...))) {
+    # group_by() calls mutate() if there are any expressions in ...
+    .data <- dplyr::group_by(.data, ..., .add = TRUE)
+    # `data %>% group_by() %>% summarise()` returns cols in order supplied
+    # but distinct() returns cols in dataset order, so sort group vars
+    .data$group_by_vars <- names(.data)[names(.data) %in% .data$group_by_vars]
+  } else {
+    # distinct() with no vars specified means distinct across all cols
+    .data <- dplyr::group_by(.data, !!!syms(names(.data)))
+  }
+
+  out <- dplyr::summarize(.data, .groups = "drop")
+  # distinct() doesn't modify group by vars, so restore the original ones
+  if (length(original_gv)) {
+    out$group_by_vars <- original_gv
+  }
+  out
+}
+
+distinct.Dataset <- distinct.ArrowTabular <- distinct.arrow_dplyr_query
diff --git a/src/arrow/r/R/dplyr-eval.R b/src/arrow/r/R/dplyr-eval.R
new file mode 100644
index 000000000..9d944ab80
--- /dev/null
+++ b/src/arrow/r/R/dplyr-eval.R
@@ -0,0 +1,123 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+arrow_eval <- function(expr, mask) {
+  # filter(), mutate(), etc. work by evaluating the quoted `exprs` to generate Expressions
+  # with references to Arrays (if .data is Table/RecordBatch) or Fields (if
+  # .data is a Dataset).
+
+  # This yields an Expression as long as the `exprs` are implemented in Arrow.
+  # Otherwise, it returns a try-error
+  tryCatch(eval_tidy(expr, mask), error = function(e) {
+    # Look for the cases where bad input was given, i.e. this would fail
+    # in regular dplyr anyway, and let those raise those as errors;
+    # else, for things not supported by Arrow return a "try-error",
+    # which we'll handle differently
+    msg <- conditionMessage(e)
+    if (getOption("arrow.debug", FALSE)) print(msg)
+    patterns <- .cache$i18ized_error_pattern
+    if (is.null(patterns)) {
+      patterns <- i18ize_error_messages()
+      # Memoize it
+      .cache$i18ized_error_pattern <- patterns
+    }
+    if (grepl(patterns, msg)) {
+      stop(e)
+    }
+
+    out <- structure(msg, class = "try-error", condition = e)
+    if (grepl("not supported.*Arrow", msg) || getOption("arrow.debug", FALSE)) {
+      # One of ours. Mark it so that consumers can handle it differently
+      class(out) <- c("arrow-try-error", class(out))
+    }
+    invisible(out)
+  })
+}
+
+handle_arrow_not_supported <- function(err, lab) {
+  # Look for informative message from the Arrow function version (see above)
+  if (inherits(err, "arrow-try-error")) {
+    # Include it if found
+    paste0("In ", lab, ", ", as.character(err))
+  } else {
+    # Otherwise be opaque (the original error is probably not useful)
+    paste("Expression", lab, "not supported in Arrow")
+  }
+}
+
+i18ize_error_messages <- function() {
+  # Figure out what the error messages will be with this LANGUAGE
+  # so that we can look for them
+  out <- list(
+    obj = tryCatch(eval(parse(text = "X_____X")), error = function(e) conditionMessage(e)),
+    fun = tryCatch(eval(parse(text = "X_____X()")), error = function(e) conditionMessage(e))
+  )
+  paste(map(out, ~ sub("X_____X", ".*", .)), collapse = "|")
+}
+
+# Helper to raise a common error
+arrow_not_supported <- function(msg) {
+  # TODO: raise a classed error?
+  stop(paste(msg, "not supported by Arrow"), call. = FALSE)
+}
+
+# Create a data mask for evaluating a dplyr expression
+arrow_mask <- function(.data, aggregation = FALSE) {
+  f_env <- new_environment(.cache$functions)
+
+  # Add functions that need to error hard and clear.
+  # Some R functions will still try to evaluate on an Expression
+  # and return NA with a warning
+  fail <- function(...) stop("Not implemented")
+  for (f in c("mean", "sd")) {
+    f_env[[f]] <- fail
+  }
+
+  if (aggregation) {
+    # This should probably be done with an environment inside an environment
+    # but a first attempt at that had scoping problems (ARROW-13499)
+    for (f in names(agg_funcs)) {
+      f_env[[f]] <- agg_funcs[[f]]
+    }
+  }
+
+  # Assign the schema to the expressions
+  map(.data$selected_columns, ~ (.$schema <- .data$.data$schema))
+
+  # Add the column references and make the mask
+  out <- new_data_mask(
+    new_environment(.data$selected_columns, parent = f_env),
+    f_env
+  )
+  # Then insert the data pronoun
+  # TODO: figure out what rlang::as_data_pronoun does/why we should use it
+  # (because if we do we get `Error: Can't modify the data pronoun` in mutate())
+  out$.data <- .data$selected_columns
+  out
+}
+
+format_expr <- function(x) {
+  if (is_quosure(x)) {
+    x <- quo_get_expr(x)
+  }
+  out <- deparse(x)
+  if (length(out) > 1) {
+    # Add ellipses because we are going to truncate
+    out[1] <- paste0(out[1], "...")
+  }
+  head(out, 1)
+}
diff --git a/src/arrow/r/R/dplyr-filter.R b/src/arrow/r/R/dplyr-filter.R
new file mode 100644
index 000000000..3c8c08ea5
--- /dev/null
+++ b/src/arrow/r/R/dplyr-filter.R
@@ -0,0 +1,91 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+# The following S3 methods are registered on load if dplyr is present
+
+filter.arrow_dplyr_query <- function(.data, ..., .preserve = FALSE) {
+  # TODO something with the .preserve argument
+  filts <- quos(...)
+  if (length(filts) == 0) {
+    # Nothing to do
+    return(.data)
+  }
+
+  .data <- as_adq(.data)
+  # tidy-eval the filter expressions inside an Arrow data_mask
+  filters <- lapply(filts, arrow_eval, arrow_mask(.data))
+  bad_filters <- map_lgl(filters, ~ inherits(., "try-error"))
+  if (any(bad_filters)) {
+    # This is similar to abandon_ship() except that the filter eval is
+    # vectorized, and we apply filters that _did_ work before abandoning ship
+    # with the rest
+    expr_labs <- map_chr(filts[bad_filters], format_expr)
+    if (query_on_dataset(.data)) {
+      # Abort. We don't want to auto-collect if this is a Dataset because that
+      # could blow up, too big.
+      stop(
+        "Filter expression not supported for Arrow Datasets: ",
+        oxford_paste(expr_labs, quote = FALSE),
+        "\nCall collect() first to pull data into R.",
+        call. = FALSE
+      )
+    } else {
+      arrow_errors <- map2_chr(
+        filters[bad_filters], expr_labs,
+        handle_arrow_not_supported
+      )
+      if (length(arrow_errors) == 1) {
+        msg <- paste0(arrow_errors, "; ")
+      } else {
+        msg <- paste0("* ", arrow_errors, "\n", collapse = "")
+      }
+      warning(
+        msg, "pulling data into R",
+        immediate. = TRUE,
+        call. = FALSE
+      )
+      # Set any valid filters first, then collect and then apply the invalid ones in R
+      .data <- set_filters(.data, filters[!bad_filters])
+      return(dplyr::filter(dplyr::collect(.data), !!!filts[bad_filters]))
+    }
+  }
+
+  set_filters(.data, filters)
+}
+filter.Dataset <- filter.ArrowTabular <- filter.arrow_dplyr_query
+
+set_filters <- function(.data, expressions) {
+  if (length(expressions)) {
+    if (is_list_of(expressions, "Expression")) {
+      # expressions is a list of Expressions. AND them together and set them on .data
+      new_filter <- Reduce("&", expressions)
+    } else if (inherits(expressions, "Expression")) {
+      new_filter <- expressions
+    } else {
+      stop("filter expressions must be either an expression or a list of expressions", call. = FALSE)
+    }
+
+    if (isTRUE(.data$filtered_rows)) {
+      # TRUE is default (i.e. no filter yet), so we don't need to & with it
+      .data$filtered_rows <- new_filter
+    } else {
+      .data$filtered_rows <- .data$filtered_rows & new_filter
+    }
+  }
+  .data
+}
diff --git a/src/arrow/r/R/dplyr-functions.R b/src/arrow/r/R/dplyr-functions.R
new file mode 100644
index 000000000..717cdae96
--- /dev/null
+++ b/src/arrow/r/R/dplyr-functions.R
@@ -0,0 +1,1087 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+#' @include expression.R
+NULL
+
+# This environment is an internal cache for things including data mask functions
+# We'll populate it at package load time.
+.cache <- NULL
+init_env <- function() {
+  .cache <<- new.env(hash = TRUE)
+}
+init_env()
+
+# nse_funcs is a list of functions that operated on (and return) Expressions
+# These will be the basis for a data_mask inside dplyr methods
+# and will be added to .cache at package load time
+
+# Start with mappings from R function name spellings
+nse_funcs <- lapply(set_names(names(.array_function_map)), function(operator) {
+  force(operator)
+  function(...) build_expr(operator, ...)
+})
+
+# Now add functions to that list where the mapping from R to Arrow isn't 1:1
+# Each of these functions should have the same signature as the R function
+# they're replacing.
+#
+# When to use `build_expr()` vs. `Expression$create()`?
+#
+# Use `build_expr()` if you need to
+# (1) map R function names to Arrow C++ functions
+# (2) wrap R inputs (vectors) as Array/Scalar
+#
+# `Expression$create()` is lower level. Most of the functions below use it
+# because they manage the preparation of the user-provided inputs
+# and don't need to wrap scalars
+
+nse_funcs$cast <- function(x, target_type, safe = TRUE, ...) {
+  opts <- cast_options(safe, ...)
+  opts$to_type <- as_type(target_type)
+  Expression$create("cast", x, options = opts)
+}
+
+nse_funcs$coalesce <- function(...) {
+  args <- list2(...)
+  if (length(args) < 1) {
+    abort("At least one argument must be supplied to coalesce()")
+  }
+
+  # Treat NaN like NA for consistency with dplyr::coalesce(), but if *all*
+  # the values are NaN, we should return NaN, not NA, so don't replace
+  # NaN with NA in the final (or only) argument
+  # TODO: if an option is added to the coalesce kernel to treat NaN as NA,
+  # use that to simplify the code here (ARROW-13389)
+  attr(args[[length(args)]], "last") <- TRUE
+  args <- lapply(args, function(arg) {
+    last_arg <- is.null(attr(arg, "last"))
+    attr(arg, "last") <- NULL
+
+    if (!inherits(arg, "Expression")) {
+      arg <- Expression$scalar(arg)
+    }
+
+    # coalesce doesn't yet support factors/dictionaries
+    # TODO: remove this after ARROW-14167 is merged
+    if (nse_funcs$is.factor(arg)) {
+      warning("Dictionaries (in R: factors) are currently converted to strings (characters) in coalesce", call. = FALSE)
+    }
+
+    if (last_arg && arg$type_id() %in% TYPES_WITH_NAN) {
+      # store the NA_real_ in the same type as arg to avoid avoid casting
+      # smaller float types to larger float types
+      NA_expr <- Expression$scalar(Scalar$create(NA_real_, type = arg$type()))
+      Expression$create("if_else", Expression$create("is_nan", arg), NA_expr, arg)
+    } else {
+      arg
+    }
+  })
+  Expression$create("coalesce", args = args)
+}
+
+nse_funcs$is.na <- function(x) {
+  build_expr("is_null", x, options = list(nan_is_null = TRUE))
+}
+
+nse_funcs$is.nan <- function(x) {
+  if (is.double(x) || (inherits(x, "Expression") &&
+    x$type_id() %in% TYPES_WITH_NAN)) {
+    # TODO: if an option is added to the is_nan kernel to treat NA as NaN,
+    # use that to simplify the code here (ARROW-13366)
+    build_expr("is_nan", x) & build_expr("is_valid", x)
+  } else {
+    Expression$scalar(FALSE)
+  }
+}
+
+nse_funcs$is <- function(object, class2) {
+  if (is.string(class2)) {
+    switch(class2,
+      # for R data types, pass off to is.*() functions
+      character = nse_funcs$is.character(object),
+      numeric = nse_funcs$is.numeric(object),
+      integer = nse_funcs$is.integer(object),
+      integer64 = nse_funcs$is.integer64(object),
+      logical = nse_funcs$is.logical(object),
+      factor = nse_funcs$is.factor(object),
+      list = nse_funcs$is.list(object),
+      # for Arrow data types, compare class2 with object$type()$ToString(),
+      # but first strip off any parameters to only compare the top-level data
+      # type,  and canonicalize class2
+      sub("^([^([<]+).*$", "\\1", object$type()$ToString()) ==
+        canonical_type_str(class2)
+    )
+  } else if (inherits(class2, "DataType")) {
+    object$type() == as_type(class2)
+  } else {
+    stop("Second argument to is() is not a string or DataType", call. = FALSE)
+  }
+}
+
+nse_funcs$dictionary_encode <- function(x,
+                                        null_encoding_behavior = c("mask", "encode")) {
+  behavior <- toupper(match.arg(null_encoding_behavior))
+  null_encoding_behavior <- NullEncodingBehavior[[behavior]]
+  Expression$create(
+    "dictionary_encode",
+    x,
+    options = list(null_encoding_behavior = null_encoding_behavior)
+  )
+}
+
+nse_funcs$between <- function(x, left, right) {
+  x >= left & x <= right
+}
+
+nse_funcs$is.finite <- function(x) {
+  is_fin <- Expression$create("is_finite", x)
+  # for compatibility with base::is.finite(), return FALSE for NA_real_
+  is_fin & !nse_funcs$is.na(is_fin)
+}
+
+nse_funcs$is.infinite <- function(x) {
+  is_inf <- Expression$create("is_inf", x)
+  # for compatibility with base::is.infinite(), return FALSE for NA_real_
+  is_inf & !nse_funcs$is.na(is_inf)
+}
+
+# as.* type casting functions
+# as.factor() is mapped in expression.R
+nse_funcs$as.character <- function(x) {
+  Expression$create("cast", x, options = cast_options(to_type = string()))
+}
+nse_funcs$as.double <- function(x) {
+  Expression$create("cast", x, options = cast_options(to_type = float64()))
+}
+nse_funcs$as.integer <- function(x) {
+  Expression$create(
+    "cast",
+    x,
+    options = cast_options(
+      to_type = int32(),
+      allow_float_truncate = TRUE,
+      allow_decimal_truncate = TRUE
+    )
+  )
+}
+nse_funcs$as.integer64 <- function(x) {
+  Expression$create(
+    "cast",
+    x,
+    options = cast_options(
+      to_type = int64(),
+      allow_float_truncate = TRUE,
+      allow_decimal_truncate = TRUE
+    )
+  )
+}
+nse_funcs$as.logical <- function(x) {
+  Expression$create("cast", x, options = cast_options(to_type = boolean()))
+}
+nse_funcs$as.numeric <- function(x) {
+  Expression$create("cast", x, options = cast_options(to_type = float64()))
+}
+
+# is.* type functions
+nse_funcs$is.character <- function(x) {
+  is.character(x) || (inherits(x, "Expression") &&
+    x$type_id() %in% Type[c("STRING", "LARGE_STRING")])
+}
+nse_funcs$is.numeric <- function(x) {
+  is.numeric(x) || (inherits(x, "Expression") && x$type_id() %in% Type[c(
+    "UINT8", "INT8", "UINT16", "INT16", "UINT32", "INT32",
+    "UINT64", "INT64", "HALF_FLOAT", "FLOAT", "DOUBLE",
+    "DECIMAL", "DECIMAL256"
+  )])
+}
+nse_funcs$is.double <- function(x) {
+  is.double(x) || (inherits(x, "Expression") && x$type_id() == Type["DOUBLE"])
+}
+nse_funcs$is.integer <- function(x) {
+  is.integer(x) || (inherits(x, "Expression") && x$type_id() %in% Type[c(
+    "UINT8", "INT8", "UINT16", "INT16", "UINT32", "INT32",
+    "UINT64", "INT64"
+  )])
+}
+nse_funcs$is.integer64 <- function(x) {
+  is.integer64(x) || (inherits(x, "Expression") && x$type_id() == Type["INT64"])
+}
+nse_funcs$is.logical <- function(x) {
+  is.logical(x) || (inherits(x, "Expression") && x$type_id() == Type["BOOL"])
+}
+nse_funcs$is.factor <- function(x) {
+  is.factor(x) || (inherits(x, "Expression") && x$type_id() == Type["DICTIONARY"])
+}
+nse_funcs$is.list <- function(x) {
+  is.list(x) || (inherits(x, "Expression") && x$type_id() %in% Type[c(
+    "LIST", "FIXED_SIZE_LIST", "LARGE_LIST"
+  )])
+}
+
+# rlang::is_* type functions
+nse_funcs$is_character <- function(x, n = NULL) {
+  assert_that(is.null(n))
+  nse_funcs$is.character(x)
+}
+nse_funcs$is_double <- function(x, n = NULL, finite = NULL) {
+  assert_that(is.null(n) && is.null(finite))
+  nse_funcs$is.double(x)
+}
+nse_funcs$is_integer <- function(x, n = NULL) {
+  assert_that(is.null(n))
+  nse_funcs$is.integer(x)
+}
+nse_funcs$is_list <- function(x, n = NULL) {
+  assert_that(is.null(n))
+  nse_funcs$is.list(x)
+}
+nse_funcs$is_logical <- function(x, n = NULL) {
+  assert_that(is.null(n))
+  nse_funcs$is.logical(x)
+}
+nse_funcs$is_timestamp <- function(x, n = NULL) {
+  assert_that(is.null(n))
+  inherits(x, "POSIXt") || (inherits(x, "Expression") && x$type_id() %in% Type[c("TIMESTAMP")])
+}
+
+# String functions
+nse_funcs$nchar <- function(x, type = "chars", allowNA = FALSE, keepNA = NA) {
+  if (allowNA) {
+    arrow_not_supported("allowNA = TRUE")
+  }
+  if (is.na(keepNA)) {
+    keepNA <- !identical(type, "width")
+  }
+  if (!keepNA) {
+    # TODO: I think there is a fill_null kernel we could use, set null to 2
+    arrow_not_supported("keepNA = TRUE")
+  }
+  if (identical(type, "bytes")) {
+    Expression$create("binary_length", x)
+  } else {
+    Expression$create("utf8_length", x)
+  }
+}
+
+nse_funcs$paste <- function(..., sep = " ", collapse = NULL, recycle0 = FALSE) {
+  assert_that(
+    is.null(collapse),
+    msg = "paste() with the collapse argument is not yet supported in Arrow"
+  )
+  if (!inherits(sep, "Expression")) {
+    assert_that(!is.na(sep), msg = "Invalid separator")
+  }
+  arrow_string_join_function(NullHandlingBehavior$REPLACE, "NA")(..., sep)
+}
+
+nse_funcs$paste0 <- function(..., collapse = NULL, recycle0 = FALSE) {
+  assert_that(
+    is.null(collapse),
+    msg = "paste0() with the collapse argument is not yet supported in Arrow"
+  )
+  arrow_string_join_function(NullHandlingBehavior$REPLACE, "NA")(..., "")
+}
+
+nse_funcs$str_c <- function(..., sep = "", collapse = NULL) {
+  assert_that(
+    is.null(collapse),
+    msg = "str_c() with the collapse argument is not yet supported in Arrow"
+  )
+  arrow_string_join_function(NullHandlingBehavior$EMIT_NULL)(..., sep)
+}
+
+arrow_string_join_function <- function(null_handling, null_replacement = NULL) {
+  # the `binary_join_element_wise` Arrow C++ compute kernel takes the separator
+  # as the last argument, so pass `sep` as the last dots arg to this function
+  function(...) {
+    args <- lapply(list(...), function(arg) {
+      # handle scalar literal args, and cast all args to string for
+      # consistency with base::paste(), base::paste0(), and stringr::str_c()
+      if (!inherits(arg, "Expression")) {
+        assert_that(
+          length(arg) == 1,
+          msg = "Literal vectors of length != 1 not supported in string concatenation"
+        )
+        Expression$scalar(as.character(arg))
+      } else {
+        nse_funcs$as.character(arg)
+      }
+    })
+    Expression$create(
+      "binary_join_element_wise",
+      args = args,
+      options = list(
+        null_handling = null_handling,
+        null_replacement = null_replacement
+      )
+    )
+  }
+}
+
+# Currently, Arrow does not supports a locale option for string case conversion
+# functions, contrast to stringr's API, so the 'locale' argument is only valid
+# for stringr's default value ("en"). The following are string functions that
+# take a 'locale' option as its second argument:
+#   str_to_lower
+#   str_to_upper
+#   str_to_title
+#
+# Arrow locale will be supported with ARROW-14126
+stop_if_locale_provided <- function(locale) {
+  if (!identical(locale, "en")) {
+    stop("Providing a value for 'locale' other than the default ('en') is not supported by Arrow. ",
+      "To change locale, use 'Sys.setlocale()'",
+      call. = FALSE
+    )
+  }
+}
+
+nse_funcs$str_to_lower <- function(string, locale = "en") {
+  stop_if_locale_provided(locale)
+  Expression$create("utf8_lower", string)
+}
+
+nse_funcs$str_to_upper <- function(string, locale = "en") {
+  stop_if_locale_provided(locale)
+  Expression$create("utf8_upper", string)
+}
+
+nse_funcs$str_to_title <- function(string, locale = "en") {
+  stop_if_locale_provided(locale)
+  Expression$create("utf8_title", string)
+}
+
+nse_funcs$str_trim <- function(string, side = c("both", "left", "right")) {
+  side <- match.arg(side)
+  trim_fun <- switch(side,
+    left = "utf8_ltrim_whitespace",
+    right = "utf8_rtrim_whitespace",
+    both = "utf8_trim_whitespace"
+  )
+  Expression$create(trim_fun, string)
+}
+
+nse_funcs$substr <- function(x, start, stop) {
+  assert_that(
+    length(start) == 1,
+    msg = "`start` must be length 1 - other lengths are not supported in Arrow"
+  )
+  assert_that(
+    length(stop) == 1,
+    msg = "`stop` must be length 1 - other lengths are not supported in Arrow"
+  )
+
+  # substr treats values as if they're on a continous number line, so values
+  # 0 are effectively blank characters - set `start` to 1 here so Arrow mimics
+  # this behavior
+  if (start <= 0) {
+    start <- 1
+  }
+
+  # if `stop` is lower than `start`, this is invalid, so set `stop` to
+  # 0 so that an empty string will be returned (consistent with base::substr())
+  if (stop < start) {
+    stop <- 0
+  }
+
+  Expression$create(
+    "utf8_slice_codeunits",
+    x,
+    # we don't need to subtract 1 from `stop` as C++ counts exclusively
+    # which effectively cancels out the difference in indexing between R & C++
+    options = list(start = start - 1L, stop = stop)
+  )
+}
+
+nse_funcs$substring <- function(text, first, last) {
+  nse_funcs$substr(x = text, start = first, stop = last)
+}
+
+nse_funcs$str_sub <- function(string, start = 1L, end = -1L) {
+  assert_that(
+    length(start) == 1,
+    msg = "`start` must be length 1 - other lengths are not supported in Arrow"
+  )
+  assert_that(
+    length(end) == 1,
+    msg = "`end` must be length 1 - other lengths are not supported in Arrow"
+  )
+
+  # In stringr::str_sub, an `end` value of -1 means the end of the string, so
+  # set it to the maximum integer to match this behavior
+  if (end == -1) {
+    end <- .Machine$integer.max
+  }
+
+  # An end value lower than a start value returns an empty string in
+  # stringr::str_sub so set end to 0 here to match this behavior
+  if (end < start) {
+    end <- 0
+  }
+
+  # subtract 1 from `start` because C++ is 0-based and R is 1-based
+  # str_sub treats a `start` value of 0 or 1 as the same thing so don't subtract 1 when `start` == 0
+  # when `start` < 0, both str_sub and utf8_slice_codeunits count backwards from the end
+  if (start > 0) {
+    start <- start - 1L
+  }
+
+  Expression$create(
+    "utf8_slice_codeunits",
+    string,
+    options = list(start = start, stop = end)
+  )
+}
+
+nse_funcs$grepl <- function(pattern, x, ignore.case = FALSE, fixed = FALSE) {
+  arrow_fun <- ifelse(fixed, "match_substring", "match_substring_regex")
+  Expression$create(
+    arrow_fun,
+    x,
+    options = list(pattern = pattern, ignore_case = ignore.case)
+  )
+}
+
+nse_funcs$str_detect <- function(string, pattern, negate = FALSE) {
+  opts <- get_stringr_pattern_options(enexpr(pattern))
+  out <- nse_funcs$grepl(
+    pattern = opts$pattern,
+    x = string,
+    ignore.case = opts$ignore_case,
+    fixed = opts$fixed
+  )
+  if (negate) {
+    out <- !out
+  }
+  out
+}
+
+nse_funcs$str_like <- function(string, pattern, ignore_case = TRUE) {
+  Expression$create(
+    "match_like",
+    string,
+    options = list(pattern = pattern, ignore_case = ignore_case)
+  )
+}
+
+# Encapsulate some common logic for sub/gsub/str_replace/str_replace_all
+arrow_r_string_replace_function <- function(max_replacements) {
+  function(pattern, replacement, x, ignore.case = FALSE, fixed = FALSE) {
+    Expression$create(
+      ifelse(fixed && !ignore.case, "replace_substring", "replace_substring_regex"),
+      x,
+      options = list(
+        pattern = format_string_pattern(pattern, ignore.case, fixed),
+        replacement = format_string_replacement(replacement, ignore.case, fixed),
+        max_replacements = max_replacements
+      )
+    )
+  }
+}
+
+arrow_stringr_string_replace_function <- function(max_replacements) {
+  function(string, pattern, replacement) {
+    opts <- get_stringr_pattern_options(enexpr(pattern))
+    arrow_r_string_replace_function(max_replacements)(
+      pattern = opts$pattern,
+      replacement = replacement,
+      x = string,
+      ignore.case = opts$ignore_case,
+      fixed = opts$fixed
+    )
+  }
+}
+
+nse_funcs$sub <- arrow_r_string_replace_function(1L)
+nse_funcs$gsub <- arrow_r_string_replace_function(-1L)
+nse_funcs$str_replace <- arrow_stringr_string_replace_function(1L)
+nse_funcs$str_replace_all <- arrow_stringr_string_replace_function(-1L)
+
+nse_funcs$strsplit <- function(x,
+                               split,
+                               fixed = FALSE,
+                               perl = FALSE,
+                               useBytes = FALSE) {
+  assert_that(is.string(split))
+
+  arrow_fun <- ifelse(fixed, "split_pattern", "split_pattern_regex")
+  # warn when the user specifies both fixed = TRUE and perl = TRUE, for
+  # consistency with the behavior of base::strsplit()
+  if (fixed && perl) {
+    warning("Argument 'perl = TRUE' will be ignored", call. = FALSE)
+  }
+  # since split is not a regex, proceed without any warnings or errors regardless
+  # of the value of perl, for consistency with the behavior of base::strsplit()
+  Expression$create(
+    arrow_fun,
+    x,
+    options = list(pattern = split, reverse = FALSE, max_splits = -1L)
+  )
+}
+
+nse_funcs$str_split <- function(string, pattern, n = Inf, simplify = FALSE) {
+  opts <- get_stringr_pattern_options(enexpr(pattern))
+  arrow_fun <- ifelse(opts$fixed, "split_pattern", "split_pattern_regex")
+  if (opts$ignore_case) {
+    arrow_not_supported("Case-insensitive string splitting")
+  }
+  if (n == 0) {
+    arrow_not_supported("Splitting strings into zero parts")
+  }
+  if (identical(n, Inf)) {
+    n <- 0L
+  }
+  if (simplify) {
+    warning("Argument 'simplify = TRUE' will be ignored", call. = FALSE)
+  }
+  # The max_splits option in the Arrow C++ library controls the maximum number
+  # of places at which the string is split, whereas the argument n to
+  # str_split() controls the maximum number of pieces to return. So we must
+  # subtract 1 from n to get max_splits.
+  Expression$create(
+    arrow_fun,
+    string,
+    options = list(
+      pattern = opts$pattern,
+      reverse = FALSE,
+      max_splits = n - 1L
+    )
+  )
+}
+
+nse_funcs$pmin <- function(..., na.rm = FALSE) {
+  build_expr(
+    "min_element_wise",
+    ...,
+    options = list(skip_nulls = na.rm)
+  )
+}
+
+nse_funcs$pmax <- function(..., na.rm = FALSE) {
+  build_expr(
+    "max_element_wise",
+    ...,
+    options = list(skip_nulls = na.rm)
+  )
+}
+
+nse_funcs$str_pad <- function(string, width, side = c("left", "right", "both"), pad = " ") {
+  assert_that(is_integerish(width))
+  side <- match.arg(side)
+  assert_that(is.string(pad))
+
+  if (side == "left") {
+    pad_func <- "utf8_lpad"
+  } else if (side == "right") {
+    pad_func <- "utf8_rpad"
+  } else if (side == "both") {
+    pad_func <- "utf8_center"
+  }
+
+  Expression$create(
+    pad_func,
+    string,
+    options = list(width = width, padding = pad)
+  )
+}
+
+nse_funcs$startsWith <- function(x, prefix) {
+  Expression$create(
+    "starts_with",
+    x,
+    options = list(pattern = prefix)
+  )
+}
+
+nse_funcs$endsWith <- function(x, suffix) {
+  Expression$create(
+    "ends_with",
+    x,
+    options = list(pattern = suffix)
+  )
+}
+
+nse_funcs$str_starts <- function(string, pattern, negate = FALSE) {
+  opts <- get_stringr_pattern_options(enexpr(pattern))
+  if (opts$fixed) {
+    out <- nse_funcs$startsWith(x = string, prefix = opts$pattern)
+  } else {
+    out <- nse_funcs$grepl(pattern = paste0("^", opts$pattern), x = string, fixed = FALSE)
+  }
+
+  if (negate) {
+    out <- !out
+  }
+  out
+}
+
+nse_funcs$str_ends <- function(string, pattern, negate = FALSE) {
+  opts <- get_stringr_pattern_options(enexpr(pattern))
+  if (opts$fixed) {
+    out <- nse_funcs$endsWith(x = string, suffix = opts$pattern)
+  } else {
+    out <- nse_funcs$grepl(pattern = paste0(opts$pattern, "$"), x = string, fixed = FALSE)
+  }
+
+  if (negate) {
+    out <- !out
+  }
+  out
+}
+
+nse_funcs$str_count <- function(string, pattern) {
+  opts <- get_stringr_pattern_options(enexpr(pattern))
+  if (!is.string(pattern)) {
+    arrow_not_supported("`pattern` must be a length 1 character vector; other values")
+  }
+  arrow_fun <- ifelse(opts$fixed, "count_substring", "count_substring_regex")
+  Expression$create(
+    arrow_fun,
+    string,
+    options = list(pattern = opts$pattern, ignore_case = opts$ignore_case)
+  )
+}
+
+# String function helpers
+
+# format `pattern` as needed for case insensitivity and literal matching by RE2
+format_string_pattern <- function(pattern, ignore.case, fixed) {
+  # Arrow lacks native support for case-insensitive literal string matching and
+  # replacement, so we use the regular expression engine (RE2) to do this.
+  # https://github.com/google/re2/wiki/Syntax
+  if (ignore.case) {
+    if (fixed) {
+      # Everything between "\Q" and "\E" is treated as literal text.
+      # If the search text contains any literal "\E" strings, make them
+      # lowercase so they won't signal the end of the literal text:
+      pattern <- gsub("\\E", "\\e", pattern, fixed = TRUE)
+      pattern <- paste0("\\Q", pattern, "\\E")
+    }
+    # Prepend "(?i)" for case-insensitive matching
+    pattern <- paste0("(?i)", pattern)
+  }
+  pattern
+}
+
+# format `replacement` as needed for literal replacement by RE2
+format_string_replacement <- function(replacement, ignore.case, fixed) {
+  # Arrow lacks native support for case-insensitive literal string
+  # replacement, so we use the regular expression engine (RE2) to do this.
+  # https://github.com/google/re2/wiki/Syntax
+  if (ignore.case && fixed) {
+    # Escape single backslashes in the regex replacement text so they are
+    # interpreted as literal backslashes:
+    replacement <- gsub("\\", "\\\\", replacement, fixed = TRUE)
+  }
+  replacement
+}
+
+#' Get `stringr` pattern options
+#'
+#' This function assigns definitions for the `stringr` pattern modifier
+#' functions (`fixed()`, `regex()`, etc.) inside itself, and uses them to
+#' evaluate the quoted expression `pattern`, returning a list that is used
+#' to control pattern matching behavior in internal `arrow` functions.
+#'
+#' @param pattern Unevaluated expression containing a call to a `stringr`
+#' pattern modifier function
+#'
+#' @return List containing elements `pattern`, `fixed`, and `ignore_case`
+#' @keywords internal
+get_stringr_pattern_options <- function(pattern) {
+  fixed <- function(pattern, ignore_case = FALSE, ...) {
+    check_dots(...)
+    list(pattern = pattern, fixed = TRUE, ignore_case = ignore_case)
+  }
+  regex <- function(pattern, ignore_case = FALSE, ...) {
+    check_dots(...)
+    list(pattern = pattern, fixed = FALSE, ignore_case = ignore_case)
+  }
+  coll <- function(...) {
+    arrow_not_supported("Pattern modifier `coll()`")
+  }
+  boundary <- function(...) {
+    arrow_not_supported("Pattern modifier `boundary()`")
+  }
+  check_dots <- function(...) {
+    dots <- list(...)
+    if (length(dots)) {
+      warning(
+        "Ignoring pattern modifier ",
+        ngettext(length(dots), "argument ", "arguments "),
+        "not supported in Arrow: ",
+        oxford_paste(names(dots)),
+        call. = FALSE
+      )
+    }
+  }
+  ensure_opts <- function(opts) {
+    if (is.character(opts)) {
+      opts <- list(pattern = opts, fixed = FALSE, ignore_case = FALSE)
+    }
+    opts
+  }
+  ensure_opts(eval(pattern))
+}
+
+#' Does this string contain regex metacharacters?
+#'
+#' @param string String to be tested
+#' @keywords internal
+#' @return Logical: does `string` contain regex metacharacters?
+contains_regex <- function(string) {
+  grepl("[.\\|()[{^$*+?]", string)
+}
+
+nse_funcs$strptime <- function(x, format = "%Y-%m-%d %H:%M:%S", tz = NULL, unit = "ms") {
+  # Arrow uses unit for time parsing, strptime() does not.
+  # Arrow has no default option for strptime (format, unit),
+  # we suggest following format = "%Y-%m-%d %H:%M:%S", unit = MILLI/1L/"ms",
+  # (ARROW-12809)
+
+  # ParseTimestampStrptime currently ignores the timezone information (ARROW-12820).
+  # Stop if tz is provided.
+  if (is.character(tz)) {
+    arrow_not_supported("Time zone argument")
+  }
+
+  unit <- make_valid_time_unit(unit, c(valid_time64_units, valid_time32_units))
+
+  Expression$create("strptime", x, options = list(format = format, unit = unit))
+}
+
+nse_funcs$strftime <- function(x, format = "", tz = "", usetz = FALSE) {
+  if (usetz) {
+    format <- paste(format, "%Z")
+  }
+  if (tz == "") {
+    tz <- Sys.timezone()
+  }
+  # Arrow's strftime prints in timezone of the timestamp. To match R's strftime behavior we first
+  # cast the timestamp to desired timezone. This is a metadata only change.
+  if (nse_funcs$is_timestamp(x)) {
+    ts <- Expression$create("cast", x, options = list(to_type = timestamp(x$type()$unit(), tz)))
+  } else {
+    ts <- x
+  }
+  Expression$create("strftime", ts, options = list(format = format, locale = Sys.getlocale("LC_TIME")))
+}
+
+nse_funcs$format_ISO8601 <- function(x, usetz = FALSE, precision = NULL, ...) {
+  ISO8601_precision_map <-
+    list(
+      y = "%Y",
+      ym = "%Y-%m",
+      ymd = "%Y-%m-%d",
+      ymdh = "%Y-%m-%dT%H",
+      ymdhm = "%Y-%m-%dT%H:%M",
+      ymdhms = "%Y-%m-%dT%H:%M:%S"
+    )
+
+  if (is.null(precision)) {
+    precision <- "ymdhms"
+  }
+  if (!precision %in% names(ISO8601_precision_map)) {
+    abort(
+      paste(
+        "`precision` must be one of the following values:",
+        paste(names(ISO8601_precision_map), collapse = ", "),
+        "\nValue supplied was: ",
+        precision
+      )
+    )
+  }
+  format <- ISO8601_precision_map[[precision]]
+  if (usetz) {
+    format <- paste0(format, "%z")
+  }
+  Expression$create("strftime", x, options = list(format = format, locale = "C"))
+}
+
+nse_funcs$second <- function(x) {
+  Expression$create("add", Expression$create("second", x), Expression$create("subsecond", x))
+}
+
+nse_funcs$trunc <- function(x, ...) {
+  # accepts and ignores ... for consistency with base::trunc()
+  build_expr("trunc", x)
+}
+
+nse_funcs$round <- function(x, digits = 0) {
+  build_expr(
+    "round",
+    x,
+    options = list(ndigits = digits, round_mode = RoundMode$HALF_TO_EVEN)
+  )
+}
+
+nse_funcs$wday <- function(x,
+                           label = FALSE,
+                           abbr = TRUE,
+                           week_start = getOption("lubridate.week.start", 7),
+                           locale = Sys.getlocale("LC_TIME")) {
+  if (label) {
+    if (abbr) {
+      format <- "%a"
+    } else {
+      format <- "%A"
+    }
+    return(Expression$create("strftime", x, options = list(format = format, locale = locale)))
+  }
+
+  Expression$create("day_of_week", x, options = list(count_from_zero = FALSE, week_start = week_start))
+}
+
+nse_funcs$log <- nse_funcs$logb <- function(x, base = exp(1)) {
+  # like other binary functions, either `x` or `base` can be Expression or double(1)
+  if (is.numeric(x) && length(x) == 1) {
+    x <- Expression$scalar(x)
+  } else if (!inherits(x, "Expression")) {
+    arrow_not_supported("x must be a column or a length-1 numeric; other values")
+  }
+
+  # handle `base` differently because we use the simpler ln, log2, and log10
+  # functions for specific scalar base values
+  if (inherits(base, "Expression")) {
+    return(Expression$create("logb_checked", x, base))
+  }
+
+  if (!is.numeric(base) || length(base) != 1) {
+    arrow_not_supported("base must be a column or a length-1 numeric; other values")
+  }
+
+  if (base == exp(1)) {
+    return(Expression$create("ln_checked", x))
+  }
+
+  if (base == 2) {
+    return(Expression$create("log2_checked", x))
+  }
+
+  if (base == 10) {
+    return(Expression$create("log10_checked", x))
+  }
+
+  Expression$create("logb_checked", x, Expression$scalar(base))
+}
+
+nse_funcs$if_else <- function(condition, true, false, missing = NULL) {
+  if (!is.null(missing)) {
+    return(nse_funcs$if_else(
+      nse_funcs$is.na(condition),
+      missing,
+      nse_funcs$if_else(condition, true, false)
+    ))
+  }
+
+  # if_else doesn't yet support factors/dictionaries
+  # TODO: remove this after ARROW-13358 is merged
+  warn_types <- nse_funcs$is.factor(true) | nse_funcs$is.factor(false)
+  if (warn_types) {
+    warning(
+      "Dictionaries (in R: factors) are currently converted to strings (characters) ",
+      "in if_else and ifelse",
+      call. = FALSE
+    )
+  }
+
+  build_expr("if_else", condition, true, false)
+}
+
+# Although base R ifelse allows `yes` and `no` to be different classes
+nse_funcs$ifelse <- function(test, yes, no) {
+  nse_funcs$if_else(condition = test, true = yes, false = no)
+}
+
+nse_funcs$case_when <- function(...) {
+  formulas <- list2(...)
+  n <- length(formulas)
+  if (n == 0) {
+    abort("No cases provided in case_when()")
+  }
+  query <- vector("list", n)
+  value <- vector("list", n)
+  mask <- caller_env()
+  for (i in seq_len(n)) {
+    f <- formulas[[i]]
+    if (!inherits(f, "formula")) {
+      abort("Each argument to case_when() must be a two-sided formula")
+    }
+    query[[i]] <- arrow_eval(f[[2]], mask)
+    value[[i]] <- arrow_eval(f[[3]], mask)
+    if (!nse_funcs$is.logical(query[[i]])) {
+      abort("Left side of each formula in case_when() must be a logical expression")
+    }
+    if (inherits(value[[i]], "try-error")) {
+      abort(handle_arrow_not_supported(value[[i]], format_expr(f[[3]])))
+    }
+  }
+  build_expr(
+    "case_when",
+    args = c(
+      build_expr(
+        "make_struct",
+        args = query,
+        options = list(field_names = as.character(seq_along(query)))
+      ),
+      value
+    )
+  )
+}
+
+# Aggregation functions
+# These all return a list of:
+# @param fun string function name
+# @param data Expression (these are all currently a single field)
+# @param options list of function options, as passed to call_function
+# For group-by aggregation, `hash_` gets prepended to the function name.
+# So to see a list of available hash aggregation functions,
+# you can use list_compute_functions("^hash_")
+agg_funcs <- list()
+agg_funcs$sum <- function(..., na.rm = FALSE) {
+  list(
+    fun = "sum",
+    data = ensure_one_arg(list2(...), "sum"),
+    options = list(skip_nulls = na.rm, min_count = 0L)
+  )
+}
+agg_funcs$any <- function(..., na.rm = FALSE) {
+  list(
+    fun = "any",
+    data = ensure_one_arg(list2(...), "any"),
+    options = list(skip_nulls = na.rm, min_count = 0L)
+  )
+}
+agg_funcs$all <- function(..., na.rm = FALSE) {
+  list(
+    fun = "all",
+    data = ensure_one_arg(list2(...), "all"),
+    options = list(skip_nulls = na.rm, min_count = 0L)
+  )
+}
+agg_funcs$mean <- function(x, na.rm = FALSE) {
+  list(
+    fun = "mean",
+    data = x,
+    options = list(skip_nulls = na.rm, min_count = 0L)
+  )
+}
+agg_funcs$sd <- function(x, na.rm = FALSE, ddof = 1) {
+  list(
+    fun = "stddev",
+    data = x,
+    options = list(skip_nulls = na.rm, min_count = 0L, ddof = ddof)
+  )
+}
+agg_funcs$var <- function(x, na.rm = FALSE, ddof = 1) {
+  list(
+    fun = "variance",
+    data = x,
+    options = list(skip_nulls = na.rm, min_count = 0L, ddof = ddof)
+  )
+}
+agg_funcs$quantile <- function(x, probs, na.rm = FALSE) {
+  if (length(probs) != 1) {
+    arrow_not_supported("quantile() with length(probs) != 1")
+  }
+  # TODO: Bind to the Arrow function that returns an exact quantile and remove
+  # this warning (ARROW-14021)
+  warn(
+    "quantile() currently returns an approximate quantile in Arrow",
+    .frequency = ifelse(is_interactive(), "once", "always"),
+    .frequency_id = "arrow.quantile.approximate"
+  )
+  list(
+    fun = "tdigest",
+    data = x,
+    options = list(skip_nulls = na.rm, q = probs)
+  )
+}
+agg_funcs$median <- function(x, na.rm = FALSE) {
+  # TODO: Bind to the Arrow function that returns an exact median and remove
+  # this warning (ARROW-14021)
+  warn(
+    "median() currently returns an approximate median in Arrow",
+    .frequency = ifelse(is_interactive(), "once", "always"),
+    .frequency_id = "arrow.median.approximate"
+  )
+  list(
+    fun = "approximate_median",
+    data = x,
+    options = list(skip_nulls = na.rm)
+  )
+}
+agg_funcs$n_distinct <- function(..., na.rm = FALSE) {
+  list(
+    fun = "count_distinct",
+    data = ensure_one_arg(list2(...), "n_distinct"),
+    options = list(na.rm = na.rm)
+  )
+}
+agg_funcs$n <- function() {
+  list(
+    fun = "sum",
+    data = Expression$scalar(1L),
+    options = list()
+  )
+}
+agg_funcs$min <- function(..., na.rm = FALSE) {
+  list(
+    fun = "min",
+    data = ensure_one_arg(list2(...), "min"),
+    options = list(skip_nulls = na.rm, min_count = 0L)
+  )
+}
+agg_funcs$max <- function(..., na.rm = FALSE) {
+  list(
+    fun = "max",
+    data = ensure_one_arg(list2(...), "max"),
+    options = list(skip_nulls = na.rm, min_count = 0L)
+  )
+}
+
+ensure_one_arg <- function(args, fun) {
+  if (length(args) == 0) {
+    arrow_not_supported(paste0(fun, "() with 0 arguments"))
+  } else if (length(args) > 1) {
+    arrow_not_supported(paste0("Multiple arguments to ", fun, "()"))
+  }
+  args[[1]]
+}
+
+output_type <- function(fun, input_type, hash) {
+  # These are quick and dirty heuristics.
+  if (fun %in% c("any", "all")) {
+    bool()
+  } else if (fun %in% "sum") {
+    # It may upcast to a bigger type but this is close enough
+    input_type
+  } else if (fun %in% c("mean", "stddev", "variance", "approximate_median")) {
+    float64()
+  } else if (fun %in% "tdigest") {
+    if (hash) {
+      fixed_size_list_of(float64(), 1L)
+    } else {
+      float64()
+    }
+  } else {
+    # Just so things don't error, assume the resulting type is the same
+    input_type
+  }
+}
diff --git a/src/arrow/r/R/dplyr-group-by.R b/src/arrow/r/R/dplyr-group-by.R
new file mode 100644
index 000000000..66b867210
--- /dev/null
+++ b/src/arrow/r/R/dplyr-group-by.R
@@ -0,0 +1,86 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+# The following S3 methods are registered on load if dplyr is present
+
+group_by.arrow_dplyr_query <- function(.data,
+                                       ...,
+                                       .add = FALSE,
+                                       add = .add,
+                                       .drop = dplyr::group_by_drop_default(.data)) {
+  .data <- as_adq(.data)
+  new_groups <- enquos(...)
+  # ... can contain expressions (i.e. can add (or rename?) columns) and so we
+  # need to identify those and add them on to the query with mutate. Specifically,
+  # we want to mark as new:
+  #   * expressions (named or otherwise)
+  #   * variables that have new names
+  # All others (i.e. simple references to variables) should not be (re)-added
+
+  # Identify any groups with names which aren't in names of .data
+  new_group_ind <- map_lgl(new_groups, ~ !(quo_name(.x) %in% names(.data)))
+  # Identify any groups which don't have names
+  named_group_ind <- map_lgl(names(new_groups), nzchar)
+  # Retain any new groups identified above
+  new_groups <- new_groups[new_group_ind | named_group_ind]
+  if (length(new_groups)) {
+    # now either use the name that was given in ... or if that is "" then use the expr
+    names(new_groups) <- imap_chr(new_groups, ~ ifelse(.y == "", quo_name(.x), .y))
+
+    # Add them to the data
+    .data <- dplyr::mutate(.data, !!!new_groups)
+  }
+  if (".add" %in% names(formals(dplyr::group_by))) {
+    # For compatibility with dplyr >= 1.0
+    gv <- dplyr::group_by_prepare(.data, ..., .add = .add)$group_names
+  } else {
+    gv <- dplyr::group_by_prepare(.data, ..., add = add)$group_names
+  }
+  .data$group_by_vars <- gv
+  .data$drop_empty_groups <- ifelse(length(gv), .drop, dplyr::group_by_drop_default(.data))
+  .data
+}
+group_by.Dataset <- group_by.ArrowTabular <- group_by.arrow_dplyr_query
+
+groups.arrow_dplyr_query <- function(x) syms(dplyr::group_vars(x))
+groups.Dataset <- groups.ArrowTabular <- function(x) NULL
+
+group_vars.arrow_dplyr_query <- function(x) x$group_by_vars
+group_vars.Dataset <- function(x) NULL
+group_vars.RecordBatchReader <- function(x) NULL
+group_vars.ArrowTabular <- function(x) {
+  x$r_metadata$attributes$.group_vars
+}
+
+# the logical literal in the two functions below controls the default value of
+# the .drop argument to group_by()
+group_by_drop_default.arrow_dplyr_query <-
+  function(.tbl) .tbl$drop_empty_groups %||% TRUE
+group_by_drop_default.Dataset <- group_by_drop_default.ArrowTabular <-
+  function(.tbl) TRUE
+
+ungroup.arrow_dplyr_query <- function(x, ...) {
+  x$group_by_vars <- character()
+  x$drop_empty_groups <- NULL
+  x
+}
+ungroup.Dataset <- force
+ungroup.ArrowTabular <- function(x) {
+  x$r_metadata$attributes$.group_vars <- NULL
+  x
+}
diff --git a/src/arrow/r/R/dplyr-join.R b/src/arrow/r/R/dplyr-join.R
new file mode 100644
index 000000000..c14b1a8f3
--- /dev/null
+++ b/src/arrow/r/R/dplyr-join.R
@@ -0,0 +1,126 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+# The following S3 methods are registered on load if dplyr is present
+
+do_join <- function(x,
+                    y,
+                    by = NULL,
+                    copy = FALSE,
+                    suffix = c(".x", ".y"),
+                    ...,
+                    keep = FALSE,
+                    na_matches,
+                    join_type) {
+  # TODO: handle `copy` arg: ignore?
+  # TODO: handle `suffix` arg: Arrow does prefix
+  # TODO: handle `keep` arg: "Should the join keys from both ‘x’ and ‘y’ be preserved in the output?"
+  # TODO: handle `na_matches` arg
+  x <- as_adq(x)
+  y <- as_adq(y)
+  by <- handle_join_by(by, x, y)
+
+  x$join <- list(
+    type = JoinType[[join_type]],
+    right_data = y,
+    by = by
+  )
+  collapse.arrow_dplyr_query(x)
+}
+
+left_join.arrow_dplyr_query <- function(x,
+                                        y,
+                                        by = NULL,
+                                        copy = FALSE,
+                                        suffix = c(".x", ".y"),
+                                        ...,
+                                        keep = FALSE) {
+  do_join(x, y, by, copy, suffix, ..., keep = keep, join_type = "LEFT_OUTER")
+}
+left_join.Dataset <- left_join.ArrowTabular <- left_join.arrow_dplyr_query
+
+right_join.arrow_dplyr_query <- function(x,
+                                         y,
+                                         by = NULL,
+                                         copy = FALSE,
+                                         suffix = c(".x", ".y"),
+                                         ...,
+                                         keep = FALSE) {
+  do_join(x, y, by, copy, suffix, ..., keep = keep, join_type = "RIGHT_OUTER")
+}
+right_join.Dataset <- right_join.ArrowTabular <- right_join.arrow_dplyr_query
+
+inner_join.arrow_dplyr_query <- function(x,
+                                         y,
+                                         by = NULL,
+                                         copy = FALSE,
+                                         suffix = c(".x", ".y"),
+                                         ...,
+                                         keep = FALSE) {
+  do_join(x, y, by, copy, suffix, ..., keep = keep, join_type = "INNER")
+}
+inner_join.Dataset <- inner_join.ArrowTabular <- inner_join.arrow_dplyr_query
+
+full_join.arrow_dplyr_query <- function(x,
+                                        y,
+                                        by = NULL,
+                                        copy = FALSE,
+                                        suffix = c(".x", ".y"),
+                                        ...,
+                                        keep = FALSE) {
+  do_join(x, y, by, copy, suffix, ..., keep = keep, join_type = "FULL_OUTER")
+}
+full_join.Dataset <- full_join.ArrowTabular <- full_join.arrow_dplyr_query
+
+semi_join.arrow_dplyr_query <- function(x,
+                                        y,
+                                        by = NULL,
+                                        copy = FALSE,
+                                        suffix = c(".x", ".y"),
+                                        ...,
+                                        keep = FALSE) {
+  do_join(x, y, by, copy, suffix, ..., keep = keep, join_type = "LEFT_SEMI")
+}
+semi_join.Dataset <- semi_join.ArrowTabular <- semi_join.arrow_dplyr_query
+
+anti_join.arrow_dplyr_query <- function(x,
+                                        y,
+                                        by = NULL,
+                                        copy = FALSE,
+                                        suffix = c(".x", ".y"),
+                                        ...,
+                                        keep = FALSE) {
+  do_join(x, y, by, copy, suffix, ..., keep = keep, join_type = "LEFT_ANTI")
+}
+anti_join.Dataset <- anti_join.ArrowTabular <- anti_join.arrow_dplyr_query
+
+handle_join_by <- function(by, x, y) {
+  if (is.null(by)) {
+    return(set_names(intersect(names(x), names(y))))
+  }
+  stopifnot(is.character(by))
+  if (is.null(names(by))) {
+    by <- set_names(by)
+  }
+  # TODO: nicer messages?
+  stopifnot(
+    all(names(by) %in% names(x)),
+    all(by %in% names(y))
+  )
+  by
+}
diff --git a/src/arrow/r/R/dplyr-mutate.R b/src/arrow/r/R/dplyr-mutate.R
new file mode 100644
index 000000000..2e5239484
--- /dev/null
+++ b/src/arrow/r/R/dplyr-mutate.R
@@ -0,0 +1,140 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+# The following S3 methods are registered on load if dplyr is present
+
+mutate.arrow_dplyr_query <- function(.data,
+                                     ...,
+                                     .keep = c("all", "used", "unused", "none"),
+                                     .before = NULL,
+                                     .after = NULL) {
+  call <- match.call()
+  exprs <- ensure_named_exprs(quos(...))
+
+  .keep <- match.arg(.keep)
+  .before <- enquo(.before)
+  .after <- enquo(.after)
+
+  if (.keep %in% c("all", "unused") && length(exprs) == 0) {
+    # Nothing to do
+    return(.data)
+  }
+
+  .data <- as_adq(.data)
+
+  # Restrict the cases we support for now
+  has_aggregations <- any(unlist(lapply(exprs, all_funs)) %in% names(agg_funcs))
+  if (has_aggregations) {
+    # ARROW-13926
+    # mutate() on a grouped dataset does calculations within groups
+    # This doesn't matter on scalar ops (arithmetic etc.) but it does
+    # for things with aggregations (e.g. subtracting the mean)
+    return(abandon_ship(call, .data, "window functions not currently supported in Arrow"))
+  }
+
+  mask <- arrow_mask(.data)
+  results <- list()
+  for (i in seq_along(exprs)) {
+    # Iterate over the indices and not the names because names may be repeated
+    # (which overwrites the previous name)
+    new_var <- names(exprs)[i]
+    results[[new_var]] <- arrow_eval(exprs[[i]], mask)
+    if (inherits(results[[new_var]], "try-error")) {
+      msg <- handle_arrow_not_supported(
+        results[[new_var]],
+        format_expr(exprs[[i]])
+      )
+      return(abandon_ship(call, .data, msg))
+    } else if (!inherits(results[[new_var]], "Expression") &&
+      !is.null(results[[new_var]])) {
+      # We need some wrapping to handle literal values
+      if (length(results[[new_var]]) != 1) {
+        msg <- paste0("In ", new_var, " = ", format_expr(exprs[[i]]), ", only values of size one are recycled")
+        return(abandon_ship(call, .data, msg))
+      }
+      results[[new_var]] <- Expression$scalar(results[[new_var]])
+    }
+    # Put it in the data mask too
+    mask[[new_var]] <- mask$.data[[new_var]] <- results[[new_var]]
+  }
+
+  old_vars <- names(.data$selected_columns)
+  # Note that this is names(exprs) not names(results):
+  # if results$new_var is NULL, that means we are supposed to remove it
+  new_vars <- names(exprs)
+
+  # Assign the new columns into the .data$selected_columns
+  for (new_var in new_vars) {
+    .data$selected_columns[[new_var]] <- results[[new_var]]
+  }
+
+  # Deduplicate new_vars and remove NULL columns from new_vars
+  new_vars <- intersect(new_vars, names(.data$selected_columns))
+
+  # Respect .before and .after
+  if (!quo_is_null(.before) || !quo_is_null(.after)) {
+    new <- setdiff(new_vars, old_vars)
+    .data <- dplyr::relocate(.data, all_of(new), .before = !!.before, .after = !!.after)
+  }
+
+  # Respect .keep
+  if (.keep == "none") {
+    .data$selected_columns <- .data$selected_columns[new_vars]
+  } else if (.keep != "all") {
+    # "used" or "unused"
+    used_vars <- unlist(lapply(exprs, all.vars), use.names = FALSE)
+    if (.keep == "used") {
+      .data$selected_columns[setdiff(old_vars, used_vars)] <- NULL
+    } else {
+      # "unused"
+      .data$selected_columns[intersect(old_vars, used_vars)] <- NULL
+    }
+  }
+  # Even if "none", we still keep group vars
+  ensure_group_vars(.data)
+}
+mutate.Dataset <- mutate.ArrowTabular <- mutate.arrow_dplyr_query
+
+transmute.arrow_dplyr_query <- function(.data, ...) {
+  dots <- check_transmute_args(...)
+  dplyr::mutate(.data, !!!dots, .keep = "none")
+}
+transmute.Dataset <- transmute.ArrowTabular <- transmute.arrow_dplyr_query
+
+# This function is a copy of dplyr:::check_transmute_args at
+# https://github.com/tidyverse/dplyr/blob/master/R/mutate.R
+check_transmute_args <- function(..., .keep, .before, .after) {
+  if (!missing(.keep)) {
+    abort("`transmute()` does not support the `.keep` argument")
+  }
+  if (!missing(.before)) {
+    abort("`transmute()` does not support the `.before` argument")
+  }
+  if (!missing(.after)) {
+    abort("`transmute()` does not support the `.after` argument")
+  }
+  enquos(...)
+}
+
+ensure_named_exprs <- function(exprs) {
+  # Check for unnamed expressions and fix if any
+  unnamed <- !nzchar(names(exprs))
+  # Deparse and take the first element in case they're long expressions
+  names(exprs)[unnamed] <- map_chr(exprs[unnamed], format_expr)
+  exprs
+}
diff --git a/src/arrow/r/R/dplyr-select.R b/src/arrow/r/R/dplyr-select.R
new file mode 100644
index 000000000..9a867ced9
--- /dev/null
+++ b/src/arrow/r/R/dplyr-select.R
@@ -0,0 +1,125 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+# The following S3 methods are registered on load if dplyr is present
+
+tbl_vars.arrow_dplyr_query <- function(x) names(x$selected_columns)
+
+select.arrow_dplyr_query <- function(.data, ...) {
+  check_select_helpers(enexprs(...))
+  column_select(as_adq(.data), !!!enquos(...))
+}
+select.Dataset <- select.ArrowTabular <- select.arrow_dplyr_query
+
+rename.arrow_dplyr_query <- function(.data, ...) {
+  check_select_helpers(enexprs(...))
+  column_select(as_adq(.data), !!!enquos(...), .FUN = vars_rename)
+}
+rename.Dataset <- rename.ArrowTabular <- rename.arrow_dplyr_query
+
+column_select <- function(.data, ..., .FUN = vars_select) {
+  # .FUN is either tidyselect::vars_select or tidyselect::vars_rename
+  # It operates on the names() of selected_columns, i.e. the column names
+  # factoring in any renaming that may already have happened
+  out <- .FUN(names(.data), !!!enquos(...))
+  # Make sure that the resulting selected columns map back to the original data,
+  # as in when there are multiple renaming steps
+  .data$selected_columns <- set_names(.data$selected_columns[out], names(out))
+
+  # If we've renamed columns, we need to project that renaming into other
+  # query parameters we've collected
+  renamed <- out[names(out) != out]
+  if (length(renamed)) {
+    # Massage group_by
+    gbv <- .data$group_by_vars
+    renamed_groups <- gbv %in% renamed
+    gbv[renamed_groups] <- names(renamed)[match(gbv[renamed_groups], renamed)]
+    .data$group_by_vars <- gbv
+    # No need to massage filters because those contain references to Arrow objects
+  }
+  .data
+}
+
+relocate.arrow_dplyr_query <- function(.data, ..., .before = NULL, .after = NULL) {
+  # The code in this function is adapted from the code in dplyr::relocate.data.frame
+  # at https://github.com/tidyverse/dplyr/blob/master/R/relocate.R
+  # TODO: revisit this after https://github.com/tidyverse/dplyr/issues/5829
+
+  .data <- as_adq(.data)
+
+  # Assign the schema to the expressions
+  map(.data$selected_columns, ~ (.$schema <- .data$.data$schema))
+
+  # Create a mask for evaluating expressions in tidyselect helpers
+  mask <- new_environment(.cache$functions, parent = caller_env())
+
+  to_move <- eval_select(substitute(c(...)), .data$selected_columns, mask)
+
+  .before <- enquo(.before)
+  .after <- enquo(.after)
+  has_before <- !quo_is_null(.before)
+  has_after <- !quo_is_null(.after)
+
+  if (has_before && has_after) {
+    abort("Must supply only one of `.before` and `.after`.")
+  } else if (has_before) {
+    where <- min(unname(eval_select(quo_get_expr(.before), .data$selected_columns, mask)))
+    if (!where %in% to_move) {
+      to_move <- c(to_move, where)
+    }
+  } else if (has_after) {
+    where <- max(unname(eval_select(quo_get_expr(.after), .data$selected_columns, mask)))
+    if (!where %in% to_move) {
+      to_move <- c(where, to_move)
+    }
+  } else {
+    where <- 1L
+    if (!where %in% to_move) {
+      to_move <- c(to_move, where)
+    }
+  }
+
+  lhs <- setdiff(seq2(1, where - 1), to_move)
+  rhs <- setdiff(seq2(where + 1, length(.data$selected_columns)), to_move)
+
+  pos <- vec_unique(c(lhs, to_move, rhs))
+  new_names <- names(pos)
+  .data$selected_columns <- .data$selected_columns[pos]
+
+  if (!is.null(new_names)) {
+    names(.data$selected_columns)[new_names != ""] <- new_names[new_names != ""]
+  }
+  .data
+}
+relocate.Dataset <- relocate.ArrowTabular <- relocate.arrow_dplyr_query
+
+check_select_helpers <- function(exprs) {
+  # Throw an error if unsupported tidyselect selection helpers in `exprs`
+  exprs <- lapply(exprs, function(x) if (is_quosure(x)) quo_get_expr(x) else x)
+  unsup_select_helpers <- "where"
+  funs_in_exprs <- unlist(lapply(exprs, all_funs))
+  unsup_funs <- funs_in_exprs[funs_in_exprs %in% unsup_select_helpers]
+  if (length(unsup_funs)) {
+    stop(
+      "Unsupported selection ",
+      ngettext(length(unsup_funs), "helper: ", "helpers: "),
+      oxford_paste(paste0(unsup_funs, "()"), quote = FALSE),
+      call. = FALSE
+    )
+  }
+}
diff --git a/src/arrow/r/R/dplyr-summarize.R b/src/arrow/r/R/dplyr-summarize.R
new file mode 100644
index 000000000..a6b7a3592
--- /dev/null
+++ b/src/arrow/r/R/dplyr-summarize.R
@@ -0,0 +1,289 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+# The following S3 methods are registered on load if dplyr is present
+
+summarise.arrow_dplyr_query <- function(.data, ...) {
+  call <- match.call()
+  .data <- as_adq(.data)
+  exprs <- quos(...)
+  # Only retain the columns we need to do our aggregations
+  vars_to_keep <- unique(c(
+    unlist(lapply(exprs, all.vars)), # vars referenced in summarise
+    dplyr::group_vars(.data) # vars needed for grouping
+  ))
+  # If exprs rely on the results of previous exprs
+  # (total = sum(x), mean = total / n())
+  # then not all vars will correspond to columns in the data,
+  # so don't try to select() them (use intersect() to exclude them)
+  # Note that this select() isn't useful for the Arrow summarize implementation
+  # because it will effectively project to keep what it needs anyway,
+  # but the data.frame fallback version does benefit from select here
+  .data <- dplyr::select(.data, intersect(vars_to_keep, names(.data)))
+
+  # Try stuff, if successful return()
+  out <- try(do_arrow_summarize(.data, ...), silent = TRUE)
+  if (inherits(out, "try-error")) {
+    return(abandon_ship(call, .data, format(out)))
+  } else {
+    return(out)
+  }
+}
+summarise.Dataset <- summarise.ArrowTabular <- summarise.arrow_dplyr_query
+
+# This is the Arrow summarize implementation
+do_arrow_summarize <- function(.data, ..., .groups = NULL) {
+  exprs <- ensure_named_exprs(quos(...))
+
+  # Create a stateful environment for recording our evaluated expressions
+  # It's more complex than other places because a single summarize() expr
+  # may result in multiple query nodes (Aggregate, Project),
+  # and we have to walk through the expressions to disentangle them.
+  ctx <- env(
+    mask = arrow_mask(.data, aggregation = TRUE),
+    aggregations = empty_named_list(),
+    post_mutate = empty_named_list()
+  )
+  for (i in seq_along(exprs)) {
+    # Iterate over the indices and not the names because names may be repeated
+    # (which overwrites the previous name)
+    summarize_eval(
+      names(exprs)[i],
+      exprs[[i]],
+      ctx,
+      length(.data$group_by_vars) > 0
+    )
+  }
+
+  # Apply the results to the .data object.
+  # First, the aggregations
+  .data$aggregations <- ctx$aggregations
+  # Then collapse the query so that the resulting query object can have
+  # additional operations applied to it
+  out <- collapse.arrow_dplyr_query(.data)
+  # The expressions may have been translated into
+  # "first, aggregate, then transform the result further"
+  # nolint start
+  # For example,
+  #   summarize(mean = sum(x) / n())
+  # is effectively implemented as
+  #   summarize(..temp0 = sum(x), ..temp1 = n()) %>%
+  #   mutate(mean = ..temp0 / ..temp1) %>%
+  #   select(-starts_with("..temp"))
+  # If this is the case, there will be expressions in post_mutate
+  # nolint end
+  if (length(ctx$post_mutate)) {
+    # Append post_mutate, and make sure order is correct
+    # according to input exprs (also dropping ..temp columns)
+    out$selected_columns <- c(
+      out$selected_columns,
+      ctx$post_mutate
+    )[c(.data$group_by_vars, names(exprs))]
+  }
+
+  # If the object has .drop = FALSE and any group vars are dictionaries,
+  # we can't (currently) preserve the empty rows that dplyr does,
+  # so give a warning about that.
+  if (!dplyr::group_by_drop_default(.data)) {
+    group_by_exprs <- .data$selected_columns[.data$group_by_vars]
+    if (any(map_lgl(group_by_exprs, ~ inherits(.$type(), "DictionaryType")))) {
+      warning(
+        ".drop = FALSE currently not supported in Arrow aggregation",
+        call. = FALSE
+      )
+    }
+  }
+
+  # Handle .groups argument
+  if (length(.data$group_by_vars)) {
+    if (is.null(.groups)) {
+      # dplyr docs say:
+      # When ‘.groups’ is not specified, it is chosen based on the
+      # number of rows of the results:
+      # • If all the results have 1 row, you get "drop_last".
+      # • If the number of rows varies, you get "keep".
+      #
+      # But we don't support anything that returns multiple rows now
+      .groups <- "drop_last"
+    } else {
+      assert_that(is.string(.groups))
+    }
+    if (.groups == "drop_last") {
+      out$group_by_vars <- head(.data$group_by_vars, -1)
+    } else if (.groups == "keep") {
+      out$group_by_vars <- .data$group_by_vars
+    } else if (.groups == "rowwise") {
+      stop(arrow_not_supported('.groups = "rowwise"'))
+    } else if (.groups == "drop") {
+      # collapse() preserves groups so remove them
+      out <- dplyr::ungroup(out)
+    } else {
+      stop(paste("Invalid .groups argument:", .groups))
+    }
+    # TODO: shouldn't we be doing something with `drop_empty_groups` in summarize? (ARROW-14044)
+    out$drop_empty_groups <- .data$drop_empty_groups
+  }
+  out
+}
+
+arrow_eval_or_stop <- function(expr, mask) {
+  # TODO: change arrow_eval error handling behavior?
+  out <- arrow_eval(expr, mask)
+  if (inherits(out, "try-error")) {
+    msg <- handle_arrow_not_supported(out, format_expr(expr))
+    stop(msg, call. = FALSE)
+  }
+  out
+}
+
+summarize_projection <- function(.data) {
+  c(
+    map(.data$aggregations, ~ .$data),
+    .data$selected_columns[.data$group_by_vars]
+  )
+}
+
+format_aggregation <- function(x) {
+  paste0(x$fun, "(", x$data$ToString(), ")")
+}
+
+# This function handles each summarize expression and turns it into the
+# appropriate combination of (1) aggregations (possibly temporary) and
+# (2) post-aggregation transformations (mutate)
+# The function returns nothing: it assigns into the `ctx` environment
+summarize_eval <- function(name, quosure, ctx, hash, recurse = FALSE) {
+  expr <- quo_get_expr(quosure)
+  ctx$quo_env <- quo_get_env(quosure)
+
+  funs_in_expr <- all_funs(expr)
+  if (length(funs_in_expr) == 0) {
+    # If it is a scalar or field ref, no special handling required
+    ctx$aggregations[[name]] <- arrow_eval_or_stop(quosure, ctx$mask)
+    return()
+  }
+
+  # For the quantile() binding in the hash aggregation case, we need to mutate
+  # the list output from the Arrow hash_tdigest kernel to flatten it into a
+  # column of type float64. We do that by modifying the unevaluated expression
+  # to replace quantile(...) with arrow_list_element(quantile(...), 0L)
+  if (hash && "quantile" %in% funs_in_expr) {
+    expr <- wrap_hash_quantile(expr)
+    funs_in_expr <- all_funs(expr)
+  }
+
+  # Start inspecting the expr to see what aggregations it involves
+  agg_funs <- names(agg_funcs)
+  outer_agg <- funs_in_expr[1] %in% agg_funs
+  inner_agg <- funs_in_expr[-1] %in% agg_funs
+
+  # First, pull out any aggregations wrapped in other function calls
+  if (any(inner_agg)) {
+    expr <- extract_aggregations(expr, ctx)
+  }
+
+  # By this point, there are no more aggregation functions in expr
+  # except for possibly the outer function call:
+  # they've all been pulled out to ctx$aggregations, and in their place in expr
+  # there are variable names, which will correspond to field refs in the
+  # query object after aggregation and collapse().
+  # So if we want to know if there are any aggregations inside expr,
+  # we have to look for them by their new var names
+  inner_agg_exprs <- all_vars(expr) %in% names(ctx$aggregations)
+
+  if (outer_agg) {
+    # This is something like agg(fun(x, y)
+    # It just works by normal arrow_eval, unless there's a mix of aggs and
+    # columns in the original data like agg(fun(x, agg(x)))
+    # (but that will have been caught in extract_aggregations())
+    ctx$aggregations[[name]] <- arrow_eval_or_stop(
+      as_quosure(expr, ctx$quo_env),
+      ctx$mask
+    )
+    return()
+  } else if (all(inner_agg_exprs)) {
+    # Something like: fun(agg(x), agg(y))
+    # So based on the aggregations that have been extracted, mutate after
+    mutate_mask <- arrow_mask(
+      list(selected_columns = make_field_refs(names(ctx$aggregations)))
+    )
+    ctx$post_mutate[[name]] <- arrow_eval_or_stop(
+      as_quosure(expr, ctx$quo_env),
+      mutate_mask
+    )
+    return()
+  }
+
+  # Backstop for any other odd cases, like fun(x, y) (i.e. no aggregation),
+  # or aggregation functions that aren't supported in Arrow (not in agg_funcs)
+  stop(
+    handle_arrow_not_supported(quo_get_expr(quosure), format_expr(quosure)),
+    call. = FALSE
+  )
+}
+
+# This function recurses through expr, pulls out any aggregation expressions,
+# and inserts a variable name (field ref) in place of the aggregation
+extract_aggregations <- function(expr, ctx) {
+  # Keep the input in case we need to raise an error message with it
+  original_expr <- expr
+  funs <- all_funs(expr)
+  if (length(funs) == 0) {
+    return(expr)
+  } else if (length(funs) > 1) {
+    # Recurse more
+    expr[-1] <- lapply(expr[-1], extract_aggregations, ctx)
+  }
+  if (funs[1] %in% names(agg_funcs)) {
+    inner_agg_exprs <- all_vars(expr) %in% names(ctx$aggregations)
+    if (any(inner_agg_exprs) & !all(inner_agg_exprs)) {
+      # We can't aggregate over a combination of dataset columns and other
+      # aggregations (e.g. sum(x - mean(x)))
+      # TODO: support in ARROW-13926
+      # TODO: Add "because" arg to explain _why_ it's not supported?
+      # TODO: this message could also say "not supported in summarize()"
+      #       since some of these expressions may be legal elsewhere
+      stop(
+        handle_arrow_not_supported(original_expr, format_expr(original_expr)),
+        call. = FALSE
+      )
+    }
+
+    # We have an aggregation expression with no other aggregations inside it,
+    # so arrow_eval the expression on the data and give it a ..temp name prefix,
+    # then insert that name (symbol) back into the expression so that we can
+    # mutate() on the result of the aggregation and reference this field.
+    tmpname <- paste0("..temp", length(ctx$aggregations))
+    ctx$aggregations[[tmpname]] <- arrow_eval_or_stop(as_quosure(expr, ctx$quo_env), ctx$mask)
+    expr <- as.symbol(tmpname)
+  }
+  expr
+}
+
+# This function recurses through expr and wraps each call to quantile() with a
+# call to arrow_list_element()
+wrap_hash_quantile <- function(expr) {
+  if (length(expr) == 1) {
+    return(expr)
+  } else {
+    if (is.call(expr) && expr[[1]] == quote(quantile)) {
+      return(str2lang(paste0("arrow_list_element(", deparse1(expr), ", 0L)")))
+    } else {
+      return(as.call(lapply(expr, wrap_hash_quantile)))
+    }
+  }
+}
diff --git a/src/arrow/r/R/dplyr.R b/src/arrow/r/R/dplyr.R
new file mode 100644
index 000000000..e6f67c066
--- /dev/null
+++ b/src/arrow/r/R/dplyr.R
@@ -0,0 +1,259 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#' @include expression.R
+#' @include record-batch.R
+#' @include table.R
+
+arrow_dplyr_query <- function(.data) {
+  # An arrow_dplyr_query is a container for an Arrow data object (Table,
+  # RecordBatch, or Dataset) and the state of the user's dplyr query--things
+  # like selected columns, filters, and group vars.
+  # An arrow_dplyr_query can contain another arrow_dplyr_query in .data
+  gv <- dplyr::group_vars(.data) %||% character()
+
+  if (!inherits(.data, c("Dataset", "arrow_dplyr_query", "RecordBatchReader"))) {
+    .data <- InMemoryDataset$create(.data)
+  }
+  # Evaluating expressions on a dataset with duplicated fieldnames will error
+  dupes <- duplicated(names(.data))
+  if (any(dupes)) {
+    abort(c(
+      "Duplicated field names",
+      x = paste0(
+        "The following field names were found more than once in the data: ",
+        oxford_paste(names(.data)[dupes])
+      )
+    ))
+  }
+  structure(
+    list(
+      .data = .data,
+      # selected_columns is a named list:
+      # * contents are references/expressions pointing to the data
+      # * names are the names they should be in the end (i.e. this
+      #   records any renaming)
+      selected_columns = make_field_refs(names(.data$schema)),
+      # filtered_rows will be an Expression
+      filtered_rows = TRUE,
+      # group_by_vars is a character vector of columns (as renamed)
+      # in the data. They will be kept when data is pulled into R.
+      group_by_vars = gv,
+      # drop_empty_groups is a logical value indicating whether to drop
+      # groups formed by factor levels that don't appear in the data. It
+      # should be non-null only when the data is grouped.
+      drop_empty_groups = NULL,
+      # arrange_vars will be a list of expressions named by their associated
+      # column names
+      arrange_vars = list(),
+      # arrange_desc will be a logical vector indicating the sort order for each
+      # expression in arrange_vars (FALSE for ascending, TRUE for descending)
+      arrange_desc = logical()
+    ),
+    class = "arrow_dplyr_query"
+  )
+}
+
+# The only difference between `arrow_dplyr_query()` and `as_adq()` is that if
+# `.data` is already an `arrow_dplyr_query`, `as_adq()`, will return it as is, but
+# `arrow_dplyr_query()` will nest it inside a new `arrow_dplyr_query`. The only
+# place where `arrow_dplyr_query()` should be called directly is inside
+# `collapse()` methods; everywhere else, call `as_adq()`.
+as_adq <- function(.data) {
+  # For most dplyr methods,
+  # method.Table == method.RecordBatch == method.Dataset == method.arrow_dplyr_query
+  # This works because the functions all pass .data through as_adq()
+  if (inherits(.data, "arrow_dplyr_query")) {
+    return(.data)
+  }
+  arrow_dplyr_query(.data)
+}
+
+make_field_refs <- function(field_names) {
+  set_names(lapply(field_names, Expression$field_ref), field_names)
+}
+
+#' @export
+print.arrow_dplyr_query <- function(x, ...) {
+  schm <- x$.data$schema
+  types <- map_chr(x$selected_columns, function(expr) {
+    name <- expr$field_name
+    if (nzchar(name)) {
+      # Just a field_ref, so look up in the schema
+      schm$GetFieldByName(name)$type$ToString()
+    } else {
+      # Expression, so get its type and append the expression
+      paste0(
+        expr$type(schm)$ToString(),
+        " (", expr$ToString(), ")"
+      )
+    }
+  })
+  fields <- paste(names(types), types, sep = ": ", collapse = "\n")
+  cat(class(source_data(x))[1], " (query)\n", sep = "")
+  cat(fields, "\n", sep = "")
+  cat("\n")
+  if (length(x$aggregations)) {
+    cat("* Aggregations:\n")
+    aggs <- paste0(names(x$aggregations), ": ", map_chr(x$aggregations, format_aggregation), collapse = "\n")
+    cat(aggs, "\n", sep = "")
+  }
+  if (!isTRUE(x$filtered_rows)) {
+    filter_string <- x$filtered_rows$ToString()
+    cat("* Filter: ", filter_string, "\n", sep = "")
+  }
+  if (length(x$group_by_vars)) {
+    cat("* Grouped by ", paste(x$group_by_vars, collapse = ", "), "\n", sep = "")
+  }
+  if (length(x$arrange_vars)) {
+    arrange_strings <- map_chr(x$arrange_vars, function(x) x$ToString())
+    cat(
+      "* Sorted by ",
+      paste(
+        paste0(
+          arrange_strings,
+          " [", ifelse(x$arrange_desc, "desc", "asc"), "]"
+        ),
+        collapse = ", "
+      ),
+      "\n",
+      sep = ""
+    )
+  }
+  cat("See $.data for the source Arrow object\n")
+  invisible(x)
+}
+
+# These are the names reflecting all select/rename, not what is in Arrow
+#' @export
+names.arrow_dplyr_query <- function(x) names(x$selected_columns)
+
+#' @export
+dim.arrow_dplyr_query <- function(x) {
+  cols <- length(names(x))
+
+  if (is_collapsed(x)) {
+    # Don't evaluate just for nrow
+    rows <- NA_integer_
+  } else if (isTRUE(x$filtered_rows)) {
+    rows <- x$.data$num_rows
+  } else {
+    rows <- Scanner$create(x)$CountRows()
+  }
+  c(rows, cols)
+}
+
+#' @export
+as.data.frame.arrow_dplyr_query <- function(x, row.names = NULL, optional = FALSE, ...) {
+  collect.arrow_dplyr_query(x, as_data_frame = TRUE, ...)
+}
+
+#' @export
+head.arrow_dplyr_query <- function(x, n = 6L, ...) {
+  x$head <- n
+  collapse.arrow_dplyr_query(x)
+}
+
+#' @export
+tail.arrow_dplyr_query <- function(x, n = 6L, ...) {
+  x$tail <- n
+  collapse.arrow_dplyr_query(x)
+}
+
+#' @export
+`[.arrow_dplyr_query` <- function(x, i, j, ..., drop = FALSE) {
+  x <- ensure_group_vars(x)
+  if (nargs() == 2L) {
+    # List-like column extraction (x[i])
+    return(x[, i])
+  }
+  if (!missing(j)) {
+    x <- select.arrow_dplyr_query(x, all_of(j))
+  }
+
+  if (!missing(i)) {
+    out <- take_dataset_rows(x, i)
+    x <- restore_dplyr_features(out, x)
+  }
+  x
+}
+
+ensure_group_vars <- function(x) {
+  if (inherits(x, "arrow_dplyr_query")) {
+    # Before pulling data from Arrow, make sure all group vars are in the projection
+    gv <- set_names(setdiff(dplyr::group_vars(x), names(x)))
+    if (length(gv)) {
+      # Add them back
+      x$selected_columns <- c(
+        x$selected_columns,
+        make_field_refs(gv)
+      )
+    }
+  }
+  x
+}
+
+ensure_arrange_vars <- function(x) {
+  # The arrange() operation is not performed until later, because:
+  # - It must be performed after mutate(), to enable sorting by new columns.
+  # - It should be performed after filter() and select(), for efficiency.
+  # However, we need users to be able to arrange() by columns and expressions
+  # that are *not* returned in the query result. To enable this, we must
+  # *temporarily* include these columns and expressions in the projection. We
+  # use x$temp_columns to store these. Later, after the arrange() operation has
+  # been performed, these are omitted from the result. This differs from the
+  # columns in x$group_by_vars which *are* returned in the result.
+  x$temp_columns <- x$arrange_vars[!names(x$arrange_vars) %in% names(x$selected_columns)]
+  x
+}
+
+# Helper to handle unsupported dplyr features
+# * For Table/RecordBatch, we collect() and then call the dplyr method in R
+# * For Dataset, we just error
+abandon_ship <- function(call, .data, msg) {
+  msg <- trimws(msg)
+  dplyr_fun_name <- sub("^(.*?)\\..*", "\\1", as.character(call[[1]]))
+  if (query_on_dataset(.data)) {
+    stop(msg, "\nCall collect() first to pull data into R.", call. = FALSE)
+  }
+  # else, collect and call dplyr method
+  warning(msg, "; pulling data into R", immediate. = TRUE, call. = FALSE)
+  call$.data <- dplyr::collect(.data)
+  call[[1]] <- get(dplyr_fun_name, envir = asNamespace("dplyr"))
+  eval.parent(call, 2)
+}
+
+query_on_dataset <- function(x) !inherits(source_data(x), "InMemoryDataset")
+
+source_data <- function(x) {
+  if (is_collapsed(x)) {
+    source_data(x$.data)
+  } else {
+    x$.data
+  }
+}
+
+is_collapsed <- function(x) inherits(x$.data, "arrow_dplyr_query")
+
+has_aggregation <- function(x) {
+  # TODO: update with joins (check right side data too)
+  !is.null(x$aggregations) || (is_collapsed(x) && has_aggregation(x$.data))
+}
+
+has_head_tail <- function(x) {
+  !is.null(x$head) || !is.null(x$tail) || (is_collapsed(x) && has_head_tail(x$.data))
+}
diff --git a/src/arrow/r/R/duckdb.R b/src/arrow/r/R/duckdb.R
new file mode 100644
index 000000000..c772d4fbd
--- /dev/null
+++ b/src/arrow/r/R/duckdb.R
@@ -0,0 +1,165 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#' Create a (virtual) DuckDB table from an Arrow object
+#'
+#' This will do the necessary configuration to create a (virtual) table in DuckDB
+#' that is backed by the Arrow object given. No data is copied or modified until
+#' `collect()` or `compute()` are called or a query is run against the table.
+#'
+#' The result is a dbplyr-compatible object that can be used in d(b)plyr pipelines.
+#'
+#' If `auto_disconnect = TRUE`, the DuckDB table that is created will be configured
+#' to be unregistered when the `tbl` object is garbage collected. This is helpful
+#' if you don't want to have extra table objects in DuckDB after you've finished
+#' using them. Currently, this cleanup can, however, sometimes lead to hangs if
+#' tables are created and deleted in quick succession, hence the default value
+#' of `FALSE`
+#'
+#' @param .data the Arrow object (e.g. Dataset, Table) to use for the DuckDB table
+#' @param con a DuckDB connection to use (default will create one and store it
+#' in `options("arrow_duck_con")`)
+#' @param table_name a name to use in DuckDB for this object. The default is a
+#' unique string `"arrow_"` followed by numbers.
+#' @param auto_disconnect should the table be automatically cleaned up when the
+#' resulting object is removed (and garbage collected)? Default: `FALSE`
+#'
+#' @return A `tbl` of the new table in DuckDB
+#'
+#' @name to_duckdb
+#' @export
+#' @examplesIf getFromNamespace("run_duckdb_examples", "arrow")()
+#' library(dplyr)
+#'
+#' ds <- InMemoryDataset$create(mtcars)
+#'
+#' ds %>%
+#'   filter(mpg < 30) %>%
+#'   to_duckdb() %>%
+#'   group_by(cyl) %>%
+#'   summarize(mean_mpg = mean(mpg, na.rm = TRUE))
+to_duckdb <- function(.data,
+                      con = arrow_duck_connection(),
+                      table_name = unique_arrow_tablename(),
+                      auto_disconnect = FALSE) {
+  .data <- as_adq(.data)
+  duckdb::duckdb_register_arrow(con, table_name, .data)
+
+  tbl <- tbl(con, table_name)
+  groups <- dplyr::groups(.data)
+  if (length(groups)) {
+    tbl <- dplyr::group_by(tbl, groups)
+  }
+
+  if (auto_disconnect) {
+    # this will add the correct connection disconnection when the tbl is gced.
+    # we should probably confirm that this use of src$disco is kosher.
+    tbl$src$disco <- duckdb_disconnector(con, table_name)
+  }
+
+  tbl
+}
+
+arrow_duck_connection <- function() {
+  con <- getOption("arrow_duck_con")
+  if (is.null(con) || !DBI::dbIsValid(con)) {
+    con <- DBI::dbConnect(duckdb::duckdb())
+    # Use the same CPU count that the arrow library is set to
+    DBI::dbExecute(con, paste0("PRAGMA threads=", cpu_count()))
+    options(arrow_duck_con = con)
+  }
+  con
+}
+
+# helper function to determine if duckdb examples should run
+# see: https://github.com/r-lib/roxygen2/issues/1242
+run_duckdb_examples <- function() {
+  arrow_with_dataset() &&
+    requireNamespace("duckdb", quietly = TRUE) &&
+    packageVersion("duckdb") > "0.2.7" &&
+    requireNamespace("dplyr", quietly = TRUE) &&
+    requireNamespace("dbplyr", quietly = TRUE)
+}
+
+# Adapted from dbplyr
+unique_arrow_tablename <- function() {
+  i <- getOption("arrow_table_name", 0) + 1
+  options(arrow_table_name = i)
+  sprintf("arrow_%03i", i)
+}
+
+# Creates an environment that disconnects the database when it's GC'd
+duckdb_disconnector <- function(con, tbl_name) {
+  reg.finalizer(environment(), function(...) {
+    # remote the table we ephemerally created (though only if the connection is
+    # still valid)
+    if (DBI::dbIsValid(con)) {
+      duckdb::duckdb_unregister_arrow(con, tbl_name)
+    }
+
+    # and there are no more tables, so we can safely shutdown
+    if (length(DBI::dbListTables(con)) == 0) {
+      DBI::dbDisconnect(con, shutdown = TRUE)
+    }
+  })
+  environment()
+}
+
+#' Create an Arrow object from others
+#'
+#' This can be used in pipelines that pass data back and forth between Arrow and
+#' other processes (like DuckDB).
+#'
+#' @param .data the object to be converted
+#'
+#' @return an `arrow_dplyr_query` object, to be used in dplyr pipelines.
+#' @export
+#'
+#' @examplesIf getFromNamespace("run_duckdb_examples", "arrow")()
+#' library(dplyr)
+#'
+#' ds <- InMemoryDataset$create(mtcars)
+#'
+#' ds %>%
+#'   filter(mpg < 30) %>%
+#'   to_duckdb() %>%
+#'   group_by(cyl) %>%
+#'   summarize(mean_mpg = mean(mpg, na.rm = TRUE)) %>%
+#'   to_arrow() %>%
+#'   collect()
+to_arrow <- function(.data) {
+  # If this is an Arrow object already, return quickly since we're already Arrow
+  if (inherits(.data, c("arrow_dplyr_query", "ArrowObject"))) {
+    return(.data)
+  }
+
+  # For now, we only handle .data from duckdb, so check that it is that if we've
+  # gotten this far
+  if (!inherits(dbplyr::remote_con(.data), "duckdb_connection")) {
+    stop(
+      "to_arrow() currently only supports Arrow tables, Arrow datasets, ",
+      "Arrow queries, or dbplyr tbls from duckdb connections",
+      call. = FALSE
+    )
+  }
+
+  # Run the query
+  res <- DBI::dbSendQuery(dbplyr::remote_con(.data), dbplyr::remote_query(.data), arrow = TRUE)
+
+  # TODO: we shouldn't need $read_table(), but we get segfaults when we do.
+  arrow_dplyr_query(duckdb::duckdb_fetch_record_batch(res)$read_table())
+}
diff --git a/src/arrow/r/R/enums.R b/src/arrow/r/R/enums.R
new file mode 100644
index 000000000..4e69b7a19
--- /dev/null
+++ b/src/arrow/r/R/enums.R
@@ -0,0 +1,178 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#' @export
+`print.arrow-enum` <- function(x, ...) {
+  NextMethod()
+}
+
+enum <- function(class, ..., .list = list(...)) {
+  structure(
+    .list,
+    class = c(class, "arrow-enum")
+  )
+}
+
+#' Arrow enums
+#' @name enums
+#' @export
+#' @keywords internal
+TimeUnit <- enum("TimeUnit::type",
+  SECOND = 0L, MILLI = 1L, MICRO = 2L, NANO = 3L
+)
+
+#' @rdname enums
+#' @export
+DateUnit <- enum("DateUnit", DAY = 0L, MILLI = 1L)
+
+#' @rdname enums
+#' @export
+Type <- enum("Type::type",
+  "NA" = 0L,
+  BOOL = 1L,
+  UINT8 = 2L,
+  INT8 = 3L,
+  UINT16 = 4L,
+  INT16 = 5L,
+  UINT32 = 6L,
+  INT32 = 7L,
+  UINT64 = 8L,
+  INT64 = 9L,
+  HALF_FLOAT = 10L,
+  FLOAT = 11L,
+  DOUBLE = 12L,
+  STRING = 13L,
+  BINARY = 14L,
+  FIXED_SIZE_BINARY = 15L,
+  DATE32 = 16L,
+  DATE64 = 17L,
+  TIMESTAMP = 18L,
+  TIME32 = 19L,
+  TIME64 = 20L,
+  INTERVAL_MONTHS = 21L,
+  INTERVAL_DAY_TIME = 22L,
+  DECIMAL = 23L,
+  DECIMAL256 = 24L,
+  LIST = 25L,
+  STRUCT = 26L,
+  SPARSE_UNION = 27L,
+  DENSE_UNION = 28L,
+  DICTIONARY = 29L,
+  MAP = 30L,
+  EXTENSION = 31L,
+  FIXED_SIZE_LIST = 32L,
+  DURATION = 33L,
+  LARGE_STRING = 34L,
+  LARGE_BINARY = 35L,
+  LARGE_LIST = 36L
+)
+
+TYPES_WITH_NAN <- Type[c("HALF_FLOAT", "FLOAT", "DOUBLE")]
+
+#' @rdname enums
+#' @export
+StatusCode <- enum("StatusCode",
+  OK = 0L, OutOfMemory = 1L, KeyError = 2L, TypeError = 3L,
+  Invalid = 4L, IOError = 5L, CapacityError = 6L, IndexError = 7L,
+  UnknownError = 9L, NotImplemented = 10L, SerializationError = 11L,
+  PythonError = 12L, RError = 13L,
+  PlasmaObjectExists = 20L, PlasmaObjectNotFound = 21L,
+  PlasmaStoreFull = 22L, PlasmaObjectAlreadySealed = 23L
+)
+
+#' @rdname enums
+#' @export
+FileMode <- enum("FileMode",
+  READ = 0L, WRITE = 1L, READWRITE = 2L
+)
+
+#' @rdname enums
+#' @export
+MessageType <- enum("MessageType",
+  NONE = 0L, SCHEMA = 1L, DICTIONARY_BATCH = 2L, RECORD_BATCH = 3L, TENSOR = 4L
+)
+
+#' @rdname enums
+#' @export
+CompressionType <- enum("Compression::type",
+  UNCOMPRESSED = 0L, SNAPPY = 1L, GZIP = 2L, BROTLI = 3L, ZSTD = 4L, LZ4 = 5L,
+  LZ4_FRAME = 6L, LZO = 7L, BZ2 = 8L
+)
+
+#' @export
+#' @rdname enums
+FileType <- enum("FileType",
+  NotFound = 0L, Unknown = 1L, File = 2L, Directory = 3L
+)
+
+#' @export
+#' @rdname enums
+ParquetVersionType <- enum("ParquetVersionType",
+  PARQUET_1_0 = 0L, PARQUET_2_0 = 1L
+)
+
+#' @export
+#' @rdname enums
+MetadataVersion <- enum("MetadataVersion",
+  V1 = 0L, V2 = 1L, V3 = 2L, V4 = 3L, V5 = 4L
+)
+
+#' @export
+#' @rdname enums
+QuantileInterpolation <- enum("QuantileInterpolation",
+  LINEAR = 0L, LOWER = 1L, HIGHER = 2L, NEAREST = 3L, MIDPOINT = 4L
+)
+
+#' @export
+#' @rdname enums
+NullEncodingBehavior <- enum("NullEncodingBehavior",
+  ENCODE = 0L, MASK = 1L
+)
+
+#' @export
+#' @rdname enums
+NullHandlingBehavior <- enum("NullHandlingBehavior",
+  EMIT_NULL = 0L, SKIP = 1L, REPLACE = 2L
+)
+
+#' @export
+#' @rdname enums
+RoundMode <- enum("RoundMode",
+  DOWN = 0L,
+  UP = 1L,
+  TOWARDS_ZERO = 2L,
+  TOWARDS_INFINITY = 3L,
+  HALF_DOWN = 4L,
+  HALF_UP = 5L,
+  HALF_TOWARDS_ZERO = 6L,
+  HALF_TOWARDS_INFINITY = 7L,
+  HALF_TO_EVEN = 8L,
+  HALF_TO_ODD = 9L
+)
+
+#' @export
+#' @rdname enums
+JoinType <- enum("JoinType",
+  LEFT_SEMI = 0L,
+  RIGHT_SEMI = 1L,
+  LEFT_ANTI = 2L,
+  RIGHT_ANTI = 3L,
+  INNER = 4L,
+  LEFT_OUTER = 5L,
+  RIGHT_OUTER = 6L,
+  FULL_OUTER = 7L
+)
diff --git a/src/arrow/r/R/expression.R b/src/arrow/r/R/expression.R
new file mode 100644
index 000000000..b1b6635f5
--- /dev/null
+++ b/src/arrow/r/R/expression.R
@@ -0,0 +1,240 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#' @include arrowExports.R
+
+.unary_function_map <- list(
+  # NOTE: Each of the R functions mapped here takes exactly *one* argument, maps
+  # *directly* to an Arrow C++ compute kernel, and does not require any
+  # non-default options to be specified. More complex R function mappings are
+  # defined in dplyr-functions.R.
+
+  # functions are arranged alphabetically by name within categories
+
+  # arithmetic functions
+  "abs" = "abs_checked",
+  "ceiling" = "ceil",
+  "floor" = "floor",
+  "log10" = "log10_checked",
+  "log1p" = "log1p_checked",
+  "log2" = "log2_checked",
+  "sign" = "sign",
+  # trunc is defined in dplyr-functions.R
+
+  # trigonometric functions
+  "acos" = "acos_checked",
+  "asin" = "asin_checked",
+  "cos" = "cos_checked",
+  "sin" = "sin_checked",
+  "tan" = "tan_checked",
+
+  # logical functions
+  "!" = "invert",
+
+  # string functions
+  # nchar is defined in dplyr-functions.R
+  "str_length" = "utf8_length",
+  # str_pad is defined in dplyr-functions.R
+  # str_sub is defined in dplyr-functions.R
+  # str_to_lower is defined in dplyr-functions.R
+  # str_to_title is defined in dplyr-functions.R
+  # str_to_upper is defined in dplyr-functions.R
+  # str_trim is defined in dplyr-functions.R
+  "stri_reverse" = "utf8_reverse",
+  # substr is defined in dplyr-functions.R
+  # substring is defined in dplyr-functions.R
+  "tolower" = "utf8_lower",
+  "toupper" = "utf8_upper",
+
+  # date and time functions
+  "day" = "day",
+  "hour" = "hour",
+  "isoweek" = "iso_week",
+  "epiweek" = "us_week",
+  "isoyear" = "iso_year",
+  "minute" = "minute",
+  "month" = "month",
+  "quarter" = "quarter",
+  # second is defined in dplyr-functions.R
+  # wday is defined in dplyr-functions.R
+  "yday" = "day_of_year",
+  "year" = "year",
+
+  # type conversion functions
+  "as.factor" = "dictionary_encode"
+)
+
+.binary_function_map <- list(
+  # NOTE: Each of the R functions/operators mapped here takes exactly *two*
+  # arguments. Most map *directly* to an Arrow C++ compute kernel and require no
+  # non-default options, but some are modified by build_expr(). More complex R
+  # function/operator mappings are defined in dplyr-functions.R.
+  "==" = "equal",
+  "!=" = "not_equal",
+  ">" = "greater",
+  ">=" = "greater_equal",
+  "<" = "less",
+  "<=" = "less_equal",
+  "&" = "and_kleene",
+  "|" = "or_kleene",
+  "+" = "add_checked",
+  "-" = "subtract_checked",
+  "*" = "multiply_checked",
+  "/" = "divide",
+  "%/%" = "divide_checked",
+  # we don't actually use divide_checked with `%%`, rather it is rewritten to
+  # use `%/%` above.
+  "%%" = "divide_checked",
+  "^" = "power_checked",
+  "%in%" = "is_in_meta_binary"
+)
+
+.array_function_map <- c(.unary_function_map, .binary_function_map)
+
+#' Arrow expressions
+#'
+#' @description
+#' `Expression`s are used to define filter logic for passing to a [Dataset]
+#' [Scanner].
+#'
+#' `Expression$scalar(x)` constructs an `Expression` which always evaluates to
+#' the provided scalar (length-1) R value.
+#'
+#' `Expression$field_ref(name)` is used to construct an `Expression` which
+#' evaluates to the named column in the `Dataset` against which it is evaluated.
+#'
+#' `Expression$create(function_name, ..., options)` builds a function-call
+#' `Expression` containing one or more `Expression`s.
+#' @name Expression
+#' @rdname Expression
+#' @export
+Expression <- R6Class("Expression",
+  inherit = ArrowObject,
+  public = list(
+    ToString = function() compute___expr__ToString(self),
+    Equals = function(other, ...) {
+      inherits(other, "Expression") && compute___expr__equals(self, other)
+    },
+    # TODO: Implement type determination without storing
+    # schemas in Expression objects (ARROW-13186)
+    schema = NULL,
+    type = function(schema = self$schema) {
+      assert_that(!is.null(schema))
+      compute___expr__type(self, schema)
+    },
+    type_id = function(schema = self$schema) {
+      assert_that(!is.null(schema))
+      compute___expr__type_id(self, schema)
+    },
+    cast = function(to_type, safe = TRUE, ...) {
+      opts <- list(
+        to_type = to_type,
+        allow_int_overflow = !safe,
+        allow_time_truncate = !safe,
+        allow_float_truncate = !safe
+      )
+      Expression$create("cast", self, options = modifyList(opts, list(...)))
+    }
+  ),
+  active = list(
+    field_name = function() compute___expr__get_field_ref_name(self)
+  )
+)
+Expression$create <- function(function_name,
+                              ...,
+                              args = list(...),
+                              options = empty_named_list()) {
+  assert_that(is.string(function_name))
+  assert_that(is_list_of(args, "Expression"), msg = "Expression arguments must be Expression objects")
+  expr <- compute___expr__call(function_name, args, options)
+  expr$schema <- unify_schemas(schemas = lapply(args, function(x) x$schema))
+  expr
+}
+
+Expression$field_ref <- function(name) {
+  assert_that(is.string(name))
+  compute___expr__field_ref(name)
+}
+Expression$scalar <- function(x) {
+  expr <- compute___expr__scalar(Scalar$create(x))
+  expr$schema <- schema()
+  expr
+}
+
+# Wrapper around Expression$create that:
+# (1) maps R function names to Arrow C++ compute ("/" --> "divide_checked")
+# (2) wraps R input args as Array or Scalar
+build_expr <- function(FUN,
+                       ...,
+                       args = list(...),
+                       options = empty_named_list()) {
+  if (FUN == "-" && length(args) == 1L) {
+    if (inherits(args[[1]], c("ArrowObject", "Expression"))) {
+      return(build_expr("negate_checked", args[[1]]))
+    } else {
+      return(-args[[1]])
+    }
+  }
+  if (FUN == "%in%") {
+    # Special-case %in%, which is different from the Array function name
+    expr <- Expression$create("is_in", args[[1]],
+      options = list(
+        # If args[[2]] is already an Arrow object (like a scalar),
+        # this wouldn't work
+        value_set = Array$create(args[[2]]),
+        skip_nulls = TRUE
+      )
+    )
+  } else {
+    args <- lapply(args, function(x) {
+      if (!inherits(x, "Expression")) {
+        x <- Expression$scalar(x)
+      }
+      x
+    })
+
+    # In Arrow, "divide" is one function, which does integer division on
+    # integer inputs and floating-point division on floats
+    if (FUN == "/") {
+      # TODO: omg so many ways it's wrong to assume these types
+      args <- lapply(args, function(x) x$cast(float64()))
+    } else if (FUN == "%/%") {
+      # In R, integer division works like floor(float division)
+      out <- build_expr("/", args = args)
+      return(out$cast(int32(), allow_float_truncate = TRUE))
+    } else if (FUN == "%%") {
+      return(args[[1]] - args[[2]] * (args[[1]] %/% args[[2]]))
+    }
+
+    expr <- Expression$create(.array_function_map[[FUN]] %||% FUN, args = args, options = options)
+  }
+  expr
+}
+
+#' @export
+Ops.Expression <- function(e1, e2) {
+  if (.Generic == "!") {
+    build_expr(.Generic, e1)
+  } else {
+    build_expr(.Generic, e1, e2)
+  }
+}
+
+#' @export
+is.na.Expression <- function(x) {
+  Expression$create("is_null", x, options = list(nan_is_null = TRUE))
+}
diff --git a/src/arrow/r/R/feather.R b/src/arrow/r/R/feather.R
new file mode 100644
index 000000000..70a270bbe
--- /dev/null
+++ b/src/arrow/r/R/feather.R
@@ -0,0 +1,219 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#' Write data in the Feather format
+#'
+#' Feather provides binary columnar serialization for data frames.
+#' It is designed to make reading and writing data frames efficient,
+#' and to make sharing data across data analysis languages easy.
+#' This function writes both the original, limited specification of the format
+#' and the version 2 specification, which is the Apache Arrow IPC file format.
+#'
+#' @param x `data.frame`, [RecordBatch], or [Table]
+#' @param sink A string file path, URI, or [OutputStream], or path in a file
+#' system (`SubTreeFileSystem`)
+#' @param version integer Feather file version. Version 2 is the current.
+#' Version 1 is the more limited legacy format.
+#' @param chunk_size For V2 files, the number of rows that each chunk of data
+#' should have in the file. Use a smaller `chunk_size` when you need faster
+#' random row access. Default is 64K. This option is not supported for V1.
+#' @param compression Name of compression codec to use, if any. Default is
+#' "lz4" if LZ4 is available in your build of the Arrow C++ library, otherwise
+#' "uncompressed". "zstd" is the other available codec and generally has better
+#' compression ratios in exchange for slower read and write performance
+#' See [codec_is_available()]. This option is not supported for V1.
+#' @param compression_level If `compression` is "zstd", you may
+#' specify an integer compression level. If omitted, the compression codec's
+#' default compression level is used.
+#'
+#' @return The input `x`, invisibly. Note that if `sink` is an [OutputStream],
+#' the stream will be left open.
+#' @export
+#' @seealso [RecordBatchWriter] for lower-level access to writing Arrow IPC data.
+#' @seealso [Schema] for information about schemas and metadata handling.
+#' @examplesIf arrow_available()
+#' tf <- tempfile()
+#' on.exit(unlink(tf))
+#' write_feather(mtcars, tf)
+#' @include arrow-package.R
+write_feather <- function(x,
+                          sink,
+                          version = 2,
+                          chunk_size = 65536L,
+                          compression = c("default", "lz4", "uncompressed", "zstd"),
+                          compression_level = NULL) {
+  # Handle and validate options before touching data
+  version <- as.integer(version)
+  assert_that(version %in% 1:2)
+  compression <- match.arg(compression)
+  chunk_size <- as.integer(chunk_size)
+  assert_that(chunk_size > 0)
+  if (compression == "default") {
+    if (version == 2 && codec_is_available("lz4")) {
+      compression <- "lz4"
+    } else {
+      compression <- "uncompressed"
+    }
+  }
+  if (is.null(compression_level)) {
+    # Use -1 as sentinal for "default"
+    compression_level <- -1L
+  }
+  compression_level <- as.integer(compression_level)
+  # Now make sure that options make sense together
+  if (version == 1) {
+    if (chunk_size != 65536L) {
+      stop("Feather version 1 does not support the 'chunk_size' option", call. = FALSE)
+    }
+    if (compression != "uncompressed") {
+      stop("Feather version 1 does not support the 'compression' option", call. = FALSE)
+    }
+    if (compression_level != -1L) {
+      stop("Feather version 1 does not support the 'compression_level' option", call. = FALSE)
+    }
+  }
+  if (compression != "zstd" && compression_level != -1L) {
+    stop("Can only specify a 'compression_level' when 'compression' is 'zstd'", call. = FALSE)
+  }
+  # Finally, add 1 to version because 2 means V1 and 3 means V2 :shrug:
+  version <- version + 1L
+
+  # "lz4" is the convenience
+  if (compression == "lz4") {
+    compression <- "lz4_frame"
+  }
+
+  compression <- compression_from_name(compression)
+
+  x_out <- x
+  if (is.data.frame(x) || inherits(x, "RecordBatch")) {
+    x <- Table$create(x)
+  }
+
+  assert_that(is_writable_table(x))
+
+  if (!inherits(sink, "OutputStream")) {
+    sink <- make_output_stream(sink)
+    on.exit(sink$close())
+  }
+  ipc___WriteFeather__Table(sink, x, version, chunk_size, compression, compression_level)
+  invisible(x_out)
+}
+
+#' Read a Feather file
+#'
+#' Feather provides binary columnar serialization for data frames.
+#' It is designed to make reading and writing data frames efficient,
+#' and to make sharing data across data analysis languages easy.
+#' This function reads both the original, limited specification of the format
+#' and the version 2 specification, which is the Apache Arrow IPC file format.
+#'
+#' @inheritParams read_ipc_stream
+#' @inheritParams read_delim_arrow
+#' @param ... additional parameters, passed to [make_readable_file()].
+#'
+#' @return A `data.frame` if `as_data_frame` is `TRUE` (the default), or an
+#' Arrow [Table] otherwise
+#'
+#' @export
+#' @seealso [FeatherReader] and [RecordBatchReader] for lower-level access to reading Arrow IPC data.
+#' @examplesIf arrow_available()
+#' tf <- tempfile()
+#' on.exit(unlink(tf))
+#' write_feather(mtcars, tf)
+#' df <- read_feather(tf)
+#' dim(df)
+#' # Can select columns
+#' df <- read_feather(tf, col_select = starts_with("d"))
+read_feather <- function(file, col_select = NULL, as_data_frame = TRUE, ...) {
+  if (!inherits(file, "RandomAccessFile")) {
+    file <- make_readable_file(file, ...)
+    on.exit(file$close())
+  }
+  reader <- FeatherReader$create(file)
+
+  col_select <- enquo(col_select)
+  columns <- if (!quo_is_null(col_select)) {
+    vars_select(names(reader), !!col_select)
+  }
+
+  out <- tryCatch(
+    reader$Read(columns),
+    error = read_compressed_error
+  )
+
+  if (isTRUE(as_data_frame)) {
+    out <- as.data.frame(out)
+  }
+  out
+}
+
+#' @title FeatherReader class
+#' @rdname FeatherReader
+#' @name FeatherReader
+#' @docType class
+#' @usage NULL
+#' @format NULL
+#' @description This class enables you to interact with Feather files. Create
+#' one to connect to a file or other InputStream, and call `Read()` on it to
+#' make an `arrow::Table`. See its usage in [`read_feather()`].
+#'
+#' @section Factory:
+#'
+#' The `FeatherReader$create()` factory method instantiates the object and
+#' takes the following argument:
+#'
+#' - `file` an Arrow file connection object inheriting from `RandomAccessFile`.
+#'
+#' @section Methods:
+#'
+#' - `$Read(columns)`: Returns a `Table` of the selected columns, a vector of
+#'   integer indices
+#' - `$column_names`: Active binding, returns the column names in the Feather file
+#' - `$schema`: Active binding, returns the schema of the Feather file
+#' - `$version`: Active binding, returns `1` or `2`, according to the Feather
+#'   file version
+#'
+#' @export
+#' @include arrow-package.R
+FeatherReader <- R6Class("FeatherReader",
+  inherit = ArrowObject,
+  public = list(
+    Read = function(columns) {
+      ipc___feather___Reader__Read(self, columns)
+    },
+    print = function(...) {
+      cat("FeatherReader:\n")
+      print(self$schema)
+      invisible(self)
+    }
+  ),
+  active = list(
+    # versions are officially 2 for V1 and 3 for V2 :shrug:
+    version = function() ipc___feather___Reader__version(self) - 1L,
+    column_names = function() names(self$schema),
+    schema = function() ipc___feather___Reader__schema(self)
+  )
+)
+
+#' @export
+names.FeatherReader <- function(x) x$column_names
+
+FeatherReader$create <- function(file) {
+  assert_is(file, "RandomAccessFile")
+  ipc___feather___Reader__Open(file)
+}
diff --git a/src/arrow/r/R/field.R b/src/arrow/r/R/field.R
new file mode 100644
index 000000000..d10ee7818
--- /dev/null
+++ b/src/arrow/r/R/field.R
@@ -0,0 +1,84 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#' @include arrow-package.R
+#' @title Field class
+#' @usage NULL
+#' @format NULL
+#' @docType class
+#' @description `field()` lets you create an `arrow::Field` that maps a
+#' [DataType][data-type] to a column name. Fields are contained in
+#' [Schemas][Schema].
+#' @section Methods:
+#'
+#' - `f$ToString()`: convert to a string
+#' - `f$Equals(other)`: test for equality. More naturally called as `f == other`
+#'
+#' @rdname Field
+#' @name Field
+#' @export
+Field <- R6Class("Field",
+  inherit = ArrowObject,
+  public = list(
+    ToString = function() {
+      prettier_dictionary_type(Field__ToString(self))
+    },
+    Equals = function(other, ...) {
+      inherits(other, "Field") && Field__Equals(self, other)
+    },
+    export_to_c = function(ptr) ExportField(self, ptr)
+  ),
+  active = list(
+    name = function() {
+      Field__name(self)
+    },
+    nullable = function() {
+      Field__nullable(self)
+    },
+    type = function() {
+      Field__type(self)
+    }
+  )
+)
+Field$create <- function(name, type, metadata, nullable = TRUE) {
+  assert_that(inherits(name, "character"), length(name) == 1L)
+  type <- as_type(type, name)
+  assert_that(missing(metadata), msg = "metadata= is currently ignored")
+  Field__initialize(enc2utf8(name), type, nullable)
+}
+#' @include arrowExports.R
+Field$import_from_c <- ImportField
+
+#' @param name field name
+#' @param type logical type, instance of [DataType]
+#' @param metadata currently ignored
+#' @param nullable TRUE if field is nullable
+#'
+#' @examplesIf arrow_available()
+#' field("x", int32())
+#' @rdname Field
+#' @export
+field <- Field$create
+
+.fields <- function(.list, nullable = TRUE) {
+  if (length(.list)) {
+    assert_that(!is.null(nms <- names(.list)))
+    map2(nms, .list, field)
+  } else {
+    list()
+  }
+}
diff --git a/src/arrow/r/R/filesystem.R b/src/arrow/r/R/filesystem.R
new file mode 100644
index 000000000..a09d0a51d
--- /dev/null
+++ b/src/arrow/r/R/filesystem.R
@@ -0,0 +1,505 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#' @include arrow-package.R
+#' @title FileSystem entry info
+#' @usage NULL
+#' @format NULL
+#'
+#' @section Methods:
+#'
+#' - `base_name()` : The file base name (component after the last directory
+#'    separator).
+#' - `extension()` : The file extension
+#'
+#' @section Active bindings:
+#'
+#' - `$type`: The file type
+#' - `$path`: The full file path in the filesystem
+#' - `$size`: The size in bytes, if available.  Only regular files are
+#'    guaranteed to have a size.
+#' - `$mtime`: The time of last modification, if available.
+#'
+#' @rdname FileInfo
+#' @export
+FileInfo <- R6Class("FileInfo",
+  inherit = ArrowObject,
+  public = list(
+    base_name = function() fs___FileInfo__base_name(self),
+    extension = function() fs___FileInfo__extension(self)
+  ),
+  active = list(
+    type = function(type) {
+      if (missing(type)) {
+        fs___FileInfo__type(self)
+      } else {
+        fs___FileInfo__set_type(self, type)
+      }
+    },
+    path = function(path) {
+      if (missing(path)) {
+        fs___FileInfo__path(self)
+      } else {
+        invisible(fs___FileInfo__set_path(self))
+      }
+    },
+    size = function(size) {
+      if (missing(size)) {
+        fs___FileInfo__size(self)
+      } else {
+        invisible(fs___FileInfo__set_size(self, size))
+      }
+    },
+    mtime = function(time) {
+      if (missing(time)) {
+        fs___FileInfo__mtime(self)
+      } else {
+        if (!inherits(time, "POSIXct") && length(time) == 1L) {
+          abort("invalid time")
+        }
+        invisible(fs___FileInfo__set_mtime(self, time))
+      }
+    }
+  )
+)
+
+#' @title file selector
+#' @format NULL
+#'
+#' @section Factory:
+#'
+#' The `$create()` factory method instantiates a `FileSelector` given the 3 fields
+#' described below.
+#'
+#' @section Fields:
+#'
+#' - `base_dir`: The directory in which to select files. If the path exists but
+#'    doesn't point to a directory, this should be an error.
+#' - `allow_not_found`: The behavior if `base_dir` doesn't exist in the
+#'    filesystem. If `FALSE`, an error is returned.  If `TRUE`, an empty
+#'    selection is returned
+#' - `recursive`: Whether to recurse into subdirectories.
+#'
+#' @rdname FileSelector
+#' @export
+FileSelector <- R6Class("FileSelector",
+  inherit = ArrowObject,
+  active = list(
+    base_dir = function() fs___FileSelector__base_dir(self),
+    allow_not_found = function() fs___FileSelector__allow_not_found(self),
+    recursive = function() fs___FileSelector__recursive(self)
+  )
+)
+
+FileSelector$create <- function(base_dir, allow_not_found = FALSE, recursive = FALSE) {
+  fs___FileSelector__create(clean_path_rel(base_dir), allow_not_found, recursive)
+}
+
+#' @title FileSystem classes
+#' @description `FileSystem` is an abstract file system API,
+#' `LocalFileSystem` is an implementation accessing files
+#' on the local machine. `SubTreeFileSystem` is an implementation that delegates
+#' to another implementation after prepending a fixed base path
+#'
+#' @section Factory:
+#'
+#' `LocalFileSystem$create()` returns the object and takes no arguments.
+#'
+#' `SubTreeFileSystem$create()` takes the following arguments:
+#'
+#' - `base_path`, a string path
+#' - `base_fs`, a `FileSystem` object
+#'
+#' `S3FileSystem$create()` optionally takes arguments:
+#'
+#' - `anonymous`: logical, default `FALSE`. If true, will not attempt to look up
+#'    credentials using standard AWS configuration methods.
+#' - `access_key`, `secret_key`: authentication credentials. If one is provided,
+#'    the other must be as well. If both are provided, they will override any
+#'    AWS configuration set at the environment level.
+#' - `session_token`: optional string for authentication along with
+#'    `access_key` and `secret_key`
+#' - `role_arn`: string AWS ARN of an AccessRole. If provided instead of `access_key` and
+#'    `secret_key`, temporary credentials will be fetched by assuming this role.
+#' - `session_name`: optional string identifier for the assumed role session.
+#' - `external_id`: optional unique string identifier that might be required
+#'    when you assume a role in another account.
+#' - `load_frequency`: integer, frequency (in seconds) with which temporary
+#'    credentials from an assumed role session will be refreshed. Default is
+#'    900 (i.e. 15 minutes)
+#' - `region`: AWS region to connect to. If omitted, the AWS library will
+#'    provide a sensible default based on client configuration, falling back
+#'    to "us-east-1" if no other alternatives are found.
+#' - `endpoint_override`: If non-empty, override region with a connect string
+#'    such as "localhost:9000". This is useful for connecting to file systems
+#'    that emulate S3.
+#' - `scheme`: S3 connection transport (default "https")
+#' - `background_writes`: logical, whether `OutputStream` writes will be issued
+#'    in the background, without blocking (default `TRUE`)
+#'
+#' @section Methods:
+#'
+#' - `$GetFileInfo(x)`: `x` may be a [FileSelector][FileSelector] or a character
+#'    vector of paths. Returns a list of [FileInfo][FileInfo]
+#' - `$CreateDir(path, recursive = TRUE)`: Create a directory and subdirectories.
+#' - `$DeleteDir(path)`: Delete a directory and its contents, recursively.
+#' - `$DeleteDirContents(path)`: Delete a directory's contents, recursively.
+#'    Like `$DeleteDir()`,
+#'    but doesn't delete the directory itself. Passing an empty path (`""`) will
+#'    wipe the entire filesystem tree.
+#' - `$DeleteFile(path)` : Delete a file.
+#' - `$DeleteFiles(paths)` : Delete many files. The default implementation
+#'    issues individual delete operations in sequence.
+#' - `$Move(src, dest)`: Move / rename a file or directory. If the destination
+#'    exists:
+#'      if it is a non-empty directory, an error is returned
+#'      otherwise, if it has the same type as the source, it is replaced
+#'      otherwise, behavior is unspecified (implementation-dependent).
+#' - `$CopyFile(src, dest)`: Copy a file. If the destination exists and is a
+#'    directory, an error is returned. Otherwise, it is replaced.
+#' - `$OpenInputStream(path)`: Open an [input stream][InputStream] for
+#'    sequential reading.
+#' - `$OpenInputFile(path)`: Open an [input file][RandomAccessFile] for random
+#'    access reading.
+#' - `$OpenOutputStream(path)`: Open an [output stream][OutputStream] for
+#'    sequential writing.
+#' - `$OpenAppendStream(path)`: Open an [output stream][OutputStream] for
+#'    appending.
+#'
+#' @section Active bindings:
+#'
+#' - `$type_name`: string filesystem type name, such as "local", "s3", etc.
+#' - `$region`: string AWS region, for `S3FileSystem` and `SubTreeFileSystem`
+#'    containing a `S3FileSystem`
+#' - `$base_fs`: for `SubTreeFileSystem`, the `FileSystem` it contains
+#' - `$base_path`: for `SubTreeFileSystem`, the path in `$base_fs` which is considered
+#'    root in this `SubTreeFileSystem`.
+#'
+#' @usage NULL
+#' @format NULL
+#' @docType class
+#'
+#' @rdname FileSystem
+#' @name FileSystem
+#' @export
+FileSystem <- R6Class("FileSystem",
+  inherit = ArrowObject,
+  public = list(
+    GetFileInfo = function(x) {
+      if (inherits(x, "FileSelector")) {
+        fs___FileSystem__GetTargetInfos_FileSelector(self, x)
+      } else if (is.character(x)) {
+        fs___FileSystem__GetTargetInfos_Paths(self, clean_path_rel(x))
+      } else {
+        abort("incompatible type for FileSystem$GetFileInfo()")
+      }
+    },
+    CreateDir = function(path, recursive = TRUE) {
+      fs___FileSystem__CreateDir(self, clean_path_rel(path), isTRUE(recursive))
+    },
+    DeleteDir = function(path) {
+      fs___FileSystem__DeleteDir(self, clean_path_rel(path))
+    },
+    DeleteDirContents = function(path) {
+      fs___FileSystem__DeleteDirContents(self, clean_path_rel(path))
+    },
+    DeleteFile = function(path) {
+      fs___FileSystem__DeleteFile(self, clean_path_rel(path))
+    },
+    DeleteFiles = function(paths) {
+      fs___FileSystem__DeleteFiles(self, clean_path_rel(paths))
+    },
+    Move = function(src, dest) {
+      fs___FileSystem__Move(self, clean_path_rel(src), clean_path_rel(dest))
+    },
+    CopyFile = function(src, dest) {
+      fs___FileSystem__CopyFile(self, clean_path_rel(src), clean_path_rel(dest))
+    },
+    OpenInputStream = function(path) {
+      fs___FileSystem__OpenInputStream(self, clean_path_rel(path))
+    },
+    OpenInputFile = function(path) {
+      fs___FileSystem__OpenInputFile(self, clean_path_rel(path))
+    },
+    OpenOutputStream = function(path) {
+      fs___FileSystem__OpenOutputStream(self, clean_path_rel(path))
+    },
+    OpenAppendStream = function(path) {
+      fs___FileSystem__OpenAppendStream(self, clean_path_rel(path))
+    },
+
+    # Friendlier R user interface
+    path = function(x) SubTreeFileSystem$create(x, self),
+    cd = function(x) SubTreeFileSystem$create(x, self),
+    ls = function(path = "", ...) {
+      selector <- FileSelector$create(path, ...) # ... for recursive = TRUE
+      infos <- self$GetFileInfo(selector)
+      map_chr(infos, ~ .$path)
+      # TODO: add full.names argument like base::dir() (default right now is TRUE)
+      # TODO: see fs package for glob/regexp filtering
+      # TODO: verbose method that shows other attributes as df
+      # TODO: print methods for FileInfo, SubTreeFileSystem, S3FileSystem
+    }
+  ),
+  active = list(
+    type_name = function() fs___FileSystem__type_name(self)
+  )
+)
+FileSystem$from_uri <- function(uri) {
+  assert_that(is.string(uri))
+  fs___FileSystemFromUri(uri)
+}
+
+get_paths_and_filesystem <- function(x, filesystem = NULL) {
+  # Wrapper around FileSystem$from_uri that handles local paths
+  # and an optional explicit filesystem
+  if (inherits(x, "SubTreeFileSystem")) {
+    return(list(fs = x$base_fs, path = x$base_path))
+  }
+  assert_that(is.character(x))
+  are_urls <- are_urls(x)
+  if (any(are_urls)) {
+    if (!all(are_urls)) {
+      stop("Vectors of mixed paths and URIs are not supported", call. = FALSE)
+    }
+    if (!is.null(filesystem)) {
+      # Stop? Can't have URL (which yields a fs) and another fs
+    }
+    x <- lapply(x, FileSystem$from_uri)
+    if (length(unique(map(x, ~ class(.$fs)))) > 1) {
+      stop(
+        "Vectors of URIs for different file systems are not supported",
+        call. = FALSE
+      )
+    }
+    fs <- x[[1]]$fs
+    path <- map_chr(x, ~ .$path) # singular name "path" used for compatibility
+  } else {
+    fs <- filesystem %||% LocalFileSystem$create()
+    if (inherits(fs, "LocalFileSystem")) {
+      path <- clean_path_abs(x)
+    } else {
+      path <- clean_path_rel(x)
+    }
+  }
+  list(
+    fs = fs,
+    path = path
+  )
+}
+
+# variant of the above function that asserts that x is either a scalar string
+# or a SubTreeFileSystem
+get_path_and_filesystem <- function(x, filesystem = NULL) {
+  assert_that(is.string(x) || inherits(x, "SubTreeFileSystem"))
+  get_paths_and_filesystem(x, filesystem)
+}
+
+is_url <- function(x) is.string(x) && grepl("://", x)
+are_urls <- function(x) if (!is.character(x)) FALSE else grepl("://", x)
+
+#' @usage NULL
+#' @format NULL
+#' @rdname FileSystem
+#' @export
+LocalFileSystem <- R6Class("LocalFileSystem", inherit = FileSystem)
+LocalFileSystem$create <- function() {
+  fs___LocalFileSystem__create()
+}
+
+#' @usage NULL
+#' @format NULL
+#' @rdname FileSystem
+#' @importFrom utils modifyList
+#' @export
+S3FileSystem <- R6Class("S3FileSystem",
+  inherit = FileSystem,
+  active = list(
+    region = function() fs___S3FileSystem__region(self)
+  )
+)
+S3FileSystem$create <- function(anonymous = FALSE, ...) {
+  args <- list2(...)
+  if (anonymous) {
+    invalid_args <- intersect(
+      c(
+        "access_key", "secret_key", "session_token", "role_arn", "session_name",
+        "external_id", "load_frequency"
+      ),
+      names(args)
+    )
+    if (length(invalid_args)) {
+      stop("Cannot specify ", oxford_paste(invalid_args), " when anonymous = TRUE", call. = FALSE)
+    }
+  } else {
+    keys_present <- length(intersect(c("access_key", "secret_key"), names(args)))
+    if (keys_present == 1) {
+      stop("Key authentication requires both access_key and secret_key", call. = FALSE)
+    }
+    if ("session_token" %in% names(args) && keys_present != 2) {
+      stop(
+        "In order to initialize a session with temporary credentials, ",
+        "both secret_key and access_key must be provided ",
+        "in addition to session_token.",
+        call. = FALSE
+      )
+    }
+    arn <- "role_arn" %in% names(args)
+    if (keys_present == 2 && arn) {
+      stop("Cannot provide both key authentication and role_arn", call. = FALSE)
+    }
+    arn_extras <- intersect(c("session_name", "external_id", "load_frequency"), names(args))
+    if (length(arn_extras) > 0 && !arn) {
+      stop("Cannot specify ", oxford_paste(arn_extras), " without providing a role_arn string", call. = FALSE)
+    }
+  }
+  args <- c(modifyList(default_s3_options, args), anonymous = anonymous)
+  exec(fs___S3FileSystem__create, !!!args)
+}
+
+default_s3_options <- list(
+  access_key = "",
+  secret_key = "",
+  session_token = "",
+  role_arn = "",
+  session_name = "",
+  external_id = "",
+  load_frequency = 900L,
+  region = "",
+  endpoint_override = "",
+  scheme = "",
+  background_writes = TRUE
+)
+
+#' Connect to an AWS S3 bucket
+#'
+#' `s3_bucket()` is a convenience function to create an `S3FileSystem` object
+#' that automatically detects the bucket's AWS region and holding onto the its
+#' relative path.
+#'
+#' @param bucket string S3 bucket name or path
+#' @param ... Additional connection options, passed to `S3FileSystem$create()`
+#' @return A `SubTreeFileSystem` containing an `S3FileSystem` and the bucket's
+#' relative path. Note that this function's success does not guarantee that you
+#' are authorized to access the bucket's contents.
+#' @examplesIf arrow_with_s3()
+#' bucket <- s3_bucket("ursa-labs-taxi-data")
+#' @export
+s3_bucket <- function(bucket, ...) {
+  assert_that(is.string(bucket))
+  args <- list2(...)
+
+  # Use FileSystemFromUri to detect the bucket's region
+  if (!is_url(bucket)) {
+    bucket <- paste0("s3://", bucket)
+  }
+  fs_and_path <- FileSystem$from_uri(bucket)
+  fs <- fs_and_path$fs
+  # If there are no additional S3Options, we can use that filesystem
+  # Otherwise, take the region that was detected and make a new fs with the args
+  if (length(args)) {
+    args$region <- fs$region
+    fs <- exec(S3FileSystem$create, !!!args)
+  }
+  # Return a subtree pointing at that bucket path
+  SubTreeFileSystem$create(fs_and_path$path, fs)
+}
+
+#' @usage NULL
+#' @format NULL
+#' @rdname FileSystem
+#' @export
+SubTreeFileSystem <- R6Class("SubTreeFileSystem",
+  inherit = FileSystem,
+  public = list(
+    print = function(...) {
+      if (inherits(self$base_fs, "LocalFileSystem")) {
+        cat("SubTreeFileSystem: ", "file://", self$base_path, "\n", sep = "")
+      } else if (inherits(self$base_fs, "S3FileSystem")) {
+        cat("SubTreeFileSystem: ", "s3://", self$base_path, "\n", sep = "")
+      } else {
+        cat("SubTreeFileSystem", "\n", sep = "")
+      }
+      invisible(self)
+    }
+  ),
+  active = list(
+    base_fs = function() {
+      fs___SubTreeFileSystem__base_fs(self)
+    },
+    base_path = function() fs___SubTreeFileSystem__base_path(self)
+  )
+)
+SubTreeFileSystem$create <- function(base_path, base_fs = NULL) {
+  fs_and_path <- get_path_and_filesystem(base_path, base_fs)
+  fs___SubTreeFileSystem__create(fs_and_path$path, fs_and_path$fs)
+}
+
+#' @export
+`$.SubTreeFileSystem` <- function(x, name, ...) {
+  # This is to allow delegating methods/properties to the base_fs
+  assert_that(is.string(name))
+  if (name %in% ls(envir = x)) {
+    get(name, x)
+  } else if (name %in% ls(envir = x$base_fs)) {
+    get(name, x$base_fs)
+  } else {
+    NULL
+  }
+}
+
+#' Copy files between FileSystems
+#'
+#' @param from A string path to a local directory or file, a URI, or a
+#' `SubTreeFileSystem`. Files will be copied recursively from this path.
+#' @param to A string path to a local directory or file, a URI, or a
+#' `SubTreeFileSystem`. Directories will be created as necessary
+#' @param chunk_size The maximum size of block to read before flushing
+#' to the destination file. A larger chunk_size will use more memory while
+#' copying but may help accommodate high latency FileSystems.
+#' @return Nothing: called for side effects in the file system
+#' @export
+#' @examplesIf FALSE
+#' # Copy an S3 bucket's files to a local directory:
+#' copy_files("s3://your-bucket-name", "local-directory")
+#' # Using a FileSystem object
+#' copy_files(s3_bucket("your-bucket-name"), "local-directory")
+#' # Or go the other way, from local to S3
+#' copy_files("local-directory", s3_bucket("your-bucket-name"))
+copy_files <- function(from, to, chunk_size = 1024L * 1024L) {
+  from <- get_path_and_filesystem(from)
+  to <- get_path_and_filesystem(to)
+  invisible(fs___CopyFiles(
+    from$fs,
+    FileSelector$create(from$path, recursive = TRUE),
+    to$fs,
+    to$path,
+    chunk_size,
+    option_use_threads()
+  ))
+}
+
+clean_path_abs <- function(path) {
+  # Make sure we have a valid, absolute, forward-slashed path for passing to Arrow
+  normalizePath(path, winslash = "/", mustWork = FALSE)
+}
+
+clean_path_rel <- function(path) {
+  # Make sure all path separators are "/", not "\" as on Windows
+  path_sep <- ifelse(tolower(Sys.info()[["sysname"]]) == "windows", "\\\\", "/")
+  gsub(path_sep, "/", path)
+}
diff --git a/src/arrow/r/R/flight.R b/src/arrow/r/R/flight.R
new file mode 100644
index 000000000..cde297853
--- /dev/null
+++ b/src/arrow/r/R/flight.R
@@ -0,0 +1,124 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#' Load a Python Flight server
+#'
+#' @param name string Python module name
+#' @param path file system path where the Python module is found. Default is
+#' to look in the `inst/` directory for included modules.
+#' @export
+#' @examplesIf FALSE
+#' load_flight_server("demo_flight_server")
+load_flight_server <- function(name, path = system.file(package = "arrow")) {
+  reticulate::import_from_path(name, path)
+}
+
+#' Connect to a Flight server
+#'
+#' @param host string hostname to connect to
+#' @param port integer port to connect on
+#' @param scheme URL scheme, default is "grpc+tcp"
+#' @return A `pyarrow.flight.FlightClient`.
+#' @export
+flight_connect <- function(host = "localhost", port, scheme = "grpc+tcp") {
+  pa <- reticulate::import("pyarrow")
+  location <- paste0(scheme, "://", host, ":", port)
+  pa$flight$FlightClient(location)
+}
+
+#' Send data to a Flight server
+#'
+#' @param client `pyarrow.flight.FlightClient`, as returned by [flight_connect()]
+#' @param data `data.frame`, [RecordBatch], or [Table] to upload
+#' @param path string identifier to store the data under
+#' @param overwrite logical: if `path` exists on `client` already, should we
+#' replace it with the contents of `data`? Default is `TRUE`; if `FALSE` and
+#' `path` exists, the function will error.
+#' @return `client`, invisibly.
+#' @export
+flight_put <- function(client, data, path, overwrite = TRUE) {
+  if (!overwrite && flight_path_exists(client, path)) {
+    stop(path, " exists.", call. = FALSE)
+  }
+  if (is.data.frame(data)) {
+    data <- Table$create(data)
+  }
+  py_data <- reticulate::r_to_py(data)
+  writer <- client$do_put(descriptor_for_path(path), py_data$schema)[[1]]
+  if (inherits(data, "RecordBatch")) {
+    writer$write_batch(py_data)
+  } else {
+    writer$write_table(py_data)
+  }
+  writer$close()
+  invisible(client)
+}
+
+#' Get data from a Flight server
+#'
+#' @param client `pyarrow.flight.FlightClient`, as returned by [flight_connect()]
+#' @param path string identifier under which data is stored
+#' @return A [Table]
+#' @export
+flight_get <- function(client, path) {
+  reader <- flight_reader(client, path)
+  reader$read_all()
+}
+
+# TODO: could use this as a RecordBatch iterator, call $read_chunk() on this
+flight_reader <- function(client, path) {
+  info <- client$get_flight_info(descriptor_for_path(path))
+  # Hack: assume a single ticket, on the same server as client is already connected
+  ticket <- info$endpoints[[1]]$ticket
+  client$do_get(ticket)
+}
+
+descriptor_for_path <- function(path) {
+  pa <- reticulate::import("pyarrow")
+  pa$flight$FlightDescriptor$for_path(path)
+}
+
+#' See available resources on a Flight server
+#'
+#' @inheritParams flight_get
+#' @return `list_flights()` returns a character vector of paths.
+#' `flight_path_exists()` returns a logical value, the equivalent of `path %in% list_flights()`
+#' @export
+list_flights <- function(client) {
+  generator <- client$list_flights()
+  out <- reticulate::iterate(generator, function(x) as.character(x$descriptor$path[[1]]))
+  out
+}
+
+#' @rdname list_flights
+#' @export
+flight_path_exists <- function(client, path) {
+  it_exists <- tryCatch(
+    expr = {
+      client$get_flight_info(descriptor_for_path(path))
+      TRUE
+    },
+    error = function(e) {
+      msg <- conditionMessage(e)
+      if (!any(grepl("ArrowKeyError", msg))) {
+        # Raise an error if this fails for any reason other than not found
+        stop(e)
+      }
+      FALSE
+    }
+  )
+}
diff --git a/src/arrow/r/R/install-arrow.R b/src/arrow/r/R/install-arrow.R
new file mode 100644
index 000000000..3e295c543
--- /dev/null
+++ b/src/arrow/r/R/install-arrow.R
@@ -0,0 +1,239 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#' Install or upgrade the Arrow library
+#'
+#' Use this function to install the latest release of `arrow`, to switch to or
+#' from a nightly development version, or on Linux to try reinstalling with
+#' all necessary C++ dependencies.
+#'
+#' Note that, unlike packages like `tensorflow`, `blogdown`, and others that
+#' require external dependencies, you do not need to run `install_arrow()`
+#' after a successful `arrow` installation.
+#'
+#' @param nightly logical: Should we install a development version of the
+#' package, or should we install from CRAN (the default).
+#' @param binary On Linux, value to set for the environment variable
+#' `LIBARROW_BINARY`, which governs how C++ binaries are used, if at all.
+#' The default value, `TRUE`, tells the installation script to detect the
+#' Linux distribution and version and find an appropriate C++ library. `FALSE`
+#' would tell the script not to retrieve a binary and instead build Arrow C++
+#' from source. Other valid values are strings corresponding to a Linux
+#' distribution-version, to override the value that would be detected.
+#' See `vignette("install", package = "arrow")` for further details.
+#' @param use_system logical: Should we use `pkg-config` to look for Arrow
+#' system packages? Default is `FALSE`. If `TRUE`, source installation may be
+#' faster, but there is a risk of version mismatch. This sets the
+#' `ARROW_USE_PKG_CONFIG` environment variable.
+#' @param minimal logical: If building from source, should we build without
+#' optional dependencies (compression libraries, for example)? Default is
+#' `FALSE`. This sets the `LIBARROW_MINIMAL` environment variable.
+#' @param verbose logical: Print more debugging output when installing? Default
+#' is `FALSE`. This sets the `ARROW_R_DEV` environment variable.
+#' @param repos character vector of base URLs of the repositories to install
+#' from (passed to `install.packages()`)
+#' @param ... Additional arguments passed to `install.packages()`
+#' @export
+#' @importFrom utils install.packages
+#' @seealso [arrow_available()] to see if the package was configured with
+#' necessary C++ dependencies. `vignette("install", package = "arrow")` for
+#' more ways to tune installation on Linux.
+install_arrow <- function(nightly = FALSE,
+                          binary = Sys.getenv("LIBARROW_BINARY", TRUE),
+                          use_system = Sys.getenv("ARROW_USE_PKG_CONFIG", FALSE),
+                          minimal = Sys.getenv("LIBARROW_MINIMAL", FALSE),
+                          verbose = Sys.getenv("ARROW_R_DEV", FALSE),
+                          repos = getOption("repos"),
+                          ...) {
+  sysname <- tolower(Sys.info()[["sysname"]])
+  conda <- isTRUE(grepl("conda", R.Version()$platform))
+
+  if (conda) {
+    if (nightly) {
+      system("conda install -y -c arrow-nightlies -c conda-forge --strict-channel-priority r-arrow")
+    } else {
+      system("conda install -y -c conda-forge --strict-channel-priority r-arrow")
+    }
+  } else {
+    Sys.setenv(
+      LIBARROW_BINARY = binary,
+      LIBARROW_MINIMAL = minimal,
+      ARROW_R_DEV = verbose,
+      ARROW_USE_PKG_CONFIG = use_system
+    )
+    # On the M1, we can't use the usual autobrew, which pulls Intel dependencies
+    apple_m1 <- grepl("arm-apple|aarch64.*darwin", R.Version()$platform)
+    # On Rosetta, we have to build without JEMALLOC, so we also can't autobrew
+    rosetta <- identical(sysname, "darwin") && identical(system("sysctl -n sysctl.proc_translated", intern = TRUE), "1")
+    if (rosetta) {
+      Sys.setenv(ARROW_JEMALLOC = "OFF")
+    }
+    if (apple_m1 || rosetta) {
+      Sys.setenv(FORCE_BUNDLED_BUILD = "true")
+    }
+
+    opts <- list()
+    if (apple_m1 || rosetta) {
+      # Skip binaries (esp. for rosetta)
+      opts$pkgType <- "source"
+    } else if (isTRUE(binary)) {
+      # Unless otherwise directed, don't consider newer source packages when
+      # options(pkgType) == "both" (default on win/mac)
+      opts$install.packages.check.source <- "no"
+      opts$install.packages.compile.from.source <- "never"
+    }
+    if (length(opts)) {
+      old <- options(opts)
+      on.exit(options(old))
+    }
+    install.packages("arrow", repos = arrow_repos(repos, nightly), ...)
+  }
+  if ("arrow" %in% loadedNamespaces()) {
+    # If you've just sourced this file, "arrow" won't be (re)loaded
+    reload_arrow()
+  }
+}
+
+arrow_repos <- function(repos = getOption("repos"), nightly = FALSE) {
+  if (length(repos) == 0 || identical(repos, c(CRAN = "@CRAN@"))) {
+    # Set the default/CDN
+    repos <- "https://cloud.r-project.org/"
+  }
+  dev_repo <- getOption("arrow.dev_repo", "https://arrow-r-nightly.s3.amazonaws.com")
+  # Remove it if it's there (so nightly=FALSE won't accidentally pull from it)
+  repos <- setdiff(repos, dev_repo)
+  if (nightly) {
+    # Add it first
+    repos <- c(dev_repo, repos)
+  }
+  repos
+}
+
+reload_arrow <- function() {
+  if (requireNamespace("pkgload", quietly = TRUE)) {
+    is_attached <- "package:arrow" %in% search()
+    pkgload::unload("arrow")
+    if (is_attached) {
+      require("arrow", character.only = TRUE, quietly = TRUE)
+    } else {
+      requireNamespace("arrow", quietly = TRUE)
+    }
+  } else {
+    message("Please restart R to use the 'arrow' package.")
+  }
+}
+
+
+#' Create a source bundle that includes all thirdparty dependencies
+#'
+#' @param dest_file File path for the new tar.gz package. Defaults to
+#' `arrow_V.V.V_with_deps.tar.gz` in the current directory (`V.V.V` is the version)
+#' @param source_file File path for the input tar.gz package. Defaults to
+#' downloading the package from CRAN (or whatever you have set as the first in
+#' `getOption("repos")`)
+#' @return The full path to `dest_file`, invisibly
+#'
+#' This function is used for setting up an offline build. If it's possible to
+#' download at build time, don't use this function. Instead, let `cmake`
+#' download the required dependencies for you.
+#' These downloaded dependencies are only used in the build if
+#' `ARROW_DEPENDENCY_SOURCE` is unset, `BUNDLED`, or `AUTO`.
+#' https://arrow.apache.org/docs/developers/cpp/building.html#offline-builds
+#'
+#' If you're using binary packages you shouldn't need to use this function. You
+#' should download the appropriate binary from your package repository, transfer
+#' that to the offline computer, and install that. Any OS can create the source
+#' bundle, but it cannot be installed on Windows. (Instead, use a standard
+#' Windows binary package.)
+#'
+#' Note if you're using RStudio Package Manager on Linux: If you still want to
+#' make a source bundle with this function, make sure to set the first repo in
+#' `options("repos")` to be a mirror that contains source packages (that is:
+#' something other than the RSPM binary mirror URLs).
+#'
+#' ## Steps for an offline install with optional dependencies:
+#'
+#' ### Using a computer with internet access, pre-download the dependencies:
+#' * Install the `arrow` package _or_ run
+#'   `source("https://raw.githubusercontent.com/apache/arrow/master/r/R/install-arrow.R")`
+#' * Run `create_package_with_all_dependencies("my_arrow_pkg.tar.gz")`
+#' * Copy the newly created `my_arrow_pkg.tar.gz` to the computer without internet access
+#'
+#' ### On the computer without internet access, install the prepared package:
+#' * Install the `arrow` package from the copied file
+#'   * `install.packages("my_arrow_pkg.tar.gz", dependencies = c("Depends", "Imports", "LinkingTo"))`
+#'   * This installation will build from source, so `cmake` must be available
+#' * Run [arrow_info()] to check installed capabilities
+#'
+#'
+#' @examples
+#' \dontrun{
+#' new_pkg <- create_package_with_all_dependencies()
+#' # Note: this works when run in the same R session, but it's meant to be
+#' # copied to a different computer.
+#' install.packages(new_pkg, dependencies = c("Depends", "Imports", "LinkingTo"))
+#' }
+#' @export
+create_package_with_all_dependencies <- function(dest_file = NULL, source_file = NULL) {
+  if (is.null(source_file)) {
+    pkg_download_dir <- tempfile()
+    dir.create(pkg_download_dir)
+    on.exit(unlink(pkg_download_dir, recursive = TRUE), add = TRUE)
+    message("Downloading Arrow source file")
+    downloaded <- utils::download.packages("arrow", destdir = pkg_download_dir, type = "source")
+    source_file <- downloaded[1, 2, drop = TRUE]
+  }
+  if (!file.exists(source_file) || !endsWith(source_file, "tar.gz")) {
+    stop("Arrow package .tar.gz file not found")
+  }
+  if (is.null(dest_file)) {
+    # e.g. convert /path/to/arrow_5.0.0.tar.gz to ./arrow_5.0.0_with_deps.tar.gz
+    # (add 'with_deps' for clarity if the file was downloaded locally)
+    dest_file <- paste0(gsub(".tar.gz$", "", basename(source_file)), "_with_deps.tar.gz")
+  }
+  untar_dir <- tempfile()
+  on.exit(unlink(untar_dir, recursive = TRUE), add = TRUE)
+  utils::untar(source_file, exdir = untar_dir)
+  tools_dir <- file.path(untar_dir, "arrow/tools")
+  download_dependencies_sh <- file.path(tools_dir, "cpp/thirdparty/download_dependencies.sh")
+  # If you change this path, also need to edit nixlibs.R
+  download_dir <- file.path(tools_dir, "thirdparty_dependencies")
+  dir.create(download_dir)
+
+  message("Downloading files to ", download_dir)
+  download_successful <- system2(download_dependencies_sh, download_dir, stdout = FALSE) == 0
+  if (!download_successful) {
+    stop("Failed to download thirdparty dependencies")
+  }
+  # Need to change directory to untar_dir so tar() will use relative paths. That
+  # means we'll need a full, non-relative path for dest_file. (extra_flags="-C"
+  # doesn't work with R's internal tar)
+  orig_wd <- getwd()
+  on.exit(setwd(orig_wd), add = TRUE)
+  # normalizePath() may return the input unchanged if dest_file doesn't exist,
+  # so create it first.
+  file.create(dest_file)
+  dest_file <- normalizePath(dest_file, mustWork = TRUE)
+  setwd(untar_dir)
+
+  message("Repacking tar.gz file to ", dest_file)
+  tar_successful <- utils::tar(dest_file, compression = "gz") == 0
+  if (!tar_successful) {
+    stop("Failed to create new tar.gz file")
+  }
+  invisible(dest_file)
+}
diff --git a/src/arrow/r/R/io.R b/src/arrow/r/R/io.R
new file mode 100644
index 000000000..898b306a3
--- /dev/null
+++ b/src/arrow/r/R/io.R
@@ -0,0 +1,295 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#' @include arrow-package.R
+#' @include enums.R
+#' @include buffer.R
+
+# OutputStream ------------------------------------------------------------
+
+Writable <- R6Class("Writable",
+  inherit = ArrowObject,
+  public = list(
+    write = function(x) io___Writable__write(self, buffer(x))
+  )
+)
+
+#' @title OutputStream classes
+#' @description `FileOutputStream` is for writing to a file;
+#' `BufferOutputStream` writes to a buffer;
+#' You can create one and pass it to any of the table writers, for example.
+#' @usage NULL
+#' @format NULL
+#' @docType class
+#' @section Factory:
+#'
+#' The `$create()` factory methods instantiate the `OutputStream` object and
+#' take the following arguments, depending on the subclass:
+#'
+#' - `path` For `FileOutputStream`, a character file name
+#' - `initial_capacity` For `BufferOutputStream`, the size in bytes of the
+#'    buffer.
+#'
+#' @section Methods:
+#'
+#'  - `$tell()`: return the position in the stream
+#'  - `$close()`: close the stream
+#'  - `$write(x)`: send `x` to the stream
+#'  - `$capacity()`: for `BufferOutputStream`
+#'  - `$finish()`: for `BufferOutputStream`
+#'  - `$GetExtentBytesWritten()`: for `MockOutputStream`, report how many bytes
+#'    were sent.
+#'
+#' @rdname OutputStream
+#' @name OutputStream
+OutputStream <- R6Class("OutputStream",
+  inherit = Writable,
+  public = list(
+    close = function() io___OutputStream__Close(self),
+    tell = function() io___OutputStream__Tell(self)
+  )
+)
+
+#' @usage NULL
+#' @format NULL
+#' @rdname OutputStream
+#' @export
+FileOutputStream <- R6Class("FileOutputStream", inherit = OutputStream)
+FileOutputStream$create <- function(path) {
+  io___FileOutputStream__Open(clean_path_abs(path))
+}
+
+#' @usage NULL
+#' @format NULL
+#' @rdname OutputStream
+#' @export
+BufferOutputStream <- R6Class("BufferOutputStream",
+  inherit = OutputStream,
+  public = list(
+    capacity = function() io___BufferOutputStream__capacity(self),
+    finish = function() io___BufferOutputStream__Finish(self),
+    write = function(bytes) io___BufferOutputStream__Write(self, bytes),
+    tell = function() io___BufferOutputStream__Tell(self)
+  )
+)
+BufferOutputStream$create <- function(initial_capacity = 0L) {
+  io___BufferOutputStream__Create(initial_capacity)
+}
+
+# InputStream -------------------------------------------------------------
+
+
+Readable <- R6Class("Readable",
+  inherit = ArrowObject,
+  public = list(
+    Read = function(nbytes) io___Readable__Read(self, nbytes)
+  )
+)
+
+#' @title InputStream classes
+#' @description `RandomAccessFile` inherits from `InputStream` and is a base
+#' class for: `ReadableFile` for reading from a file; `MemoryMappedFile` for
+#' the same but with memory mapping; and `BufferReader` for reading from a
+#' buffer. Use these with the various table readers.
+#' @usage NULL
+#' @format NULL
+#' @docType class
+#' @section Factory:
+#'
+#' The `$create()` factory methods instantiate the `InputStream` object and
+#' take the following arguments, depending on the subclass:
+#'
+#' - `path` For `ReadableFile`, a character file name
+#' - `x` For `BufferReader`, a [Buffer] or an object that can be
+#'    made into a buffer via `buffer()`.
+#'
+#' To instantiate a `MemoryMappedFile`, call [mmap_open()].
+#'
+#' @section Methods:
+#'
+#'  - `$GetSize()`:
+#'  - `$supports_zero_copy()`: Logical
+#'  - `$seek(position)`: go to that position in the stream
+#'  - `$tell()`: return the position in the stream
+#'  - `$close()`: close the stream
+#'  - `$Read(nbytes)`: read data from the stream, either a specified `nbytes` or
+#'    all, if `nbytes` is not provided
+#'  - `$ReadAt(position, nbytes)`: similar to `$seek(position)$Read(nbytes)`
+#'  - `$Resize(size)`: for a `MemoryMappedFile` that is writeable
+#'
+#' @rdname InputStream
+#' @name InputStream
+InputStream <- R6Class("InputStream",
+  inherit = Readable,
+  public = list(
+    close = function() io___InputStream__Close(self)
+  )
+)
+
+#' @usage NULL
+#' @format NULL
+#' @rdname InputStream
+#' @export
+RandomAccessFile <- R6Class("RandomAccessFile",
+  inherit = InputStream,
+  public = list(
+    GetSize = function() io___RandomAccessFile__GetSize(self),
+    supports_zero_copy = function() io___RandomAccessFile__supports_zero_copy(self),
+    seek = function(position) io___RandomAccessFile__Seek(self, position),
+    tell = function() io___RandomAccessFile__Tell(self),
+    Read = function(nbytes = NULL) {
+      if (is.null(nbytes)) {
+        io___RandomAccessFile__Read0(self)
+      } else {
+        io___Readable__Read(self, nbytes)
+      }
+    },
+    ReadAt = function(position, nbytes = NULL) {
+      if (is.null(nbytes)) {
+        nbytes <- self$GetSize() - position
+      }
+      io___RandomAccessFile__ReadAt(self, position, nbytes)
+    }
+  )
+)
+
+#' @usage NULL
+#' @format NULL
+#' @rdname InputStream
+#' @export
+MemoryMappedFile <- R6Class("MemoryMappedFile",
+  inherit = RandomAccessFile,
+  public = list(
+    Resize = function(size) io___MemoryMappedFile__Resize(self, size)
+  )
+)
+
+#' @usage NULL
+#' @format NULL
+#' @rdname InputStream
+#' @export
+ReadableFile <- R6Class("ReadableFile", inherit = RandomAccessFile)
+ReadableFile$create <- function(path) {
+  io___ReadableFile__Open(clean_path_abs(path))
+}
+
+#' @usage NULL
+#' @format NULL
+#' @rdname InputStream
+#' @export
+BufferReader <- R6Class("BufferReader", inherit = RandomAccessFile)
+BufferReader$create <- function(x) {
+  x <- buffer(x)
+  io___BufferReader__initialize(x)
+}
+
+#' Create a new read/write memory mapped file of a given size
+#'
+#' @param path file path
+#' @param size size in bytes
+#'
+#' @return a [arrow::io::MemoryMappedFile][MemoryMappedFile]
+#'
+#' @export
+mmap_create <- function(path, size) {
+  path <- clean_path_abs(path)
+  io___MemoryMappedFile__Create(path, size)
+}
+
+#' Open a memory mapped file
+#'
+#' @param path file path
+#' @param mode file mode (read/write/readwrite)
+#'
+#' @export
+mmap_open <- function(path, mode = c("read", "write", "readwrite")) {
+  mode <- match(match.arg(mode), c("read", "write", "readwrite")) - 1L
+  path <- clean_path_abs(path)
+  io___MemoryMappedFile__Open(path, mode)
+}
+
+#' Handle a range of possible input sources
+#' @param file A character file name, `raw` vector, or an Arrow input stream
+#' @param mmap Logical: whether to memory-map the file (default `TRUE`)
+#' @param compression If the file is compressed, created a [CompressedInputStream]
+#' with this compression codec, either a [Codec] or the string name of one.
+#' If `NULL` (default) and `file` is a string file name, the function will try
+#' to infer compression from the file extension.
+#' @param filesystem If not `NULL`, `file` will be opened via the
+#' `filesystem$OpenInputFile()` filesystem method, rather than the `io` module's
+#' `MemoryMappedFile` or `ReadableFile` constructors.
+#' @return An `InputStream` or a subclass of one.
+#' @keywords internal
+make_readable_file <- function(file, mmap = TRUE, compression = NULL, filesystem = NULL) {
+  if (inherits(file, "SubTreeFileSystem")) {
+    filesystem <- file$base_fs
+    file <- file$base_path
+  }
+  if (is.string(file)) {
+    if (is_url(file)) {
+      fs_and_path <- FileSystem$from_uri(file)
+      filesystem <- fs_and_path$fs
+      file <- fs_and_path$path
+    }
+    if (is.null(compression)) {
+      # Infer compression from the file path
+      compression <- detect_compression(file)
+    }
+    if (!is.null(filesystem)) {
+      file <- filesystem$OpenInputFile(file)
+    } else if (isTRUE(mmap)) {
+      file <- mmap_open(file)
+    } else {
+      file <- ReadableFile$create(file)
+    }
+    if (!identical(compression, "uncompressed")) {
+      file <- CompressedInputStream$create(file, compression)
+    }
+  } else if (inherits(file, c("raw", "Buffer"))) {
+    file <- BufferReader$create(file)
+  }
+  assert_is(file, "InputStream")
+  file
+}
+
+make_output_stream <- function(x, filesystem = NULL) {
+  if (inherits(x, "SubTreeFileSystem")) {
+    filesystem <- x$base_fs
+    x <- x$base_path
+  } else if (is_url(x)) {
+    fs_and_path <- FileSystem$from_uri(x)
+    filesystem <- fs_and_path$fs
+    x <- fs_and_path$path
+  }
+  assert_that(is.string(x))
+  if (is.null(filesystem)) {
+    FileOutputStream$create(x)
+  } else {
+    filesystem$OpenOutputStream(x)
+  }
+}
+
+detect_compression <- function(path) {
+  assert_that(is.string(path))
+  switch(tools::file_ext(path),
+    bz2 = "bz2",
+    gz = "gzip",
+    lz4 = "lz4",
+    zst = "zstd",
+    "uncompressed"
+  )
+}
diff --git a/src/arrow/r/R/ipc_stream.R b/src/arrow/r/R/ipc_stream.R
new file mode 100644
index 000000000..c45d1de6e
--- /dev/null
+++ b/src/arrow/r/R/ipc_stream.R
@@ -0,0 +1,123 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#' Write Arrow IPC stream format
+#'
+#' Apache Arrow defines two formats for [serializing data for interprocess
+#' communication
+#' (IPC)](https://arrow.apache.org/docs/format/Columnar.html#serialization-and-interprocess-communication-ipc):
+#' a "stream" format and a "file" format, known as Feather. `write_ipc_stream()`
+#' and [write_feather()] write those formats, respectively.
+#'
+#' `write_arrow()`, a wrapper around `write_ipc_stream()` and `write_feather()`
+#' with some nonstandard behavior, is deprecated. You should explicitly choose
+#' the function that will write the desired IPC format (stream or file) since
+#' either can be written to a file or `OutputStream`.
+#'
+#' @inheritParams write_feather
+#' @param ... extra parameters passed to `write_feather()`.
+#'
+#' @return `x`, invisibly.
+#' @seealso [write_feather()] for writing IPC files. [write_to_raw()] to
+#' serialize data to a buffer.
+#' [RecordBatchWriter] for a lower-level interface.
+#' @export
+#' @examplesIf arrow_available()
+#' tf <- tempfile()
+#' on.exit(unlink(tf))
+#' write_ipc_stream(mtcars, tf)
+write_ipc_stream <- function(x, sink, ...) {
+  x_out <- x # So we can return the data we got
+  if (is.data.frame(x)) {
+    x <- Table$create(x)
+  }
+  assert_that(is_writable_table(x))
+  if (!inherits(sink, "OutputStream")) {
+    sink <- make_output_stream(sink)
+    on.exit(sink$close())
+  }
+
+  writer <- RecordBatchStreamWriter$create(sink, x$schema)
+  writer$write(x)
+  writer$close()
+  invisible(x_out)
+}
+
+#' Write Arrow data to a raw vector
+#'
+#' [write_ipc_stream()] and [write_feather()] write data to a sink and return
+#' the data (`data.frame`, `RecordBatch`, or `Table`) they were given.
+#' This function wraps those so that you can serialize data to a buffer and
+#' access that buffer as a `raw` vector in R.
+#' @inheritParams write_feather
+#' @param format one of `c("stream", "file")`, indicating the IPC format to use
+#' @return A `raw` vector containing the bytes of the IPC serialized data.
+#' @examplesIf arrow_available()
+#' # The default format is "stream"
+#' mtcars_raw <- write_to_raw(mtcars)
+#' @export
+write_to_raw <- function(x, format = c("stream", "file")) {
+  sink <- BufferOutputStream$create()
+  if (match.arg(format) == "stream") {
+    write_ipc_stream(x, sink)
+  } else {
+    write_feather(x, sink)
+  }
+  as.raw(buffer(sink))
+}
+
+#' Read Arrow IPC stream format
+#'
+#' Apache Arrow defines two formats for [serializing data for interprocess
+#' communication
+#' (IPC)](https://arrow.apache.org/docs/format/Columnar.html#serialization-and-interprocess-communication-ipc):
+#' a "stream" format and a "file" format, known as Feather. `read_ipc_stream()`
+#' and [read_feather()] read those formats, respectively.
+#'
+#' `read_arrow()`, a wrapper around `read_ipc_stream()` and `read_feather()`,
+#' is deprecated. You should explicitly choose
+#' the function that will read the desired IPC format (stream or file) since
+#' a file or `InputStream` may contain either.
+#'
+#' @param file A character file name or URI, `raw` vector, an Arrow input stream,
+#' or a `FileSystem` with path (`SubTreeFileSystem`).
+#' If a file name or URI, an Arrow [InputStream] will be opened and
+#' closed when finished. If an input stream is provided, it will be left
+#' open.
+#' @param as_data_frame Should the function return a `data.frame` (default) or
+#' an Arrow [Table]?
+#' @param ... extra parameters passed to `read_feather()`.
+#'
+#' @return A `data.frame` if `as_data_frame` is `TRUE` (the default), or an
+#' Arrow [Table] otherwise
+#' @seealso [read_feather()] for writing IPC files. [RecordBatchReader] for a
+#' lower-level interface.
+#' @export
+read_ipc_stream <- function(file, as_data_frame = TRUE, ...) {
+  if (!inherits(file, "InputStream")) {
+    file <- make_readable_file(file)
+    on.exit(file$close())
+  }
+
+  # TODO: this could take col_select, like the other readers
+  # https://issues.apache.org/jira/browse/ARROW-6830
+  out <- RecordBatchStreamReader$create(file)$read_table()
+  if (as_data_frame) {
+    out <- as.data.frame(out)
+  }
+  out
+}
diff --git a/src/arrow/r/R/json.R b/src/arrow/r/R/json.R
new file mode 100644
index 000000000..0d54c8a8a
--- /dev/null
+++ b/src/arrow/r/R/json.R
@@ -0,0 +1,102 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#' Read a JSON file
+#'
+#' Using [JsonTableReader]
+#'
+#' @inheritParams read_delim_arrow
+#' @param schema [Schema] that describes the table.
+#' @param ... Additional options passed to `JsonTableReader$create()`
+#'
+#' @return A `data.frame`, or a Table if `as_data_frame = FALSE`.
+#' @export
+#' @examplesIf arrow_with_json()
+#' tf <- tempfile()
+#' on.exit(unlink(tf))
+#' writeLines('
+#'     { "hello": 3.5, "world": false, "yo": "thing" }
+#'     { "hello": 3.25, "world": null }
+#'     { "hello": 0.0, "world": true, "yo": null }
+#'   ', tf, useBytes = TRUE)
+#' df <- read_json_arrow(tf)
+read_json_arrow <- function(file,
+                            col_select = NULL,
+                            as_data_frame = TRUE,
+                            schema = NULL,
+                            ...) {
+  if (!inherits(file, "InputStream")) {
+    file <- make_readable_file(file)
+    on.exit(file$close())
+  }
+  tab <- JsonTableReader$create(file, schema = schema, ...)$Read()
+
+  col_select <- enquo(col_select)
+  if (!quo_is_null(col_select)) {
+    tab <- tab[vars_select(names(tab), !!col_select)]
+  }
+
+  if (isTRUE(as_data_frame)) {
+    tab <- as.data.frame(tab)
+  }
+  tab
+}
+
+#' @include arrow-package.R
+#' @rdname CsvTableReader
+#' @usage NULL
+#' @format NULL
+#' @docType class
+#' @export
+JsonTableReader <- R6Class("JsonTableReader",
+  inherit = ArrowObject,
+  public = list(
+    Read = function() json___TableReader__Read(self)
+  )
+)
+JsonTableReader$create <- function(file,
+                                   read_options = JsonReadOptions$create(),
+                                   parse_options = JsonParseOptions$create(schema = schema),
+                                   schema = NULL,
+                                   ...) {
+  assert_is(file, "InputStream")
+  json___TableReader__Make(file, read_options, parse_options)
+}
+
+#' @rdname CsvReadOptions
+#' @usage NULL
+#' @format NULL
+#' @docType class
+#' @export
+JsonReadOptions <- R6Class("JsonReadOptions", inherit = ArrowObject)
+JsonReadOptions$create <- function(use_threads = option_use_threads(), block_size = 1048576L) {
+  json___ReadOptions__initialize(use_threads, block_size)
+}
+
+#' @rdname CsvReadOptions
+#' @usage NULL
+#' @format NULL
+#' @docType class
+#' @export
+JsonParseOptions <- R6Class("JsonParseOptions", inherit = ArrowObject)
+JsonParseOptions$create <- function(newlines_in_values = FALSE, schema = NULL) {
+  if (is.null(schema)) {
+    json___ParseOptions__initialize1(newlines_in_values)
+  } else {
+    json___ParseOptions__initialize2(newlines_in_values, schema)
+  }
+}
diff --git a/src/arrow/r/R/memory-pool.R b/src/arrow/r/R/memory-pool.R
new file mode 100644
index 000000000..2207ed6be
--- /dev/null
+++ b/src/arrow/r/R/memory-pool.R
@@ -0,0 +1,61 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#' @include arrow-package.R
+#'
+#' @title class arrow::MemoryPool
+#'
+#' @usage NULL
+#' @format NULL
+#' @docType class
+#'
+#' @section Methods:
+#'
+#' - `backend_name`: one of "jemalloc", "mimalloc", or "system". Alternative
+#'   memory allocators are optionally enabled at build time. Windows builds
+#'   generally have `mimalloc`, and most others have both `jemalloc` (used by
+#'   default) and `mimalloc`. To change memory allocators at runtime, set the
+#'   environment variable `ARROW_DEFAULT_MEMORY_POOL` to one of those strings
+#'   prior to loading the `arrow` library.
+#' - `bytes_allocated`
+#' - `max_memory`
+#'
+#' @rdname MemoryPool
+#' @name MemoryPool
+#' @keywords internal
+MemoryPool <- R6Class("MemoryPool",
+  inherit = ArrowObject,
+  public = list(
+    # TODO: Allocate
+    # TODO: Reallocate
+    # TODO: Free
+  ),
+  active = list(
+    backend_name = function() MemoryPool__backend_name(self),
+    bytes_allocated = function() MemoryPool__bytes_allocated(self),
+    max_memory = function() MemoryPool__max_memory(self)
+  )
+)
+
+#' Arrow's default [MemoryPool]
+#'
+#' @return the default [MemoryPool]
+#' @export
+#' @keywords internal
+default_memory_pool <- function() {
+  MemoryPool__default()
+}
diff --git a/src/arrow/r/R/message.R b/src/arrow/r/R/message.R
new file mode 100644
index 000000000..ef33f1623
--- /dev/null
+++ b/src/arrow/r/R/message.R
@@ -0,0 +1,97 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#' @include arrow-package.R
+
+#' @title class arrow::Message
+#'
+#' @usage NULL
+#' @format NULL
+#' @docType class
+#'
+#' @section Methods:
+#'
+#' TODO
+#'
+#' @rdname Message
+#' @name Message
+Message <- R6Class("Message",
+  inherit = ArrowObject,
+  public = list(
+    Equals = function(other, ...) {
+      inherits(other, "Message") && ipc___Message__Equals(self, other)
+    },
+    body_length = function() ipc___Message__body_length(self),
+    Verify = function() ipc___Message__Verify(self)
+  ),
+  active = list(
+    type = function() ipc___Message__type(self),
+    metadata = function() ipc___Message__metadata(self),
+    body = function() ipc___Message__body(self)
+  )
+)
+
+#' @title class arrow::MessageReader
+#'
+#' @usage NULL
+#' @format NULL
+#' @docType class
+#'
+#' @section Methods:
+#'
+#' TODO
+#'
+#' @rdname MessageReader
+#' @name MessageReader
+#' @export
+MessageReader <- R6Class("MessageReader",
+  inherit = ArrowObject,
+  public = list(
+    ReadNextMessage = function() ipc___MessageReader__ReadNextMessage(self)
+  )
+)
+
+MessageReader$create <- function(stream) {
+  if (!inherits(stream, "InputStream")) {
+    stream <- BufferReader$create(stream)
+  }
+  ipc___MessageReader__Open(stream)
+}
+
+#' Read a Message from a stream
+#'
+#' @param stream an InputStream
+#'
+#' @export
+read_message <- function(stream) {
+  UseMethod("read_message")
+}
+
+#' @export
+read_message.default <- function(stream) {
+  read_message(BufferReader$create(stream))
+}
+
+#' @export
+read_message.InputStream <- function(stream) {
+  ipc___ReadMessage(stream)
+}
+
+#' @export
+read_message.MessageReader <- function(stream) {
+  stream$ReadNextMessage()
+}
diff --git a/src/arrow/r/R/metadata.R b/src/arrow/r/R/metadata.R
new file mode 100644
index 000000000..768abeda7
--- /dev/null
+++ b/src/arrow/r/R/metadata.R
@@ -0,0 +1,210 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#' @importFrom utils object.size
+.serialize_arrow_r_metadata <- function(x) {
+  assert_is(x, "list")
+
+  # drop problems attributes (most likely from readr)
+  x[["attributes"]][["problems"]] <- NULL
+
+  out <- serialize(x, NULL, ascii = TRUE)
+
+  # if the metadata is over 100 kB, compress
+  if (option_compress_metadata() && object.size(out) > 100000) {
+    out_comp <- serialize(memCompress(out, type = "gzip"), NULL, ascii = TRUE)
+
+    # but ensure that the compression+serialization is effective.
+    if (object.size(out) > object.size(out_comp)) out <- out_comp
+  }
+
+  rawToChar(out)
+}
+
+.unserialize_arrow_r_metadata <- function(x) {
+  tryCatch(
+    expr = {
+      out <- unserialize(charToRaw(x))
+
+      # if this is still raw, try decompressing
+      if (is.raw(out)) {
+        out <- unserialize(memDecompress(out, type = "gzip"))
+      }
+      out
+    },
+    error = function(e) {
+      warning("Invalid metadata$r", call. = FALSE)
+      NULL
+    }
+  )
+}
+
+#' @importFrom rlang trace_back
+apply_arrow_r_metadata <- function(x, r_metadata) {
+  tryCatch(
+    expr = {
+      columns_metadata <- r_metadata$columns
+      if (is.data.frame(x)) {
+        if (length(names(x)) && !is.null(columns_metadata)) {
+          for (name in intersect(names(columns_metadata), names(x))) {
+            x[[name]] <- apply_arrow_r_metadata(x[[name]], columns_metadata[[name]])
+          }
+        }
+      } else if (is.list(x) && !inherits(x, "POSIXlt") && !is.null(columns_metadata)) {
+        # If we have a list and "columns_metadata" this applies row-level metadata
+        # inside of a column in a dataframe.
+
+        # However, if we are inside of a dplyr collection (including all datasets),
+        # we cannot apply this row-level metadata, since the order of the rows is
+        # not guaranteed to be the same, so don't even try, but warn what's going on
+        trace <- trace_back()
+        # TODO: remove `trace$calls %||% trace$call` once rlang > 0.4.11 is released
+        in_dplyr_collect <- any(map_lgl(trace$calls %||% trace$call, function(x) {
+          grepl("collect.arrow_dplyr_query", x, fixed = TRUE)[[1]]
+        }))
+        if (in_dplyr_collect) {
+          warning(
+            "Row-level metadata is not compatible with this operation and has ",
+            "been ignored",
+            call. = FALSE
+          )
+        } else {
+          x <- map2(x, columns_metadata, function(.x, .y) {
+            apply_arrow_r_metadata(.x, .y)
+          })
+        }
+        x
+      }
+
+      if (!is.null(r_metadata$attributes)) {
+        attributes(x)[names(r_metadata$attributes)] <- r_metadata$attributes
+        if (inherits(x, "POSIXlt")) {
+          # We store POSIXlt as a StructArray, which is translated back to R
+          # as a data.frame, but while data frames have a row.names = c(NA, nrow(x))
+          # attribute, POSIXlt does not, so since this is now no longer an object
+          # of class data.frame, remove the extraneous attribute
+          attr(x, "row.names") <- NULL
+        }
+        if (!is.null(attr(x, ".group_vars")) && requireNamespace("dplyr", quietly = TRUE)) {
+          x <- dplyr::group_by(x, !!!syms(attr(x, ".group_vars")))
+          attr(x, ".group_vars") <- NULL
+        }
+      }
+    },
+    error = function(e) {
+      warning("Invalid metadata$r", call. = FALSE)
+    }
+  )
+  x
+}
+
+remove_attributes <- function(x) {
+  removed_attributes <- character()
+  if (identical(class(x), c("tbl_df", "tbl", "data.frame"))) {
+    removed_attributes <- c("class", "row.names", "names")
+  } else if (inherits(x, "data.frame")) {
+    removed_attributes <- c("row.names", "names")
+  } else if (inherits(x, "factor")) {
+    removed_attributes <- c("class", "levels")
+  } else if (inherits(x, c("integer64", "Date", "arrow_binary", "arrow_large_binary"))) {
+    removed_attributes <- c("class")
+  } else if (inherits(x, "arrow_fixed_size_binary")) {
+    removed_attributes <- c("class", "byte_width")
+  } else if (inherits(x, "POSIXct")) {
+    removed_attributes <- c("class", "tzone")
+  } else if (inherits(x, "hms") || inherits(x, "difftime")) {
+    removed_attributes <- c("class", "units")
+  }
+  removed_attributes
+}
+
+arrow_attributes <- function(x, only_top_level = FALSE) {
+  if (inherits(x, "grouped_df")) {
+    # Keep only the group var names, not the rest of the cached data that dplyr
+    # uses, which may be large
+    if (requireNamespace("dplyr", quietly = TRUE)) {
+      gv <- dplyr::group_vars(x)
+      x <- dplyr::ungroup(x)
+      # ungroup() first, then set attribute, bc ungroup() would erase it
+      attr(x, ".group_vars") <- gv
+    } else {
+      # Regardless, we shouldn't keep groups around
+      attr(x, "groups") <- NULL
+    }
+  }
+  att <- attributes(x)
+
+  removed_attributes <- remove_attributes(x)
+
+  att <- att[setdiff(names(att), removed_attributes)]
+  if (isTRUE(only_top_level)) {
+    return(att)
+  }
+
+  if (is.data.frame(x)) {
+    columns <- map(x, arrow_attributes)
+    out <- if (length(att) || !all(map_lgl(columns, is.null))) {
+      list(attributes = att, columns = columns)
+    }
+    return(out)
+  }
+
+  columns <- NULL
+  attempt_to_save_row_level <- getOption("arrow.preserve_row_level_metadata", FALSE) &&
+    is.list(x) && !inherits(x, "POSIXlt")
+  if (attempt_to_save_row_level) {
+    # However, if we are inside of a dplyr collection (including all datasets),
+    # we cannot apply this row-level metadata, since the order of the rows is
+    # not guaranteed to be the same, so don't even try, but warn what's going on
+    trace <- trace_back()
+    # TODO: remove `trace$calls %||% trace$call` once rlang > 0.4.11 is released
+    in_dataset_write <- any(map_lgl(trace$calls %||% trace$call, function(x) {
+      grepl("write_dataset", x, fixed = TRUE)[[1]]
+    }))
+    if (in_dataset_write) {
+      warning(
+        "Row-level metadata is not compatible with datasets and will be discarded",
+        call. = FALSE
+      )
+    } else {
+      # for list columns, we also keep attributes of each
+      # element in columns
+      columns <- map(x, arrow_attributes)
+    }
+    if (all(map_lgl(columns, is.null))) {
+      columns <- NULL
+    }
+  } else if (inherits(x, c("sfc", "sf"))) {
+    # Check if there are any columns that look like sf columns, warn that we will
+    # not be saving this data for now (but only if arrow.preserve_row_level_metadata
+    # is set to FALSE)
+    warning(
+      "One of the columns given appears to be an `sfc` SF column. Due to their unique ",
+      "nature, these columns do not convert to Arrow well. We are working on ",
+      "better ways to do this, but in the interim we recommend converting any `sfc` ",
+      "columns to WKB (well-known binary) columns before using them with Arrow ",
+      "(for example, with `sf::st_as_binary(col)`).",
+      call. = FALSE
+    )
+  }
+
+  if (length(att) || !is.null(columns)) {
+    list(attributes = att, columns = columns)
+  } else {
+    NULL
+  }
+}
diff --git a/src/arrow/r/R/parquet.R b/src/arrow/r/R/parquet.R
new file mode 100644
index 000000000..ee2ed57de
--- /dev/null
+++ b/src/arrow/r/R/parquet.R
@@ -0,0 +1,585 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#' Read a Parquet file
+#'
+#' '[Parquet](https://parquet.apache.org/)' is a columnar storage file format.
+#' This function enables you to read Parquet files into R.
+#'
+#' @inheritParams read_feather
+#' @param props [ParquetArrowReaderProperties]
+#' @param ... Additional arguments passed to `ParquetFileReader$create()`
+#'
+#' @return A [arrow::Table][Table], or a `data.frame` if `as_data_frame` is
+#' `TRUE` (the default).
+#' @examplesIf arrow_with_parquet()
+#' tf <- tempfile()
+#' on.exit(unlink(tf))
+#' write_parquet(mtcars, tf)
+#' df <- read_parquet(tf, col_select = starts_with("d"))
+#' head(df)
+#' @export
+read_parquet <- function(file,
+                         col_select = NULL,
+                         as_data_frame = TRUE,
+                         props = ParquetArrowReaderProperties$create(),
+                         ...) {
+  if (is.string(file)) {
+    file <- make_readable_file(file)
+    on.exit(file$close())
+  }
+  reader <- ParquetFileReader$create(file, props = props, ...)
+
+  col_select <- enquo(col_select)
+  if (!quo_is_null(col_select)) {
+    # infer which columns to keep from schema
+    schema <- reader$GetSchema()
+    names <- names(schema)
+    indices <- match(vars_select(names, !!col_select), names) - 1L
+    tab <- tryCatch(
+      reader$ReadTable(indices),
+      error = read_compressed_error
+    )
+  } else {
+    # read all columns
+    tab <- tryCatch(
+      reader$ReadTable(),
+      error = read_compressed_error
+    )
+  }
+
+  if (as_data_frame) {
+    tab <- as.data.frame(tab)
+  }
+  tab
+}
+
+#' Write Parquet file to disk
+#'
+#' [Parquet](https://parquet.apache.org/) is a columnar storage file format.
+#' This function enables you to write Parquet files from R.
+#'
+#' Due to features of the format, Parquet files cannot be appended to.
+#' If you want to use the Parquet format but also want the ability to extend
+#' your dataset, you can write to additional Parquet files and then treat
+#' the whole directory of files as a [Dataset] you can query.
+#' See `vignette("dataset", package = "arrow")` for examples of this.
+#'
+#' @param x `data.frame`, [RecordBatch], or [Table]
+#' @param sink A string file path, URI, or [OutputStream], or path in a file
+#' system (`SubTreeFileSystem`)
+#' @param chunk_size chunk size in number of rows. If NULL, the total number of rows is used.
+#' @param version parquet version, "1.0" or "2.0". Default "1.0". Numeric values
+#'   are coerced to character.
+#' @param compression compression algorithm. Default "snappy". See details.
+#' @param compression_level compression level. Meaning depends on compression algorithm
+#' @param use_dictionary Specify if we should use dictionary encoding. Default `TRUE`
+#' @param write_statistics Specify if we should write statistics. Default `TRUE`
+#' @param data_page_size Set a target threshold for the approximate encoded
+#'    size of data pages within a column chunk (in bytes). Default 1 MiB.
+#' @param use_deprecated_int96_timestamps Write timestamps to INT96 Parquet format. Default `FALSE`.
+#' @param coerce_timestamps Cast timestamps a particular resolution. Can be
+#'   `NULL`, "ms" or "us". Default `NULL` (no casting)
+#' @param allow_truncated_timestamps Allow loss of data when coercing timestamps to a
+#'    particular resolution. E.g. if microsecond or nanosecond data is lost when coercing
+#'    to "ms", do not raise an exception
+#' @param properties A `ParquetWriterProperties` object, used instead of the options
+#'    enumerated in this function's signature. Providing `properties` as an argument
+#'    is deprecated; if you need to assemble `ParquetWriterProperties` outside
+#'    of `write_parquet()`, use `ParquetFileWriter` instead.
+#' @param arrow_properties A `ParquetArrowWriterProperties` object. Like
+#'    `properties`, this argument is deprecated.
+#'
+#' @details The parameters `compression`, `compression_level`, `use_dictionary` and
+#'   `write_statistics` support various patterns:
+#'
+#'  - The default `NULL` leaves the parameter unspecified, and the C++ library
+#'    uses an appropriate default for each column (defaults listed above)
+#'  - A single, unnamed, value (e.g. a single string for `compression`) applies to all columns
+#'  - An unnamed vector, of the same size as the number of columns, to specify a
+#'    value for each column, in positional order
+#'  - A named vector, to specify the value for the named columns, the default
+#'    value for the setting is used when not supplied
+#'
+#' The `compression` argument can be any of the following (case insensitive):
+#' "uncompressed", "snappy", "gzip", "brotli", "zstd", "lz4", "lzo" or "bz2".
+#' Only "uncompressed" is guaranteed to be available, but "snappy" and "gzip"
+#' are almost always included. See [codec_is_available()].
+#' The default "snappy" is used if available, otherwise "uncompressed". To
+#' disable compression, set `compression = "uncompressed"`.
+#' Note that "uncompressed" columns may still have dictionary encoding.
+#'
+#' @return the input `x` invisibly.
+#'
+#' @examplesIf arrow_with_parquet()
+#' tf1 <- tempfile(fileext = ".parquet")
+#' write_parquet(data.frame(x = 1:5), tf1)
+#'
+#' # using compression
+#' if (codec_is_available("gzip")) {
+#'   tf2 <- tempfile(fileext = ".gz.parquet")
+#'   write_parquet(data.frame(x = 1:5), tf2, compression = "gzip", compression_level = 5)
+#' }
+#' @export
+write_parquet <- function(x,
+                          sink,
+                          chunk_size = NULL,
+                          # writer properties
+                          version = NULL,
+                          compression = default_parquet_compression(),
+                          compression_level = NULL,
+                          use_dictionary = NULL,
+                          write_statistics = NULL,
+                          data_page_size = NULL,
+                          # arrow writer properties
+                          use_deprecated_int96_timestamps = FALSE,
+                          coerce_timestamps = NULL,
+                          allow_truncated_timestamps = FALSE,
+                          properties = NULL,
+                          arrow_properties = NULL) {
+  x_out <- x
+
+  if (is.data.frame(x) || inherits(x, "RecordBatch")) {
+    x <- Table$create(x)
+  }
+
+  assert_that(is_writable_table(x))
+
+  if (!inherits(sink, "OutputStream")) {
+    sink <- make_output_stream(sink)
+    on.exit(sink$close())
+  }
+
+  # Deprecation warnings
+  if (!is.null(properties)) {
+    warning(
+      "Providing 'properties' is deprecated. If you need to assemble properties outside ",
+      "this function, use ParquetFileWriter instead."
+    )
+  }
+  if (!is.null(arrow_properties)) {
+    warning(
+      "Providing 'arrow_properties' is deprecated. If you need to assemble arrow_properties ",
+      "outside this function, use ParquetFileWriter instead."
+    )
+  }
+
+  writer <- ParquetFileWriter$create(
+    x$schema,
+    sink,
+    properties = properties %||% ParquetWriterProperties$create(
+      x,
+      version = version,
+      compression = compression,
+      compression_level = compression_level,
+      use_dictionary = use_dictionary,
+      write_statistics = write_statistics,
+      data_page_size = data_page_size
+    ),
+    arrow_properties = arrow_properties %||% ParquetArrowWriterProperties$create(
+      use_deprecated_int96_timestamps = use_deprecated_int96_timestamps,
+      coerce_timestamps = coerce_timestamps,
+      allow_truncated_timestamps = allow_truncated_timestamps
+    )
+  )
+  writer$WriteTable(x, chunk_size = chunk_size %||% x$num_rows)
+  writer$Close()
+
+  invisible(x_out)
+}
+
+default_parquet_compression <- function() {
+  # Match the pyarrow default (overriding the C++ default)
+  if (codec_is_available("snappy")) {
+    "snappy"
+  } else {
+    NULL
+  }
+}
+
+ParquetArrowWriterProperties <- R6Class("ParquetArrowWriterProperties", inherit = ArrowObject)
+ParquetArrowWriterProperties$create <- function(use_deprecated_int96_timestamps = FALSE,
+                                                coerce_timestamps = NULL,
+                                                allow_truncated_timestamps = FALSE,
+                                                ...) {
+  if (is.null(coerce_timestamps)) {
+    timestamp_unit <- -1L # null sentinel value
+  } else {
+    timestamp_unit <- make_valid_time_unit(
+      coerce_timestamps,
+      c("ms" = TimeUnit$MILLI, "us" = TimeUnit$MICRO)
+    )
+  }
+  parquet___ArrowWriterProperties___create(
+    use_deprecated_int96_timestamps = isTRUE(use_deprecated_int96_timestamps),
+    timestamp_unit = timestamp_unit,
+    allow_truncated_timestamps = isTRUE(allow_truncated_timestamps)
+  )
+}
+
+valid_parquet_version <- c(
+  "1.0" = ParquetVersionType$PARQUET_1_0,
+  "2.0" = ParquetVersionType$PARQUET_2_0
+)
+
+make_valid_version <- function(version, valid_versions = valid_parquet_version) {
+  if (is_integerish(version)) {
+    version <- as.character(version)
+  }
+  tryCatch(
+    valid_versions[[match.arg(version, choices = names(valid_versions))]],
+    error = function(cond) {
+      stop('"version" should be one of ', oxford_paste(names(valid_versions), "or"), call. = FALSE)
+    }
+  )
+}
+
+#' @title ParquetWriterProperties class
+#' @rdname ParquetWriterProperties
+#' @name ParquetWriterProperties
+#' @docType class
+#' @usage NULL
+#' @format NULL
+#' @description This class holds settings to control how a Parquet file is read
+#' by [ParquetFileWriter].
+#'
+#' @section Factory:
+#'
+#' The `ParquetWriterProperties$create()` factory method instantiates the object
+#' and takes the following arguments:
+#'
+#' - `table`: table to write (required)
+#' - `version`: Parquet version, "1.0" or "2.0". Default "1.0"
+#' - `compression`: Compression type, algorithm `"uncompressed"`
+#' - `compression_level`: Compression level; meaning depends on compression algorithm
+#' - `use_dictionary`: Specify if we should use dictionary encoding. Default `TRUE`
+#' - `write_statistics`: Specify if we should write statistics. Default `TRUE`
+#' - `data_page_size`: Set a target threshold for the approximate encoded
+#'    size of data pages within a column chunk (in bytes). Default 1 MiB.
+#'
+#' @details The parameters `compression`, `compression_level`, `use_dictionary`
+#'   and write_statistics` support various patterns:
+#'
+#'  - The default `NULL` leaves the parameter unspecified, and the C++ library
+#'    uses an appropriate default for each column (defaults listed above)
+#'  - A single, unnamed, value (e.g. a single string for `compression`) applies to all columns
+#'  - An unnamed vector, of the same size as the number of columns, to specify a
+#'    value for each column, in positional order
+#'  - A named vector, to specify the value for the named columns, the default
+#'    value for the setting is used when not supplied
+#'
+#' Unlike the high-level [write_parquet], `ParquetWriterProperties` arguments
+#' use the C++ defaults. Currently this means "uncompressed" rather than
+#' "snappy" for the `compression` argument.
+#'
+#' @seealso [write_parquet]
+#' @seealso [Schema] for information about schemas and metadata handling.
+#'
+#' @export
+ParquetWriterProperties <- R6Class("ParquetWriterProperties", inherit = ArrowObject)
+ParquetWriterPropertiesBuilder <- R6Class("ParquetWriterPropertiesBuilder",
+  inherit = ArrowObject,
+  public = list(
+    set_version = function(version) {
+      parquet___WriterProperties___Builder__version(self, make_valid_version(version))
+    },
+    set_compression = function(table, compression) {
+      compression <- compression_from_name(compression)
+      assert_that(is.integer(compression))
+      private$.set(
+        table, compression,
+        parquet___ArrowWriterProperties___Builder__set_compressions
+      )
+    },
+    set_compression_level = function(table, compression_level) {
+      # cast to integer but keep names
+      compression_level <- set_names(as.integer(compression_level), names(compression_level))
+      private$.set(
+        table, compression_level,
+        parquet___ArrowWriterProperties___Builder__set_compression_levels
+      )
+    },
+    set_dictionary = function(table, use_dictionary) {
+      assert_that(is.logical(use_dictionary))
+      private$.set(
+        table, use_dictionary,
+        parquet___ArrowWriterProperties___Builder__set_use_dictionary
+      )
+    },
+    set_write_statistics = function(table, write_statistics) {
+      assert_that(is.logical(write_statistics))
+      private$.set(
+        table, write_statistics,
+        parquet___ArrowWriterProperties___Builder__set_write_statistics
+      )
+    },
+    set_data_page_size = function(data_page_size) {
+      parquet___ArrowWriterProperties___Builder__data_page_size(self, data_page_size)
+    }
+  ),
+  private = list(
+    .set = function(table, value, FUN) {
+      msg <- paste0("unsupported ", substitute(value), "= specification")
+      column_names <- names(table)
+      given_names <- names(value)
+      if (is.null(given_names)) {
+        if (length(value) %in% c(1L, length(column_names))) {
+          # If there's a single, unnamed value, FUN will set it globally
+          # If there are values for all columns, send them along with the names
+          FUN(self, column_names, value)
+        } else {
+          abort(msg)
+        }
+      } else if (all(given_names %in% column_names)) {
+        # Use the given names
+        FUN(self, given_names, value)
+      } else {
+        abort(msg)
+      }
+    }
+  )
+)
+
+ParquetWriterProperties$create <- function(table,
+                                           version = NULL,
+                                           compression = default_parquet_compression(),
+                                           compression_level = NULL,
+                                           use_dictionary = NULL,
+                                           write_statistics = NULL,
+                                           data_page_size = NULL,
+                                           ...) {
+  builder <- parquet___WriterProperties___Builder__create()
+  if (!is.null(version)) {
+    builder$set_version(version)
+  }
+  if (!is.null(compression)) {
+    builder$set_compression(table, compression = compression)
+  }
+  if (!is.null(compression_level)) {
+    builder$set_compression_level(table, compression_level = compression_level)
+  }
+  if (!is.null(use_dictionary)) {
+    builder$set_dictionary(table, use_dictionary)
+  }
+  if (!is.null(write_statistics)) {
+    builder$set_write_statistics(table, write_statistics)
+  }
+  if (!is.null(data_page_size)) {
+    builder$set_data_page_size(data_page_size)
+  }
+  parquet___WriterProperties___Builder__build(builder)
+}
+
+#' @title ParquetFileWriter class
+#' @rdname ParquetFileWriter
+#' @name ParquetFileWriter
+#' @docType class
+#' @usage NULL
+#' @format NULL
+#' @description This class enables you to interact with Parquet files.
+#'
+#' @section Factory:
+#'
+#' The `ParquetFileWriter$create()` factory method instantiates the object and
+#' takes the following arguments:
+#'
+#' - `schema` A [Schema]
+#' - `sink` An [arrow::io::OutputStream][OutputStream]
+#' - `properties` An instance of [ParquetWriterProperties]
+#' - `arrow_properties` An instance of `ParquetArrowWriterProperties`
+#'
+#' @section Methods:
+#'
+#' - `WriteTable` Write a [Table] to `sink`
+#' - `Close` Close the writer. Note: does not close the `sink`.
+#'   [arrow::io::OutputStream][OutputStream] has its own `close()` method.
+#'
+#' @export
+#' @include arrow-package.R
+ParquetFileWriter <- R6Class("ParquetFileWriter",
+  inherit = ArrowObject,
+  public = list(
+    WriteTable = function(table, chunk_size) {
+      parquet___arrow___FileWriter__WriteTable(self, table, chunk_size)
+    },
+    Close = function() parquet___arrow___FileWriter__Close(self)
+  )
+)
+ParquetFileWriter$create <- function(schema,
+                                     sink,
+                                     properties = ParquetWriterProperties$create(),
+                                     arrow_properties = ParquetArrowWriterProperties$create()) {
+  assert_is(sink, "OutputStream")
+  parquet___arrow___ParquetFileWriter__Open(schema, sink, properties, arrow_properties)
+}
+
+
+#' @title ParquetFileReader class
+#' @rdname ParquetFileReader
+#' @name ParquetFileReader
+#' @docType class
+#' @usage NULL
+#' @format NULL
+#' @description This class enables you to interact with Parquet files.
+#'
+#' @section Factory:
+#'
+#' The `ParquetFileReader$create()` factory method instantiates the object and
+#' takes the following arguments:
+#'
+#' - `file` A character file name, raw vector, or Arrow file connection object
+#'    (e.g. `RandomAccessFile`).
+#' - `props` Optional [ParquetArrowReaderProperties]
+#' - `mmap` Logical: whether to memory-map the file (default `TRUE`)
+#' - `...` Additional arguments, currently ignored
+#'
+#' @section Methods:
+#'
+#' - `$ReadTable(column_indices)`: get an `arrow::Table` from the file. The optional
+#'    `column_indices=` argument is a 0-based integer vector indicating which columns to retain.
+#' - `$ReadRowGroup(i, column_indices)`: get an `arrow::Table` by reading the `i`th row group (0-based).
+#'    The optional `column_indices=` argument is a 0-based integer vector indicating which columns to retain.
+#' - `$ReadRowGroups(row_groups, column_indices)`: get an `arrow::Table` by reading several row
+#'    groups (0-based integers).
+#'    The optional `column_indices=` argument is a 0-based integer vector indicating which columns to retain.
+#' - `$GetSchema()`: get the `arrow::Schema` of the data in the file
+#' - `$ReadColumn(i)`: read the `i`th column (0-based) as a [ChunkedArray].
+#'
+#' @section Active bindings:
+#'
+#' - `$num_rows`: number of rows.
+#' - `$num_columns`: number of columns.
+#' - `$num_row_groups`: number of row groups.
+#'
+#' @export
+#' @examplesIf arrow_with_parquet()
+#' f <- system.file("v0.7.1.parquet", package = "arrow")
+#' pq <- ParquetFileReader$create(f)
+#' pq$GetSchema()
+#' if (codec_is_available("snappy")) {
+#'   # This file has compressed data columns
+#'   tab <- pq$ReadTable()
+#'   tab$schema
+#' }
+#' @include arrow-package.R
+ParquetFileReader <- R6Class("ParquetFileReader",
+  inherit = ArrowObject,
+  active = list(
+    num_rows = function() {
+      as.integer(parquet___arrow___FileReader__num_rows(self))
+    },
+    num_columns = function() {
+      parquet___arrow___FileReader__num_columns(self)
+    },
+    num_row_groups = function() {
+      parquet___arrow___FileReader__num_row_groups(self)
+    }
+  ),
+  public = list(
+    ReadTable = function(column_indices = NULL) {
+      if (is.null(column_indices)) {
+        parquet___arrow___FileReader__ReadTable1(self)
+      } else {
+        column_indices <- vec_cast(column_indices, integer())
+        parquet___arrow___FileReader__ReadTable2(self, column_indices)
+      }
+    },
+    ReadRowGroup = function(i, column_indices = NULL) {
+      i <- vec_cast(i, integer())
+      if (is.null(column_indices)) {
+        parquet___arrow___FileReader__ReadRowGroup1(self, i)
+      } else {
+        column_indices <- vec_cast(column_indices, integer())
+        parquet___arrow___FileReader__ReadRowGroup2(self, i, column_indices)
+      }
+    },
+    ReadRowGroups = function(row_groups, column_indices = NULL) {
+      row_groups <- vec_cast(row_groups, integer())
+      if (is.null(column_indices)) {
+        parquet___arrow___FileReader__ReadRowGroups1(self, row_groups)
+      } else {
+        column_indices <- vec_cast(column_indices, integer())
+        parquet___arrow___FileReader__ReadRowGroups2(self, row_groups, column_indices)
+      }
+    },
+    ReadColumn = function(i) {
+      i <- vec_cast(i, integer())
+      parquet___arrow___FileReader__ReadColumn(self, i)
+    },
+    GetSchema = function() {
+      parquet___arrow___FileReader__GetSchema(self)
+    }
+  )
+)
+
+ParquetFileReader$create <- function(file,
+                                     props = ParquetArrowReaderProperties$create(),
+                                     mmap = TRUE,
+                                     ...) {
+  file <- make_readable_file(file, mmap)
+  assert_is(props, "ParquetArrowReaderProperties")
+
+  parquet___arrow___FileReader__OpenFile(file, props)
+}
+
+#' @title ParquetArrowReaderProperties class
+#' @rdname ParquetArrowReaderProperties
+#' @name ParquetArrowReaderProperties
+#' @docType class
+#' @usage NULL
+#' @format NULL
+#' @description This class holds settings to control how a Parquet file is read
+#' by [ParquetFileReader].
+#'
+#' @section Factory:
+#'
+#' The `ParquetArrowReaderProperties$create()` factory method instantiates the object
+#' and takes the following arguments:
+#'
+#' - `use_threads` Logical: whether to use multithreading (default `TRUE`)
+#'
+#' @section Methods:
+#'
+#' - `$read_dictionary(column_index)`
+#' - `$set_read_dictionary(column_index, read_dict)`
+#' - `$use_threads(use_threads)`
+#'
+#' @export
+ParquetArrowReaderProperties <- R6Class("ParquetArrowReaderProperties",
+  inherit = ArrowObject,
+  public = list(
+    read_dictionary = function(column_index) {
+      parquet___arrow___ArrowReaderProperties__get_read_dictionary(self, column_index)
+    },
+    set_read_dictionary = function(column_index, read_dict) {
+      parquet___arrow___ArrowReaderProperties__set_read_dictionary(self, column_index, read_dict)
+    }
+  ),
+  active = list(
+    use_threads = function(use_threads) {
+      if (missing(use_threads)) {
+        parquet___arrow___ArrowReaderProperties__get_use_threads(self)
+      } else {
+        parquet___arrow___ArrowReaderProperties__set_use_threads(self, use_threads)
+      }
+    }
+  )
+)
+
+ParquetArrowReaderProperties$create <- function(use_threads = option_use_threads()) {
+  parquet___arrow___ArrowReaderProperties__Make(isTRUE(use_threads))
+}
diff --git a/src/arrow/r/R/python.R b/src/arrow/r/R/python.R
new file mode 100644
index 000000000..07cd4456b
--- /dev/null
+++ b/src/arrow/r/R/python.R
@@ -0,0 +1,225 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+py_to_r.pyarrow.lib.Array <- function(x, ...) {
+  schema_ptr <- allocate_arrow_schema()
+  array_ptr <- allocate_arrow_array()
+  on.exit({
+    delete_arrow_schema(schema_ptr)
+    delete_arrow_array(array_ptr)
+  })
+
+  x$`_export_to_c`(array_ptr, schema_ptr)
+  Array$import_from_c(array_ptr, schema_ptr)
+}
+
+r_to_py.Array <- function(x, convert = FALSE) {
+  schema_ptr <- allocate_arrow_schema()
+  array_ptr <- allocate_arrow_array()
+  on.exit({
+    delete_arrow_schema(schema_ptr)
+    delete_arrow_array(array_ptr)
+  })
+
+  # Import with convert = FALSE so that `_import_from_c` returns a Python object
+  pa <- reticulate::import("pyarrow", convert = FALSE)
+  x$export_to_c(array_ptr, schema_ptr)
+  out <- pa$Array$`_import_from_c`(array_ptr, schema_ptr)
+  # But set the convert attribute on the return object to the requested value
+  assign("convert", convert, out)
+  out
+}
+
+py_to_r.pyarrow.lib.RecordBatch <- function(x, ...) {
+  schema_ptr <- allocate_arrow_schema()
+  array_ptr <- allocate_arrow_array()
+  on.exit({
+    delete_arrow_schema(schema_ptr)
+    delete_arrow_array(array_ptr)
+  })
+
+  x$`_export_to_c`(array_ptr, schema_ptr)
+
+  RecordBatch$import_from_c(array_ptr, schema_ptr)
+}
+
+r_to_py.RecordBatch <- function(x, convert = FALSE) {
+  schema_ptr <- allocate_arrow_schema()
+  array_ptr <- allocate_arrow_array()
+  on.exit({
+    delete_arrow_schema(schema_ptr)
+    delete_arrow_array(array_ptr)
+  })
+
+  # Import with convert = FALSE so that `_import_from_c` returns a Python object
+  pa <- reticulate::import("pyarrow", convert = FALSE)
+  x$export_to_c(array_ptr, schema_ptr)
+  out <- pa$RecordBatch$`_import_from_c`(array_ptr, schema_ptr)
+  # But set the convert attribute on the return object to the requested value
+  assign("convert", convert, out)
+  out
+}
+
+r_to_py.ChunkedArray <- function(x, convert = FALSE) {
+  # Import with convert = FALSE so that `_import_from_c` returns a Python object
+  pa <- reticulate::import("pyarrow", convert = FALSE)
+  out <- pa$chunked_array(x$chunks)
+  # But set the convert attribute on the return object to the requested value
+  assign("convert", convert, out)
+  out
+}
+
+py_to_r.pyarrow.lib.ChunkedArray <- function(x, ...) {
+  ChunkedArray$create(!!!maybe_py_to_r(x$chunks))
+}
+
+r_to_py.Table <- function(x, convert = FALSE) {
+  # Import with convert = FALSE so that `_import_from_c` returns a Python object
+  pa <- reticulate::import("pyarrow", convert = FALSE)
+  out <- pa$Table$from_arrays(x$columns, schema = x$schema)
+  # But set the convert attribute on the return object to the requested value
+  assign("convert", convert, out)
+  out
+}
+
+py_to_r.pyarrow.lib.Table <- function(x, ...) {
+  colnames <- maybe_py_to_r(x$column_names)
+  r_cols <- maybe_py_to_r(x$columns)
+  names(r_cols) <- colnames
+  Table$create(!!!r_cols, schema = maybe_py_to_r(x$schema))
+}
+
+py_to_r.pyarrow.lib.Schema <- function(x, ...) {
+  schema_ptr <- allocate_arrow_schema()
+  on.exit(delete_arrow_schema(schema_ptr))
+
+  x$`_export_to_c`(schema_ptr)
+  Schema$import_from_c(schema_ptr)
+}
+
+r_to_py.Schema <- function(x, convert = FALSE) {
+  schema_ptr <- allocate_arrow_schema()
+  on.exit(delete_arrow_schema(schema_ptr))
+
+  # Import with convert = FALSE so that `_import_from_c` returns a Python object
+  pa <- reticulate::import("pyarrow", convert = FALSE)
+  x$export_to_c(schema_ptr)
+  out <- pa$Schema$`_import_from_c`(schema_ptr)
+  # But set the convert attribute on the return object to the requested value
+  assign("convert", convert, out)
+  out
+}
+
+py_to_r.pyarrow.lib.Field <- function(x, ...) {
+  schema_ptr <- allocate_arrow_schema()
+  on.exit(delete_arrow_schema(schema_ptr))
+
+  x$`_export_to_c`(schema_ptr)
+  Field$import_from_c(schema_ptr)
+}
+
+r_to_py.Field <- function(x, convert = FALSE) {
+  schema_ptr <- allocate_arrow_schema()
+  on.exit(delete_arrow_schema(schema_ptr))
+
+  # Import with convert = FALSE so that `_import_from_c` returns a Python object
+  pa <- reticulate::import("pyarrow", convert = FALSE)
+  x$export_to_c(schema_ptr)
+  out <- pa$Field$`_import_from_c`(schema_ptr)
+  # But set the convert attribute on the return object to the requested value
+  assign("convert", convert, out)
+  out
+}
+
+py_to_r.pyarrow.lib.DataType <- function(x, ...) {
+  schema_ptr <- allocate_arrow_schema()
+  on.exit(delete_arrow_schema(schema_ptr))
+
+  x$`_export_to_c`(schema_ptr)
+  DataType$import_from_c(schema_ptr)
+}
+
+r_to_py.DataType <- function(x, convert = FALSE) {
+  schema_ptr <- allocate_arrow_schema()
+  on.exit(delete_arrow_schema(schema_ptr))
+
+  # Import with convert = FALSE so that `_import_from_c` returns a Python object
+  pa <- reticulate::import("pyarrow", convert = FALSE)
+  x$export_to_c(schema_ptr)
+  out <- pa$DataType$`_import_from_c`(schema_ptr)
+  # But set the convert attribute on the return object to the requested value
+  assign("convert", convert, out)
+  out
+}
+
+py_to_r.pyarrow.lib.RecordBatchReader <- function(x, ...) {
+  stream_ptr <- allocate_arrow_array_stream()
+  on.exit(delete_arrow_array_stream(stream_ptr))
+
+  x$`_export_to_c`(stream_ptr)
+  RecordBatchReader$import_from_c(stream_ptr)
+}
+
+r_to_py.RecordBatchReader <- function(x, convert = FALSE) {
+  stream_ptr <- allocate_arrow_array_stream()
+  on.exit(delete_arrow_array_stream(stream_ptr))
+
+  # Import with convert = FALSE so that `_import_from_c` returns a Python object
+  pa <- reticulate::import("pyarrow", convert = FALSE)
+  x$export_to_c(stream_ptr)
+  # TODO: handle subclasses of RecordBatchReader?
+  out <- pa$lib$RecordBatchReader$`_import_from_c`(stream_ptr)
+  # But set the convert attribute on the return object to the requested value
+  assign("convert", convert, out)
+  out
+}
+
+
+maybe_py_to_r <- function(x) {
+  if (inherits(x, "python.builtin.object")) {
+    # Depending on some auto-convert behavior, x may already be converted
+    # or it may still be a Python object
+    x <- reticulate::py_to_r(x)
+  }
+  x
+}
+
+#' Install pyarrow for use with reticulate
+#'
+#' `pyarrow` is the Python package for Apache Arrow. This function helps with
+#' installing it for use with `reticulate`.
+#'
+#' @param envname The name or full path of the Python environment to install
+#' into. This can be a virtualenv or conda environment created by `reticulate`.
+#' See `reticulate::py_install()`.
+#' @param nightly logical: Should we install a development version of the
+#' package? Default is to use the official release version.
+#' @param ... additional arguments passed to `reticulate::py_install()`.
+#' @export
+install_pyarrow <- function(envname = NULL, nightly = FALSE, ...) {
+  if (nightly) {
+    reticulate::py_install("pyarrow",
+      envname = envname, ...,
+      # Nightly for pip
+      pip_options = "--extra-index-url https://repo.fury.io/arrow-nightlies/ --pre --upgrade",
+      # Nightly for conda
+      channel = "arrow-nightlies"
+    )
+  } else {
+    reticulate::py_install("pyarrow", envname = envname, ...)
+  }
+}
diff --git a/src/arrow/r/R/query-engine.R b/src/arrow/r/R/query-engine.R
new file mode 100644
index 000000000..234aaf569
--- /dev/null
+++ b/src/arrow/r/R/query-engine.R
@@ -0,0 +1,298 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+do_exec_plan <- function(.data) {
+  plan <- ExecPlan$create()
+  final_node <- plan$Build(.data)
+  tab <- plan$Run(final_node)
+
+  # TODO (ARROW-14289): make the head/tail methods return RBR not Table
+  if (inherits(tab, "RecordBatchReader")) {
+    tab <- tab$read_table()
+  }
+
+  # If arrange() created $temp_columns, make sure to omit them from the result
+  # We can't currently handle this in the ExecPlan itself because sorting
+  # happens in the end (SinkNode) so nothing comes after it.
+  if (length(final_node$sort$temp_columns) > 0) {
+    tab <- tab[, setdiff(names(tab), final_node$sort$temp_columns), drop = FALSE]
+  }
+
+  if (ncol(tab)) {
+    # Apply any column metadata from the original schema, where appropriate
+    original_schema <- source_data(.data)$schema
+    # TODO: do we care about other (non-R) metadata preservation?
+    # How would we know if it were meaningful?
+    r_meta <- original_schema$r_metadata
+    if (!is.null(r_meta)) {
+      # Filter r_metadata$columns on columns with name _and_ type match
+      new_schema <- tab$schema
+      common_names <- intersect(names(r_meta$columns), names(tab))
+      keep <- common_names[
+        map_lgl(common_names, ~ original_schema[[.]] == new_schema[[.]])
+      ]
+      r_meta$columns <- r_meta$columns[keep]
+      if (has_aggregation(.data)) {
+        # dplyr drops top-level attributes if you do summarize
+        r_meta$attributes <- NULL
+      }
+      tab$r_metadata <- r_meta
+    }
+  }
+
+  tab
+}
+
+ExecPlan <- R6Class("ExecPlan",
+  inherit = ArrowObject,
+  public = list(
+    Scan = function(dataset) {
+      # Handle arrow_dplyr_query
+      if (inherits(dataset, "arrow_dplyr_query")) {
+        if (inherits(dataset$.data, "RecordBatchReader")) {
+          return(ExecNode_ReadFromRecordBatchReader(self, dataset$.data))
+        }
+
+        filter <- dataset$filtered_rows
+        if (isTRUE(filter)) {
+          filter <- Expression$scalar(TRUE)
+        }
+        # Use FieldsInExpression to find all from dataset$selected_columns
+        colnames <- unique(unlist(map(
+          dataset$selected_columns,
+          field_names_in_expression
+        )))
+        dataset <- dataset$.data
+        assert_is(dataset, "Dataset")
+      } else {
+        if (inherits(dataset, "ArrowTabular")) {
+          dataset <- InMemoryDataset$create(dataset)
+        }
+        assert_is(dataset, "Dataset")
+        # Set some defaults
+        filter <- Expression$scalar(TRUE)
+        colnames <- names(dataset)
+      }
+      # ScanNode needs the filter to do predicate pushdown and skip partitions,
+      # and it needs to know which fields to materialize (and which are unnecessary)
+      ExecNode_Scan(self, dataset, filter, colnames %||% character(0))
+    },
+    Build = function(.data) {
+      # This method takes an arrow_dplyr_query and chains together the
+      # ExecNodes that they produce. It does not evaluate them--that is Run().
+      group_vars <- dplyr::group_vars(.data)
+      grouped <- length(group_vars) > 0
+
+      # Collect the target names first because we have to add back the group vars
+      target_names <- names(.data)
+      .data <- ensure_group_vars(.data)
+      .data <- ensure_arrange_vars(.data) # this sets .data$temp_columns
+
+      if (inherits(.data$.data, "arrow_dplyr_query")) {
+        # We have a nested query. Recurse.
+        node <- self$Build(.data$.data)
+      } else {
+        node <- self$Scan(.data)
+      }
+
+      # ARROW-13498: Even though Scan takes the filter, apparently we have to do it again
+      if (inherits(.data$filtered_rows, "Expression")) {
+        node <- node$Filter(.data$filtered_rows)
+      }
+
+      if (!is.null(.data$aggregations)) {
+        # Project to include just the data required for each aggregation,
+        # plus group_by_vars (last)
+        # TODO: validate that none of names(aggregations) are the same as names(group_by_vars)
+        # dplyr does not error on this but the result it gives isn't great
+        node <- node$Project(summarize_projection(.data))
+
+        if (grouped) {
+          # We need to prefix all of the aggregation function names with "hash_"
+          .data$aggregations <- lapply(.data$aggregations, function(x) {
+            x[["fun"]] <- paste0("hash_", x[["fun"]])
+            x
+          })
+        }
+
+        node <- node$Aggregate(
+          options = map(.data$aggregations, ~ .[c("fun", "options")]),
+          target_names = names(.data$aggregations),
+          out_field_names = names(.data$aggregations),
+          key_names = group_vars
+        )
+
+        if (grouped) {
+          # The result will have result columns first then the grouping cols.
+          # dplyr orders group cols first, so adapt the result to meet that expectation.
+          node <- node$Project(
+            make_field_refs(c(group_vars, names(.data$aggregations)))
+          )
+          if (getOption("arrow.summarise.sort", FALSE)) {
+            # Add sorting instructions for the rows too to match dplyr
+            # (see below about why sorting isn't itself a Node)
+            node$sort <- list(
+              names = group_vars,
+              orders = rep(0L, length(group_vars))
+            )
+          }
+        }
+      } else {
+        # If any columns are derived, reordered, or renamed we need to Project
+        # If there are aggregations, the projection was already handled above
+        # We have to project at least once to eliminate some junk columns
+        # that the ExecPlan adds:
+        # __fragment_index, __batch_index, __last_in_fragment
+        # Presumably extraneous repeated projection of the same thing
+        # (as when we've done collapse() and not projected after) is cheap/no-op
+        projection <- c(.data$selected_columns, .data$temp_columns)
+        node <- node$Project(projection)
+
+        if (!is.null(.data$join)) {
+          node <- node$Join(
+            type = .data$join$type,
+            right_node = self$Build(.data$join$right_data),
+            by = .data$join$by,
+            left_output = names(.data),
+            right_output = setdiff(names(.data$join$right_data), .data$join$by)
+          )
+        }
+      }
+
+      # Apply sorting: this is currently not an ExecNode itself, it is a
+      # sink node option.
+      # TODO: handle some cases:
+      # (1) arrange > summarize > arrange
+      # (2) ARROW-13779: arrange then operation where order matters (e.g. cumsum)
+      if (length(.data$arrange_vars)) {
+        node$sort <- list(
+          names = names(.data$arrange_vars),
+          orders = .data$arrange_desc,
+          temp_columns = names(.data$temp_columns)
+        )
+      }
+
+      # This is only safe because we are going to evaluate queries that end
+      # with head/tail first, then evaluate any subsequent query as a new query
+      if (!is.null(.data$head)) {
+        node$head <- .data$head
+      }
+      if (!is.null(.data$tail)) {
+        node$tail <- .data$tail
+      }
+
+      node
+    },
+    Run = function(node) {
+      assert_is(node, "ExecNode")
+
+      # Sorting and head/tail (if sorted) are handled in the SinkNode,
+      # created in ExecPlan_run
+      sorting <- node$sort %||% list()
+      select_k <- node$head %||% -1L
+      has_sorting <- length(sorting) > 0
+      if (has_sorting) {
+        if (!is.null(node$tail)) {
+          # Reverse the sort order and take the top K, then after we'll reverse
+          # the resulting rows so that it is ordered as expected
+          sorting$orders <- !sorting$orders
+          select_k <- node$tail
+        }
+        sorting$orders <- as.integer(sorting$orders)
+      }
+
+      out <- ExecPlan_run(self, node, sorting, select_k)
+
+      if (!has_sorting) {
+        # Since ExecPlans don't scan in deterministic order, head/tail are both
+        # essentially taking a random slice from somewhere in the dataset.
+        # And since the head() implementation is way more efficient than tail(),
+        # just use it to take the random slice
+        slice_size <- node$head %||% node$tail
+        if (!is.null(slice_size)) {
+          # TODO (ARROW-14289): make the head methods return RBR not Table
+          out <- head(out, slice_size)
+        }
+        # Can we now tell `self$Stop()` to StopProducing? We already have
+        # everything we need for the head (but it seems to segfault: ARROW-14329)
+      } else if (!is.null(node$tail)) {
+        # Reverse the row order to get back what we expect
+        # TODO: don't return Table, return RecordBatchReader
+        out <- out$read_table()
+        out <- out[rev(seq_len(nrow(out))), , drop = FALSE]
+      }
+
+      out
+    },
+    Stop = function() ExecPlan_StopProducing(self)
+  )
+)
+ExecPlan$create <- function(use_threads = option_use_threads()) {
+  ExecPlan_create(use_threads)
+}
+
+ExecNode <- R6Class("ExecNode",
+  inherit = ArrowObject,
+  public = list(
+    # `sort` is a slight hack to be able to keep around arrange() params,
+    # which don't currently yield their own ExecNode but rather are consumed
+    # in the SinkNode (in ExecPlan$run())
+    sort = NULL,
+    # Similar hacks for head and tail
+    head = NULL,
+    tail = NULL,
+    preserve_sort = function(new_node) {
+      new_node$sort <- self$sort
+      new_node$head <- self$head
+      new_node$tail <- self$tail
+      new_node
+    },
+    Project = function(cols) {
+      if (length(cols)) {
+        assert_is_list_of(cols, "Expression")
+        self$preserve_sort(ExecNode_Project(self, cols, names(cols)))
+      } else {
+        self$preserve_sort(ExecNode_Project(self, character(0), character(0)))
+      }
+    },
+    Filter = function(expr) {
+      assert_is(expr, "Expression")
+      self$preserve_sort(ExecNode_Filter(self, expr))
+    },
+    Aggregate = function(options, target_names, out_field_names, key_names) {
+      self$preserve_sort(
+        ExecNode_Aggregate(self, options, target_names, out_field_names, key_names)
+      )
+    },
+    Join = function(type, right_node, by, left_output, right_output) {
+      self$preserve_sort(
+        ExecNode_Join(
+          self,
+          type,
+          right_node,
+          left_keys = names(by),
+          right_keys = by,
+          left_output = left_output,
+          right_output = right_output
+        )
+      )
+    }
+  ),
+  active = list(
+    schema = function() ExecNode_output_schema(self)
+  )
+)
diff --git a/src/arrow/r/R/record-batch-reader.R b/src/arrow/r/R/record-batch-reader.R
new file mode 100644
index 000000000..1542e3649
--- /dev/null
+++ b/src/arrow/r/R/record-batch-reader.R
@@ -0,0 +1,164 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+#' @title RecordBatchReader classes
+#' @description Apache Arrow defines two formats for [serializing data for interprocess
+#' communication
+#' (IPC)](https://arrow.apache.org/docs/format/Columnar.html#serialization-and-interprocess-communication-ipc):
+#' a "stream" format and a "file" format, known as Feather.
+#' `RecordBatchStreamReader` and `RecordBatchFileReader` are
+#' interfaces for accessing record batches from input sources in those formats,
+#' respectively.
+#'
+#' For guidance on how to use these classes, see the examples section.
+#'
+#' @seealso [read_ipc_stream()] and [read_feather()] provide a much simpler interface
+#' for reading data from these formats and are sufficient for many use cases.
+#' @usage NULL
+#' @format NULL
+#' @docType class
+#' @section Factory:
+#'
+#' The `RecordBatchFileReader$create()` and `RecordBatchStreamReader$create()`
+#' factory methods instantiate the object and
+#' take a single argument, named according to the class:
+#'
+#' - `file` A character file name, raw vector, or Arrow file connection object
+#'    (e.g. [RandomAccessFile]).
+#' - `stream` A raw vector, [Buffer], or [InputStream].
+#'
+#' @section Methods:
+#'
+#' - `$read_next_batch()`: Returns a `RecordBatch`, iterating through the
+#'   Reader. If there are no further batches in the Reader, it returns `NULL`.
+#' - `$schema`: Returns a [Schema] (active binding)
+#' - `$batches()`: Returns a list of `RecordBatch`es
+#' - `$read_table()`: Collects the reader's `RecordBatch`es into a [Table]
+#' - `$get_batch(i)`: For `RecordBatchFileReader`, return a particular batch
+#'    by an integer index.
+#' - `$num_record_batches()`: For `RecordBatchFileReader`, see how many batches
+#'    are in the file.
+#'
+#' @rdname RecordBatchReader
+#' @name RecordBatchReader
+#' @include arrow-package.R
+#' @examplesIf arrow_available()
+#' tf <- tempfile()
+#' on.exit(unlink(tf))
+#'
+#' batch <- record_batch(chickwts)
+#'
+#' # This opens a connection to the file in Arrow
+#' file_obj <- FileOutputStream$create(tf)
+#' # Pass that to a RecordBatchWriter to write data conforming to a schema
+#' writer <- RecordBatchFileWriter$create(file_obj, batch$schema)
+#' writer$write(batch)
+#' # You may write additional batches to the stream, provided that they have
+#' # the same schema.
+#' # Call "close" on the writer to indicate end-of-file/stream
+#' writer$close()
+#' # Then, close the connection--closing the IPC message does not close the file
+#' file_obj$close()
+#'
+#' # Now, we have a file we can read from. Same pattern: open file connection,
+#' # then pass it to a RecordBatchReader
+#' read_file_obj <- ReadableFile$create(tf)
+#' reader <- RecordBatchFileReader$create(read_file_obj)
+#' # RecordBatchFileReader knows how many batches it has (StreamReader does not)
+#' reader$num_record_batches
+#' # We could consume the Reader by calling $read_next_batch() until all are,
+#' # consumed, or we can call $read_table() to pull them all into a Table
+#' tab <- reader$read_table()
+#' # Call as.data.frame to turn that Table into an R data.frame
+#' df <- as.data.frame(tab)
+#' # This should be the same data we sent
+#' all.equal(df, chickwts, check.attributes = FALSE)
+#' # Unlike the Writers, we don't have to close RecordBatchReaders,
+#' # but we do still need to close the file connection
+#' read_file_obj$close()
+RecordBatchReader <- R6Class("RecordBatchReader",
+  inherit = ArrowObject,
+  public = list(
+    read_next_batch = function() RecordBatchReader__ReadNext(self),
+    batches = function() RecordBatchReader__batches(self),
+    read_table = function() Table__from_RecordBatchReader(self),
+    export_to_c = function(stream_ptr) ExportRecordBatchReader(self, stream_ptr)
+  ),
+  active = list(
+    schema = function() RecordBatchReader__schema(self)
+  )
+)
+
+#' @export
+head.RecordBatchReader <- function(x, n = 6L, ...) {
+  head(Scanner$create(x), n)
+}
+
+#' @export
+tail.RecordBatchReader <- function(x, n = 6L, ...) {
+  tail(Scanner$create(x), n)
+}
+
+#' @rdname RecordBatchReader
+#' @usage NULL
+#' @format NULL
+#' @export
+RecordBatchStreamReader <- R6Class("RecordBatchStreamReader", inherit = RecordBatchReader)
+RecordBatchStreamReader$create <- function(stream) {
+  if (inherits(stream, c("raw", "Buffer"))) {
+    # TODO: deprecate this because it doesn't close the connection to the Buffer
+    # (that's a problem, right?)
+    stream <- BufferReader$create(stream)
+  }
+  assert_is(stream, "InputStream")
+  ipc___RecordBatchStreamReader__Open(stream)
+}
+#' @include arrowExports.R
+RecordBatchReader$import_from_c <- RecordBatchStreamReader$import_from_c <- ImportRecordBatchReader
+
+#' @rdname RecordBatchReader
+#' @usage NULL
+#' @format NULL
+#' @export
+RecordBatchFileReader <- R6Class("RecordBatchFileReader",
+  inherit = ArrowObject,
+  # Why doesn't this inherit from RecordBatchReader in C++?
+  # Origin: https://github.com/apache/arrow/pull/679
+  public = list(
+    get_batch = function(i) {
+      ipc___RecordBatchFileReader__ReadRecordBatch(self, i)
+    },
+    batches = function() {
+      ipc___RecordBatchFileReader__batches(self)
+    },
+    read_table = function() Table__from_RecordBatchFileReader(self)
+  ),
+  active = list(
+    num_record_batches = function() ipc___RecordBatchFileReader__num_record_batches(self),
+    schema = function() ipc___RecordBatchFileReader__schema(self)
+  )
+)
+RecordBatchFileReader$create <- function(file) {
+  if (inherits(file, c("raw", "Buffer"))) {
+    # TODO: deprecate this because it doesn't close the connection to the Buffer
+    # (that's a problem, right?)
+    file <- BufferReader$create(file)
+  }
+  assert_is(file, "InputStream")
+  ipc___RecordBatchFileReader__Open(file)
+}
diff --git a/src/arrow/r/R/record-batch-writer.R b/src/arrow/r/R/record-batch-writer.R
new file mode 100644
index 000000000..8675e785a
--- /dev/null
+++ b/src/arrow/r/R/record-batch-writer.R
@@ -0,0 +1,194 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+#' @title RecordBatchWriter classes
+#' @description Apache Arrow defines two formats for [serializing data for interprocess
+#' communication
+#' (IPC)](https://arrow.apache.org/docs/format/Columnar.html#serialization-and-interprocess-communication-ipc):
+#' a "stream" format and a "file" format, known as Feather.
+#' `RecordBatchStreamWriter` and `RecordBatchFileWriter` are
+#' interfaces for writing record batches to those formats, respectively.
+#'
+#' For guidance on how to use these classes, see the examples section.
+#'
+#' @seealso [write_ipc_stream()] and [write_feather()] provide a much simpler
+#' interface for writing data to these formats and are sufficient for many use
+#' cases. [write_to_raw()] is a version that serializes data to a buffer.
+#' @usage NULL
+#' @format NULL
+#' @docType class
+#' @section Factory:
+#'
+#' The `RecordBatchFileWriter$create()` and `RecordBatchStreamWriter$create()`
+#' factory methods instantiate the object and take the following arguments:
+#'
+#' - `sink` An `OutputStream`
+#' - `schema` A [Schema] for the data to be written
+#' - `use_legacy_format` logical: write data formatted so that Arrow libraries
+#'   versions 0.14 and lower can read it. Default is `FALSE`. You can also
+#'   enable this by setting the environment variable `ARROW_PRE_0_15_IPC_FORMAT=1`.
+#' - `metadata_version`: A string like "V5" or the equivalent integer indicating
+#'   the Arrow IPC MetadataVersion. Default (NULL) will use the latest version,
+#'   unless the environment variable `ARROW_PRE_1_0_METADATA_VERSION=1`, in
+#'   which case it will be V4.
+#'
+#' @section Methods:
+#'
+#' - `$write(x)`: Write a [RecordBatch], [Table], or `data.frame`, dispatching
+#'    to the methods below appropriately
+#' - `$write_batch(batch)`: Write a `RecordBatch` to stream
+#' - `$write_table(table)`: Write a `Table` to stream
+#' - `$close()`: close stream. Note that this indicates end-of-file or
+#'   end-of-stream--it does not close the connection to the `sink`. That needs
+#'   to be closed separately.
+#'
+#' @rdname RecordBatchWriter
+#' @name RecordBatchWriter
+#' @include arrow-package.R
+#' @examplesIf arrow_available()
+#' tf <- tempfile()
+#' on.exit(unlink(tf))
+#'
+#' batch <- record_batch(chickwts)
+#'
+#' # This opens a connection to the file in Arrow
+#' file_obj <- FileOutputStream$create(tf)
+#' # Pass that to a RecordBatchWriter to write data conforming to a schema
+#' writer <- RecordBatchFileWriter$create(file_obj, batch$schema)
+#' writer$write(batch)
+#' # You may write additional batches to the stream, provided that they have
+#' # the same schema.
+#' # Call "close" on the writer to indicate end-of-file/stream
+#' writer$close()
+#' # Then, close the connection--closing the IPC message does not close the file
+#' file_obj$close()
+#'
+#' # Now, we have a file we can read from. Same pattern: open file connection,
+#' # then pass it to a RecordBatchReader
+#' read_file_obj <- ReadableFile$create(tf)
+#' reader <- RecordBatchFileReader$create(read_file_obj)
+#' # RecordBatchFileReader knows how many batches it has (StreamReader does not)
+#' reader$num_record_batches
+#' # We could consume the Reader by calling $read_next_batch() until all are,
+#' # consumed, or we can call $read_table() to pull them all into a Table
+#' tab <- reader$read_table()
+#' # Call as.data.frame to turn that Table into an R data.frame
+#' df <- as.data.frame(tab)
+#' # This should be the same data we sent
+#' all.equal(df, chickwts, check.attributes = FALSE)
+#' # Unlike the Writers, we don't have to close RecordBatchReaders,
+#' # but we do still need to close the file connection
+#' read_file_obj$close()
+RecordBatchWriter <- R6Class("RecordBatchWriter",
+  inherit = ArrowObject,
+  public = list(
+    write_batch = function(batch) ipc___RecordBatchWriter__WriteRecordBatch(self, batch),
+    write_table = function(table) ipc___RecordBatchWriter__WriteTable(self, table),
+    write = function(x) {
+      if (inherits(x, "RecordBatch")) {
+        self$write_batch(x)
+      } else if (inherits(x, "Table")) {
+        self$write_table(x)
+      } else {
+        self$write_table(Table$create(x))
+      }
+    },
+    close = function() ipc___RecordBatchWriter__Close(self)
+  )
+)
+
+#' @usage NULL
+#' @format NULL
+#' @rdname RecordBatchWriter
+#' @export
+RecordBatchStreamWriter <- R6Class("RecordBatchStreamWriter", inherit = RecordBatchWriter)
+RecordBatchStreamWriter$create <- function(sink,
+                                           schema,
+                                           use_legacy_format = NULL,
+                                           metadata_version = NULL) {
+  if (is.string(sink)) {
+    stop(
+      "RecordBatchStreamWriter$create() requires an Arrow InputStream. ",
+      "Try providing FileOutputStream$create(", substitute(sink), ")",
+      call. = FALSE
+    )
+  }
+  assert_is(sink, "OutputStream")
+  assert_is(schema, "Schema")
+
+  ipc___RecordBatchStreamWriter__Open(
+    sink,
+    schema,
+    get_ipc_use_legacy_format(use_legacy_format),
+    get_ipc_metadata_version(metadata_version)
+  )
+}
+
+#' @usage NULL
+#' @format NULL
+#' @rdname RecordBatchWriter
+#' @export
+RecordBatchFileWriter <- R6Class("RecordBatchFileWriter", inherit = RecordBatchStreamWriter)
+RecordBatchFileWriter$create <- function(sink,
+                                         schema,
+                                         use_legacy_format = NULL,
+                                         metadata_version = NULL) {
+  if (is.string(sink)) {
+    stop(
+      "RecordBatchFileWriter$create() requires an Arrow InputStream. ",
+      "Try providing FileOutputStream$create(", substitute(sink), ")",
+      call. = FALSE
+    )
+  }
+  assert_is(sink, "OutputStream")
+  assert_is(schema, "Schema")
+
+  ipc___RecordBatchFileWriter__Open(
+    sink,
+    schema,
+    get_ipc_use_legacy_format(use_legacy_format),
+    get_ipc_metadata_version(metadata_version)
+  )
+}
+
+get_ipc_metadata_version <- function(x) {
+  input <- x
+  if (is_integerish(x)) {
+    # 4 means "V4", which actually happens to be 3L
+    x <- paste0("V", x)
+  } else if (is.null(x)) {
+    if (identical(Sys.getenv("ARROW_PRE_1_0_METADATA_VERSION"), "1") ||
+      identical(Sys.getenv("ARROW_PRE_0_15_IPC_FORMAT"), "1")) {
+      # PRE_1_0 is specific for this;
+      # if you already set PRE_0_15, PRE_1_0 should be implied
+      x <- "V4"
+    } else {
+      # Take the latest
+      x <- length(MetadataVersion)
+    }
+  }
+  out <- MetadataVersion[[x]]
+  if (is.null(out)) {
+    stop(deparse(input), " is not a valid IPC MetadataVersion", call. = FALSE)
+  }
+  out
+}
+
+get_ipc_use_legacy_format <- function(x) {
+  isTRUE(x %||% identical(Sys.getenv("ARROW_PRE_0_15_IPC_FORMAT"), "1"))
+}
diff --git a/src/arrow/r/R/record-batch.R b/src/arrow/r/R/record-batch.R
new file mode 100644
index 000000000..c66ff7fb0
--- /dev/null
+++ b/src/arrow/r/R/record-batch.R
@@ -0,0 +1,193 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#' @include arrow-package.R
+#' @include array.R
+#' @title RecordBatch class
+#' @description A record batch is a collection of equal-length arrays matching
+#' a particular [Schema]. It is a table-like data structure that is semantically
+#' a sequence of [fields][Field], each a contiguous Arrow [Array].
+#' @usage NULL
+#' @format NULL
+#' @docType class
+#'
+#' @section S3 Methods and Usage:
+#' Record batches are data-frame-like, and many methods you expect to work on
+#' a `data.frame` are implemented for `RecordBatch`. This includes `[`, `[[`,
+#' `$`, `names`, `dim`, `nrow`, `ncol`, `head`, and `tail`. You can also pull
+#' the data from an Arrow record batch into R with `as.data.frame()`. See the
+#' examples.
+#'
+#' A caveat about the `$` method: because `RecordBatch` is an `R6` object,
+#' `$` is also used to access the object's methods (see below). Methods take
+#' precedence over the table's columns. So, `batch$Slice` would return the
+#' "Slice" method function even if there were a column in the table called
+#' "Slice".
+#'
+#' @section R6 Methods:
+#' In addition to the more R-friendly S3 methods, a `RecordBatch` object has
+#' the following R6 methods that map onto the underlying C++ methods:
+#'
+#' - `$Equals(other)`: Returns `TRUE` if the `other` record batch is equal
+#' - `$column(i)`: Extract an `Array` by integer position from the batch
+#' - `$column_name(i)`: Get a column's name by integer position
+#' - `$names()`: Get all column names (called by `names(batch)`)
+#' - `$RenameColumns(value)`: Set all column names (called by `names(batch) <- value`)
+#' - `$GetColumnByName(name)`: Extract an `Array` by string name
+#' - `$RemoveColumn(i)`: Drops a column from the batch by integer position
+#' - `$SelectColumns(indices)`: Return a new record batch with a selection of columns, expressed as 0-based integers.
+#' - `$Slice(offset, length = NULL)`: Create a zero-copy view starting at the
+#'    indicated integer offset and going for the given length, or to the end
+#'    of the table if `NULL`, the default.
+#' - `$Take(i)`: return an `RecordBatch` with rows at positions given by
+#'    integers (R vector or Array Array) `i`.
+#' - `$Filter(i, keep_na = TRUE)`: return an `RecordBatch` with rows at positions where logical
+#'    vector (or Arrow boolean Array) `i` is `TRUE`.
+#' - `$SortIndices(names, descending = FALSE)`: return an `Array` of integer row
+#'    positions that can be used to rearrange the `RecordBatch` in ascending or
+#'    descending order by the first named column, breaking ties with further named
+#'    columns. `descending` can be a logical vector of length one or of the same
+#'    length as `names`.
+#' - `$serialize()`: Returns a raw vector suitable for interprocess communication
+#' - `$cast(target_schema, safe = TRUE, options = cast_options(safe))`: Alter
+#'    the schema of the record batch.
+#'
+#' There are also some active bindings
+#' - `$num_columns`
+#' - `$num_rows`
+#' - `$schema`
+#' - `$metadata`: Returns the key-value metadata of the `Schema` as a named list.
+#'    Modify or replace by assigning in (`batch$metadata <- new_metadata`).
+#'    All list elements are coerced to string. See `schema()` for more information.
+#' - `$columns`: Returns a list of `Array`s
+#' @rdname RecordBatch
+#' @name RecordBatch
+#' @export
+RecordBatch <- R6Class("RecordBatch",
+  inherit = ArrowTabular,
+  public = list(
+    column = function(i) RecordBatch__column(self, i),
+    column_name = function(i) RecordBatch__column_name(self, i),
+    names = function() RecordBatch__names(self),
+    RenameColumns = function(value) RecordBatch__RenameColumns(self, value),
+    Equals = function(other, check_metadata = FALSE, ...) {
+      inherits(other, "RecordBatch") && RecordBatch__Equals(self, other, isTRUE(check_metadata))
+    },
+    GetColumnByName = function(name) {
+      assert_that(is.string(name))
+      RecordBatch__GetColumnByName(self, name)
+    },
+    SelectColumns = function(indices) RecordBatch__SelectColumns(self, indices),
+    AddColumn = function(i, new_field, value) {
+      RecordBatch__AddColumn(self, i, new_field, value)
+    },
+    SetColumn = function(i, new_field, value) {
+      RecordBatch__SetColumn(self, i, new_field, value)
+    },
+    RemoveColumn = function(i) RecordBatch__RemoveColumn(self, i),
+    ReplaceSchemaMetadata = function(new) {
+      RecordBatch__ReplaceSchemaMetadata(self, new)
+    },
+    Slice = function(offset, length = NULL) {
+      if (is.null(length)) {
+        RecordBatch__Slice1(self, offset)
+      } else {
+        RecordBatch__Slice2(self, offset, length)
+      }
+    },
+    # Take, Filter, and SortIndices are methods on ArrowTabular
+    serialize = function() ipc___SerializeRecordBatch__Raw(self),
+    to_data_frame = function() {
+      RecordBatch__to_dataframe(self, use_threads = option_use_threads())
+    },
+    cast = function(target_schema, safe = TRUE, ..., options = cast_options(safe, ...)) {
+      assert_is(target_schema, "Schema")
+      assert_that(identical(self$schema$names, target_schema$names), msg = "incompatible schemas")
+      RecordBatch__cast(self, target_schema, options)
+    },
+    invalidate = function() {
+      .Call(`_arrow_RecordBatch__Reset`, self)
+      super$invalidate()
+    },
+    export_to_c = function(array_ptr, schema_ptr) {
+      ExportRecordBatch(self, array_ptr, schema_ptr)
+    }
+  ),
+  active = list(
+    num_columns = function() RecordBatch__num_columns(self),
+    num_rows = function() RecordBatch__num_rows(self),
+    schema = function() RecordBatch__schema(self),
+    columns = function() RecordBatch__columns(self)
+  )
+)
+
+RecordBatch$create <- function(..., schema = NULL) {
+  arrays <- list2(...)
+  if (length(arrays) == 1 && inherits(arrays[[1]], c("raw", "Buffer", "InputStream", "Message"))) {
+    return(RecordBatch$from_message(arrays[[1]], schema))
+  }
+
+  # Else, a list of arrays or data.frames
+  # making sure there are always names
+  if (is.null(names(arrays))) {
+    names(arrays) <- rep_len("", length(arrays))
+  }
+  stopifnot(length(arrays) > 0)
+
+  # If any arrays are length 1, recycle them
+  arrays <- recycle_scalars(arrays)
+
+  # TODO: should this also assert that they're all Arrays?
+  RecordBatch__from_arrays(schema, arrays)
+}
+
+RecordBatch$from_message <- function(obj, schema) {
+  # Message/Buffer readers, previously in read_record_batch()
+  assert_is(schema, "Schema")
+  if (inherits(obj, c("raw", "Buffer"))) {
+    obj <- BufferReader$create(obj)
+    on.exit(obj$close())
+  }
+  if (inherits(obj, "InputStream")) {
+    ipc___ReadRecordBatch__InputStream__Schema(obj, schema)
+  } else {
+    ipc___ReadRecordBatch__Message__Schema(obj, schema)
+  }
+}
+#' @include arrowExports.R
+RecordBatch$import_from_c <- ImportRecordBatch
+
+#' @param ... A `data.frame` or a named set of Arrays or vectors. If given a
+#' mixture of data.frames and vectors, the inputs will be autospliced together
+#' (see examples). Alternatively, you can provide a single Arrow IPC
+#' `InputStream`, `Message`, `Buffer`, or R `raw` object containing a `Buffer`.
+#' @param schema a [Schema], or `NULL` (the default) to infer the schema from
+#' the data in `...`. When providing an Arrow IPC buffer, `schema` is required.
+#' @rdname RecordBatch
+#' @examplesIf arrow_available()
+#' batch <- record_batch(name = rownames(mtcars), mtcars)
+#' dim(batch)
+#' dim(head(batch))
+#' names(batch)
+#' batch$mpg
+#' batch[["cyl"]]
+#' as.data.frame(batch[4:8, c("gear", "hp", "wt")])
+#' @export
+record_batch <- RecordBatch$create
+
+#' @export
+names.RecordBatch <- function(x) x$names()
diff --git a/src/arrow/r/R/reexports-bit64.R b/src/arrow/r/R/reexports-bit64.R
new file mode 100644
index 000000000..c89d2b150
--- /dev/null
+++ b/src/arrow/r/R/reexports-bit64.R
@@ -0,0 +1,22 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#' @importFrom bit64 print.integer64
+bit64::print.integer64
+
+#' @importFrom bit64 str.integer64
+bit64::str.integer64
diff --git a/src/arrow/r/R/reexports-tidyselect.R b/src/arrow/r/R/reexports-tidyselect.R
new file mode 100644
index 000000000..cd0de2849
--- /dev/null
+++ b/src/arrow/r/R/reexports-tidyselect.R
@@ -0,0 +1,46 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Alias required for help links in downstream packages
+#' @aliases select_helpers
+#' @importFrom tidyselect contains
+#' @export
+tidyselect::contains
+#' @importFrom tidyselect ends_with
+#' @export
+tidyselect::ends_with
+#' @importFrom tidyselect everything
+#' @export
+tidyselect::everything
+#' @importFrom tidyselect matches
+#' @export
+tidyselect::matches
+#' @importFrom tidyselect num_range
+#' @export
+tidyselect::num_range
+#' @importFrom tidyselect one_of
+#' @export
+tidyselect::one_of
+#' @importFrom tidyselect starts_with
+#' @export
+tidyselect::starts_with
+#' @importFrom tidyselect last_col
+#' @export
+tidyselect::last_col
+#' @importFrom tidyselect all_of
+#' @export
+tidyselect::all_of
diff --git a/src/arrow/r/R/scalar.R b/src/arrow/r/R/scalar.R
new file mode 100644
index 000000000..4dedc6c12
--- /dev/null
+++ b/src/arrow/r/R/scalar.R
@@ -0,0 +1,101 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#' @include arrow-datum.R
+
+#' @title Arrow scalars
+#' @usage NULL
+#' @format NULL
+#' @docType class
+#'
+#' @description A `Scalar` holds a single value of an Arrow type.
+#'
+#' @section Methods:
+#'   `$ToString()`: convert to a string
+#'   `$as_vector()`: convert to an R vector
+#'   `$as_array()`: convert to an Arrow `Array`
+#'   `$Equals(other)`: is this Scalar equal to `other`
+#'   `$ApproxEquals(other)`: is this Scalar approximately equal to `other`
+#'   `$is_valid`: is this Scalar valid
+#'   `$null_count`: number of invalid values - 1 or 0
+#'   `$type`: Scalar type
+#'
+#' @name Scalar
+#' @rdname Scalar
+#' @examplesIf arrow_available()
+#' Scalar$create(pi)
+#' Scalar$create(404)
+#' # If you pass a vector into Scalar$create, you get a list containing your items
+#' Scalar$create(c(1, 2, 3))
+#'
+#' # Comparisons
+#' my_scalar <- Scalar$create(99)
+#' my_scalar$ApproxEquals(Scalar$create(99.00001)) # FALSE
+#' my_scalar$ApproxEquals(Scalar$create(99.000009)) # TRUE
+#' my_scalar$Equals(Scalar$create(99.000009)) # FALSE
+#' my_scalar$Equals(Scalar$create(99L)) # FALSE (types don't match)
+#'
+#' my_scalar$ToString()
+#' @export
+Scalar <- R6Class("Scalar",
+  inherit = ArrowDatum,
+  # TODO: document the methods
+  public = list(
+    ToString = function() Scalar__ToString(self),
+    type_id = function() Scalar__type(self)$id,
+    as_vector = function() Scalar__as_vector(self),
+    as_array = function(length = 1L) MakeArrayFromScalar(self, as.integer(length)),
+    Equals = function(other, ...) {
+      inherits(other, "Scalar") && Scalar__Equals(self, other)
+    },
+    ApproxEquals = function(other, ...) {
+      inherits(other, "Scalar") && Scalar__ApproxEquals(self, other)
+    }
+  ),
+  active = list(
+    is_valid = function() Scalar__is_valid(self),
+    null_count = function() sum(!self$is_valid),
+    type = function() Scalar__type(self)
+  )
+)
+Scalar$create <- function(x, type = NULL) {
+  if (is.null(x)) {
+    x <- vctrs::unspecified(1)
+  } else if (length(x) != 1 && !is.data.frame(x)) {
+    # Wrap in a list type
+    x <- list(x)
+  }
+  Array__GetScalar(Array$create(x, type = type), 0)
+}
+
+#' @rdname array
+#' @usage NULL
+#' @format NULL
+#' @export
+StructScalar <- R6Class("StructScalar",
+  inherit = Scalar,
+  public = list(
+    field = function(i) StructScalar__field(self, i),
+    GetFieldByName = function(name) StructScalar__GetFieldByName(self, name)
+  )
+)
+
+#' @export
+length.Scalar <- function(x) 1L
+
+#' @export
+sort.Scalar <- function(x, decreasing = FALSE, ...) x
diff --git a/src/arrow/r/R/schema.R b/src/arrow/r/R/schema.R
new file mode 100644
index 000000000..c3dfee5f9
--- /dev/null
+++ b/src/arrow/r/R/schema.R
@@ -0,0 +1,330 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#' @include arrow-package.R
+#' @title Schema class
+#'
+#' @description A `Schema` is a list of [Field]s, which map names to
+#' Arrow [data types][data-type]. Create a `Schema` when you
+#' want to convert an R `data.frame` to Arrow but don't want to rely on the
+#' default mapping of R types to Arrow types, such as when you want to choose a
+#' specific numeric precision, or when creating a [Dataset] and you want to
+#' ensure a specific schema rather than inferring it from the various files.
+#'
+#' Many Arrow objects, including [Table] and [Dataset], have a `$schema` method
+#' (active binding) that lets you access their schema.
+#'
+#' @usage NULL
+#' @format NULL
+#' @docType class
+#' @section Methods:
+#'
+#' - `$ToString()`: convert to a string
+#' - `$field(i)`: returns the field at index `i` (0-based)
+#' - `$GetFieldByName(x)`: returns the field with name `x`
+#' - `$WithMetadata(metadata)`: returns a new `Schema` with the key-value
+#'    `metadata` set. Note that all list elements in `metadata` will be coerced
+#'    to `character`.
+#'
+#' @section Active bindings:
+#'
+#' - `$names`: returns the field names (called in `names(Schema)`)
+#' - `$num_fields`: returns the number of fields (called in `length(Schema)`)
+#' - `$fields`: returns the list of `Field`s in the `Schema`, suitable for
+#'   iterating over
+#' - `$HasMetadata`: logical: does this `Schema` have extra metadata?
+#' - `$metadata`: returns the key-value metadata as a named list.
+#'    Modify or replace by assigning in (`sch$metadata <- new_metadata`).
+#'    All list elements are coerced to string.
+#'
+#' @section R Metadata:
+#'
+#'   When converting a data.frame to an Arrow Table or RecordBatch, attributes
+#'   from the `data.frame` are saved alongside tables so that the object can be
+#'   reconstructed faithfully in R (e.g. with `as.data.frame()`). This metadata
+#'   can be both at the top-level of the `data.frame` (e.g. `attributes(df)`) or
+#'   at the column (e.g. `attributes(df$col_a)`) or for list columns only:
+#'   element level (e.g. `attributes(df[1, "col_a"])`). For example, this allows
+#'   for storing `haven` columns in a table and being able to faithfully
+#'   re-create them when pulled back into R. This metadata is separate from the
+#'   schema (column names and types) which is compatible with other Arrow
+#'   clients. The R metadata is only read by R and is ignored by other clients
+#'   (e.g. Pandas has its own custom metadata). This metadata is stored in
+#'   `$metadata$r`.
+#'
+#'   Since Schema metadata keys and values must be strings, this metadata is
+#'   saved by serializing R's attribute list structure to a string. If the
+#'   serialized metadata exceeds 100Kb in size, by default it is compressed
+#'   starting in version 3.0.0. To disable this compression (e.g. for tables
+#'   that are compatible with Arrow versions before 3.0.0 and include large
+#'   amounts of metadata), set the option `arrow.compress_metadata` to `FALSE`.
+#'   Files with compressed metadata are readable by older versions of arrow, but
+#'   the metadata is dropped.
+#'
+#' @rdname Schema
+#' @name Schema
+#' @examplesIf arrow_available()
+#' df <- data.frame(col1 = 2:4, col2 = c(0.1, 0.3, 0.5))
+#' tab1 <- arrow_table(df)
+#' tab1$schema
+#' tab2 <- arrow_table(df, schema = schema(col1 = int8(), col2 = float32()))
+#' tab2$schema
+#' @export
+Schema <- R6Class("Schema",
+  inherit = ArrowObject,
+  public = list(
+    ToString = function() {
+      fields <- print_schema_fields(self)
+      if (self$HasMetadata) {
+        fields <- paste0(fields, "\n\nSee $metadata for additional Schema metadata")
+      }
+      fields
+    },
+    field = function(i) Schema__field(self, i),
+    GetFieldByName = function(x) Schema__GetFieldByName(self, x),
+    AddField = function(i, field) {
+      assert_is(field, "Field")
+      Schema__AddField(self, i, field)
+    },
+    SetField = function(i, field) {
+      assert_is(field, "Field")
+      Schema__SetField(self, i, field)
+    },
+    RemoveField = function(i) Schema__RemoveField(self, i),
+    serialize = function() Schema__serialize(self),
+    WithMetadata = function(metadata = NULL) {
+      metadata <- prepare_key_value_metadata(metadata)
+      Schema__WithMetadata(self, metadata)
+    },
+    Equals = function(other, check_metadata = FALSE, ...) {
+      inherits(other, "Schema") && Schema__Equals(self, other, isTRUE(check_metadata))
+    },
+    export_to_c = function(ptr) ExportSchema(self, ptr)
+  ),
+  active = list(
+    names = function() {
+      Schema__field_names(self)
+    },
+    num_fields = function() Schema__num_fields(self),
+    fields = function() Schema__fields(self),
+    HasMetadata = function() Schema__HasMetadata(self),
+    metadata = function(new_metadata) {
+      if (missing(new_metadata)) {
+        Schema__metadata(self)
+      } else {
+        # Set the metadata
+        out <- self$WithMetadata(new_metadata)
+        # $WithMetadata returns a new object but we're modifying in place,
+        # so swap in that new C++ object pointer into our R6 object
+        self$set_pointer(out$pointer())
+        self
+      }
+    },
+    r_metadata = function(new) {
+      # Helper for the R metadata that handles the serialization
+      # See also method on ArrowTabular
+      if (missing(new)) {
+        out <- self$metadata$r
+        if (!is.null(out)) {
+          # Can't unserialize NULL
+          out <- .unserialize_arrow_r_metadata(out)
+        }
+        # Returns either NULL or a named list
+        out
+      } else {
+        # Set the R metadata
+        self$metadata$r <- .serialize_arrow_r_metadata(new)
+        self
+      }
+    }
+  )
+)
+Schema$create <- function(...) {
+  .list <- list2(...)
+  if (all(map_lgl(.list, ~ inherits(., "Field")))) {
+    schema_(.list)
+  } else {
+    schema_(.fields(.list))
+  }
+}
+#' @include arrowExports.R
+Schema$import_from_c <- ImportSchema
+
+prepare_key_value_metadata <- function(metadata) {
+  # key-value-metadata must be a named character vector;
+  # this function validates and coerces
+  if (is.null(metadata)) {
+    # NULL to remove metadata, so equivalent to setting an empty list
+    metadata <- empty_named_list()
+  }
+  if (is.null(names(metadata))) {
+    stop(
+      "Key-value metadata must be a named list or character vector",
+      call. = FALSE
+    )
+  }
+  map_chr(metadata, as.character)
+}
+
+print_schema_fields <- function(s) {
+  # Alternative to Schema__ToString that doesn't print metadata
+  paste(map_chr(s$fields, ~ .$ToString()), collapse = "\n")
+}
+
+#' @param ... named list containing [data types][data-type] or
+#'   a list of [fields][field] containing the fields for the schema
+#' @export
+#' @rdname Schema
+schema <- Schema$create
+
+#' @export
+names.Schema <- function(x) x$names
+
+#' @export
+length.Schema <- function(x) x$num_fields
+
+#' @export
+`[[.Schema` <- function(x, i, ...) {
+  if (is.character(i)) {
+    x$GetFieldByName(i)
+  } else if (is.numeric(i)) {
+    x$field(i - 1)
+  } else {
+    stop("'i' must be character or numeric, not ", class(i), call. = FALSE)
+  }
+}
+
+#' @export
+`[[<-.Schema` <- function(x, i, value) {
+  assert_that(length(i) == 1)
+  if (is.character(i)) {
+    field_names <- names(x)
+    if (anyDuplicated(field_names)) {
+      stop("Cannot update field by name with duplicates", call. = FALSE)
+    }
+
+    # If i is character, it's the field name
+    if (!is.null(value) && !inherits(value, "Field")) {
+      value <- field(i, as_type(value, "value"))
+    }
+
+    # No match means we're adding to the end
+    i <- match(i, field_names, nomatch = length(field_names) + 1L)
+  } else {
+    assert_that(is.numeric(i), !is.na(i), i > 0)
+    # If i is numeric and we have a type,
+    # we need to grab the existing field name for the new one
+    if (!is.null(value) && !inherits(value, "Field")) {
+      value <- field(names(x)[i], as_type(value, "value"))
+    }
+  }
+
+  i <- as.integer(i - 1L)
+  if (i >= length(x)) {
+    if (!is.null(value)) {
+      x <- x$AddField(i, value)
+    }
+  } else if (is.null(value)) {
+    x <- x$RemoveField(i)
+  } else {
+    x <- x$SetField(i, value)
+  }
+  x
+}
+
+#' @export
+`$<-.Schema` <- `$<-.ArrowTabular`
+
+#' @export
+`[.Schema` <- function(x, i, ...) {
+  if (is.logical(i)) {
+    i <- rep_len(i, length(x)) # For R recycling behavior
+    i <- which(i)
+  }
+  if (is.numeric(i)) {
+    if (all(i < 0)) {
+      # in R, negative i means "everything but i"
+      i <- setdiff(seq_len(length(x)), -1 * i)
+    }
+  }
+  fields <- map(i, ~ x[[.]])
+  invalid <- map_lgl(fields, is.null)
+  if (any(invalid)) {
+    stop(
+      "Invalid field name", ifelse(sum(invalid) > 1, "s: ", ": "),
+      oxford_paste(i[invalid]),
+      call. = FALSE
+    )
+  }
+  schema_(fields)
+}
+
+#' @export
+`$.Schema` <- function(x, name, ...) {
+  assert_that(is.string(name))
+  if (name %in% ls(x)) {
+    get(name, x)
+  } else {
+    x$GetFieldByName(name)
+  }
+}
+
+#' @export
+as.list.Schema <- function(x, ...) x$fields
+
+#' read a Schema from a stream
+#'
+#' @param stream a `Message`, `InputStream`, or `Buffer`
+#' @param ... currently ignored
+#' @return A [Schema]
+#' @export
+read_schema <- function(stream, ...) {
+  if (inherits(stream, "Message")) {
+    return(ipc___ReadSchema_Message(stream))
+  } else {
+    if (!inherits(stream, "InputStream")) {
+      stream <- BufferReader$create(stream)
+      on.exit(stream$close())
+    }
+    return(ipc___ReadSchema_InputStream(stream))
+  }
+}
+
+#' Combine and harmonize schemas
+#'
+#' @param ... [Schema]s to unify
+#' @param schemas Alternatively, a list of schemas
+#' @return A `Schema` with the union of fields contained in the inputs, or
+#'   `NULL` if any of `schemas` is `NULL`
+#' @export
+#' @examplesIf arrow_available()
+#' a <- schema(b = double(), c = bool())
+#' z <- schema(b = double(), k = utf8())
+#' unify_schemas(a, z)
+unify_schemas <- function(..., schemas = list(...)) {
+  if (any(vapply(schemas, is.null, TRUE))) {
+    return(NULL)
+  }
+  arrow__UnifySchemas(schemas)
+}
+
+#' @export
+print.arrow_r_metadata <- function(x, ...) {
+  utils::str(x)
+  utils::str(.unserialize_arrow_r_metadata(x))
+  invisible(x)
+}
diff --git a/src/arrow/r/R/table.R b/src/arrow/r/R/table.R
new file mode 100644
index 000000000..5ae87f7e3
--- /dev/null
+++ b/src/arrow/r/R/table.R
@@ -0,0 +1,170 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#' @include record-batch.R
+#' @title Table class
+#' @description A Table is a sequence of [chunked arrays][ChunkedArray]. They
+#' have a similar interface to [record batches][RecordBatch], but they can be
+#' composed from multiple record batches or chunked arrays.
+#' @usage NULL
+#' @format NULL
+#' @docType class
+#'
+#' @section S3 Methods and Usage:
+#' Tables are data-frame-like, and many methods you expect to work on
+#' a `data.frame` are implemented for `Table`. This includes `[`, `[[`,
+#' `$`, `names`, `dim`, `nrow`, `ncol`, `head`, and `tail`. You can also pull
+#' the data from an Arrow table into R with `as.data.frame()`. See the
+#' examples.
+#'
+#' A caveat about the `$` method: because `Table` is an `R6` object,
+#' `$` is also used to access the object's methods (see below). Methods take
+#' precedence over the table's columns. So, `tab$Slice` would return the
+#' "Slice" method function even if there were a column in the table called
+#' "Slice".
+#'
+#' @section R6 Methods:
+#' In addition to the more R-friendly S3 methods, a `Table` object has
+#' the following R6 methods that map onto the underlying C++ methods:
+#'
+#' - `$column(i)`: Extract a `ChunkedArray` by integer position from the table
+#' - `$ColumnNames()`: Get all column names (called by `names(tab)`)
+#' - `$RenameColumns(value)`: Set all column names (called by `names(tab) <- value`)
+#' - `$GetColumnByName(name)`: Extract a `ChunkedArray` by string name
+#' - `$field(i)`: Extract a `Field` from the table schema by integer position
+#' - `$SelectColumns(indices)`: Return new `Table` with specified columns, expressed as 0-based integers.
+#' - `$Slice(offset, length = NULL)`: Create a zero-copy view starting at the
+#'    indicated integer offset and going for the given length, or to the end
+#'    of the table if `NULL`, the default.
+#' - `$Take(i)`: return an `Table` with rows at positions given by
+#'    integers `i`. If `i` is an Arrow `Array` or `ChunkedArray`, it will be
+#'    coerced to an R vector before taking.
+#' - `$Filter(i, keep_na = TRUE)`: return an `Table` with rows at positions where logical
+#'    vector or Arrow boolean-type `(Chunked)Array` `i` is `TRUE`.
+#' - `$SortIndices(names, descending = FALSE)`: return an `Array` of integer row
+#'    positions that can be used to rearrange the `Table` in ascending or descending
+#'    order by the first named column, breaking ties with further named columns.
+#'    `descending` can be a logical vector of length one or of the same length as
+#'    `names`.
+#' - `$serialize(output_stream, ...)`: Write the table to the given
+#'    [OutputStream]
+#' - `$cast(target_schema, safe = TRUE, options = cast_options(safe))`: Alter
+#'    the schema of the record batch.
+#'
+#' There are also some active bindings:
+#' - `$num_columns`
+#' - `$num_rows`
+#' - `$schema`
+#' - `$metadata`: Returns the key-value metadata of the `Schema` as a named list.
+#'    Modify or replace by assigning in (`tab$metadata <- new_metadata`).
+#'    All list elements are coerced to string. See `schema()` for more information.
+#' - `$columns`: Returns a list of `ChunkedArray`s
+#' @rdname Table
+#' @name Table
+#' @export
+Table <- R6Class("Table",
+  inherit = ArrowTabular,
+  public = list(
+    column = function(i) Table__column(self, i),
+    ColumnNames = function() Table__ColumnNames(self),
+    RenameColumns = function(value) Table__RenameColumns(self, value),
+    GetColumnByName = function(name) {
+      assert_is(name, "character")
+      assert_that(length(name) == 1)
+      Table__GetColumnByName(self, name)
+    },
+    RemoveColumn = function(i) Table__RemoveColumn(self, i),
+    AddColumn = function(i, new_field, value) Table__AddColumn(self, i, new_field, value),
+    SetColumn = function(i, new_field, value) Table__SetColumn(self, i, new_field, value),
+    ReplaceSchemaMetadata = function(new) {
+      Table__ReplaceSchemaMetadata(self, new)
+    },
+    field = function(i) Table__field(self, i),
+    serialize = function(output_stream, ...) write_table(self, output_stream, ...),
+    to_data_frame = function() {
+      Table__to_dataframe(self, use_threads = option_use_threads())
+    },
+    cast = function(target_schema, safe = TRUE, ..., options = cast_options(safe, ...)) {
+      assert_is(target_schema, "Schema")
+      assert_that(identical(self$schema$names, target_schema$names), msg = "incompatible schemas")
+      Table__cast(self, target_schema, options)
+    },
+    SelectColumns = function(indices) Table__SelectColumns(self, indices),
+    Slice = function(offset, length = NULL) {
+      if (is.null(length)) {
+        Table__Slice1(self, offset)
+      } else {
+        Table__Slice2(self, offset, length)
+      }
+    },
+    # Take, Filter, and SortIndices are methods on ArrowTabular
+    Equals = function(other, check_metadata = FALSE, ...) {
+      inherits(other, "Table") && Table__Equals(self, other, isTRUE(check_metadata))
+    },
+    Validate = function() Table__Validate(self),
+    ValidateFull = function() Table__ValidateFull(self),
+    invalidate = function() {
+      .Call(`_arrow_Table__Reset`, self)
+      super$invalidate()
+    }
+  ),
+  active = list(
+    num_columns = function() Table__num_columns(self),
+    num_rows = function() Table__num_rows(self),
+    schema = function() Table__schema(self),
+    columns = function() Table__columns(self)
+  )
+)
+
+Table$create <- function(..., schema = NULL) {
+  dots <- list2(...)
+  # making sure there are always names
+  if (is.null(names(dots))) {
+    names(dots) <- rep_len("", length(dots))
+  }
+  stopifnot(length(dots) > 0)
+
+  if (all_record_batches(dots)) {
+    return(Table__from_record_batches(dots, schema))
+  }
+
+  # If any arrays are length 1, recycle them
+  dots <- recycle_scalars(dots)
+
+  Table__from_dots(dots, schema, option_use_threads())
+}
+
+#' @export
+names.Table <- function(x) x$ColumnNames()
+
+#' @param ... A `data.frame` or a named set of Arrays or vectors. If given a
+#' mixture of data.frames and named vectors, the inputs will be autospliced together
+#' (see examples). Alternatively, you can provide a single Arrow IPC
+#' `InputStream`, `Message`, `Buffer`, or R `raw` object containing a `Buffer`.
+#' @param schema a [Schema], or `NULL` (the default) to infer the schema from
+#' the data in `...`. When providing an Arrow IPC buffer, `schema` is required.
+#' @rdname Table
+#' @examplesIf arrow_available()
+#' tbl <- arrow_table(name = rownames(mtcars), mtcars)
+#' dim(tbl)
+#' dim(head(tbl))
+#' names(tbl)
+#' tbl$mpg
+#' tbl[["cyl"]]
+#' as.data.frame(tbl[4:8, c("gear", "hp", "wt")])
+#' @export
+arrow_table <- Table$create
diff --git a/src/arrow/r/R/type.R b/src/arrow/r/R/type.R
new file mode 100644
index 000000000..4ef7cefb5
--- /dev/null
+++ b/src/arrow/r/R/type.R
@@ -0,0 +1,541 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#' @include arrow-package.R
+#' @title class arrow::DataType
+#'
+#' @usage NULL
+#' @format NULL
+#' @docType class
+#'
+#' @section Methods:
+#'
+#' TODO
+#'
+#' @rdname DataType
+#' @name DataType
+DataType <- R6Class("DataType",
+  inherit = ArrowObject,
+  public = list(
+    ToString = function() {
+      DataType__ToString(self)
+    },
+    Equals = function(other, ...) {
+      inherits(other, "DataType") && DataType__Equals(self, other)
+    },
+    fields = function() {
+      DataType__fields(self)
+    },
+    export_to_c = function(ptr) ExportType(self, ptr)
+  ),
+  active = list(
+    id = function() DataType__id(self),
+    name = function() DataType__name(self),
+    num_fields = function() DataType__num_fields(self)
+  )
+)
+
+#' @include arrowExports.R
+DataType$import_from_c <- ImportType
+
+INTEGER_TYPES <- as.character(outer(c("uint", "int"), c(8, 16, 32, 64), paste0))
+FLOAT_TYPES <- c("float16", "float32", "float64", "halffloat", "float", "double")
+
+#' infer the arrow Array type from an R vector
+#'
+#' @param x an R vector
+#'
+#' @return an arrow logical type
+#' @examplesIf arrow_available()
+#' type(1:10)
+#' type(1L:10L)
+#' type(c(1, 1.5, 2))
+#' type(c("A", "B", "C"))
+#' type(mtcars)
+#' type(Sys.Date())
+#' @export
+type <- function(x) UseMethod("type")
+
+#' @export
+type.default <- function(x) Array__infer_type(x)
+
+#' @export
+type.ArrowDatum <- function(x) x$type
+
+#----- metadata
+
+#' @title class arrow::FixedWidthType
+#'
+#' @usage NULL
+#' @format NULL
+#' @docType class
+#'
+#' @section Methods:
+#'
+#' TODO
+#'
+#' @rdname FixedWidthType
+#' @name FixedWidthType
+FixedWidthType <- R6Class("FixedWidthType",
+  inherit = DataType,
+  active = list(
+    bit_width = function() FixedWidthType__bit_width(self)
+  )
+)
+
+Int8 <- R6Class("Int8", inherit = FixedWidthType)
+Int16 <- R6Class("Int16", inherit = FixedWidthType)
+Int32 <- R6Class("Int32", inherit = FixedWidthType)
+Int64 <- R6Class("Int64", inherit = FixedWidthType)
+UInt8 <- R6Class("UInt8", inherit = FixedWidthType)
+UInt16 <- R6Class("UInt16", inherit = FixedWidthType)
+UInt32 <- R6Class("UInt32", inherit = FixedWidthType)
+UInt64 <- R6Class("UInt64", inherit = FixedWidthType)
+Float16 <- R6Class("Float16", inherit = FixedWidthType)
+Float32 <- R6Class("Float32", inherit = FixedWidthType)
+Float64 <- R6Class("Float64", inherit = FixedWidthType)
+Boolean <- R6Class("Boolean", inherit = FixedWidthType)
+Utf8 <- R6Class("Utf8", inherit = DataType)
+LargeUtf8 <- R6Class("LargeUtf8", inherit = DataType)
+Binary <- R6Class("Binary", inherit = DataType)
+FixedSizeBinary <- R6Class("FixedSizeBinary", inherit = FixedWidthType)
+LargeBinary <- R6Class("LargeBinary", inherit = DataType)
+
+DateType <- R6Class("DateType",
+  inherit = FixedWidthType,
+  public = list(
+    unit = function() DateType__unit(self)
+  )
+)
+Date32 <- R6Class("Date32", inherit = DateType)
+Date64 <- R6Class("Date64", inherit = DateType)
+
+TimeType <- R6Class("TimeType",
+  inherit = FixedWidthType,
+  public = list(
+    unit = function() TimeType__unit(self)
+  )
+)
+Time32 <- R6Class("Time32", inherit = TimeType)
+Time64 <- R6Class("Time64", inherit = TimeType)
+
+Null <- R6Class("Null", inherit = DataType)
+
+Timestamp <- R6Class("Timestamp",
+  inherit = FixedWidthType,
+  public = list(
+    timezone = function() TimestampType__timezone(self),
+    unit = function() TimestampType__unit(self)
+  )
+)
+
+DecimalType <- R6Class("DecimalType",
+  inherit = FixedWidthType,
+  public = list(
+    precision = function() DecimalType__precision(self),
+    scale = function() DecimalType__scale(self)
+  )
+)
+Decimal128Type <- R6Class("Decimal128Type", inherit = DecimalType)
+
+NestedType <- R6Class("NestedType", inherit = DataType)
+
+#' Apache Arrow data types
+#'
+#' These functions create type objects corresponding to Arrow types. Use them
+#' when defining a [schema()] or as inputs to other types, like `struct`. Most
+#' of these functions don't take arguments, but a few do.
+#'
+#' A few functions have aliases:
+#'
+#' * `utf8()` and `string()`
+#' * `float16()` and `halffloat()`
+#' * `float32()` and `float()`
+#' * `bool()` and `boolean()`
+#' * When called inside an `arrow` function, such as `schema()` or `cast()`,
+#' `double()` also is supported as a way of creating a `float64()`
+#'
+#' `date32()` creates a datetime type with a "day" unit, like the R `Date`
+#' class. `date64()` has a "ms" unit.
+#'
+#' `uint32` (32 bit unsigned integer), `uint64` (64 bit unsigned integer), and
+#' `int64` (64-bit signed integer) types may contain values that exceed the
+#' range of R's `integer` type (32-bit signed integer). When these arrow objects
+#' are translated to R objects, `uint32` and `uint64` are converted to `double`
+#' ("numeric") and `int64` is converted to `bit64::integer64`. For `int64`
+#' types, this conversion can be disabled (so that `int64` always yields a
+#' `bit64::integer64` object) by setting `options(arrow.int64_downcast =
+#' FALSE)`.
+#'
+#' @param unit For time/timestamp types, the time unit. `time32()` can take
+#' either "s" or "ms", while `time64()` can be "us" or "ns". `timestamp()` can
+#' take any of those four values.
+#' @param timezone For `timestamp()`, an optional time zone string.
+#' @param byte_width byte width for `FixedSizeBinary` type.
+#' @param list_size list size for `FixedSizeList` type.
+#' @param precision For `decimal()`, precision
+#' @param scale For `decimal()`, scale
+#' @param type For `list_of()`, a data type to make a list-of-type
+#' @param ... For `struct()`, a named list of types to define the struct columns
+#'
+#' @name data-type
+#' @return An Arrow type object inheriting from DataType.
+#' @export
+#' @seealso [dictionary()] for creating a dictionary (factor-like) type.
+#' @examplesIf arrow_available()
+#' bool()
+#' struct(a = int32(), b = double())
+#' timestamp("ms", timezone = "CEST")
+#' time64("ns")
+int8 <- function() Int8__initialize()
+
+#' @rdname data-type
+#' @export
+int16 <- function() Int16__initialize()
+
+#' @rdname data-type
+#' @export
+int32 <- function() Int32__initialize()
+
+#' @rdname data-type
+#' @export
+int64 <- function() Int64__initialize()
+
+#' @rdname data-type
+#' @export
+uint8 <- function() UInt8__initialize()
+
+#' @rdname data-type
+#' @export
+uint16 <- function() UInt16__initialize()
+
+#' @rdname data-type
+#' @export
+uint32 <- function() UInt32__initialize()
+
+#' @rdname data-type
+#' @export
+uint64 <- function() UInt64__initialize()
+
+#' @rdname data-type
+#' @export
+float16 <- function() Float16__initialize()
+
+#' @rdname data-type
+#' @export
+halffloat <- float16
+
+#' @rdname data-type
+#' @export
+float32 <- function() Float32__initialize()
+
+#' @rdname data-type
+#' @export
+float <- float32
+
+#' @rdname data-type
+#' @export
+float64 <- function() Float64__initialize()
+
+#' @rdname data-type
+#' @export
+boolean <- function() Boolean__initialize()
+
+#' @rdname data-type
+#' @export
+bool <- boolean
+
+#' @rdname data-type
+#' @export
+utf8 <- function() Utf8__initialize()
+
+#' @rdname data-type
+#' @export
+large_utf8 <- function() LargeUtf8__initialize()
+
+#' @rdname data-type
+#' @export
+binary <- function() Binary__initialize()
+
+#' @rdname data-type
+#' @export
+large_binary <- function() LargeBinary__initialize()
+
+#' @rdname data-type
+#' @export
+fixed_size_binary <- function(byte_width) FixedSizeBinary__initialize(byte_width)
+
+#' @rdname data-type
+#' @export
+string <- utf8
+
+#' @rdname data-type
+#' @export
+date32 <- function() Date32__initialize()
+
+#' @rdname data-type
+#' @export
+date64 <- function() Date64__initialize()
+
+#' @rdname data-type
+#' @export
+time32 <- function(unit = c("ms", "s")) {
+  if (is.character(unit)) {
+    unit <- match.arg(unit)
+  }
+  unit <- make_valid_time_unit(unit, valid_time32_units)
+  Time32__initialize(unit)
+}
+
+valid_time32_units <- c(
+  "ms" = TimeUnit$MILLI,
+  "s" = TimeUnit$SECOND
+)
+
+valid_time64_units <- c(
+  "ns" = TimeUnit$NANO,
+  "us" = TimeUnit$MICRO
+)
+
+make_valid_time_unit <- function(unit, valid_units) {
+  if (is.character(unit)) {
+    unit <- valid_units[match.arg(unit, choices = names(valid_units))]
+  }
+  if (is.numeric(unit)) {
+    # Allow non-integer input for convenience
+    unit <- as.integer(unit)
+  } else {
+    stop('"unit" should be one of ', oxford_paste(names(valid_units), "or"), call. = FALSE)
+  }
+  if (!(unit %in% valid_units)) {
+    stop('"unit" should be one of ', oxford_paste(valid_units, "or"), call. = FALSE)
+  }
+  unit
+}
+
+#' @rdname data-type
+#' @export
+time64 <- function(unit = c("ns", "us")) {
+  if (is.character(unit)) {
+    unit <- match.arg(unit)
+  }
+  unit <- make_valid_time_unit(unit, valid_time64_units)
+  Time64__initialize(unit)
+}
+
+#' @rdname data-type
+#' @export
+null <- function() Null__initialize()
+
+#' @rdname data-type
+#' @export
+timestamp <- function(unit = c("s", "ms", "us", "ns"), timezone = "") {
+  if (is.character(unit)) {
+    unit <- match.arg(unit)
+  }
+  unit <- make_valid_time_unit(unit, c(valid_time64_units, valid_time32_units))
+  assert_that(is.string(timezone))
+  Timestamp__initialize(unit, timezone)
+}
+
+#' @rdname data-type
+#' @export
+decimal <- function(precision, scale) {
+  if (is.numeric(precision)) {
+    precision <- as.integer(precision)
+  } else {
+    stop('"precision" must be an integer', call. = FALSE)
+  }
+  if (is.numeric(scale)) {
+    scale <- as.integer(scale)
+  } else {
+    stop('"scale" must be an integer', call. = FALSE)
+  }
+  Decimal128Type__initialize(precision, scale)
+}
+
+StructType <- R6Class("StructType",
+  inherit = NestedType,
+  public = list(
+    GetFieldByName = function(name) StructType__GetFieldByName(self, name),
+    GetFieldIndex = function(name) StructType__GetFieldIndex(self, name)
+  )
+)
+StructType$create <- function(...) struct__(.fields(list(...)))
+
+#' @rdname data-type
+#' @export
+struct <- StructType$create
+
+ListType <- R6Class("ListType",
+  inherit = NestedType,
+  active = list(
+    value_field = function() ListType__value_field(self),
+    value_type = function() ListType__value_type(self)
+  )
+)
+
+#' @rdname data-type
+#' @export
+list_of <- function(type) list__(type)
+
+LargeListType <- R6Class("LargeListType",
+  inherit = NestedType,
+  active = list(
+    value_field = function() LargeListType__value_field(self),
+    value_type = function() LargeListType__value_type(self)
+  )
+)
+
+#' @rdname data-type
+#' @export
+large_list_of <- function(type) large_list__(type)
+
+#' @rdname data-type
+#' @export
+FixedSizeListType <- R6Class("FixedSizeListType",
+  inherit = NestedType,
+  active = list(
+    value_field = function() FixedSizeListType__value_field(self),
+    value_type = function() FixedSizeListType__value_type(self),
+    list_size = function() FixedSizeListType__list_size(self)
+  )
+)
+
+#' @rdname data-type
+#' @export
+fixed_size_list_of <- function(type, list_size) fixed_size_list__(type, list_size)
+
+as_type <- function(type, name = "type") {
+  # magic so we don't have to mask base::double()
+  if (identical(type, double())) {
+    type <- float64()
+  }
+  if (!inherits(type, "DataType")) {
+    stop(name, " must be a DataType, not ", class(type), call. = FALSE)
+  }
+  type
+}
+
+canonical_type_str <- function(type_str) {
+  # canonicalizes data type strings, converting data type function names and
+  # aliases to match the strings returned by DataType$ToString()
+  assert_that(is.string(type_str))
+  if (grepl("[([<]", type_str)) {
+    stop("Cannot interpret string representations of data types that have parameters", call. = FALSE)
+  }
+  switch(type_str,
+    int8 = "int8",
+    int16 = "int16",
+    int32 = "int32",
+    int64 = "int64",
+    uint8 = "uint8",
+    uint16 = "uint16",
+    uint32 = "uint32",
+    uint64 = "uint64",
+    float16 = "halffloat",
+    halffloat = "halffloat",
+    float32 = "float",
+    float = "float",
+    float64 = "double",
+    double = "double",
+    boolean = "bool",
+    bool = "bool",
+    utf8 = "string",
+    large_utf8 = "large_string",
+    large_string = "large_string",
+    binary = "binary",
+    large_binary = "large_binary",
+    fixed_size_binary = "fixed_size_binary",
+    string = "string",
+    date32 = "date32",
+    date64 = "date64",
+    time32 = "time32",
+    time64 = "time64",
+    null = "null",
+    timestamp = "timestamp",
+    decimal = "decimal128",
+    struct = "struct",
+    list_of = "list",
+    list = "list",
+    large_list_of = "large_list",
+    large_list = "large_list",
+    fixed_size_list_of = "fixed_size_list",
+    fixed_size_list = "fixed_size_list",
+    stop("Unrecognized string representation of data type", call. = FALSE)
+  )
+}
+
+# vctrs support -----------------------------------------------------------
+str_dup <- function(x, times) {
+  paste0(rep(x, times = times), collapse = "")
+}
+
+indent <- function(x, n) {
+  pad <- str_dup(" ", n)
+  sapply(x, gsub, pattern = "(\n+)", replacement = paste0("\\1", pad))
+}
+
+#' @importFrom vctrs vec_ptype_full vec_ptype_abbr
+#' @export
+vec_ptype_full.arrow_fixed_size_binary <- function(x, ...) {
+  paste0("fixed_size_binary<", attr(x, "byte_width"), ">")
+}
+
+#' @export
+vec_ptype_full.arrow_list <- function(x, ...) {
+  param <- vec_ptype_full(attr(x, "ptype"))
+  if (grepl("\n", param)) {
+    param <- paste0(indent(paste0("\n", param), 2), "\n")
+  }
+  paste0("list<", param, ">")
+}
+
+#' @export
+vec_ptype_full.arrow_large_list <- function(x, ...) {
+  param <- vec_ptype_full(attr(x, "ptype"))
+  if (grepl("\n", param)) {
+    param <- paste0(indent(paste0("\n", param), 2), "\n")
+  }
+  paste0("large_list<", param, ">")
+}
+
+#' @export
+vec_ptype_full.arrow_fixed_size_list <- function(x, ...) {
+  param <- vec_ptype_full(attr(x, "ptype"))
+  if (grepl("\n", param)) {
+    param <- paste0(indent(paste0("\n", param), 2), "\n")
+  }
+  paste0("fixed_size_list<", param, ", ", attr(x, "list_size"), ">")
+}
+
+#' @export
+vec_ptype_abbr.arrow_fixed_size_binary <- function(x, ...) {
+  vec_ptype_full(x, ...)
+}
+#' @export
+vec_ptype_abbr.arrow_list <- function(x, ...) {
+  vec_ptype_full(x, ...)
+}
+#' @export
+vec_ptype_abbr.arrow_large_list <- function(x, ...) {
+  vec_ptype_full(x, ...)
+}
+#' @export
+vec_ptype_abbr.arrow_fixed_size_list <- function(x, ...) {
+  vec_ptype_full(x, ...)
+}
diff --git a/src/arrow/r/R/util.R b/src/arrow/r/R/util.R
new file mode 100644
index 000000000..9e3ade6a9
--- /dev/null
+++ b/src/arrow/r/R/util.R
@@ -0,0 +1,195 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# for compatibility with R versions earlier than 4.0.0
+if (!exists("deparse1")) {
+  deparse1 <- function(expr, collapse = " ", width.cutoff = 500L, ...) {
+    paste(deparse(expr, width.cutoff, ...), collapse = collapse)
+  }
+}
+
+# for compatibility with R versions earlier than 3.6.0
+if (!exists("str2lang")) {
+  str2lang <- function(s) {
+    parse(text = s, keep.source = FALSE)[[1]]
+  }
+}
+
+oxford_paste <- function(x, conjunction = "and", quote = TRUE) {
+  if (quote && is.character(x)) {
+    x <- paste0('"', x, '"')
+  }
+  if (length(x) < 2) {
+    return(x)
+  }
+  x[length(x)] <- paste(conjunction, x[length(x)])
+  if (length(x) > 2) {
+    return(paste(x, collapse = ", "))
+  } else {
+    return(paste(x, collapse = " "))
+  }
+}
+
+assert_is <- function(object, class) {
+  msg <- paste(substitute(object), "must be a", oxford_paste(class, "or"))
+  assert_that(inherits(object, class), msg = msg)
+}
+
+assert_is_list_of <- function(object, class) {
+  msg <- paste(substitute(object), "must be a list of", oxford_paste(class, "or"))
+  assert_that(is_list_of(object, class), msg = msg)
+}
+
+is_list_of <- function(object, class) {
+  is.list(object) && all(map_lgl(object, ~ inherits(., class)))
+}
+
+empty_named_list <- function() structure(list(), .Names = character(0))
+
+r_symbolic_constants <- c(
+  "pi", "TRUE", "FALSE", "NULL", "Inf", "NA", "NaN",
+  "NA_integer_", "NA_real_", "NA_complex_", "NA_character_"
+)
+
+is_function <- function(expr, name) {
+  # We could have a quosure here if we have an expression like `sum({{ var }})`
+  if (is_quosure(expr)) {
+    expr <- quo_get_expr(expr)
+  }
+  if (!is.call(expr)) {
+    return(FALSE)
+  } else {
+    if (deparse(expr[[1]]) == name) {
+      return(TRUE)
+    }
+    out <- lapply(expr, is_function, name)
+  }
+  any(map_lgl(out, isTRUE))
+}
+
+all_funs <- function(expr) {
+  # It is not sufficient to simply do: setdiff(all.names, all.vars)
+  # here because that would fail to return the names of functions that
+  # share names with variables.
+  # To preserve duplicates, call `all.names()` not `all_names()` here.
+  if (is_quosure(expr)) {
+    expr <- quo_get_expr(expr)
+  }
+  names <- all.names(expr)
+  names[map_lgl(names, ~ is_function(expr, .))]
+}
+
+all_vars <- function(expr) {
+  setdiff(all.vars(expr), r_symbolic_constants)
+}
+
+all_names <- function(expr) {
+  setdiff(all.names(expr), r_symbolic_constants)
+}
+
+is_constant <- function(expr) {
+  length(all_vars(expr)) == 0
+}
+
+read_compressed_error <- function(e) {
+  msg <- conditionMessage(e)
+  if (grepl(" codec ", msg)) {
+    compression <- sub(".*Support for codec '(.*)'.*", "\\1", msg)
+    e$message <- paste0(
+      msg,
+      "\nIn order to read this file, you will need to reinstall arrow with additional features enabled.",
+      "\nSet one of these environment variables before installing:",
+      sprintf("\n\n * LIBARROW_MINIMAL=false (for all optional features, including '%s')", compression),
+      sprintf("\n * ARROW_WITH_%s=ON (for just '%s')", toupper(compression), compression),
+      "\n\nSee https://arrow.apache.org/docs/r/articles/install.html for details"
+    )
+  }
+  stop(e)
+}
+
+handle_parquet_io_error <- function(e, format) {
+  msg <- conditionMessage(e)
+  if (grepl("Parquet magic bytes not found in footer", msg) && length(format) > 1 && is_character(format)) {
+    # If length(format) > 1, that means it is (almost certainly) the default/not specified value
+    # so let the user know that they should specify the actual (not parquet) format
+    abort(c(
+      msg,
+      i = "Did you mean to specify a 'format' other than the default (parquet)?"
+    ))
+  }
+  stop(e)
+}
+
+is_writable_table <- function(x) {
+  inherits(x, c("data.frame", "ArrowTabular"))
+}
+
+# This attribute is used when is_writable is passed into assert_that, and allows
+# the call to form part of the error message when is_writable is FALSE
+attr(is_writable_table, "fail") <- function(call, env) {
+  paste0(
+    deparse(call$x),
+    " must be an object of class 'data.frame', 'RecordBatch', or 'Table', not '",
+    class(env[[deparse(call$x)]])[[1]],
+    "'."
+  )
+}
+
+#' Recycle scalar values in a list of arrays
+#'
+#' @param arrays List of arrays
+#' @return List of arrays with any vector/Scalar/Array/ChunkedArray values of length 1 recycled
+#' @keywords internal
+recycle_scalars <- function(arrays) {
+  # Get lengths of items in arrays
+  arr_lens <- map_int(arrays, NROW)
+
+  is_scalar <- arr_lens == 1
+
+  if (length(arrays) > 1 && any(is_scalar) && !all(is_scalar)) {
+
+    # Recycling not supported for tibbles and data.frames
+    if (all(map_lgl(arrays, ~ inherits(.x, "data.frame")))) {
+      abort(c(
+        "All input tibbles or data.frames must have the same number of rows",
+        x = paste(
+          "Number of rows in longest and shortest inputs:",
+          oxford_paste(c(max(arr_lens), min(arr_lens)))
+        )
+      ))
+    }
+
+    max_array_len <- max(arr_lens)
+    arrays[is_scalar] <- lapply(arrays[is_scalar], repeat_value_as_array, max_array_len)
+  }
+  arrays
+}
+
+#' Take an object of length 1 and repeat it.
+#'
+#' @param object Object of length 1 to be repeated - vector, `Scalar`, `Array`, or `ChunkedArray`
+#' @param n Number of repetitions
+#'
+#' @return `Array` of length `n`
+#'
+#' @keywords internal
+repeat_value_as_array <- function(object, n) {
+  if (inherits(object, "ChunkedArray")) {
+    return(Scalar$create(object$chunks[[1]])$as_array(n))
+  }
+  return(Scalar$create(object)$as_array(n))
+}