1 files changed, 690 insertions, 0 deletions
diff --git a/src/arrow/r/tests/testthat/test-RecordBatch.R b/src/arrow/r/tests/testthat/test-RecordBatch.R
new file mode 100644
index 000000000..d280754a3
--- /dev/null
+++ b/src/arrow/r/tests/testthat/test-RecordBatch.R
@@ -0,0 +1,690 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+test_that("RecordBatch", {
+  # Note that we're reusing `tbl` and `batch` throughout the tests in this file
+  tbl <- tibble::tibble(
+    int = 1:10,
+    dbl = as.numeric(1:10),
+    lgl = sample(c(TRUE, FALSE, NA), 10, replace = TRUE),
+    chr = letters[1:10],
+    fct = factor(letters[1:10])
+  )
+  batch <- record_batch(tbl)
+
+  expect_equal(batch, batch)
+  expect_equal(
+    batch$schema,
+    schema(
+      int = int32(), dbl = float64(),
+      lgl = boolean(), chr = utf8(),
+      fct = dictionary(int8(), utf8())
+    )
+  )
+  expect_equal(batch$num_columns, 5L)
+  expect_equal(batch$num_rows, 10L)
+  expect_equal(batch$column_name(0), "int")
+  expect_equal(batch$column_name(1), "dbl")
+  expect_equal(batch$column_name(2), "lgl")
+  expect_equal(batch$column_name(3), "chr")
+  expect_equal(batch$column_name(4), "fct")
+  expect_equal(names(batch), c("int", "dbl", "lgl", "chr", "fct"))
+
+  # input validation
+  expect_error(batch$column_name(NA), "'i' cannot be NA")
+  expect_error(batch$column_name(-1), "subscript out of bounds")
+  expect_error(batch$column_name(1000), "subscript out of bounds")
+  expect_error(batch$column_name(1:2))
+  expect_error(batch$column_name("one"))
+
+  col_int <- batch$column(0)
+  expect_true(inherits(col_int, "Array"))
+  expect_equal(col_int$as_vector(), tbl$int)
+  expect_equal(col_int$type, int32())
+
+  col_dbl <- batch$column(1)
+  expect_true(inherits(col_dbl, "Array"))
+  expect_equal(col_dbl$as_vector(), tbl$dbl)
+  expect_equal(col_dbl$type, float64())
+
+  col_lgl <- batch$column(2)
+  expect_true(inherits(col_dbl, "Array"))
+  expect_equal(col_lgl$as_vector(), tbl$lgl)
+  expect_equal(col_lgl$type, boolean())
+
+  col_chr <- batch$column(3)
+  expect_true(inherits(col_chr, "Array"))
+  expect_equal(col_chr$as_vector(), tbl$chr)
+  expect_equal(col_chr$type, utf8())
+
+  col_fct <- batch$column(4)
+  expect_true(inherits(col_fct, "Array"))
+  expect_equal(col_fct$as_vector(), tbl$fct)
+  expect_equal(col_fct$type, dictionary(int8(), utf8()))
+
+  # input validation
+  expect_error(batch$column(NA), "'i' cannot be NA")
+  expect_error(batch$column(-1), "subscript out of bounds")
+  expect_error(batch$column(1000), "subscript out of bounds")
+  expect_error(batch$column(1:2))
+  expect_error(batch$column("one"))
+
+  batch2 <- batch$RemoveColumn(0)
+  expect_equal(
+    batch2$schema,
+    schema(dbl = float64(), lgl = boolean(), chr = utf8(), fct = dictionary(int8(), utf8()))
+  )
+  expect_equal(batch2$column(0), batch$column(1))
+  expect_data_frame(batch2, tbl[, -1])
+
+  # input validation
+  expect_error(batch$RemoveColumn(NA), "'i' cannot be NA")
+  expect_error(batch$RemoveColumn(-1), "subscript out of bounds")
+  expect_error(batch$RemoveColumn(1000), "subscript out of bounds")
+  expect_error(batch$RemoveColumn(1:2))
+  expect_error(batch$RemoveColumn("one"))
+})
+
+test_that("RecordBatch S3 methods", {
+  tab <- RecordBatch$create(example_data)
+  for (f in c("dim", "nrow", "ncol", "dimnames", "colnames", "row.names", "as.list")) {
+    fun <- get(f)
+    expect_identical(fun(tab), fun(example_data), info = f)
+  }
+})
+
+test_that("RecordBatch$Slice", {
+  batch3 <- batch$Slice(5)
+  expect_data_frame(batch3, tbl[6:10, ])
+
+  batch4 <- batch$Slice(5, 2)
+  expect_data_frame(batch4, tbl[6:7, ])
+
+  # Input validation
+  expect_error(batch$Slice("ten"))
+  expect_error(batch$Slice(NA_integer_), "Slice 'offset' cannot be NA")
+  expect_error(batch$Slice(NA), "Slice 'offset' cannot be NA")
+  expect_error(batch$Slice(10, "ten"))
+  expect_error(batch$Slice(10, NA_integer_), "Slice 'length' cannot be NA")
+  expect_error(batch$Slice(NA_integer_, NA_integer_), "Slice 'offset' cannot be NA")
+  expect_error(batch$Slice(c(10, 10)))
+  expect_error(batch$Slice(10, c(10, 10)))
+  expect_error(batch$Slice(1000), "Slice 'offset' greater than array length")
+  expect_error(batch$Slice(-1), "Slice 'offset' cannot be negative")
+  expect_error(batch4$Slice(10, 10), "Slice 'offset' greater than array length")
+  expect_error(batch$Slice(10, -1), "Slice 'length' cannot be negative")
+  expect_error(batch$Slice(-1, 10), "Slice 'offset' cannot be negative")
+})
+
+test_that("[ on RecordBatch", {
+  expect_data_frame(batch[6:7, ], tbl[6:7, ])
+  expect_data_frame(batch[c(6, 7), ], tbl[6:7, ])
+  expect_data_frame(batch[6:7, 2:4], tbl[6:7, 2:4])
+  expect_data_frame(batch[, c("dbl", "fct")], tbl[, c(2, 5)])
+  expect_identical(as.vector(batch[, "chr", drop = TRUE]), tbl$chr)
+  expect_data_frame(batch[c(7, 3, 5), 2:4], tbl[c(7, 3, 5), 2:4])
+  expect_data_frame(
+    batch[rep(c(FALSE, TRUE), 5), ],
+    tbl[c(2, 4, 6, 8, 10), ]
+  )
+  # bool Array
+  expect_data_frame(batch[batch$lgl, ], tbl[tbl$lgl, ])
+  # int Array
+  expect_data_frame(batch[Array$create(5:6), 2:4], tbl[6:7, 2:4])
+
+  # input validation
+  expect_error(batch[, c("dbl", "NOTACOLUMN")], 'Column not found: "NOTACOLUMN"')
+  expect_error(batch[, c(6, NA)], "Column indices cannot be NA")
+  expect_error(batch[, c(2, -2)], "Invalid column index")
+})
+
+test_that("[[ and $ on RecordBatch", {
+  expect_as_vector(batch[["int"]], tbl$int)
+  expect_as_vector(batch$int, tbl$int)
+  expect_as_vector(batch[[4]], tbl$chr)
+  expect_null(batch$qwerty)
+  expect_null(batch[["asdf"]])
+  expect_error(batch[[c(4, 3)]])
+  expect_error(batch[[NA]], "'i' must be character or numeric, not logical")
+  expect_error(batch[[NULL]], "'i' must be character or numeric, not NULL")
+  expect_error(batch[[c("asdf", "jkl;")]], "name is not a string", fixed = TRUE)
+})
+
+test_that("[[<- assignment", {
+  tbl <- tibble::tibble(
+    int = 1:10,
+    dbl = as.numeric(1:10),
+    lgl = sample(c(TRUE, FALSE, NA), 10, replace = TRUE),
+    chr = letters[1:10],
+    fct = factor(letters[1:10])
+  )
+  batch <- RecordBatch$create(tbl)
+
+  # can remove a column
+  batch[["chr"]] <- NULL
+  expect_data_frame(batch, tbl[-4])
+
+  # can remove a column by index
+  batch[[4]] <- NULL
+  expect_data_frame(batch, tbl[1:3])
+
+  # can add a named column
+  batch[["new"]] <- letters[10:1]
+  expect_data_frame(batch, dplyr::bind_cols(tbl[1:3], new = letters[10:1]))
+
+  # can replace a column by index
+  batch[[2]] <- as.numeric(10:1)
+  expect_as_vector(batch[[2]], as.numeric(10:1))
+
+  # can add a column by index
+  batch[[5]] <- as.numeric(10:1)
+  expect_as_vector(batch[[5]], as.numeric(10:1))
+  expect_as_vector(batch[["5"]], as.numeric(10:1))
+
+  # can replace a column
+  batch[["int"]] <- 10:1
+  expect_as_vector(batch[["int"]], 10:1)
+
+  # can use $
+  batch$new <- NULL
+  expect_null(as.vector(batch$new))
+  expect_identical(dim(batch), c(10L, 4L))
+
+  batch$int <- 1:10
+  expect_as_vector(batch$int, 1:10)
+
+  # recycling
+  batch[["atom"]] <- 1L
+  expect_as_vector(batch[["atom"]], rep(1L, 10))
+
+  expect_error(
+    batch[["atom"]] <- 1:6,
+    "Can't recycle input of size 6 to size 10."
+  )
+
+  # assign Arrow array
+  array <- Array$create(c(10:1))
+  batch$array <- array
+  expect_as_vector(batch$array, 10:1)
+
+  # nonsense indexes
+  expect_error(batch[[NA]] <- letters[10:1], "'i' must be character or numeric, not logical")
+  expect_error(batch[[NULL]] <- letters[10:1], "'i' must be character or numeric, not NULL")
+  expect_error(batch[[NA_integer_]] <- letters[10:1], "!is.na(i) is not TRUE", fixed = TRUE)
+  expect_error(batch[[NA_real_]] <- letters[10:1], "!is.na(i) is not TRUE", fixed = TRUE)
+  expect_error(batch[[NA_character_]] <- letters[10:1], "!is.na(i) is not TRUE", fixed = TRUE)
+  expect_error(batch[[c(1, 4)]] <- letters[10:1], "length(i) not equal to 1", fixed = TRUE)
+})
+
+test_that("head and tail on RecordBatch", {
+  tbl <- tibble::tibble(
+    int = 1:10,
+    dbl = as.numeric(1:10),
+    lgl = sample(c(TRUE, FALSE, NA), 10, replace = TRUE),
+    chr = letters[1:10],
+    fct = factor(letters[1:10])
+  )
+  batch <- RecordBatch$create(tbl)
+  expect_data_frame(head(batch), head(tbl))
+  expect_data_frame(head(batch, 4), head(tbl, 4))
+  expect_data_frame(head(batch, 40), head(tbl, 40))
+  expect_data_frame(head(batch, -4), head(tbl, -4))
+  expect_data_frame(head(batch, -40), head(tbl, -40))
+  expect_data_frame(tail(batch), tail(tbl))
+  expect_data_frame(tail(batch, 4), tail(tbl, 4))
+  expect_data_frame(tail(batch, 40), tail(tbl, 40))
+  expect_data_frame(tail(batch, -4), tail(tbl, -4))
+  expect_data_frame(tail(batch, -40), tail(tbl, -40))
+})
+
+test_that("RecordBatch print method", {
+  expect_output(
+    print(batch),
+    paste(
+      "RecordBatch",
+      "10 rows x 5 columns",
+      "$int <int32>",
+      "$dbl <double>",
+      "$lgl <bool>",
+      "$chr <string>",
+      "$fct <dictionary<values=string, indices=int8>>",
+      sep = "\n"
+    ),
+    fixed = TRUE
+  )
+})
+
+test_that("RecordBatch with 0 rows are supported", {
+  tbl <- tibble::tibble(
+    int = integer(),
+    dbl = numeric(),
+    lgl = logical(),
+    chr = character(),
+    fct = factor(character(), levels = c("a", "b"))
+  )
+
+  batch <- record_batch(tbl)
+  expect_equal(batch$num_columns, 5L)
+  expect_equal(batch$num_rows, 0L)
+  expect_equal(
+    batch$schema,
+    schema(
+      int = int32(),
+      dbl = float64(),
+      lgl = boolean(),
+      chr = utf8(),
+      fct = dictionary(int8(), utf8())
+    )
+  )
+})
+
+test_that("RecordBatch cast (ARROW-3741)", {
+  batch <- record_batch(x = 1:10, y = 1:10)
+
+  expect_error(batch$cast(schema(x = int32())))
+  expect_error(batch$cast(schema(x = int32(), z = int32())))
+
+  s2 <- schema(x = int16(), y = int64())
+  batch2 <- batch$cast(s2)
+  expect_equal(batch2$schema, s2)
+  expect_equal(batch2$column(0L)$type, int16())
+  expect_equal(batch2$column(1L)$type, int64())
+})
+
+test_that("record_batch() handles schema= argument", {
+  s <- schema(x = int32(), y = int32())
+  batch <- record_batch(x = 1:10, y = 1:10, schema = s)
+  expect_equal(s, batch$schema)
+
+  s <- schema(x = int32(), y = float64())
+  batch <- record_batch(x = 1:10, y = 1:10, schema = s)
+  expect_equal(s, batch$schema)
+
+  s <- schema(x = int32(), y = utf8())
+  expect_error(record_batch(x = 1:10, y = 1:10, schema = s))
+})
+
+test_that("record_batch(schema=) does some basic consistency checking of the schema", {
+  s <- schema(x = int32())
+  expect_error(record_batch(x = 1:10, y = 1:10, schema = s))
+  expect_error(record_batch(z = 1:10, schema = s))
+})
+
+test_that("RecordBatch dim() and nrow() (ARROW-3816)", {
+  batch <- record_batch(x = 1:10, y = 1:10)
+  expect_equal(dim(batch), c(10L, 2L))
+  expect_equal(nrow(batch), 10L)
+})
+
+test_that("record_batch() handles Array", {
+  batch <- record_batch(x = 1:10, y = Array$create(1:10))
+  expect_equal(batch$schema, schema(x = int32(), y = int32()))
+})
+
+test_that("record_batch() handles data frame columns", {
+  tib <- tibble::tibble(x = 1:10, y = 1:10)
+  # because tib is named here, this becomes a struct array
+  batch <- record_batch(a = 1:10, b = tib)
+  expect_equal(
+    batch$schema,
+    schema(
+      a = int32(),
+      b = struct(x = int32(), y = int32())
+    )
+  )
+  out <- as.data.frame(batch)
+  expect_equal(out, tibble::tibble(a = 1:10, b = tib))
+
+  # if not named, columns from tib are auto spliced
+  batch2 <- record_batch(a = 1:10, tib)
+  expect_equal(
+    batch2$schema,
+    schema(a = int32(), x = int32(), y = int32())
+  )
+  out <- as.data.frame(batch2)
+  expect_equal(out, tibble::tibble(a = 1:10, !!!tib))
+})
+
+test_that("record_batch() handles data frame columns with schema spec", {
+  tib <- tibble::tibble(x = 1:10, y = 1:10)
+  tib_float <- tib
+  tib_float$y <- as.numeric(tib_float$y)
+  schema <- schema(a = int32(), b = struct(x = int16(), y = float64()))
+  batch <- record_batch(a = 1:10, b = tib, schema = schema)
+  expect_equal(batch$schema, schema)
+  out <- as.data.frame(batch)
+  expect_equal(out, tibble::tibble(a = 1:10, b = tib_float))
+
+  schema <- schema(a = int32(), b = struct(x = int16(), y = utf8()))
+  expect_error(record_batch(a = 1:10, b = tib, schema = schema))
+})
+
+test_that("record_batch() auto splices (ARROW-5718)", {
+  df <- tibble::tibble(x = 1:10, y = letters[1:10])
+  batch1 <- record_batch(df)
+  batch2 <- record_batch(!!!df)
+  expect_equal(batch1, batch2)
+  expect_equal(batch1$schema, schema(x = int32(), y = utf8()))
+  expect_data_frame(batch1, df)
+
+  batch3 <- record_batch(df, z = 1:10)
+  batch4 <- record_batch(!!!df, z = 1:10)
+  expect_equal(batch3, batch4)
+  expect_equal(batch3$schema, schema(x = int32(), y = utf8(), z = int32()))
+  expect_equal(
+    as.data.frame(batch3),
+    tibble::as_tibble(cbind(df, data.frame(z = 1:10)))
+  )
+
+  s <- schema(x = float64(), y = utf8())
+  batch5 <- record_batch(df, schema = s)
+  batch6 <- record_batch(!!!df, schema = s)
+  expect_equal(batch5, batch6)
+  expect_equal(batch5$schema, s)
+  expect_equal(as.data.frame(batch5), df)
+
+  s2 <- schema(x = float64(), y = utf8(), z = int16())
+  batch7 <- record_batch(df, z = 1:10, schema = s2)
+  batch8 <- record_batch(!!!df, z = 1:10, schema = s2)
+  expect_equal(batch7, batch8)
+  expect_equal(batch7$schema, s2)
+  expect_equal(
+    as.data.frame(batch7),
+    tibble::as_tibble(cbind(df, data.frame(z = 1:10)))
+  )
+})
+
+test_that("record_batch() only auto splice data frames", {
+  expect_error(
+    record_batch(1:10),
+    regexp = "only data frames are allowed as unnamed arguments to be auto spliced"
+  )
+})
+
+test_that("record_batch() handles null type (ARROW-7064)", {
+  batch <- record_batch(a = 1:10, n = vctrs::unspecified(10))
+  expect_equal(
+    batch$schema,
+    schema(a = int32(), n = null()),
+    ignore_attr = TRUE
+  )
+})
+
+test_that("record_batch() scalar recycling with vectors", {
+  expect_data_frame(
+    record_batch(a = 1:10, b = 5),
+    tibble::tibble(a = 1:10, b = 5)
+  )
+})
+
+test_that("record_batch() scalar recycling with Scalars, Arrays, and ChunkedArrays", {
+  expect_data_frame(
+    record_batch(a = Array$create(1:10), b = Scalar$create(5)),
+    tibble::tibble(a = 1:10, b = 5)
+  )
+
+  expect_data_frame(
+    record_batch(a = Array$create(1:10), b = Array$create(5)),
+    tibble::tibble(a = 1:10, b = 5)
+  )
+
+  expect_data_frame(
+    record_batch(a = Array$create(1:10), b = ChunkedArray$create(5)),
+    tibble::tibble(a = 1:10, b = 5)
+  )
+})
+
+test_that("record_batch() no recycling with tibbles", {
+  expect_error(
+    record_batch(
+      tibble::tibble(a = 1:10),
+      tibble::tibble(a = 1, b = 5)
+    ),
+    regexp = "All input tibbles or data.frames must have the same number of rows"
+  )
+
+  expect_error(
+    record_batch(
+      tibble::tibble(a = 1:10),
+      tibble::tibble(a = 1)
+    ),
+    regexp = "All input tibbles or data.frames must have the same number of rows"
+  )
+})
+
+test_that("RecordBatch$Equals", {
+  df <- tibble::tibble(x = 1:10, y = letters[1:10])
+  a <- record_batch(df)
+  b <- record_batch(df)
+  expect_equal(a, b)
+  expect_true(a$Equals(b))
+  expect_false(a$Equals(df))
+})
+
+test_that("RecordBatch$Equals(check_metadata)", {
+  df <- tibble::tibble(x = 1:2, y = c("a", "b"))
+  rb1 <- record_batch(df)
+  rb2 <- record_batch(df, schema = rb1$schema$WithMetadata(list(some = "metadata")))
+
+  expect_r6_class(rb1, "RecordBatch")
+  expect_r6_class(rb2, "RecordBatch")
+  expect_false(rb1$schema$HasMetadata)
+  expect_true(rb2$schema$HasMetadata)
+  expect_identical(rb2$schema$metadata, list(some = "metadata"))
+
+  expect_true(rb1 == rb2)
+  expect_true(rb1$Equals(rb2))
+  expect_false(rb1$Equals(rb2, check_metadata = TRUE))
+
+  expect_failure(expect_equal(rb1, rb2)) # expect_equal has check_metadata=TRUE
+  expect_equal(rb1, rb2, ignore_attr = TRUE) # this passes check_metadata=FALSE
+
+  expect_false(rb1$Equals(24)) # Not a RecordBatch
+})
+
+test_that("RecordBatch name assignment", {
+  rb <- record_batch(x = 1:10, y = 1:10)
+  expect_identical(names(rb), c("x", "y"))
+  names(rb) <- c("a", "b")
+  expect_identical(names(rb), c("a", "b"))
+  expect_error(names(rb) <- "f")
+  expect_error(names(rb) <- letters)
+  expect_error(names(rb) <- character(0))
+  expect_error(names(rb) <- NULL)
+  expect_error(names(rb) <- c(TRUE, FALSE))
+})
+
+test_that("record_batch() with different length arrays", {
+  msg <- "All arrays must have the same length"
+  expect_error(record_batch(a = 1:5, b = 1:6), msg)
+})
+
+test_that("Handling string data with embedded nuls", {
+  raws <- Array$create(structure(list(
+    as.raw(c(0x70, 0x65, 0x72, 0x73, 0x6f, 0x6e)),
+    as.raw(c(0x77, 0x6f, 0x6d, 0x61, 0x6e)),
+    as.raw(c(0x6d, 0x61, 0x00, 0x6e)), # <-- there's your nul, 0x00
+    as.raw(c(0x63, 0x61, 0x6d, 0x65, 0x72, 0x61)),
+    as.raw(c(0x74, 0x76))
+  ),
+  class = c("arrow_binary", "vctrs_vctr", "list")
+  ))
+  batch_with_nul <- record_batch(a = 1:5, b = raws)
+  batch_with_nul$b <- batch_with_nul$b$cast(utf8())
+
+  # The behavior of the warnings/errors is slightly different with and without
+  # altrep. Without it (i.e. 3.5.0 and below, the error would trigger immediately
+  # on `as.vector()` where as with it, the error only happens on materialization)
+  skip_if_r_version("3.5.0")
+  df <- as.data.frame(batch_with_nul)
+
+  expect_error(
+    df$b[],
+    paste0(
+      "embedded nul in string: 'ma\\0n'; to strip nuls when converting from Arrow to R, ",
+      "set options(arrow.skip_nul = TRUE)"
+    ),
+    fixed = TRUE
+  )
+
+  batch_with_nul <- record_batch(a = 1:5, b = raws)
+  batch_with_nul$b <- batch_with_nul$b$cast(utf8())
+
+  withr::with_options(list(arrow.skip_nul = TRUE), {
+    expect_warning(
+      expect_equal(
+        as.data.frame(batch_with_nul)$b,
+        c("person", "woman", "man", "camera", "tv"),
+        ignore_attr = TRUE
+      ),
+      "Stripping '\\0' (nul) from character vector",
+      fixed = TRUE
+    )
+  })
+})
+
+test_that("ARROW-11769/ARROW-13860 - grouping preserved in record batch creation", {
+  skip_if_not_available("dataset")
+  library(dplyr, warn.conflicts = FALSE)
+
+  tbl <- tibble::tibble(
+    int = 1:10,
+    fct = factor(rep(c("A", "B"), 5)),
+    fct2 = factor(rep(c("C", "D"), each = 5)),
+  )
+
+  expect_r6_class(
+    tbl %>%
+      group_by(fct, fct2) %>%
+      record_batch(),
+    "RecordBatch"
+  )
+  expect_identical(
+    tbl %>%
+      group_by(fct, fct2) %>%
+      record_batch() %>%
+      group_vars(),
+    c("fct", "fct2")
+  )
+  expect_identical(
+    tbl %>%
+      group_by(fct, fct2) %>%
+      record_batch() %>%
+      ungroup() %>%
+      group_vars(),
+    NULL
+  )
+  expect_identical(
+    tbl %>%
+      group_by(fct, fct2) %>%
+      record_batch() %>%
+      select(-int) %>%
+      group_vars(),
+    c("fct", "fct2")
+  )
+})
+
+test_that("ARROW-12729 - length returns number of columns in RecordBatch", {
+  tbl <- tibble::tibble(
+    int = 1:10,
+    fct = factor(rep(c("A", "B"), 5)),
+    fct2 = factor(rep(c("C", "D"), each = 5)),
+  )
+
+  rb <- record_batch(!!!tbl)
+
+  expect_identical(length(rb), 3L)
+})
+
+test_that("RecordBatchReader to C-interface", {
+  skip_if_not_available("dataset")
+
+  tab <- Table$create(example_data)
+
+  # export the RecordBatchReader via the C-interface
+  stream_ptr <- allocate_arrow_array_stream()
+  scan <- Scanner$create(tab)
+  reader <- scan$ToRecordBatchReader()
+  reader$export_to_c(stream_ptr)
+
+  # then import it and check that the roundtripped value is the same
+  circle <- RecordBatchStreamReader$import_from_c(stream_ptr)
+  tab_from_c_new <- circle$read_table()
+  expect_equal(tab, tab_from_c_new)
+
+  # must clean up the pointer or we leak
+  delete_arrow_array_stream(stream_ptr)
+
+  # export the RecordBatchStreamReader via the C-interface
+  stream_ptr_new <- allocate_arrow_array_stream()
+  bytes <- write_to_raw(example_data)
+  expect_type(bytes, "raw")
+  reader_new <- RecordBatchStreamReader$create(bytes)
+  reader_new$export_to_c(stream_ptr_new)
+
+  # then import it and check that the roundtripped value is the same
+  circle_new <- RecordBatchStreamReader$import_from_c(stream_ptr_new)
+  tab_from_c_new <- circle_new$read_table()
+  expect_equal(tab, tab_from_c_new)
+
+  # must clean up the pointer or we leak
+  delete_arrow_array_stream(stream_ptr_new)
+})
+
+test_that("RecordBatch to C-interface", {
+  batch <- RecordBatch$create(example_data)
+
+  # export the RecordBatch via the C-interface
+  schema_ptr <- allocate_arrow_schema()
+  array_ptr <- allocate_arrow_array()
+  batch$export_to_c(array_ptr, schema_ptr)
+
+  # then import it and check that the roundtripped value is the same
+  circle <- RecordBatch$import_from_c(array_ptr, schema_ptr)
+  expect_equal
+
+  # must clean up the pointers or we leak
+  delete_arrow_schema(schema_ptr)
+  delete_arrow_array(array_ptr)
+})
+
+
+
+test_that("RecordBatchReader to C-interface to arrow_dplyr_query", {
+  skip_if_not_available("dataset")
+
+  tab <- Table$create(example_data)
+
+  # export the RecordBatchReader via the C-interface
+  stream_ptr <- allocate_arrow_array_stream()
+  scan <- Scanner$create(tab)
+  reader <- scan$ToRecordBatchReader()
+  reader$export_to_c(stream_ptr)
+
+  # then import it and check that the roundtripped value is the same
+  circle <- RecordBatchStreamReader$import_from_c(stream_ptr)
+
+  # create an arrow_dplyr_query() from the recordbatch reader
+  reader_adq <- arrow_dplyr_query(circle)
+
+  tab_from_c_new <- reader_adq %>%
+    dplyr::compute()
+  expect_equal(tab_from_c_new, tab)
+
+  # must clean up the pointer or we leak
+  delete_arrow_array_stream(stream_ptr)
+})