summaryrefslogtreecommitdiffstats
path: root/src/arrow/r/tests/testthat/test-RecordBatch.R
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/arrow/r/tests/testthat/test-RecordBatch.R690
1 files changed, 690 insertions, 0 deletions
diff --git a/src/arrow/r/tests/testthat/test-RecordBatch.R b/src/arrow/r/tests/testthat/test-RecordBatch.R
new file mode 100644
index 000000000..d280754a3
--- /dev/null
+++ b/src/arrow/r/tests/testthat/test-RecordBatch.R
@@ -0,0 +1,690 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+test_that("RecordBatch", {
+ # Note that we're reusing `tbl` and `batch` throughout the tests in this file
+ tbl <- tibble::tibble(
+ int = 1:10,
+ dbl = as.numeric(1:10),
+ lgl = sample(c(TRUE, FALSE, NA), 10, replace = TRUE),
+ chr = letters[1:10],
+ fct = factor(letters[1:10])
+ )
+ batch <- record_batch(tbl)
+
+ expect_equal(batch, batch)
+ expect_equal(
+ batch$schema,
+ schema(
+ int = int32(), dbl = float64(),
+ lgl = boolean(), chr = utf8(),
+ fct = dictionary(int8(), utf8())
+ )
+ )
+ expect_equal(batch$num_columns, 5L)
+ expect_equal(batch$num_rows, 10L)
+ expect_equal(batch$column_name(0), "int")
+ expect_equal(batch$column_name(1), "dbl")
+ expect_equal(batch$column_name(2), "lgl")
+ expect_equal(batch$column_name(3), "chr")
+ expect_equal(batch$column_name(4), "fct")
+ expect_equal(names(batch), c("int", "dbl", "lgl", "chr", "fct"))
+
+ # input validation
+ expect_error(batch$column_name(NA), "'i' cannot be NA")
+ expect_error(batch$column_name(-1), "subscript out of bounds")
+ expect_error(batch$column_name(1000), "subscript out of bounds")
+ expect_error(batch$column_name(1:2))
+ expect_error(batch$column_name("one"))
+
+ col_int <- batch$column(0)
+ expect_true(inherits(col_int, "Array"))
+ expect_equal(col_int$as_vector(), tbl$int)
+ expect_equal(col_int$type, int32())
+
+ col_dbl <- batch$column(1)
+ expect_true(inherits(col_dbl, "Array"))
+ expect_equal(col_dbl$as_vector(), tbl$dbl)
+ expect_equal(col_dbl$type, float64())
+
+ col_lgl <- batch$column(2)
+ expect_true(inherits(col_dbl, "Array"))
+ expect_equal(col_lgl$as_vector(), tbl$lgl)
+ expect_equal(col_lgl$type, boolean())
+
+ col_chr <- batch$column(3)
+ expect_true(inherits(col_chr, "Array"))
+ expect_equal(col_chr$as_vector(), tbl$chr)
+ expect_equal(col_chr$type, utf8())
+
+ col_fct <- batch$column(4)
+ expect_true(inherits(col_fct, "Array"))
+ expect_equal(col_fct$as_vector(), tbl$fct)
+ expect_equal(col_fct$type, dictionary(int8(), utf8()))
+
+ # input validation
+ expect_error(batch$column(NA), "'i' cannot be NA")
+ expect_error(batch$column(-1), "subscript out of bounds")
+ expect_error(batch$column(1000), "subscript out of bounds")
+ expect_error(batch$column(1:2))
+ expect_error(batch$column("one"))
+
+ batch2 <- batch$RemoveColumn(0)
+ expect_equal(
+ batch2$schema,
+ schema(dbl = float64(), lgl = boolean(), chr = utf8(), fct = dictionary(int8(), utf8()))
+ )
+ expect_equal(batch2$column(0), batch$column(1))
+ expect_data_frame(batch2, tbl[, -1])
+
+ # input validation
+ expect_error(batch$RemoveColumn(NA), "'i' cannot be NA")
+ expect_error(batch$RemoveColumn(-1), "subscript out of bounds")
+ expect_error(batch$RemoveColumn(1000), "subscript out of bounds")
+ expect_error(batch$RemoveColumn(1:2))
+ expect_error(batch$RemoveColumn("one"))
+})
+
+test_that("RecordBatch S3 methods", {
+ tab <- RecordBatch$create(example_data)
+ for (f in c("dim", "nrow", "ncol", "dimnames", "colnames", "row.names", "as.list")) {
+ fun <- get(f)
+ expect_identical(fun(tab), fun(example_data), info = f)
+ }
+})
+
+test_that("RecordBatch$Slice", {
+ batch3 <- batch$Slice(5)
+ expect_data_frame(batch3, tbl[6:10, ])
+
+ batch4 <- batch$Slice(5, 2)
+ expect_data_frame(batch4, tbl[6:7, ])
+
+ # Input validation
+ expect_error(batch$Slice("ten"))
+ expect_error(batch$Slice(NA_integer_), "Slice 'offset' cannot be NA")
+ expect_error(batch$Slice(NA), "Slice 'offset' cannot be NA")
+ expect_error(batch$Slice(10, "ten"))
+ expect_error(batch$Slice(10, NA_integer_), "Slice 'length' cannot be NA")
+ expect_error(batch$Slice(NA_integer_, NA_integer_), "Slice 'offset' cannot be NA")
+ expect_error(batch$Slice(c(10, 10)))
+ expect_error(batch$Slice(10, c(10, 10)))
+ expect_error(batch$Slice(1000), "Slice 'offset' greater than array length")
+ expect_error(batch$Slice(-1), "Slice 'offset' cannot be negative")
+ expect_error(batch4$Slice(10, 10), "Slice 'offset' greater than array length")
+ expect_error(batch$Slice(10, -1), "Slice 'length' cannot be negative")
+ expect_error(batch$Slice(-1, 10), "Slice 'offset' cannot be negative")
+})
+
+test_that("[ on RecordBatch", {
+ expect_data_frame(batch[6:7, ], tbl[6:7, ])
+ expect_data_frame(batch[c(6, 7), ], tbl[6:7, ])
+ expect_data_frame(batch[6:7, 2:4], tbl[6:7, 2:4])
+ expect_data_frame(batch[, c("dbl", "fct")], tbl[, c(2, 5)])
+ expect_identical(as.vector(batch[, "chr", drop = TRUE]), tbl$chr)
+ expect_data_frame(batch[c(7, 3, 5), 2:4], tbl[c(7, 3, 5), 2:4])
+ expect_data_frame(
+ batch[rep(c(FALSE, TRUE), 5), ],
+ tbl[c(2, 4, 6, 8, 10), ]
+ )
+ # bool Array
+ expect_data_frame(batch[batch$lgl, ], tbl[tbl$lgl, ])
+ # int Array
+ expect_data_frame(batch[Array$create(5:6), 2:4], tbl[6:7, 2:4])
+
+ # input validation
+ expect_error(batch[, c("dbl", "NOTACOLUMN")], 'Column not found: "NOTACOLUMN"')
+ expect_error(batch[, c(6, NA)], "Column indices cannot be NA")
+ expect_error(batch[, c(2, -2)], "Invalid column index")
+})
+
+test_that("[[ and $ on RecordBatch", {
+ expect_as_vector(batch[["int"]], tbl$int)
+ expect_as_vector(batch$int, tbl$int)
+ expect_as_vector(batch[[4]], tbl$chr)
+ expect_null(batch$qwerty)
+ expect_null(batch[["asdf"]])
+ expect_error(batch[[c(4, 3)]])
+ expect_error(batch[[NA]], "'i' must be character or numeric, not logical")
+ expect_error(batch[[NULL]], "'i' must be character or numeric, not NULL")
+ expect_error(batch[[c("asdf", "jkl;")]], "name is not a string", fixed = TRUE)
+})
+
+test_that("[[<- assignment", {
+ tbl <- tibble::tibble(
+ int = 1:10,
+ dbl = as.numeric(1:10),
+ lgl = sample(c(TRUE, FALSE, NA), 10, replace = TRUE),
+ chr = letters[1:10],
+ fct = factor(letters[1:10])
+ )
+ batch <- RecordBatch$create(tbl)
+
+ # can remove a column
+ batch[["chr"]] <- NULL
+ expect_data_frame(batch, tbl[-4])
+
+ # can remove a column by index
+ batch[[4]] <- NULL
+ expect_data_frame(batch, tbl[1:3])
+
+ # can add a named column
+ batch[["new"]] <- letters[10:1]
+ expect_data_frame(batch, dplyr::bind_cols(tbl[1:3], new = letters[10:1]))
+
+ # can replace a column by index
+ batch[[2]] <- as.numeric(10:1)
+ expect_as_vector(batch[[2]], as.numeric(10:1))
+
+ # can add a column by index
+ batch[[5]] <- as.numeric(10:1)
+ expect_as_vector(batch[[5]], as.numeric(10:1))
+ expect_as_vector(batch[["5"]], as.numeric(10:1))
+
+ # can replace a column
+ batch[["int"]] <- 10:1
+ expect_as_vector(batch[["int"]], 10:1)
+
+ # can use $
+ batch$new <- NULL
+ expect_null(as.vector(batch$new))
+ expect_identical(dim(batch), c(10L, 4L))
+
+ batch$int <- 1:10
+ expect_as_vector(batch$int, 1:10)
+
+ # recycling
+ batch[["atom"]] <- 1L
+ expect_as_vector(batch[["atom"]], rep(1L, 10))
+
+ expect_error(
+ batch[["atom"]] <- 1:6,
+ "Can't recycle input of size 6 to size 10."
+ )
+
+ # assign Arrow array
+ array <- Array$create(c(10:1))
+ batch$array <- array
+ expect_as_vector(batch$array, 10:1)
+
+ # nonsense indexes
+ expect_error(batch[[NA]] <- letters[10:1], "'i' must be character or numeric, not logical")
+ expect_error(batch[[NULL]] <- letters[10:1], "'i' must be character or numeric, not NULL")
+ expect_error(batch[[NA_integer_]] <- letters[10:1], "!is.na(i) is not TRUE", fixed = TRUE)
+ expect_error(batch[[NA_real_]] <- letters[10:1], "!is.na(i) is not TRUE", fixed = TRUE)
+ expect_error(batch[[NA_character_]] <- letters[10:1], "!is.na(i) is not TRUE", fixed = TRUE)
+ expect_error(batch[[c(1, 4)]] <- letters[10:1], "length(i) not equal to 1", fixed = TRUE)
+})
+
+test_that("head and tail on RecordBatch", {
+ tbl <- tibble::tibble(
+ int = 1:10,
+ dbl = as.numeric(1:10),
+ lgl = sample(c(TRUE, FALSE, NA), 10, replace = TRUE),
+ chr = letters[1:10],
+ fct = factor(letters[1:10])
+ )
+ batch <- RecordBatch$create(tbl)
+ expect_data_frame(head(batch), head(tbl))
+ expect_data_frame(head(batch, 4), head(tbl, 4))
+ expect_data_frame(head(batch, 40), head(tbl, 40))
+ expect_data_frame(head(batch, -4), head(tbl, -4))
+ expect_data_frame(head(batch, -40), head(tbl, -40))
+ expect_data_frame(tail(batch), tail(tbl))
+ expect_data_frame(tail(batch, 4), tail(tbl, 4))
+ expect_data_frame(tail(batch, 40), tail(tbl, 40))
+ expect_data_frame(tail(batch, -4), tail(tbl, -4))
+ expect_data_frame(tail(batch, -40), tail(tbl, -40))
+})
+
+test_that("RecordBatch print method", {
+ expect_output(
+ print(batch),
+ paste(
+ "RecordBatch",
+ "10 rows x 5 columns",
+ "$int <int32>",
+ "$dbl <double>",
+ "$lgl <bool>",
+ "$chr <string>",
+ "$fct <dictionary<values=string, indices=int8>>",
+ sep = "\n"
+ ),
+ fixed = TRUE
+ )
+})
+
+test_that("RecordBatch with 0 rows are supported", {
+ tbl <- tibble::tibble(
+ int = integer(),
+ dbl = numeric(),
+ lgl = logical(),
+ chr = character(),
+ fct = factor(character(), levels = c("a", "b"))
+ )
+
+ batch <- record_batch(tbl)
+ expect_equal(batch$num_columns, 5L)
+ expect_equal(batch$num_rows, 0L)
+ expect_equal(
+ batch$schema,
+ schema(
+ int = int32(),
+ dbl = float64(),
+ lgl = boolean(),
+ chr = utf8(),
+ fct = dictionary(int8(), utf8())
+ )
+ )
+})
+
+test_that("RecordBatch cast (ARROW-3741)", {
+ batch <- record_batch(x = 1:10, y = 1:10)
+
+ expect_error(batch$cast(schema(x = int32())))
+ expect_error(batch$cast(schema(x = int32(), z = int32())))
+
+ s2 <- schema(x = int16(), y = int64())
+ batch2 <- batch$cast(s2)
+ expect_equal(batch2$schema, s2)
+ expect_equal(batch2$column(0L)$type, int16())
+ expect_equal(batch2$column(1L)$type, int64())
+})
+
+test_that("record_batch() handles schema= argument", {
+ s <- schema(x = int32(), y = int32())
+ batch <- record_batch(x = 1:10, y = 1:10, schema = s)
+ expect_equal(s, batch$schema)
+
+ s <- schema(x = int32(), y = float64())
+ batch <- record_batch(x = 1:10, y = 1:10, schema = s)
+ expect_equal(s, batch$schema)
+
+ s <- schema(x = int32(), y = utf8())
+ expect_error(record_batch(x = 1:10, y = 1:10, schema = s))
+})
+
+test_that("record_batch(schema=) does some basic consistency checking of the schema", {
+ s <- schema(x = int32())
+ expect_error(record_batch(x = 1:10, y = 1:10, schema = s))
+ expect_error(record_batch(z = 1:10, schema = s))
+})
+
+test_that("RecordBatch dim() and nrow() (ARROW-3816)", {
+ batch <- record_batch(x = 1:10, y = 1:10)
+ expect_equal(dim(batch), c(10L, 2L))
+ expect_equal(nrow(batch), 10L)
+})
+
+test_that("record_batch() handles Array", {
+ batch <- record_batch(x = 1:10, y = Array$create(1:10))
+ expect_equal(batch$schema, schema(x = int32(), y = int32()))
+})
+
+test_that("record_batch() handles data frame columns", {
+ tib <- tibble::tibble(x = 1:10, y = 1:10)
+ # because tib is named here, this becomes a struct array
+ batch <- record_batch(a = 1:10, b = tib)
+ expect_equal(
+ batch$schema,
+ schema(
+ a = int32(),
+ b = struct(x = int32(), y = int32())
+ )
+ )
+ out <- as.data.frame(batch)
+ expect_equal(out, tibble::tibble(a = 1:10, b = tib))
+
+ # if not named, columns from tib are auto spliced
+ batch2 <- record_batch(a = 1:10, tib)
+ expect_equal(
+ batch2$schema,
+ schema(a = int32(), x = int32(), y = int32())
+ )
+ out <- as.data.frame(batch2)
+ expect_equal(out, tibble::tibble(a = 1:10, !!!tib))
+})
+
+test_that("record_batch() handles data frame columns with schema spec", {
+ tib <- tibble::tibble(x = 1:10, y = 1:10)
+ tib_float <- tib
+ tib_float$y <- as.numeric(tib_float$y)
+ schema <- schema(a = int32(), b = struct(x = int16(), y = float64()))
+ batch <- record_batch(a = 1:10, b = tib, schema = schema)
+ expect_equal(batch$schema, schema)
+ out <- as.data.frame(batch)
+ expect_equal(out, tibble::tibble(a = 1:10, b = tib_float))
+
+ schema <- schema(a = int32(), b = struct(x = int16(), y = utf8()))
+ expect_error(record_batch(a = 1:10, b = tib, schema = schema))
+})
+
+test_that("record_batch() auto splices (ARROW-5718)", {
+ df <- tibble::tibble(x = 1:10, y = letters[1:10])
+ batch1 <- record_batch(df)
+ batch2 <- record_batch(!!!df)
+ expect_equal(batch1, batch2)
+ expect_equal(batch1$schema, schema(x = int32(), y = utf8()))
+ expect_data_frame(batch1, df)
+
+ batch3 <- record_batch(df, z = 1:10)
+ batch4 <- record_batch(!!!df, z = 1:10)
+ expect_equal(batch3, batch4)
+ expect_equal(batch3$schema, schema(x = int32(), y = utf8(), z = int32()))
+ expect_equal(
+ as.data.frame(batch3),
+ tibble::as_tibble(cbind(df, data.frame(z = 1:10)))
+ )
+
+ s <- schema(x = float64(), y = utf8())
+ batch5 <- record_batch(df, schema = s)
+ batch6 <- record_batch(!!!df, schema = s)
+ expect_equal(batch5, batch6)
+ expect_equal(batch5$schema, s)
+ expect_equal(as.data.frame(batch5), df)
+
+ s2 <- schema(x = float64(), y = utf8(), z = int16())
+ batch7 <- record_batch(df, z = 1:10, schema = s2)
+ batch8 <- record_batch(!!!df, z = 1:10, schema = s2)
+ expect_equal(batch7, batch8)
+ expect_equal(batch7$schema, s2)
+ expect_equal(
+ as.data.frame(batch7),
+ tibble::as_tibble(cbind(df, data.frame(z = 1:10)))
+ )
+})
+
+test_that("record_batch() only auto splice data frames", {
+ expect_error(
+ record_batch(1:10),
+ regexp = "only data frames are allowed as unnamed arguments to be auto spliced"
+ )
+})
+
+test_that("record_batch() handles null type (ARROW-7064)", {
+ batch <- record_batch(a = 1:10, n = vctrs::unspecified(10))
+ expect_equal(
+ batch$schema,
+ schema(a = int32(), n = null()),
+ ignore_attr = TRUE
+ )
+})
+
+test_that("record_batch() scalar recycling with vectors", {
+ expect_data_frame(
+ record_batch(a = 1:10, b = 5),
+ tibble::tibble(a = 1:10, b = 5)
+ )
+})
+
+test_that("record_batch() scalar recycling with Scalars, Arrays, and ChunkedArrays", {
+ expect_data_frame(
+ record_batch(a = Array$create(1:10), b = Scalar$create(5)),
+ tibble::tibble(a = 1:10, b = 5)
+ )
+
+ expect_data_frame(
+ record_batch(a = Array$create(1:10), b = Array$create(5)),
+ tibble::tibble(a = 1:10, b = 5)
+ )
+
+ expect_data_frame(
+ record_batch(a = Array$create(1:10), b = ChunkedArray$create(5)),
+ tibble::tibble(a = 1:10, b = 5)
+ )
+})
+
+test_that("record_batch() no recycling with tibbles", {
+ expect_error(
+ record_batch(
+ tibble::tibble(a = 1:10),
+ tibble::tibble(a = 1, b = 5)
+ ),
+ regexp = "All input tibbles or data.frames must have the same number of rows"
+ )
+
+ expect_error(
+ record_batch(
+ tibble::tibble(a = 1:10),
+ tibble::tibble(a = 1)
+ ),
+ regexp = "All input tibbles or data.frames must have the same number of rows"
+ )
+})
+
+test_that("RecordBatch$Equals", {
+ df <- tibble::tibble(x = 1:10, y = letters[1:10])
+ a <- record_batch(df)
+ b <- record_batch(df)
+ expect_equal(a, b)
+ expect_true(a$Equals(b))
+ expect_false(a$Equals(df))
+})
+
+test_that("RecordBatch$Equals(check_metadata)", {
+ df <- tibble::tibble(x = 1:2, y = c("a", "b"))
+ rb1 <- record_batch(df)
+ rb2 <- record_batch(df, schema = rb1$schema$WithMetadata(list(some = "metadata")))
+
+ expect_r6_class(rb1, "RecordBatch")
+ expect_r6_class(rb2, "RecordBatch")
+ expect_false(rb1$schema$HasMetadata)
+ expect_true(rb2$schema$HasMetadata)
+ expect_identical(rb2$schema$metadata, list(some = "metadata"))
+
+ expect_true(rb1 == rb2)
+ expect_true(rb1$Equals(rb2))
+ expect_false(rb1$Equals(rb2, check_metadata = TRUE))
+
+ expect_failure(expect_equal(rb1, rb2)) # expect_equal has check_metadata=TRUE
+ expect_equal(rb1, rb2, ignore_attr = TRUE) # this passes check_metadata=FALSE
+
+ expect_false(rb1$Equals(24)) # Not a RecordBatch
+})
+
+test_that("RecordBatch name assignment", {
+ rb <- record_batch(x = 1:10, y = 1:10)
+ expect_identical(names(rb), c("x", "y"))
+ names(rb) <- c("a", "b")
+ expect_identical(names(rb), c("a", "b"))
+ expect_error(names(rb) <- "f")
+ expect_error(names(rb) <- letters)
+ expect_error(names(rb) <- character(0))
+ expect_error(names(rb) <- NULL)
+ expect_error(names(rb) <- c(TRUE, FALSE))
+})
+
+test_that("record_batch() with different length arrays", {
+ msg <- "All arrays must have the same length"
+ expect_error(record_batch(a = 1:5, b = 1:6), msg)
+})
+
+test_that("Handling string data with embedded nuls", {
+ raws <- Array$create(structure(list(
+ as.raw(c(0x70, 0x65, 0x72, 0x73, 0x6f, 0x6e)),
+ as.raw(c(0x77, 0x6f, 0x6d, 0x61, 0x6e)),
+ as.raw(c(0x6d, 0x61, 0x00, 0x6e)), # <-- there's your nul, 0x00
+ as.raw(c(0x63, 0x61, 0x6d, 0x65, 0x72, 0x61)),
+ as.raw(c(0x74, 0x76))
+ ),
+ class = c("arrow_binary", "vctrs_vctr", "list")
+ ))
+ batch_with_nul <- record_batch(a = 1:5, b = raws)
+ batch_with_nul$b <- batch_with_nul$b$cast(utf8())
+
+ # The behavior of the warnings/errors is slightly different with and without
+ # altrep. Without it (i.e. 3.5.0 and below, the error would trigger immediately
+ # on `as.vector()` where as with it, the error only happens on materialization)
+ skip_if_r_version("3.5.0")
+ df <- as.data.frame(batch_with_nul)
+
+ expect_error(
+ df$b[],
+ paste0(
+ "embedded nul in string: 'ma\\0n'; to strip nuls when converting from Arrow to R, ",
+ "set options(arrow.skip_nul = TRUE)"
+ ),
+ fixed = TRUE
+ )
+
+ batch_with_nul <- record_batch(a = 1:5, b = raws)
+ batch_with_nul$b <- batch_with_nul$b$cast(utf8())
+
+ withr::with_options(list(arrow.skip_nul = TRUE), {
+ expect_warning(
+ expect_equal(
+ as.data.frame(batch_with_nul)$b,
+ c("person", "woman", "man", "camera", "tv"),
+ ignore_attr = TRUE
+ ),
+ "Stripping '\\0' (nul) from character vector",
+ fixed = TRUE
+ )
+ })
+})
+
+test_that("ARROW-11769/ARROW-13860 - grouping preserved in record batch creation", {
+ skip_if_not_available("dataset")
+ library(dplyr, warn.conflicts = FALSE)
+
+ tbl <- tibble::tibble(
+ int = 1:10,
+ fct = factor(rep(c("A", "B"), 5)),
+ fct2 = factor(rep(c("C", "D"), each = 5)),
+ )
+
+ expect_r6_class(
+ tbl %>%
+ group_by(fct, fct2) %>%
+ record_batch(),
+ "RecordBatch"
+ )
+ expect_identical(
+ tbl %>%
+ group_by(fct, fct2) %>%
+ record_batch() %>%
+ group_vars(),
+ c("fct", "fct2")
+ )
+ expect_identical(
+ tbl %>%
+ group_by(fct, fct2) %>%
+ record_batch() %>%
+ ungroup() %>%
+ group_vars(),
+ NULL
+ )
+ expect_identical(
+ tbl %>%
+ group_by(fct, fct2) %>%
+ record_batch() %>%
+ select(-int) %>%
+ group_vars(),
+ c("fct", "fct2")
+ )
+})
+
+test_that("ARROW-12729 - length returns number of columns in RecordBatch", {
+ tbl <- tibble::tibble(
+ int = 1:10,
+ fct = factor(rep(c("A", "B"), 5)),
+ fct2 = factor(rep(c("C", "D"), each = 5)),
+ )
+
+ rb <- record_batch(!!!tbl)
+
+ expect_identical(length(rb), 3L)
+})
+
+test_that("RecordBatchReader to C-interface", {
+ skip_if_not_available("dataset")
+
+ tab <- Table$create(example_data)
+
+ # export the RecordBatchReader via the C-interface
+ stream_ptr <- allocate_arrow_array_stream()
+ scan <- Scanner$create(tab)
+ reader <- scan$ToRecordBatchReader()
+ reader$export_to_c(stream_ptr)
+
+ # then import it and check that the roundtripped value is the same
+ circle <- RecordBatchStreamReader$import_from_c(stream_ptr)
+ tab_from_c_new <- circle$read_table()
+ expect_equal(tab, tab_from_c_new)
+
+ # must clean up the pointer or we leak
+ delete_arrow_array_stream(stream_ptr)
+
+ # export the RecordBatchStreamReader via the C-interface
+ stream_ptr_new <- allocate_arrow_array_stream()
+ bytes <- write_to_raw(example_data)
+ expect_type(bytes, "raw")
+ reader_new <- RecordBatchStreamReader$create(bytes)
+ reader_new$export_to_c(stream_ptr_new)
+
+ # then import it and check that the roundtripped value is the same
+ circle_new <- RecordBatchStreamReader$import_from_c(stream_ptr_new)
+ tab_from_c_new <- circle_new$read_table()
+ expect_equal(tab, tab_from_c_new)
+
+ # must clean up the pointer or we leak
+ delete_arrow_array_stream(stream_ptr_new)
+})
+
+test_that("RecordBatch to C-interface", {
+ batch <- RecordBatch$create(example_data)
+
+ # export the RecordBatch via the C-interface
+ schema_ptr <- allocate_arrow_schema()
+ array_ptr <- allocate_arrow_array()
+ batch$export_to_c(array_ptr, schema_ptr)
+
+ # then import it and check that the roundtripped value is the same
+ circle <- RecordBatch$import_from_c(array_ptr, schema_ptr)
+ expect_equal
+
+ # must clean up the pointers or we leak
+ delete_arrow_schema(schema_ptr)
+ delete_arrow_array(array_ptr)
+})
+
+
+
+test_that("RecordBatchReader to C-interface to arrow_dplyr_query", {
+ skip_if_not_available("dataset")
+
+ tab <- Table$create(example_data)
+
+ # export the RecordBatchReader via the C-interface
+ stream_ptr <- allocate_arrow_array_stream()
+ scan <- Scanner$create(tab)
+ reader <- scan$ToRecordBatchReader()
+ reader$export_to_c(stream_ptr)
+
+ # then import it and check that the roundtripped value is the same
+ circle <- RecordBatchStreamReader$import_from_c(stream_ptr)
+
+ # create an arrow_dplyr_query() from the recordbatch reader
+ reader_adq <- arrow_dplyr_query(circle)
+
+ tab_from_c_new <- reader_adq %>%
+ dplyr::compute()
+ expect_equal(tab_from_c_new, tab)
+
+ # must clean up the pointer or we leak
+ delete_arrow_array_stream(stream_ptr)
+})