From e6918187568dbd01842d8d1d2c808ce16a894239 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 21 Apr 2024 13:54:28 +0200 Subject: Adding upstream version 18.2.2. Signed-off-by: Daniel Baumann --- src/arrow/r/tests/testthat/test-schema.R | 220 +++++++++++++++++++++++++++++++ 1 file changed, 220 insertions(+) create mode 100644 src/arrow/r/tests/testthat/test-schema.R (limited to 'src/arrow/r/tests/testthat/test-schema.R') diff --git a/src/arrow/r/tests/testthat/test-schema.R b/src/arrow/r/tests/testthat/test-schema.R new file mode 100644 index 000000000..8473550df --- /dev/null +++ b/src/arrow/r/tests/testthat/test-schema.R @@ -0,0 +1,220 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +test_that("Alternate type names are supported", { + expect_equal( + schema(b = double(), c = bool(), d = string(), e = float(), f = halffloat()), + schema(b = float64(), c = boolean(), d = utf8(), e = float32(), f = float16()) + ) + expect_equal(names(schema(b = double(), c = bool(), d = string())), c("b", "c", "d")) +}) + +test_that("Schema print method", { + expect_output( + print(schema(b = double(), c = bool(), d = string())), + paste( + "Schema", + "b: double", + "c: bool", + "d: string", + sep = "\n" + ), + fixed = TRUE + ) +}) + +test_that("Schema with non-nullable fields", { + expect_output( + print(schema(field("b", double()), + field("c", bool(), nullable = FALSE), + field("d", string()))), + paste( + "Schema", + "b: double", + "c: bool not null", + "d: string", + sep = "\n" + ), + fixed = TRUE + ) +}) + +test_that("Schema $GetFieldByName", { + schm <- schema(b = double(), c = string()) + expect_equal(schm$GetFieldByName("b"), field("b", double())) + expect_null(schm$GetFieldByName("f")) + # TODO: schema(b = double(), b = string())$GetFieldByName("b") # nolint + # also returns NULL and probably should error bc duplicated names +}) + +test_that("Schema extract (returns Field)", { + # TODO: should this return a Field or the Type? + # I think of Schema like list(name = type, name = type, ...) + # but in practice it is more like list(list(name, type), list(name, type), ...) + # -> Field names in a Schema may be duplicated + # -> Fields may have metadata (though we don't really handle that in R) + schm <- schema(b = double(), c = string()) + expect_equal(schm$b, field("b", double())) + expect_equal(schm[["b"]], field("b", double())) + expect_equal(schm[[1]], field("b", double())) + + expect_null(schm[["ZZZ"]]) + expect_error(schm[[42]]) # Should have better error message +}) + +test_that("Schema slicing", { + schm <- schema(b = double(), c = string(), d = int8()) + expect_equal(schm[2:3], schema(c = string(), d = int8())) + expect_equal(schm[-1], schema(c = string(), d = int8())) + expect_equal(schm[c("d", "c")], schema(d = int8(), c = string())) + expect_equal(schm[c(FALSE, TRUE, TRUE)], schema(c = string(), d = int8())) + expect_error(schm[c("c", "ZZZ")], 'Invalid field name: "ZZZ"') + expect_error(schm[c("XXX", "c", "ZZZ")], 'Invalid field names: "XXX" and "ZZZ"') +}) + +test_that("Schema modification", { + schm <- schema(b = double(), c = string(), d = int8()) + schm$c <- boolean() + expect_equal(schm, schema(b = double(), c = boolean(), d = int8())) + schm[["d"]] <- int16() + expect_equal(schm, schema(b = double(), c = boolean(), d = int16())) + schm$b <- NULL + expect_equal(schm, schema(c = boolean(), d = int16())) + # NULL assigning something that doesn't exist doesn't modify + schm$zzzz <- NULL + expect_equal(schm, schema(c = boolean(), d = int16())) + # Adding a field + schm$fff <- int32() + expect_equal(schm, schema(c = boolean(), d = int16(), fff = int32())) + + # By index + schm <- schema(b = double(), c = string(), d = int8()) + schm[[2]] <- int32() + expect_equal(schm, schema(b = double(), c = int32(), d = int8())) + + # Adding actual Fields + # If assigning by name, note that this can modify the resulting name + schm <- schema(b = double(), c = string(), d = int8()) + schm$c <- field("x", int32()) + expect_equal(schm, schema(b = double(), x = int32(), d = int8())) + schm[[2]] <- field("y", int64()) + expect_equal(schm, schema(b = double(), y = int64(), d = int8())) + + # Error handling + expect_error(schm$c <- 4, "value must be a DataType") + expect_error(schm[[-3]] <- int32(), "i not greater than 0") + expect_error(schm[[0]] <- int32(), "i not greater than 0") + expect_error(schm[[NA_integer_]] <- int32(), "!is.na(i) is not TRUE", fixed = TRUE) + expect_error(schm[[TRUE]] <- int32(), "i is not a numeric or integer vector") + expect_error(schm[[c(2, 4)]] <- int32(), "length(i) not equal to 1", fixed = TRUE) +}) + +test_that("Metadata is preserved when modifying Schema", { + schm <- schema(b = double(), c = string(), d = int8()) + schm$metadata$foo <- "bar" + expect_identical(schm$metadata, list(foo = "bar")) + schm$c <- field("x", int32()) + expect_identical(schm$metadata, list(foo = "bar")) +}) + +test_that("reading schema from Buffer", { + # TODO: this uses the streaming format, i.e. from RecordBatchStreamWriter + # maybe there is an easier way to serialize a schema + batch <- record_batch(x = 1:10) + expect_r6_class(batch, "RecordBatch") + + stream <- BufferOutputStream$create() + writer <- RecordBatchStreamWriter$create(stream, batch$schema) + expect_r6_class(writer, "RecordBatchWriter") + writer$close() + + buffer <- stream$finish() + expect_r6_class(buffer, "Buffer") + + reader <- MessageReader$create(buffer) + expect_r6_class(reader, "MessageReader") + + message <- reader$ReadNextMessage() + expect_r6_class(message, "Message") + expect_equal(message$type, MessageType$SCHEMA) + + stream <- BufferReader$create(buffer) + expect_r6_class(stream, "BufferReader") + message <- read_message(stream) + expect_r6_class(message, "Message") + expect_equal(message$type, MessageType$SCHEMA) +}) + +test_that("Input validation when creating a table with a schema", { + expect_error( + Table$create(b = 1, schema = c(b = float64())), # list not Schema + "`schema` must be an arrow::Schema or NULL" + ) +}) + +test_that("Schema$Equals", { + a <- schema(b = double(), c = bool()) + b <- a$WithMetadata(list(some = "metadata")) + + # different metadata + expect_failure(expect_equal(a, b)) + expect_false(a$Equals(b, check_metadata = TRUE)) + + # Metadata not checked + expect_equal(a, b, ignore_attr = TRUE) + + # Non-schema object + expect_false(a$Equals(42)) +}) + +test_that("unify_schemas", { + a <- schema(b = double(), c = bool()) + z <- schema(b = double(), k = utf8()) + expect_equal( + unify_schemas(a, z), + schema(b = double(), c = bool(), k = utf8()) + ) + # returns NULL when any arg is NULL + expect_null( + unify_schemas(a, NULL, z) + ) + # returns NULL when all args are NULL + expect_null( + unify_schemas(NULL, NULL) + ) + # errors when no args + expect_error( + unify_schemas(), + "Must provide at least one schema to unify" + ) +}) + +test_that("Schema to C-interface", { + schema <- schema(b = double(), c = bool()) + + # export the schema via the C-interface + ptr <- allocate_arrow_schema() + schema$export_to_c(ptr) + + # then import it and check that the roundtripped value is the same + circle <- Schema$import_from_c(ptr) + expect_equal(circle, schema) + + # must clean up the pointer or we leak + delete_arrow_schema(ptr) +}) -- cgit v1.2.3