summaryrefslogtreecommitdiffstats
path: root/src/arrow/r/tests/testthat/test-backwards-compatibility.R
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/arrow/r/tests/testthat/test-backwards-compatibility.R121
1 files changed, 121 insertions, 0 deletions
diff --git a/src/arrow/r/tests/testthat/test-backwards-compatibility.R b/src/arrow/r/tests/testthat/test-backwards-compatibility.R
new file mode 100644
index 000000000..32e86d5f6
--- /dev/null
+++ b/src/arrow/r/tests/testthat/test-backwards-compatibility.R
@@ -0,0 +1,121 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# nolint start
+# To write a new version of a test file for a current version:
+# write_parquet(example_with_metadata, test_path("golden-files/data-arrow_2.0.0.parquet"))
+
+# To write a new version of a test file for an old version, use docker(-compose)
+# to setup a linux distribution and use RStudio's public package manager binary
+# repo to install the old version. The following commands should be run at the
+# root of the arrow repo directory and might need slight adjusments.
+# R_ORG=rstudio R_IMAGE=r-base R_TAG=4.0-focal docker-compose build --no-cache r
+# R_ORG=rstudio R_IMAGE=r-base R_TAG=4.0-focal docker-compose run r /bin/bash
+# R
+# options(repos = "https://packagemanager.rstudio.com/all/__linux__/focal/latest")
+# remotes::install_version("arrow", version = "1.0.1")
+# # get example data into the global env
+# write_parquet(example_with_metadata, "arrow/r/tests/testthat/golden-files/data-arrow_1.0.1.parquet")
+# quit()/exit
+# nolint end
+
+skip_if(getRversion() < "3.5.0", "The serialization format changed in 3.5")
+
+expect_identical_with_metadata <- function(object, expected, ..., top_level = TRUE) {
+ attrs_to_keep <- c("names", "class", "row.names")
+ if (!top_level) {
+ # remove not-tbl and not-data.frame attributes
+ for (attribute in names(attributes(expected))) {
+ if (attribute %in% attrs_to_keep) next
+ attributes(expected)[[attribute]] <- NULL
+ }
+ }
+ expect_identical(object, expected, ...)
+}
+
+test_that("reading a known Parquet file to dataframe with 3.0.0", {
+ skip_if_not_available("parquet")
+ skip_if_not_available("snappy")
+ pq_file <- test_path("golden-files/data-arrow-extra-meta_3.0.0.parquet")
+
+ df <- read_parquet(pq_file)
+ # this is equivalent to `expect_identical()`
+ expect_identical_with_metadata(df, example_with_extra_metadata)
+})
+
+test_that("reading a known Parquet file to dataframe with 2.0.0", {
+ skip_if_not_available("parquet")
+ skip_if_not_available("snappy")
+ pq_file <- test_path("golden-files/data-arrow_2.0.0.parquet")
+
+ df <- read_parquet(pq_file)
+ # this is equivalent to `expect_identical()`
+ expect_identical_with_metadata(df, example_with_metadata)
+})
+
+test_that("reading a known Parquet file to dataframe with 1.0.1", {
+ skip_if_not_available("parquet")
+ skip_if_not_available("snappy")
+ pq_file <- test_path("golden-files/data-arrow_1.0.1.parquet")
+
+ df <- read_parquet(pq_file)
+ # 1.0.1 didn't save top-level metadata, so we need to remove it.
+ expect_identical_with_metadata(df, example_with_metadata, top_level = FALSE)
+})
+
+for (comp in c("lz4", "uncompressed", "zstd")) {
+ # nolint start
+ # write_feather(example_with_metadata, test_path("golden-files/data-arrow_2.0.0_lz4.feather"), compression = "lz4")
+ # write_feather(example_with_metadata, test_path("golden-files/data-arrow_2.0.0_uncompressed.feather"), compression = "uncompressed")
+ # write_feather(example_with_metadata, test_path("golden-files/data-arrow_2.0.0_zstd.feather"), compression = "zstd")
+ # nolint end
+ test_that("reading a known Feather file to dataframe with 2.0.0", {
+ skip_if_not_available("parquet")
+ skip_if_not_available(comp)
+ feather_file <- test_path(paste0("golden-files/data-arrow_2.0.0_", comp, ".feather"))
+
+ df <- read_feather(feather_file)
+ expect_identical_with_metadata(df, example_with_metadata)
+ })
+
+ test_that("reading a known Feather file to dataframe with 1.0.1", {
+ skip_if_not_available("parquet")
+ skip_if_not_available(comp)
+ feather_file <- test_path(paste0("golden-files/data-arrow_1.0.1_", comp, ".feather"))
+
+ df <- read_feather(feather_file)
+ # 1.0.1 didn't save top-level metadata, so we need to remove it.
+ expect_identical_with_metadata(df, example_with_metadata, top_level = FALSE)
+ })
+
+ test_that("reading a known Feather file to dataframe with 0.17.0", {
+ skip_if_not_available("parquet")
+ skip_if_not_available(comp)
+ feather_file <- test_path(paste0("golden-files/data-arrow_0.17.0_", comp, ".feather"))
+
+ df <- read_feather(feather_file)
+ # the metadata from 0.17.0 doesn't have the top level, the special class is
+ # not maintained and the embedded tibble's attributes are read in a wrong
+ # order. Since this is prior to 1.0.0 punting on checking the attributes
+ # though classes are always checked, so that must be removed before checking.
+ example_with_metadata_sans_special_class <- example_with_metadata
+ example_with_metadata_sans_special_class$a <- unclass(example_with_metadata_sans_special_class$a)
+ expect_equal(df, example_with_metadata_sans_special_class, ignore_attr = TRUE)
+ })
+}
+
+# TODO: streams(?)