diff options
Diffstat (limited to 'src/arrow/r/tests/testthat/test-dplyr-join.R')
-rw-r--r-- | src/arrow/r/tests/testthat/test-dplyr-join.R | 175 |
1 files changed, 175 insertions, 0 deletions
diff --git a/src/arrow/r/tests/testthat/test-dplyr-join.R b/src/arrow/r/tests/testthat/test-dplyr-join.R new file mode 100644 index 000000000..d8239f810 --- /dev/null +++ b/src/arrow/r/tests/testthat/test-dplyr-join.R @@ -0,0 +1,175 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +skip_if_not_available("dataset") + +library(dplyr, warn.conflicts = FALSE) + +left <- example_data +left$some_grouping <- rep(c(1, 2), 5) + +left_tab <- Table$create(left) + +to_join <- tibble::tibble( + some_grouping = c(1, 2), + capital_letters = c("A", "B"), + another_column = TRUE +) +to_join_tab <- Table$create(to_join) + + +test_that("left_join", { + expect_message( + compare_dplyr_binding( + .input %>% + left_join(to_join) %>% + collect(), + left + ), + 'Joining, by = "some_grouping"' + ) +}) + +test_that("left_join `by` args", { + compare_dplyr_binding( + .input %>% + left_join(to_join, by = "some_grouping") %>% + collect(), + left + ) + compare_dplyr_binding( + .input %>% + left_join( + to_join %>% + rename(the_grouping = some_grouping), + by = c(some_grouping = "the_grouping") + ) %>% + collect(), + left + ) + + compare_dplyr_binding( + .input %>% + rename(the_grouping = some_grouping) %>% + left_join( + to_join, + by = c(the_grouping = "some_grouping") + ) %>% + collect(), + left + ) +}) + +test_that("join two tables", { + expect_identical( + left_tab %>% + left_join(to_join_tab, by = "some_grouping") %>% + collect(), + left %>% + left_join(to_join, by = "some_grouping") %>% + collect() + ) +}) + +test_that("Error handling", { + expect_error( + left_tab %>% + left_join(to_join, by = "not_a_col") %>% + collect(), + "all(names(by) %in% names(x)) is not TRUE", + fixed = TRUE + ) +}) + +# TODO: test duplicate col names +# TODO: casting: int and float columns? + +test_that("right_join", { + compare_dplyr_binding( + .input %>% + right_join(to_join, by = "some_grouping") %>% + collect(), + left + ) +}) + +test_that("inner_join", { + compare_dplyr_binding( + .input %>% + inner_join(to_join, by = "some_grouping") %>% + collect(), + left + ) +}) + +test_that("full_join", { + compare_dplyr_binding( + .input %>% + full_join(to_join, by = "some_grouping") %>% + collect(), + left + ) +}) + +test_that("semi_join", { + compare_dplyr_binding( + .input %>% + semi_join(to_join, by = "some_grouping") %>% + collect(), + left + ) +}) + +test_that("anti_join", { + compare_dplyr_binding( + .input %>% + # Factor levels when there are no rows in the data don't match + # TODO: use better anti_join test data + select(-fct) %>% + anti_join(to_join, by = "some_grouping") %>% + collect(), + left + ) +}) + +test_that("mutate then join", { + left <- Table$create( + one = c("a", "b"), + two = 1:2 + ) + right <- Table$create( + three = TRUE, + dos = 2L + ) + + expect_equal( + left %>% + rename(dos = two) %>% + mutate(one = toupper(one)) %>% + left_join( + right %>% + mutate(three = !three) + ) %>% + arrange(dos) %>% + collect(), + tibble( + one = c("A", "B"), + dos = 1:2, + three = c(NA, FALSE) + ) + ) +}) |