diff options
Diffstat (limited to 'src/arrow/r/tests/testthat/test-json.R')
-rw-r--r-- | src/arrow/r/tests/testthat/test-json.R | 255 |
1 files changed, 255 insertions, 0 deletions
diff --git a/src/arrow/r/tests/testthat/test-json.R b/src/arrow/r/tests/testthat/test-json.R new file mode 100644 index 000000000..825511b97 --- /dev/null +++ b/src/arrow/r/tests/testthat/test-json.R @@ -0,0 +1,255 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +skip_if_not_available("json") + +test_that("Can read json file with scalars columns (ARROW-5503)", { + tf <- tempfile() + on.exit(unlink(tf)) + writeLines(' + { "hello": 3.5, "world": false, "yo": "thing" } + { "hello": 3.25, "world": null } + { "hello": 3.125, "world": null, "yo": "\u5fcd" } + { "hello": 0.0, "world": true, "yo": null } + ', tf, useBytes = TRUE) + + tab1 <- read_json_arrow(tf, as_data_frame = FALSE) + tab2 <- read_json_arrow(mmap_open(tf), as_data_frame = FALSE) + tab3 <- read_json_arrow(ReadableFile$create(tf), as_data_frame = FALSE) + + expect_equal(tab1, tab2) + expect_equal(tab1, tab3) + + expect_equal( + tab1$schema, + schema(hello = float64(), world = boolean(), yo = utf8()) + ) + tib <- as.data.frame(tab1) + expect_equal(tib$hello, c(3.5, 3.25, 3.125, 0)) + expect_equal(tib$world, c(FALSE, NA, NA, TRUE)) + expect_equal(tib$yo, c("thing", NA, "\u5fcd", NA)) +}) + +test_that("read_json_arrow() converts to tibble", { + tf <- tempfile() + on.exit(unlink(tf)) + writeLines(' + { "hello": 3.5, "world": false, "yo": "thing" } + { "hello": 3.25, "world": null } + { "hello": 3.125, "world": null, "yo": "\u5fcd" } + { "hello": 0.0, "world": true, "yo": null } + ', tf, useBytes = TRUE) + + tab1 <- read_json_arrow(tf) + tab2 <- read_json_arrow(mmap_open(tf)) + tab3 <- read_json_arrow(ReadableFile$create(tf)) + + expect_s3_class(tab1, "tbl_df") + expect_s3_class(tab2, "tbl_df") + expect_s3_class(tab3, "tbl_df") + + expect_equal(tab1, tab2) + expect_equal(tab1, tab3) + + expect_equal(tab1$hello, c(3.5, 3.25, 3.125, 0)) + expect_equal(tab1$world, c(FALSE, NA, NA, TRUE)) + expect_equal(tab1$yo, c("thing", NA, "\u5fcd", NA)) +}) + +test_that("read_json_arrow() supports col_select=", { + tf <- tempfile() + writeLines(' + { "hello": 3.5, "world": false, "yo": "thing" } + { "hello": 3.25, "world": null } + { "hello": 3.125, "world": null, "yo": "\u5fcd" } + { "hello": 0.0, "world": true, "yo": null } + ', tf) + + tab1 <- read_json_arrow(tf, col_select = c(hello, world)) + expect_equal(names(tab1), c("hello", "world")) + + tab2 <- read_json_arrow(tf, col_select = 1:2) + expect_equal(names(tab2), c("hello", "world")) +}) + +test_that("read_json_arrow(schema=) with empty schema", { + tf <- tempfile() + writeLines(' + { "hello": 3.5, "world": 2, "third_col": 99} + { "hello": 3.25, "world": 5, "third_col": 98} + { "hello": 3.125, "world": 8, "third_col": 97 } + { "hello": 0.0, "world": 10, "third_col": 96} + ', tf) + + tab1 <- read_json_arrow(tf, schema = schema()) + + expect_identical( + tab1, + tibble::tibble( + hello = c(3.5, 3.25, 3.125, 0), + world = c(2L, 5L, 8L, 10L), + third_col = c(99L, 98L, 97L, 96L) + ) + ) +}) + +test_that("read_json_arrow(schema=) with partial schema", { + tf <- tempfile() + writeLines(' + { "hello": 3.5, "world": 2, "third_col": 99} + { "hello": 3.25, "world": 5, "third_col": 98} + { "hello": 3.125, "world": 8, "third_col": 97 } + { "hello": 0.0, "world": 10, "third_col": 96} + ', tf) + + tab1 <- read_json_arrow(tf, schema = schema(third_col = float64(), world = float64())) + + expect_identical( + tab1, + tibble::tibble( + third_col = c(99, 98, 97, 96), + world = c(2, 5, 8, 10), + hello = c(3.5, 3.25, 3.125, 0) + ) + ) + + tf2 <- tempfile() + writeLines(' + { "hello": 3.5, "world": 2, "third_col": "99"} + { "hello": 3.25, "world": 5, "third_col": "98"} + { "hello": 3.125, "world": 8, "third_col": "97"} + ', tf2) + + tab2 <- read_json_arrow(tf2, schema = schema(third_col = string(), world = float64())) + + expect_identical( + tab2, + tibble::tibble( + third_col = c("99", "98", "97"), + world = c(2, 5, 8), + hello = c(3.5, 3.25, 3.125) + ) + ) +}) + +test_that("read_json_arrow(schema=) with full schema", { + tf <- tempfile() + writeLines(' + { "hello": 3.5, "world": 2, "third_col": 99} + { "hello": 3.25, "world": 5, "third_col": 98} + { "hello": 3.125, "world": 8, "third_col": 97} + { "hello": 0.0, "world": 10, "third_col": 96} + ', tf) + + tab1 <- read_json_arrow( + tf, + schema = schema( + hello = float64(), + third_col = float64(), + world = float64() + ) + ) + + expect_identical( + tab1, + tibble::tibble( + hello = c(3.5, 3.25, 3.125, 0), + third_col = c(99, 98, 97, 96), + world = c(2, 5, 8, 10) + ) + ) +}) + +test_that("Can read json file with nested columns (ARROW-5503)", { + tf <- tempfile() + on.exit(unlink(tf)) + writeLines(' + { "arr": [1.0, 2.0, 3.0], "nuf": {} } + { "arr": [2.0], "nuf": null } + { "arr": [], "nuf": { "ps": 78.0, "hello": "hi" } } + { "arr": null, "nuf": { "ps": 90.0, "hello": "bonjour" } } + { "arr": [5.0], "nuf": { "hello": "ciao" } } + { "arr": [5.0, 6.0], "nuf": { "ps": 19 } } + ', tf) + + tab1 <- read_json_arrow(tf, as_data_frame = FALSE) + tab2 <- read_json_arrow(mmap_open(tf), as_data_frame = FALSE) + tab3 <- read_json_arrow(ReadableFile$create(tf), as_data_frame = FALSE) + + expect_equal(tab1, tab2) + expect_equal(tab1, tab3) + + expect_equal( + tab1$schema, + schema( + arr = list_of(float64()), + nuf = struct(ps = float64(), hello = utf8()) + ) + ) + + struct_array <- tab1$column(1)$chunk(0) + ps <- Array$create(c(NA, NA, 78, 90, NA, 19)) + hello <- Array$create(c(NA, NA, "hi", "bonjour", "ciao", NA)) + expect_equal(struct_array$field(0L), ps) + expect_equal(struct_array$GetFieldByName("ps"), ps) + struct_cols <- struct_array$Flatten() + expect_identical(length(struct_cols), 2L) + expect_equal(struct_cols[[1]], ps) + expect_equal(struct_cols[[2]], hello) + expect_equal( + as.vector(struct_array), + tibble::tibble(ps = ps$as_vector(), hello = hello$as_vector()) + ) + + list_array_r <- list( + c(1, 2, 3), + c(2), + numeric(), + NULL, + 5, + c(5, 6) + ) + list_array <- tab1$column(0) + expect_equal( + list_array$as_vector(), + list_array_r, + ignore_attr = TRUE + ) + + tib <- as.data.frame(tab1) + expect_equal( + tib, + tibble::tibble( + arr = list_array_r, + nuf = tibble::tibble(ps = ps$as_vector(), hello = hello$as_vector()) + ), + ignore_attr = TRUE + ) +}) + +test_that("Can read json file with list<struct<T...>> nested columns (ARROW-7740)", { + tf <- tempfile() + on.exit(unlink(tf)) + writeLines(' + {"a":[{"b":1.0},{"b":2.0}]} + {"a":[{"b":1.0},{"b":2.0}]} + ', tf) + + one <- tibble::tibble(b = c(1, 2)) + expected <- tibble::tibble(a = c(list(one), list(one))) + expect_equal(read_json_arrow(tf), expected, ignore_attr = TRUE) +}) |