1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
|
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
test_that("table round trip", {
tbl <- tibble::tibble(
int = 1:10,
dbl = as.numeric(1:10),
raw = as.raw(1:10)
)
tab <- Table$create(!!!tbl)
expect_equal(tab$num_columns, 3L)
expect_equal(tab$num_rows, 10L)
# ChunkedArray
chunked_array_int <- tab$column(0)
expect_equal(chunked_array_int$length(), 10L)
expect_equal(chunked_array_int$null_count, 0L)
expect_equal(chunked_array_int$as_vector(), tbl$int)
# Array
chunks_int <- chunked_array_int$chunks
expect_equal(length(chunks_int), chunked_array_int$num_chunks)
for (i in seq_along(chunks_int)) {
expect_equal(chunked_array_int$chunk(i - 1L), chunks_int[[i]])
}
# ChunkedArray
chunked_array_dbl <- tab$column(1)
expect_equal(chunked_array_dbl$length(), 10L)
expect_equal(chunked_array_dbl$null_count, 0L)
expect_equal(chunked_array_dbl$as_vector(), tbl$dbl)
# Array
chunks_dbl <- chunked_array_dbl$chunks
expect_equal(length(chunks_dbl), chunked_array_dbl$num_chunks)
for (i in seq_along(chunks_dbl)) {
expect_equal(chunked_array_dbl$chunk(i - 1L), chunks_dbl[[i]])
}
# ChunkedArray
chunked_array_raw <- tab$column(2)
expect_equal(chunked_array_raw$length(), 10L)
expect_equal(chunked_array_raw$null_count, 0L)
expect_equal(chunked_array_raw$as_vector(), as.integer(tbl$raw))
# Array
chunks_raw <- chunked_array_raw$chunks
expect_equal(length(chunks_raw), chunked_array_raw$num_chunks)
for (i in seq_along(chunks_raw)) {
expect_equal(chunked_array_raw$chunk(i - 1L), chunks_raw[[i]])
}
tf <- tempfile()
write_feather(tbl, tf)
res <- read_feather(tf)
expect_identical(tbl$int, res$int)
expect_identical(tbl$dbl, res$dbl)
expect_identical(as.integer(tbl$raw), res$raw)
unlink(tf)
})
test_that("table round trip handles NA in integer and numeric", {
tbl <- tibble::tibble(
int = c(NA, 2:10),
dbl = as.numeric(c(1:5, NA, 7:9, NA)),
raw = as.raw(1:10)
)
tab <- Table$create(!!!tbl)
expect_equal(tab$num_columns, 3L)
expect_equal(tab$num_rows, 10L)
expect_equal(tab$column(0)$length(), 10L)
expect_equal(tab$column(1)$length(), 10L)
expect_equal(tab$column(2)$length(), 10L)
expect_equal(tab$column(0)$null_count, 1L)
expect_equal(tab$column(1)$null_count, 2L)
expect_equal(tab$column(2)$null_count, 0L)
expect_equal(tab$column(0)$type, int32())
expect_equal(tab$column(1)$type, float64())
expect_equal(tab$column(2)$type, uint8())
tf <- tempfile()
write_feather(tbl, tf)
res <- read_feather(tf)
expect_identical(tbl$int, res$int)
expect_identical(tbl$dbl, res$dbl)
expect_identical(as.integer(tbl$raw), res$raw)
expect_true(is.na(res$int[1]))
expect_true(is.na(res$dbl[6]))
expect_true(is.na(res$dbl[10]))
unlink(tf)
})
test_that("reading/writing a raw vector (sparklyr integration)", {
# These are effectively what sparklyr calls to get data to/from Spark
read_from_raw_test <- function(x) {
as.data.frame(RecordBatchStreamReader$create(x)$read_next_batch())
}
bytes <- write_to_raw(example_data)
expect_type(bytes, "raw")
expect_identical(read_from_raw_test(bytes), example_data)
# this could just be `read_ipc_stream(x)`; propose that
expect_identical(read_ipc_stream(bytes), example_data)
})
|