summaryrefslogtreecommitdiffstats
path: root/src/arrow/r/tests/testthat/latin1.R
blob: 150192d3147ccb6890c83949bbc031729abbfa8d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

x <- iconv("Veitingasta�ir", to = "latin1")
df <- tibble::tibble(
  chr = x,
  fct = as.factor(x)
)
names(df) <- iconv(paste(x, names(df), sep = "_"), to = "latin1")
df_struct <- tibble::tibble(a = df)

raw_schema <- list(utf8(), dictionary(int8(), utf8()))
names(raw_schema) <- names(df)

# Confirm setup
expect_identical(Encoding(x), "latin1")
expect_identical(Encoding(names(df)), c("latin1", "latin1"))
expect_identical(Encoding(df[[1]]), "latin1")
expect_identical(Encoding(levels(df[[2]])), "latin1")

# Array
expect_identical(as.vector(Array$create(x)), x)
# struct
expect_identical(as.vector(Array$create(df)), df)

# ChunkedArray
expect_identical(as.vector(ChunkedArray$create(x)), x)
# struct
expect_identical(as.vector(ChunkedArray$create(df)), df)

# Table (including field name)
expect_identical(as.data.frame(Table$create(df)), df)
expect_identical(as.data.frame(Table$create(df_struct)), df_struct)

# RecordBatch
expect_identical(as.data.frame(record_batch(df)), df)
expect_identical(as.data.frame(record_batch(df_struct)), df_struct)

# Schema field name
df_schema <- do.call(schema, raw_schema)
expect_identical(names(df_schema), names(df))

df_struct_schema <- schema(a = do.call(struct, raw_schema))
# StructType doesn't expose names (in C++)
# expect_identical(names(df_struct_schema$a), names(df))

# Create table/batch with schema
expect_identical(as.data.frame(Table$create(df, schema = df_schema)), df)
expect_identical(as.data.frame(Table$create(df_struct, schema = df_struct_schema)), df_struct)
expect_identical(as.data.frame(record_batch(df, schema = df_schema)), df)
expect_identical(as.data.frame(record_batch(df_struct, schema = df_struct_schema)), df_struct)

# Serialization
feather_file <- tempfile()
write_feather(df_struct, feather_file)
expect_identical(read_feather(feather_file), df_struct)

if (arrow_with_parquet()) {
	parquet_file <- tempfile()
	write_parquet(df, parquet_file) # Parquet doesn't yet support nested types
	expect_identical(read_parquet(parquet_file), df)
}