1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
|
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# The following S3 methods are registered on load if dplyr is present
tbl_vars.arrow_dplyr_query <- function(x) names(x$selected_columns)
select.arrow_dplyr_query <- function(.data, ...) {
check_select_helpers(enexprs(...))
column_select(as_adq(.data), !!!enquos(...))
}
select.Dataset <- select.ArrowTabular <- select.arrow_dplyr_query
rename.arrow_dplyr_query <- function(.data, ...) {
check_select_helpers(enexprs(...))
column_select(as_adq(.data), !!!enquos(...), .FUN = vars_rename)
}
rename.Dataset <- rename.ArrowTabular <- rename.arrow_dplyr_query
column_select <- function(.data, ..., .FUN = vars_select) {
# .FUN is either tidyselect::vars_select or tidyselect::vars_rename
# It operates on the names() of selected_columns, i.e. the column names
# factoring in any renaming that may already have happened
out <- .FUN(names(.data), !!!enquos(...))
# Make sure that the resulting selected columns map back to the original data,
# as in when there are multiple renaming steps
.data$selected_columns <- set_names(.data$selected_columns[out], names(out))
# If we've renamed columns, we need to project that renaming into other
# query parameters we've collected
renamed <- out[names(out) != out]
if (length(renamed)) {
# Massage group_by
gbv <- .data$group_by_vars
renamed_groups <- gbv %in% renamed
gbv[renamed_groups] <- names(renamed)[match(gbv[renamed_groups], renamed)]
.data$group_by_vars <- gbv
# No need to massage filters because those contain references to Arrow objects
}
.data
}
relocate.arrow_dplyr_query <- function(.data, ..., .before = NULL, .after = NULL) {
# The code in this function is adapted from the code in dplyr::relocate.data.frame
# at https://github.com/tidyverse/dplyr/blob/master/R/relocate.R
# TODO: revisit this after https://github.com/tidyverse/dplyr/issues/5829
.data <- as_adq(.data)
# Assign the schema to the expressions
map(.data$selected_columns, ~ (.$schema <- .data$.data$schema))
# Create a mask for evaluating expressions in tidyselect helpers
mask <- new_environment(.cache$functions, parent = caller_env())
to_move <- eval_select(substitute(c(...)), .data$selected_columns, mask)
.before <- enquo(.before)
.after <- enquo(.after)
has_before <- !quo_is_null(.before)
has_after <- !quo_is_null(.after)
if (has_before && has_after) {
abort("Must supply only one of `.before` and `.after`.")
} else if (has_before) {
where <- min(unname(eval_select(quo_get_expr(.before), .data$selected_columns, mask)))
if (!where %in% to_move) {
to_move <- c(to_move, where)
}
} else if (has_after) {
where <- max(unname(eval_select(quo_get_expr(.after), .data$selected_columns, mask)))
if (!where %in% to_move) {
to_move <- c(where, to_move)
}
} else {
where <- 1L
if (!where %in% to_move) {
to_move <- c(to_move, where)
}
}
lhs <- setdiff(seq2(1, where - 1), to_move)
rhs <- setdiff(seq2(where + 1, length(.data$selected_columns)), to_move)
pos <- vec_unique(c(lhs, to_move, rhs))
new_names <- names(pos)
.data$selected_columns <- .data$selected_columns[pos]
if (!is.null(new_names)) {
names(.data$selected_columns)[new_names != ""] <- new_names[new_names != ""]
}
.data
}
relocate.Dataset <- relocate.ArrowTabular <- relocate.arrow_dplyr_query
check_select_helpers <- function(exprs) {
# Throw an error if unsupported tidyselect selection helpers in `exprs`
exprs <- lapply(exprs, function(x) if (is_quosure(x)) quo_get_expr(x) else x)
unsup_select_helpers <- "where"
funs_in_exprs <- unlist(lapply(exprs, all_funs))
unsup_funs <- funs_in_exprs[funs_in_exprs %in% unsup_select_helpers]
if (length(unsup_funs)) {
stop(
"Unsupported selection ",
ngettext(length(unsup_funs), "helper: ", "helpers: "),
oxford_paste(paste0(unsup_funs, "()"), quote = FALSE),
call. = FALSE
)
}
}
|