diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
commit | e6918187568dbd01842d8d1d2c808ce16a894239 (patch) | |
tree | 64f88b554b444a49f656b6c656111a145cbbaa28 /src/arrow/r/R/dplyr-group-by.R | |
parent | Initial commit. (diff) | |
download | ceph-b26c4052f3542036551aa9dec9caa4226e456195.tar.xz ceph-b26c4052f3542036551aa9dec9caa4226e456195.zip |
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/arrow/r/R/dplyr-group-by.R')
-rw-r--r-- | src/arrow/r/R/dplyr-group-by.R | 86 |
1 files changed, 86 insertions, 0 deletions
diff --git a/src/arrow/r/R/dplyr-group-by.R b/src/arrow/r/R/dplyr-group-by.R new file mode 100644 index 000000000..66b867210 --- /dev/null +++ b/src/arrow/r/R/dplyr-group-by.R @@ -0,0 +1,86 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +# The following S3 methods are registered on load if dplyr is present + +group_by.arrow_dplyr_query <- function(.data, + ..., + .add = FALSE, + add = .add, + .drop = dplyr::group_by_drop_default(.data)) { + .data <- as_adq(.data) + new_groups <- enquos(...) + # ... can contain expressions (i.e. can add (or rename?) columns) and so we + # need to identify those and add them on to the query with mutate. Specifically, + # we want to mark as new: + # * expressions (named or otherwise) + # * variables that have new names + # All others (i.e. simple references to variables) should not be (re)-added + + # Identify any groups with names which aren't in names of .data + new_group_ind <- map_lgl(new_groups, ~ !(quo_name(.x) %in% names(.data))) + # Identify any groups which don't have names + named_group_ind <- map_lgl(names(new_groups), nzchar) + # Retain any new groups identified above + new_groups <- new_groups[new_group_ind | named_group_ind] + if (length(new_groups)) { + # now either use the name that was given in ... or if that is "" then use the expr + names(new_groups) <- imap_chr(new_groups, ~ ifelse(.y == "", quo_name(.x), .y)) + + # Add them to the data + .data <- dplyr::mutate(.data, !!!new_groups) + } + if (".add" %in% names(formals(dplyr::group_by))) { + # For compatibility with dplyr >= 1.0 + gv <- dplyr::group_by_prepare(.data, ..., .add = .add)$group_names + } else { + gv <- dplyr::group_by_prepare(.data, ..., add = add)$group_names + } + .data$group_by_vars <- gv + .data$drop_empty_groups <- ifelse(length(gv), .drop, dplyr::group_by_drop_default(.data)) + .data +} +group_by.Dataset <- group_by.ArrowTabular <- group_by.arrow_dplyr_query + +groups.arrow_dplyr_query <- function(x) syms(dplyr::group_vars(x)) +groups.Dataset <- groups.ArrowTabular <- function(x) NULL + +group_vars.arrow_dplyr_query <- function(x) x$group_by_vars +group_vars.Dataset <- function(x) NULL +group_vars.RecordBatchReader <- function(x) NULL +group_vars.ArrowTabular <- function(x) { + x$r_metadata$attributes$.group_vars +} + +# the logical literal in the two functions below controls the default value of +# the .drop argument to group_by() +group_by_drop_default.arrow_dplyr_query <- + function(.tbl) .tbl$drop_empty_groups %||% TRUE +group_by_drop_default.Dataset <- group_by_drop_default.ArrowTabular <- + function(.tbl) TRUE + +ungroup.arrow_dplyr_query <- function(x, ...) { + x$group_by_vars <- character() + x$drop_empty_groups <- NULL + x +} +ungroup.Dataset <- force +ungroup.ArrowTabular <- function(x) { + x$r_metadata$attributes$.group_vars <- NULL + x +} |