From e6918187568dbd01842d8d1d2c808ce16a894239 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 21 Apr 2024 13:54:28 +0200 Subject: Adding upstream version 18.2.2. Signed-off-by: Daniel Baumann --- src/arrow/r/R/dplyr-count.R | 60 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 src/arrow/r/R/dplyr-count.R (limited to 'src/arrow/r/R/dplyr-count.R') diff --git a/src/arrow/r/R/dplyr-count.R b/src/arrow/r/R/dplyr-count.R new file mode 100644 index 000000000..c567c285f --- /dev/null +++ b/src/arrow/r/R/dplyr-count.R @@ -0,0 +1,60 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# The following S3 methods are registered on load if dplyr is present + +count.arrow_dplyr_query <- function(x, ..., wt = NULL, sort = FALSE, name = NULL) { + if (!missing(...)) { + out <- dplyr::group_by(x, ..., .add = TRUE) + } else { + out <- x + } + out <- dplyr::tally(out, wt = {{ wt }}, sort = sort, name = name) + + # Restore original group vars + gv <- dplyr::group_vars(x) + if (length(gv)) { + out$group_by_vars <- gv + } + + out +} + +count.Dataset <- count.ArrowTabular <- count.arrow_dplyr_query + +#' @importFrom rlang sym := +tally.arrow_dplyr_query <- function(x, wt = NULL, sort = FALSE, name = NULL) { + check_name <- utils::getFromNamespace("check_name", "dplyr") + name <- check_name(name, dplyr::group_vars(x)) + + if (quo_is_null(enquo(wt))) { + out <- dplyr::summarize(x, !!name := n()) + } else { + out <- dplyr::summarize(x, !!name := sum({{ wt }}, na.rm = TRUE)) + } + + if (sort) { + dplyr::arrange(out, desc(!!sym(name))) + } else { + out + } +} + +tally.Dataset <- tally.ArrowTabular <- tally.arrow_dplyr_query + +# we don't want to depend on dplyr, but we refrence these above +utils::globalVariables(c("n", "desc")) -- cgit v1.2.3