diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
commit | e6918187568dbd01842d8d1d2c808ce16a894239 (patch) | |
tree | 64f88b554b444a49f656b6c656111a145cbbaa28 /src/arrow/r/R/dplyr-count.R | |
parent | Initial commit. (diff) | |
download | ceph-e6918187568dbd01842d8d1d2c808ce16a894239.tar.xz ceph-e6918187568dbd01842d8d1d2c808ce16a894239.zip |
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/arrow/r/R/dplyr-count.R')
-rw-r--r-- | src/arrow/r/R/dplyr-count.R | 60 |
1 files changed, 60 insertions, 0 deletions
diff --git a/src/arrow/r/R/dplyr-count.R b/src/arrow/r/R/dplyr-count.R new file mode 100644 index 000000000..c567c285f --- /dev/null +++ b/src/arrow/r/R/dplyr-count.R @@ -0,0 +1,60 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# The following S3 methods are registered on load if dplyr is present + +count.arrow_dplyr_query <- function(x, ..., wt = NULL, sort = FALSE, name = NULL) { + if (!missing(...)) { + out <- dplyr::group_by(x, ..., .add = TRUE) + } else { + out <- x + } + out <- dplyr::tally(out, wt = {{ wt }}, sort = sort, name = name) + + # Restore original group vars + gv <- dplyr::group_vars(x) + if (length(gv)) { + out$group_by_vars <- gv + } + + out +} + +count.Dataset <- count.ArrowTabular <- count.arrow_dplyr_query + +#' @importFrom rlang sym := +tally.arrow_dplyr_query <- function(x, wt = NULL, sort = FALSE, name = NULL) { + check_name <- utils::getFromNamespace("check_name", "dplyr") + name <- check_name(name, dplyr::group_vars(x)) + + if (quo_is_null(enquo(wt))) { + out <- dplyr::summarize(x, !!name := n()) + } else { + out <- dplyr::summarize(x, !!name := sum({{ wt }}, na.rm = TRUE)) + } + + if (sort) { + dplyr::arrange(out, desc(!!sym(name))) + } else { + out + } +} + +tally.Dataset <- tally.ArrowTabular <- tally.arrow_dplyr_query + +# we don't want to depend on dplyr, but we refrence these above +utils::globalVariables(c("n", "desc")) |