summaryrefslogtreecommitdiffstats
path: root/src/arrow/r/R/dplyr-count.R
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-21 11:54:28 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-21 11:54:28 +0000
commite6918187568dbd01842d8d1d2c808ce16a894239 (patch)
tree64f88b554b444a49f656b6c656111a145cbbaa28 /src/arrow/r/R/dplyr-count.R
parentInitial commit. (diff)
downloadceph-e6918187568dbd01842d8d1d2c808ce16a894239.tar.xz
ceph-e6918187568dbd01842d8d1d2c808ce16a894239.zip
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/arrow/r/R/dplyr-count.R')
-rw-r--r--src/arrow/r/R/dplyr-count.R60
1 files changed, 60 insertions, 0 deletions
diff --git a/src/arrow/r/R/dplyr-count.R b/src/arrow/r/R/dplyr-count.R
new file mode 100644
index 000000000..c567c285f
--- /dev/null
+++ b/src/arrow/r/R/dplyr-count.R
@@ -0,0 +1,60 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# The following S3 methods are registered on load if dplyr is present
+
+count.arrow_dplyr_query <- function(x, ..., wt = NULL, sort = FALSE, name = NULL) {
+ if (!missing(...)) {
+ out <- dplyr::group_by(x, ..., .add = TRUE)
+ } else {
+ out <- x
+ }
+ out <- dplyr::tally(out, wt = {{ wt }}, sort = sort, name = name)
+
+ # Restore original group vars
+ gv <- dplyr::group_vars(x)
+ if (length(gv)) {
+ out$group_by_vars <- gv
+ }
+
+ out
+}
+
+count.Dataset <- count.ArrowTabular <- count.arrow_dplyr_query
+
+#' @importFrom rlang sym :=
+tally.arrow_dplyr_query <- function(x, wt = NULL, sort = FALSE, name = NULL) {
+ check_name <- utils::getFromNamespace("check_name", "dplyr")
+ name <- check_name(name, dplyr::group_vars(x))
+
+ if (quo_is_null(enquo(wt))) {
+ out <- dplyr::summarize(x, !!name := n())
+ } else {
+ out <- dplyr::summarize(x, !!name := sum({{ wt }}, na.rm = TRUE))
+ }
+
+ if (sort) {
+ dplyr::arrange(out, desc(!!sym(name)))
+ } else {
+ out
+ }
+}
+
+tally.Dataset <- tally.ArrowTabular <- tally.arrow_dplyr_query
+
+# we don't want to depend on dplyr, but we refrence these above
+utils::globalVariables(c("n", "desc"))