summaryrefslogtreecommitdiffstats
path: root/src/arrow/r/R/dplyr-arrange.R
blob: 4c8c687a3cbc701504b52aaa658e086b367a7206 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.


# The following S3 methods are registered on load if dplyr is present

arrange.arrow_dplyr_query <- function(.data, ..., .by_group = FALSE) {
  call <- match.call()
  exprs <- quos(...)
  if (.by_group) {
    # when the data is is grouped and .by_group is TRUE, order the result by
    # the grouping columns first
    exprs <- c(quos(!!!dplyr::groups(.data)), exprs)
  }
  if (length(exprs) == 0) {
    # Nothing to do
    return(.data)
  }
  .data <- as_adq(.data)
  # find and remove any dplyr::desc() and tidy-eval
  # the arrange expressions inside an Arrow data_mask
  sorts <- vector("list", length(exprs))
  descs <- logical(0)
  mask <- arrow_mask(.data)
  for (i in seq_along(exprs)) {
    x <- find_and_remove_desc(exprs[[i]])
    exprs[[i]] <- x[["quos"]]
    sorts[[i]] <- arrow_eval(exprs[[i]], mask)
    names(sorts)[i] <- format_expr(exprs[[i]])
    if (inherits(sorts[[i]], "try-error")) {
      msg <- paste("Expression", names(sorts)[i], "not supported in Arrow")
      return(abandon_ship(call, .data, msg))
    }
    descs[i] <- x[["desc"]]
  }
  .data$arrange_vars <- c(sorts, .data$arrange_vars)
  .data$arrange_desc <- c(descs, .data$arrange_desc)
  .data
}
arrange.Dataset <- arrange.ArrowTabular <- arrange.arrow_dplyr_query

# Helper to handle desc() in arrange()
# * Takes a quosure as input
# * Returns a list with two elements:
#   1. The quosure with any wrapping parentheses and desc() removed
#   2. A logical value indicating whether desc() was found
# * Performs some other validation
find_and_remove_desc <- function(quosure) {
  expr <- quo_get_expr(quosure)
  descending <- FALSE
  if (length(all.vars(expr)) < 1L) {
    stop(
      "Expression in arrange() does not contain any field names: ",
      deparse(expr),
      call. = FALSE
    )
  }
  # Use a while loop to remove any number of nested pairs of enclosing
  # parentheses and any number of nested desc() calls. In the case of multiple
  # nested desc() calls, each one toggles the sort order.
  while (identical(typeof(expr), "language") && is.call(expr)) {
    if (identical(expr[[1]], quote(`(`))) {
      # remove enclosing parentheses
      expr <- expr[[2]]
    } else if (identical(expr[[1]], quote(desc))) {
      # ensure desc() has only one argument (when an R expression is a function
      # call, length == 2 means it has exactly one argument)
      if (length(expr) > 2) {
        stop("desc() expects only one argument", call. = FALSE)
      }
      # remove desc() and toggle descending
      expr <- expr[[2]]
      descending <- !descending
    } else {
      break
    }
  }
  return(
    list(
      quos = quo_set_expr(quosure, expr),
      desc = descending
    )
  )
}