summaryrefslogtreecommitdiffstats
path: root/src/arrow/r/R/array.R
diff options
context:
space:
mode:
Diffstat (limited to 'src/arrow/r/R/array.R')
-rw-r--r--src/arrow/r/R/array.R329
1 files changed, 329 insertions, 0 deletions
diff --git a/src/arrow/r/R/array.R b/src/arrow/r/R/array.R
new file mode 100644
index 000000000..46acc14ff
--- /dev/null
+++ b/src/arrow/r/R/array.R
@@ -0,0 +1,329 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#' @include arrow-datum.R
+
+#' @title Arrow Arrays
+#' @description An `Array` is an immutable data array with some logical type
+#' and some length. Most logical types are contained in the base
+#' `Array` class; there are also subclasses for `DictionaryArray`, `ListArray`,
+#' and `StructArray`.
+#' @usage NULL
+#' @format NULL
+#' @docType class
+#'
+#' @section Factory:
+#' The `Array$create()` factory method instantiates an `Array` and
+#' takes the following arguments:
+#' * `x`: an R vector, list, or `data.frame`
+#' * `type`: an optional [data type][data-type] for `x`. If omitted, the type
+#' will be inferred from the data.
+#'
+#' `Array$create()` will return the appropriate subclass of `Array`, such as
+#' `DictionaryArray` when given an R factor.
+#'
+#' To compose a `DictionaryArray` directly, call `DictionaryArray$create()`,
+#' which takes two arguments:
+#' * `x`: an R vector or `Array` of integers for the dictionary indices
+#' * `dict`: an R vector or `Array` of dictionary values (like R factor levels
+#' but not limited to strings only)
+#' @section Usage:
+#'
+#' ```
+#' a <- Array$create(x)
+#' length(a)
+#'
+#' print(a)
+#' a == a
+#' ```
+#'
+#' @section Methods:
+#'
+#' - `$IsNull(i)`: Return true if value at index is null. Does not boundscheck
+#' - `$IsValid(i)`: Return true if value at index is valid. Does not boundscheck
+#' - `$length()`: Size in the number of elements this array contains
+#' - `$offset`: A relative position into another array's data, to enable zero-copy slicing
+#' - `$null_count`: The number of null entries in the array
+#' - `$type`: logical type of data
+#' - `$type_id()`: type id
+#' - `$Equals(other)` : is this array equal to `other`
+#' - `$ApproxEquals(other)` :
+#' - `$Diff(other)` : return a string expressing the difference between two arrays
+#' - `$data()`: return the underlying [ArrayData][ArrayData]
+#' - `$as_vector()`: convert to an R vector
+#' - `$ToString()`: string representation of the array
+#' - `$Slice(offset, length = NULL)`: Construct a zero-copy slice of the array
+#' with the indicated offset and length. If length is `NULL`, the slice goes
+#' until the end of the array.
+#' - `$Take(i)`: return an `Array` with values at positions given by integers
+#' (R vector or Array Array) `i`.
+#' - `$Filter(i, keep_na = TRUE)`: return an `Array` with values at positions where logical
+#' vector (or Arrow boolean Array) `i` is `TRUE`.
+#' - `$SortIndices(descending = FALSE)`: return an `Array` of integer positions that can be
+#' used to rearrange the `Array` in ascending or descending order
+#' - `$RangeEquals(other, start_idx, end_idx, other_start_idx)` :
+#' - `$cast(target_type, safe = TRUE, options = cast_options(safe))`: Alter the
+#' data in the array to change its type.
+#' - `$View(type)`: Construct a zero-copy view of this array with the given type.
+#' - `$Validate()` : Perform any validation checks to determine obvious inconsistencies
+#' within the array's internal data. This can be an expensive check, potentially `O(length)`
+#'
+#' @rdname array
+#' @name array
+#' @examplesIf arrow_available()
+#' my_array <- Array$create(1:10)
+#' my_array$type
+#' my_array$cast(int8())
+#'
+#' # Check if value is null; zero-indexed
+#' na_array <- Array$create(c(1:5, NA))
+#' na_array$IsNull(0)
+#' na_array$IsNull(5)
+#' na_array$IsValid(5)
+#' na_array$null_count
+#'
+#' # zero-copy slicing; the offset of the new Array will be the same as the index passed to $Slice
+#' new_array <- na_array$Slice(5)
+#' new_array$offset
+#'
+#' # Compare 2 arrays
+#' na_array2 <- na_array
+#' na_array2 == na_array # element-wise comparison
+#' na_array2$Equals(na_array) # overall comparison
+#' @export
+Array <- R6Class("Array",
+ inherit = ArrowDatum,
+ public = list(
+ IsNull = function(i) Array__IsNull(self, i),
+ IsValid = function(i) Array__IsValid(self, i),
+ length = function() Array__length(self),
+ type_id = function() Array__type_id(self),
+ Equals = function(other, ...) {
+ inherits(other, "Array") && Array__Equals(self, other)
+ },
+ ApproxEquals = function(other) {
+ inherits(other, "Array") && Array__ApproxEquals(self, other)
+ },
+ Diff = function(other) {
+ if (!inherits(other, "Array")) {
+ other <- Array$create(other)
+ }
+ Array__Diff(self, other)
+ },
+ data = function() Array__data(self),
+ as_vector = function() Array__as_vector(self),
+ ToString = function() {
+ typ <- paste0("<", self$type$ToString(), ">")
+ paste(typ, Array__ToString(self), sep = "\n")
+ },
+ Slice = function(offset, length = NULL) {
+ if (is.null(length)) {
+ Array__Slice1(self, offset)
+ } else {
+ Array__Slice2(self, offset, length)
+ }
+ },
+ Take = function(i) {
+ if (is.numeric(i)) {
+ i <- as.integer(i)
+ }
+ if (is.integer(i)) {
+ i <- Array$create(i)
+ }
+ call_function("take", self, i)
+ },
+ Filter = function(i, keep_na = TRUE) {
+ if (is.logical(i)) {
+ i <- Array$create(i)
+ }
+ assert_is(i, "Array")
+ call_function("filter", self, i, options = list(keep_na = keep_na))
+ },
+ SortIndices = function(descending = FALSE) {
+ assert_that(is.logical(descending))
+ assert_that(length(descending) == 1L)
+ assert_that(!is.na(descending))
+ call_function("array_sort_indices", self, options = list(order = descending))
+ },
+ RangeEquals = function(other, start_idx, end_idx, other_start_idx = 0L) {
+ assert_is(other, "Array")
+ Array__RangeEquals(self, other, start_idx, end_idx, other_start_idx)
+ },
+ View = function(type) {
+ Array$create(Array__View(self, as_type(type)))
+ },
+ Validate = function() Array__Validate(self),
+ export_to_c = function(array_ptr, schema_ptr) ExportArray(self, array_ptr, schema_ptr)
+ ),
+ active = list(
+ null_count = function() Array__null_count(self),
+ offset = function() Array__offset(self),
+ type = function() Array__type(self)
+ )
+)
+Array$create <- function(x, type = NULL) {
+ if (!is.null(type)) {
+ type <- as_type(type)
+ }
+ if (inherits(x, "Scalar")) {
+ out <- x$as_array()
+ if (!is.null(type)) {
+ out <- out$cast(type)
+ }
+ return(out)
+ }
+ vec_to_arrow(x, type)
+}
+#' @include arrowExports.R
+Array$import_from_c <- ImportArray
+
+#' @rdname array
+#' @usage NULL
+#' @format NULL
+#' @export
+DictionaryArray <- R6Class("DictionaryArray",
+ inherit = Array,
+ public = list(
+ indices = function() DictionaryArray__indices(self),
+ dictionary = function() DictionaryArray__dictionary(self)
+ ),
+ active = list(
+ ordered = function() self$type$ordered
+ )
+)
+DictionaryArray$create <- function(x, dict = NULL) {
+ if (is.factor(x)) {
+ # The simple case: converting a factor.
+ # Ignoring `dict`; should probably error if dict is not NULL
+ return(Array$create(x))
+ }
+
+ assert_that(!is.null(dict))
+ if (!is.Array(x)) {
+ x <- Array$create(x)
+ }
+ if (!is.Array(dict)) {
+ dict <- Array$create(dict)
+ }
+ type <- DictionaryType$create(x$type, dict$type)
+ DictionaryArray__FromArrays(type, x, dict)
+}
+
+#' @rdname array
+#' @usage NULL
+#' @format NULL
+#' @export
+StructArray <- R6Class("StructArray",
+ inherit = Array,
+ public = list(
+ field = function(i) StructArray__field(self, i),
+ GetFieldByName = function(name) StructArray__GetFieldByName(self, name),
+ Flatten = function() StructArray__Flatten(self)
+ )
+)
+
+
+#' @export
+`[[.StructArray` <- function(x, i, ...) {
+ if (is.character(i)) {
+ x$GetFieldByName(i)
+ } else if (is.numeric(i)) {
+ x$field(i - 1)
+ } else {
+ stop("'i' must be character or numeric, not ", class(i), call. = FALSE)
+ }
+}
+
+#' @export
+`$.StructArray` <- function(x, name, ...) {
+ assert_that(is.string(name))
+ if (name %in% ls(x)) {
+ get(name, x)
+ } else {
+ x$GetFieldByName(name)
+ }
+}
+
+#' @export
+names.StructArray <- function(x, ...) StructType__field_names(x$type)
+
+#' @export
+dim.StructArray <- function(x, ...) c(length(x), x$type$num_fields)
+
+#' @export
+as.data.frame.StructArray <- function(x, row.names = NULL, optional = FALSE, ...) {
+ as.vector(x)
+}
+
+#' @rdname array
+#' @usage NULL
+#' @format NULL
+#' @export
+ListArray <- R6Class("ListArray",
+ inherit = Array,
+ public = list(
+ values = function() ListArray__values(self),
+ value_length = function(i) ListArray__value_length(self, i),
+ value_offset = function(i) ListArray__value_offset(self, i),
+ raw_value_offsets = function() ListArray__raw_value_offsets(self)
+ ),
+ active = list(
+ value_type = function() ListArray__value_type(self)
+ )
+)
+
+#' @rdname array
+#' @usage NULL
+#' @format NULL
+#' @export
+LargeListArray <- R6Class("LargeListArray",
+ inherit = Array,
+ public = list(
+ values = function() LargeListArray__values(self),
+ value_length = function(i) LargeListArray__value_length(self, i),
+ value_offset = function(i) LargeListArray__value_offset(self, i),
+ raw_value_offsets = function() LargeListArray__raw_value_offsets(self)
+ ),
+ active = list(
+ value_type = function() LargeListArray__value_type(self)
+ )
+)
+
+#' @rdname array
+#' @usage NULL
+#' @format NULL
+#' @export
+FixedSizeListArray <- R6Class("FixedSizeListArray",
+ inherit = Array,
+ public = list(
+ values = function() FixedSizeListArray__values(self),
+ value_length = function(i) FixedSizeListArray__value_length(self, i),
+ value_offset = function(i) FixedSizeListArray__value_offset(self, i)
+ ),
+ active = list(
+ value_type = function() FixedSizeListArray__value_type(self),
+ list_size = function() self$type$list_size
+ )
+)
+
+is.Array <- function(x, type = NULL) { # nolint
+ is_it <- inherits(x, c("Array", "ChunkedArray"))
+ if (is_it && !is.null(type)) {
+ is_it <- x$type$ToString() %in% type
+ }
+ is_it
+}