# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. #' @include arrow-package.R #' @title class arrow::DataType #' #' @usage NULL #' @format NULL #' @docType class #' #' @section Methods: #' #' TODO #' #' @rdname DataType #' @name DataType DataType <- R6Class("DataType", inherit = ArrowObject, public = list( ToString = function() { DataType__ToString(self) }, Equals = function(other, ...) { inherits(other, "DataType") && DataType__Equals(self, other) }, fields = function() { DataType__fields(self) }, export_to_c = function(ptr) ExportType(self, ptr) ), active = list( id = function() DataType__id(self), name = function() DataType__name(self), num_fields = function() DataType__num_fields(self) ) ) #' @include arrowExports.R DataType$import_from_c <- ImportType INTEGER_TYPES <- as.character(outer(c("uint", "int"), c(8, 16, 32, 64), paste0)) FLOAT_TYPES <- c("float16", "float32", "float64", "halffloat", "float", "double") #' infer the arrow Array type from an R vector #' #' @param x an R vector #' #' @return an arrow logical type #' @examplesIf arrow_available() #' type(1:10) #' type(1L:10L) #' type(c(1, 1.5, 2)) #' type(c("A", "B", "C")) #' type(mtcars) #' type(Sys.Date()) #' @export type <- function(x) UseMethod("type") #' @export type.default <- function(x) Array__infer_type(x) #' @export type.ArrowDatum <- function(x) x$type #----- metadata #' @title class arrow::FixedWidthType #' #' @usage NULL #' @format NULL #' @docType class #' #' @section Methods: #' #' TODO #' #' @rdname FixedWidthType #' @name FixedWidthType FixedWidthType <- R6Class("FixedWidthType", inherit = DataType, active = list( bit_width = function() FixedWidthType__bit_width(self) ) ) Int8 <- R6Class("Int8", inherit = FixedWidthType) Int16 <- R6Class("Int16", inherit = FixedWidthType) Int32 <- R6Class("Int32", inherit = FixedWidthType) Int64 <- R6Class("Int64", inherit = FixedWidthType) UInt8 <- R6Class("UInt8", inherit = FixedWidthType) UInt16 <- R6Class("UInt16", inherit = FixedWidthType) UInt32 <- R6Class("UInt32", inherit = FixedWidthType) UInt64 <- R6Class("UInt64", inherit = FixedWidthType) Float16 <- R6Class("Float16", inherit = FixedWidthType) Float32 <- R6Class("Float32", inherit = FixedWidthType) Float64 <- R6Class("Float64", inherit = FixedWidthType) Boolean <- R6Class("Boolean", inherit = FixedWidthType) Utf8 <- R6Class("Utf8", inherit = DataType) LargeUtf8 <- R6Class("LargeUtf8", inherit = DataType) Binary <- R6Class("Binary", inherit = DataType) FixedSizeBinary <- R6Class("FixedSizeBinary", inherit = FixedWidthType) LargeBinary <- R6Class("LargeBinary", inherit = DataType) DateType <- R6Class("DateType", inherit = FixedWidthType, public = list( unit = function() DateType__unit(self) ) ) Date32 <- R6Class("Date32", inherit = DateType) Date64 <- R6Class("Date64", inherit = DateType) TimeType <- R6Class("TimeType", inherit = FixedWidthType, public = list( unit = function() TimeType__unit(self) ) ) Time32 <- R6Class("Time32", inherit = TimeType) Time64 <- R6Class("Time64", inherit = TimeType) Null <- R6Class("Null", inherit = DataType) Timestamp <- R6Class("Timestamp", inherit = FixedWidthType, public = list( timezone = function() TimestampType__timezone(self), unit = function() TimestampType__unit(self) ) ) DecimalType <- R6Class("DecimalType", inherit = FixedWidthType, public = list( precision = function() DecimalType__precision(self), scale = function() DecimalType__scale(self) ) ) Decimal128Type <- R6Class("Decimal128Type", inherit = DecimalType) NestedType <- R6Class("NestedType", inherit = DataType) #' Apache Arrow data types #' #' These functions create type objects corresponding to Arrow types. Use them #' when defining a [schema()] or as inputs to other types, like `struct`. Most #' of these functions don't take arguments, but a few do. #' #' A few functions have aliases: #' #' * `utf8()` and `string()` #' * `float16()` and `halffloat()` #' * `float32()` and `float()` #' * `bool()` and `boolean()` #' * When called inside an `arrow` function, such as `schema()` or `cast()`, #' `double()` also is supported as a way of creating a `float64()` #' #' `date32()` creates a datetime type with a "day" unit, like the R `Date` #' class. `date64()` has a "ms" unit. #' #' `uint32` (32 bit unsigned integer), `uint64` (64 bit unsigned integer), and #' `int64` (64-bit signed integer) types may contain values that exceed the #' range of R's `integer` type (32-bit signed integer). When these arrow objects #' are translated to R objects, `uint32` and `uint64` are converted to `double` #' ("numeric") and `int64` is converted to `bit64::integer64`. For `int64` #' types, this conversion can be disabled (so that `int64` always yields a #' `bit64::integer64` object) by setting `options(arrow.int64_downcast = #' FALSE)`. #' #' @param unit For time/timestamp types, the time unit. `time32()` can take #' either "s" or "ms", while `time64()` can be "us" or "ns". `timestamp()` can #' take any of those four values. #' @param timezone For `timestamp()`, an optional time zone string. #' @param byte_width byte width for `FixedSizeBinary` type. #' @param list_size list size for `FixedSizeList` type. #' @param precision For `decimal()`, precision #' @param scale For `decimal()`, scale #' @param type For `list_of()`, a data type to make a list-of-type #' @param ... For `struct()`, a named list of types to define the struct columns #' #' @name data-type #' @return An Arrow type object inheriting from DataType. #' @export #' @seealso [dictionary()] for creating a dictionary (factor-like) type. #' @examplesIf arrow_available() #' bool() #' struct(a = int32(), b = double()) #' timestamp("ms", timezone = "CEST") #' time64("ns") int8 <- function() Int8__initialize() #' @rdname data-type #' @export int16 <- function() Int16__initialize() #' @rdname data-type #' @export int32 <- function() Int32__initialize() #' @rdname data-type #' @export int64 <- function() Int64__initialize() #' @rdname data-type #' @export uint8 <- function() UInt8__initialize() #' @rdname data-type #' @export uint16 <- function() UInt16__initialize() #' @rdname data-type #' @export uint32 <- function() UInt32__initialize() #' @rdname data-type #' @export uint64 <- function() UInt64__initialize() #' @rdname data-type #' @export float16 <- function() Float16__initialize() #' @rdname data-type #' @export halffloat <- float16 #' @rdname data-type #' @export float32 <- function() Float32__initialize() #' @rdname data-type #' @export float <- float32 #' @rdname data-type #' @export float64 <- function() Float64__initialize() #' @rdname data-type #' @export boolean <- function() Boolean__initialize() #' @rdname data-type #' @export bool <- boolean #' @rdname data-type #' @export utf8 <- function() Utf8__initialize() #' @rdname data-type #' @export large_utf8 <- function() LargeUtf8__initialize() #' @rdname data-type #' @export binary <- function() Binary__initialize() #' @rdname data-type #' @export large_binary <- function() LargeBinary__initialize() #' @rdname data-type #' @export fixed_size_binary <- function(byte_width) FixedSizeBinary__initialize(byte_width) #' @rdname data-type #' @export string <- utf8 #' @rdname data-type #' @export date32 <- function() Date32__initialize() #' @rdname data-type #' @export date64 <- function() Date64__initialize() #' @rdname data-type #' @export time32 <- function(unit = c("ms", "s")) { if (is.character(unit)) { unit <- match.arg(unit) } unit <- make_valid_time_unit(unit, valid_time32_units) Time32__initialize(unit) } valid_time32_units <- c( "ms" = TimeUnit$MILLI, "s" = TimeUnit$SECOND ) valid_time64_units <- c( "ns" = TimeUnit$NANO, "us" = TimeUnit$MICRO ) make_valid_time_unit <- function(unit, valid_units) { if (is.character(unit)) { unit <- valid_units[match.arg(unit, choices = names(valid_units))] } if (is.numeric(unit)) { # Allow non-integer input for convenience unit <- as.integer(unit) } else { stop('"unit" should be one of ', oxford_paste(names(valid_units), "or"), call. = FALSE) } if (!(unit %in% valid_units)) { stop('"unit" should be one of ', oxford_paste(valid_units, "or"), call. = FALSE) } unit } #' @rdname data-type #' @export time64 <- function(unit = c("ns", "us")) { if (is.character(unit)) { unit <- match.arg(unit) } unit <- make_valid_time_unit(unit, valid_time64_units) Time64__initialize(unit) } #' @rdname data-type #' @export null <- function() Null__initialize() #' @rdname data-type #' @export timestamp <- function(unit = c("s", "ms", "us", "ns"), timezone = "") { if (is.character(unit)) { unit <- match.arg(unit) } unit <- make_valid_time_unit(unit, c(valid_time64_units, valid_time32_units)) assert_that(is.string(timezone)) Timestamp__initialize(unit, timezone) } #' @rdname data-type #' @export decimal <- function(precision, scale) { if (is.numeric(precision)) { precision <- as.integer(precision) } else { stop('"precision" must be an integer', call. = FALSE) } if (is.numeric(scale)) { scale <- as.integer(scale) } else { stop('"scale" must be an integer', call. = FALSE) } Decimal128Type__initialize(precision, scale) } StructType <- R6Class("StructType", inherit = NestedType, public = list( GetFieldByName = function(name) StructType__GetFieldByName(self, name), GetFieldIndex = function(name) StructType__GetFieldIndex(self, name) ) ) StructType$create <- function(...) struct__(.fields(list(...))) #' @rdname data-type #' @export struct <- StructType$create ListType <- R6Class("ListType", inherit = NestedType, active = list( value_field = function() ListType__value_field(self), value_type = function() ListType__value_type(self) ) ) #' @rdname data-type #' @export list_of <- function(type) list__(type) LargeListType <- R6Class("LargeListType", inherit = NestedType, active = list( value_field = function() LargeListType__value_field(self), value_type = function() LargeListType__value_type(self) ) ) #' @rdname data-type #' @export large_list_of <- function(type) large_list__(type) #' @rdname data-type #' @export FixedSizeListType <- R6Class("FixedSizeListType", inherit = NestedType, active = list( value_field = function() FixedSizeListType__value_field(self), value_type = function() FixedSizeListType__value_type(self), list_size = function() FixedSizeListType__list_size(self) ) ) #' @rdname data-type #' @export fixed_size_list_of <- function(type, list_size) fixed_size_list__(type, list_size) as_type <- function(type, name = "type") { # magic so we don't have to mask base::double() if (identical(type, double())) { type <- float64() } if (!inherits(type, "DataType")) { stop(name, " must be a DataType, not ", class(type), call. = FALSE) } type } canonical_type_str <- function(type_str) { # canonicalizes data type strings, converting data type function names and # aliases to match the strings returned by DataType$ToString() assert_that(is.string(type_str)) if (grepl("[([<]", type_str)) { stop("Cannot interpret string representations of data types that have parameters", call. = FALSE) } switch(type_str, int8 = "int8", int16 = "int16", int32 = "int32", int64 = "int64", uint8 = "uint8", uint16 = "uint16", uint32 = "uint32", uint64 = "uint64", float16 = "halffloat", halffloat = "halffloat", float32 = "float", float = "float", float64 = "double", double = "double", boolean = "bool", bool = "bool", utf8 = "string", large_utf8 = "large_string", large_string = "large_string", binary = "binary", large_binary = "large_binary", fixed_size_binary = "fixed_size_binary", string = "string", date32 = "date32", date64 = "date64", time32 = "time32", time64 = "time64", null = "null", timestamp = "timestamp", decimal = "decimal128", struct = "struct", list_of = "list", list = "list", large_list_of = "large_list", large_list = "large_list", fixed_size_list_of = "fixed_size_list", fixed_size_list = "fixed_size_list", stop("Unrecognized string representation of data type", call. = FALSE) ) } # vctrs support ----------------------------------------------------------- str_dup <- function(x, times) { paste0(rep(x, times = times), collapse = "") } indent <- function(x, n) { pad <- str_dup(" ", n) sapply(x, gsub, pattern = "(\n+)", replacement = paste0("\\1", pad)) } #' @importFrom vctrs vec_ptype_full vec_ptype_abbr #' @export vec_ptype_full.arrow_fixed_size_binary <- function(x, ...) { paste0("fixed_size_binary<", attr(x, "byte_width"), ">") } #' @export vec_ptype_full.arrow_list <- function(x, ...) { param <- vec_ptype_full(attr(x, "ptype")) if (grepl("\n", param)) { param <- paste0(indent(paste0("\n", param), 2), "\n") } paste0("list<", param, ">") } #' @export vec_ptype_full.arrow_large_list <- function(x, ...) { param <- vec_ptype_full(attr(x, "ptype")) if (grepl("\n", param)) { param <- paste0(indent(paste0("\n", param), 2), "\n") } paste0("large_list<", param, ">") } #' @export vec_ptype_full.arrow_fixed_size_list <- function(x, ...) { param <- vec_ptype_full(attr(x, "ptype")) if (grepl("\n", param)) { param <- paste0(indent(paste0("\n", param), 2), "\n") } paste0("fixed_size_list<", param, ", ", attr(x, "list_size"), ">") } #' @export vec_ptype_abbr.arrow_fixed_size_binary <- function(x, ...) { vec_ptype_full(x, ...) } #' @export vec_ptype_abbr.arrow_list <- function(x, ...) { vec_ptype_full(x, ...) } #' @export vec_ptype_abbr.arrow_large_list <- function(x, ...) { vec_ptype_full(x, ...) } #' @export vec_ptype_abbr.arrow_fixed_size_list <- function(x, ...) { vec_ptype_full(x, ...) }