1 files changed, 170 insertions, 0 deletions
diff --git a/src/arrow/r/R/table.R b/src/arrow/r/R/table.R
new file mode 100644
index 000000000..5ae87f7e3
--- /dev/null
+++ b/src/arrow/r/R/table.R
@@ -0,0 +1,170 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#' @include record-batch.R
+#' @title Table class
+#' @description A Table is a sequence of [chunked arrays][ChunkedArray]. They
+#' have a similar interface to [record batches][RecordBatch], but they can be
+#' composed from multiple record batches or chunked arrays.
+#' @usage NULL
+#' @format NULL
+#' @docType class
+#'
+#' @section S3 Methods and Usage:
+#' Tables are data-frame-like, and many methods you expect to work on
+#' a `data.frame` are implemented for `Table`. This includes `[`, `[[`,
+#' `$`, `names`, `dim`, `nrow`, `ncol`, `head`, and `tail`. You can also pull
+#' the data from an Arrow table into R with `as.data.frame()`. See the
+#' examples.
+#'
+#' A caveat about the `$` method: because `Table` is an `R6` object,
+#' `$` is also used to access the object's methods (see below). Methods take
+#' precedence over the table's columns. So, `tab$Slice` would return the
+#' "Slice" method function even if there were a column in the table called
+#' "Slice".
+#'
+#' @section R6 Methods:
+#' In addition to the more R-friendly S3 methods, a `Table` object has
+#' the following R6 methods that map onto the underlying C++ methods:
+#'
+#' - `$column(i)`: Extract a `ChunkedArray` by integer position from the table
+#' - `$ColumnNames()`: Get all column names (called by `names(tab)`)
+#' - `$RenameColumns(value)`: Set all column names (called by `names(tab) <- value`)
+#' - `$GetColumnByName(name)`: Extract a `ChunkedArray` by string name
+#' - `$field(i)`: Extract a `Field` from the table schema by integer position
+#' - `$SelectColumns(indices)`: Return new `Table` with specified columns, expressed as 0-based integers.
+#' - `$Slice(offset, length = NULL)`: Create a zero-copy view starting at the
+#'    indicated integer offset and going for the given length, or to the end
+#'    of the table if `NULL`, the default.
+#' - `$Take(i)`: return an `Table` with rows at positions given by
+#'    integers `i`. If `i` is an Arrow `Array` or `ChunkedArray`, it will be
+#'    coerced to an R vector before taking.
+#' - `$Filter(i, keep_na = TRUE)`: return an `Table` with rows at positions where logical
+#'    vector or Arrow boolean-type `(Chunked)Array` `i` is `TRUE`.
+#' - `$SortIndices(names, descending = FALSE)`: return an `Array` of integer row
+#'    positions that can be used to rearrange the `Table` in ascending or descending
+#'    order by the first named column, breaking ties with further named columns.
+#'    `descending` can be a logical vector of length one or of the same length as
+#'    `names`.
+#' - `$serialize(output_stream, ...)`: Write the table to the given
+#'    [OutputStream]
+#' - `$cast(target_schema, safe = TRUE, options = cast_options(safe))`: Alter
+#'    the schema of the record batch.
+#'
+#' There are also some active bindings:
+#' - `$num_columns`
+#' - `$num_rows`
+#' - `$schema`
+#' - `$metadata`: Returns the key-value metadata of the `Schema` as a named list.
+#'    Modify or replace by assigning in (`tab$metadata <- new_metadata`).
+#'    All list elements are coerced to string. See `schema()` for more information.
+#' - `$columns`: Returns a list of `ChunkedArray`s
+#' @rdname Table
+#' @name Table
+#' @export
+Table <- R6Class("Table",
+  inherit = ArrowTabular,
+  public = list(
+    column = function(i) Table__column(self, i),
+    ColumnNames = function() Table__ColumnNames(self),
+    RenameColumns = function(value) Table__RenameColumns(self, value),
+    GetColumnByName = function(name) {
+      assert_is(name, "character")
+      assert_that(length(name) == 1)
+      Table__GetColumnByName(self, name)
+    },
+    RemoveColumn = function(i) Table__RemoveColumn(self, i),
+    AddColumn = function(i, new_field, value) Table__AddColumn(self, i, new_field, value),
+    SetColumn = function(i, new_field, value) Table__SetColumn(self, i, new_field, value),
+    ReplaceSchemaMetadata = function(new) {
+      Table__ReplaceSchemaMetadata(self, new)
+    },
+    field = function(i) Table__field(self, i),
+    serialize = function(output_stream, ...) write_table(self, output_stream, ...),
+    to_data_frame = function() {
+      Table__to_dataframe(self, use_threads = option_use_threads())
+    },
+    cast = function(target_schema, safe = TRUE, ..., options = cast_options(safe, ...)) {
+      assert_is(target_schema, "Schema")
+      assert_that(identical(self$schema$names, target_schema$names), msg = "incompatible schemas")
+      Table__cast(self, target_schema, options)
+    },
+    SelectColumns = function(indices) Table__SelectColumns(self, indices),
+    Slice = function(offset, length = NULL) {
+      if (is.null(length)) {
+        Table__Slice1(self, offset)
+      } else {
+        Table__Slice2(self, offset, length)
+      }
+    },
+    # Take, Filter, and SortIndices are methods on ArrowTabular
+    Equals = function(other, check_metadata = FALSE, ...) {
+      inherits(other, "Table") && Table__Equals(self, other, isTRUE(check_metadata))
+    },
+    Validate = function() Table__Validate(self),
+    ValidateFull = function() Table__ValidateFull(self),
+    invalidate = function() {
+      .Call(`_arrow_Table__Reset`, self)
+      super$invalidate()
+    }
+  ),
+  active = list(
+    num_columns = function() Table__num_columns(self),
+    num_rows = function() Table__num_rows(self),
+    schema = function() Table__schema(self),
+    columns = function() Table__columns(self)
+  )
+)
+
+Table$create <- function(..., schema = NULL) {
+  dots <- list2(...)
+  # making sure there are always names
+  if (is.null(names(dots))) {
+    names(dots) <- rep_len("", length(dots))
+  }
+  stopifnot(length(dots) > 0)
+
+  if (all_record_batches(dots)) {
+    return(Table__from_record_batches(dots, schema))
+  }
+
+  # If any arrays are length 1, recycle them
+  dots <- recycle_scalars(dots)
+
+  Table__from_dots(dots, schema, option_use_threads())
+}
+
+#' @export
+names.Table <- function(x) x$ColumnNames()
+
+#' @param ... A `data.frame` or a named set of Arrays or vectors. If given a
+#' mixture of data.frames and named vectors, the inputs will be autospliced together
+#' (see examples). Alternatively, you can provide a single Arrow IPC
+#' `InputStream`, `Message`, `Buffer`, or R `raw` object containing a `Buffer`.
+#' @param schema a [Schema], or `NULL` (the default) to infer the schema from
+#' the data in `...`. When providing an Arrow IPC buffer, `schema` is required.
+#' @rdname Table
+#' @examplesIf arrow_available()
+#' tbl <- arrow_table(name = rownames(mtcars), mtcars)
+#' dim(tbl)
+#' dim(head(tbl))
+#' names(tbl)
+#' tbl$mpg
+#' tbl[["cyl"]]
+#' as.data.frame(tbl[4:8, c("gear", "hp", "wt")])
+#' @export
+arrow_table <- Table$create