From e6918187568dbd01842d8d1d2c808ce16a894239 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 21 Apr 2024 13:54:28 +0200 Subject: Adding upstream version 18.2.2. Signed-off-by: Daniel Baumann --- src/arrow/r/man/RecordBatchReader.Rd | 86 ++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 src/arrow/r/man/RecordBatchReader.Rd (limited to 'src/arrow/r/man/RecordBatchReader.Rd') diff --git a/src/arrow/r/man/RecordBatchReader.Rd b/src/arrow/r/man/RecordBatchReader.Rd new file mode 100644 index 000000000..90c796a66 --- /dev/null +++ b/src/arrow/r/man/RecordBatchReader.Rd @@ -0,0 +1,86 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/record-batch-reader.R +\docType{class} +\name{RecordBatchReader} +\alias{RecordBatchReader} +\alias{RecordBatchStreamReader} +\alias{RecordBatchFileReader} +\title{RecordBatchReader classes} +\description{ +Apache Arrow defines two formats for \href{https://arrow.apache.org/docs/format/Columnar.html#serialization-and-interprocess-communication-ipc}{serializing data for interprocess communication (IPC)}: +a "stream" format and a "file" format, known as Feather. +\code{RecordBatchStreamReader} and \code{RecordBatchFileReader} are +interfaces for accessing record batches from input sources in those formats, +respectively. + +For guidance on how to use these classes, see the examples section. +} +\section{Factory}{ + + +The \code{RecordBatchFileReader$create()} and \code{RecordBatchStreamReader$create()} +factory methods instantiate the object and +take a single argument, named according to the class: +\itemize{ +\item \code{file} A character file name, raw vector, or Arrow file connection object +(e.g. \link{RandomAccessFile}). +\item \code{stream} A raw vector, \link{Buffer}, or \link{InputStream}. +} +} + +\section{Methods}{ + +\itemize{ +\item \verb{$read_next_batch()}: Returns a \code{RecordBatch}, iterating through the +Reader. If there are no further batches in the Reader, it returns \code{NULL}. +\item \verb{$schema}: Returns a \link{Schema} (active binding) +\item \verb{$batches()}: Returns a list of \code{RecordBatch}es +\item \verb{$read_table()}: Collects the reader's \code{RecordBatch}es into a \link{Table} +\item \verb{$get_batch(i)}: For \code{RecordBatchFileReader}, return a particular batch +by an integer index. +\item \verb{$num_record_batches()}: For \code{RecordBatchFileReader}, see how many batches +are in the file. +} +} + +\examples{ +\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +tf <- tempfile() +on.exit(unlink(tf)) + +batch <- record_batch(chickwts) + +# This opens a connection to the file in Arrow +file_obj <- FileOutputStream$create(tf) +# Pass that to a RecordBatchWriter to write data conforming to a schema +writer <- RecordBatchFileWriter$create(file_obj, batch$schema) +writer$write(batch) +# You may write additional batches to the stream, provided that they have +# the same schema. +# Call "close" on the writer to indicate end-of-file/stream +writer$close() +# Then, close the connection--closing the IPC message does not close the file +file_obj$close() + +# Now, we have a file we can read from. Same pattern: open file connection, +# then pass it to a RecordBatchReader +read_file_obj <- ReadableFile$create(tf) +reader <- RecordBatchFileReader$create(read_file_obj) +# RecordBatchFileReader knows how many batches it has (StreamReader does not) +reader$num_record_batches +# We could consume the Reader by calling $read_next_batch() until all are, +# consumed, or we can call $read_table() to pull them all into a Table +tab <- reader$read_table() +# Call as.data.frame to turn that Table into an R data.frame +df <- as.data.frame(tab) +# This should be the same data we sent +all.equal(df, chickwts, check.attributes = FALSE) +# Unlike the Writers, we don't have to close RecordBatchReaders, +# but we do still need to close the file connection +read_file_obj$close() +\dontshow{\}) # examplesIf} +} +\seealso{ +\code{\link[=read_ipc_stream]{read_ipc_stream()}} and \code{\link[=read_feather]{read_feather()}} provide a much simpler interface +for reading data from these formats and are sufficient for many use cases. +} -- cgit v1.2.3