summaryrefslogtreecommitdiffstats
path: root/src/arrow/r/man/RecordBatchReader.Rd
diff options
context:
space:
mode:
Diffstat (limited to 'src/arrow/r/man/RecordBatchReader.Rd')
-rw-r--r--src/arrow/r/man/RecordBatchReader.Rd86
1 files changed, 86 insertions, 0 deletions
diff --git a/src/arrow/r/man/RecordBatchReader.Rd b/src/arrow/r/man/RecordBatchReader.Rd
new file mode 100644
index 000000000..90c796a66
--- /dev/null
+++ b/src/arrow/r/man/RecordBatchReader.Rd
@@ -0,0 +1,86 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/record-batch-reader.R
+\docType{class}
+\name{RecordBatchReader}
+\alias{RecordBatchReader}
+\alias{RecordBatchStreamReader}
+\alias{RecordBatchFileReader}
+\title{RecordBatchReader classes}
+\description{
+Apache Arrow defines two formats for \href{https://arrow.apache.org/docs/format/Columnar.html#serialization-and-interprocess-communication-ipc}{serializing data for interprocess communication (IPC)}:
+a "stream" format and a "file" format, known as Feather.
+\code{RecordBatchStreamReader} and \code{RecordBatchFileReader} are
+interfaces for accessing record batches from input sources in those formats,
+respectively.
+
+For guidance on how to use these classes, see the examples section.
+}
+\section{Factory}{
+
+
+The \code{RecordBatchFileReader$create()} and \code{RecordBatchStreamReader$create()}
+factory methods instantiate the object and
+take a single argument, named according to the class:
+\itemize{
+\item \code{file} A character file name, raw vector, or Arrow file connection object
+(e.g. \link{RandomAccessFile}).
+\item \code{stream} A raw vector, \link{Buffer}, or \link{InputStream}.
+}
+}
+
+\section{Methods}{
+
+\itemize{
+\item \verb{$read_next_batch()}: Returns a \code{RecordBatch}, iterating through the
+Reader. If there are no further batches in the Reader, it returns \code{NULL}.
+\item \verb{$schema}: Returns a \link{Schema} (active binding)
+\item \verb{$batches()}: Returns a list of \code{RecordBatch}es
+\item \verb{$read_table()}: Collects the reader's \code{RecordBatch}es into a \link{Table}
+\item \verb{$get_batch(i)}: For \code{RecordBatchFileReader}, return a particular batch
+by an integer index.
+\item \verb{$num_record_batches()}: For \code{RecordBatchFileReader}, see how many batches
+are in the file.
+}
+}
+
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+tf <- tempfile()
+on.exit(unlink(tf))
+
+batch <- record_batch(chickwts)
+
+# This opens a connection to the file in Arrow
+file_obj <- FileOutputStream$create(tf)
+# Pass that to a RecordBatchWriter to write data conforming to a schema
+writer <- RecordBatchFileWriter$create(file_obj, batch$schema)
+writer$write(batch)
+# You may write additional batches to the stream, provided that they have
+# the same schema.
+# Call "close" on the writer to indicate end-of-file/stream
+writer$close()
+# Then, close the connection--closing the IPC message does not close the file
+file_obj$close()
+
+# Now, we have a file we can read from. Same pattern: open file connection,
+# then pass it to a RecordBatchReader
+read_file_obj <- ReadableFile$create(tf)
+reader <- RecordBatchFileReader$create(read_file_obj)
+# RecordBatchFileReader knows how many batches it has (StreamReader does not)
+reader$num_record_batches
+# We could consume the Reader by calling $read_next_batch() until all are,
+# consumed, or we can call $read_table() to pull them all into a Table
+tab <- reader$read_table()
+# Call as.data.frame to turn that Table into an R data.frame
+df <- as.data.frame(tab)
+# This should be the same data we sent
+all.equal(df, chickwts, check.attributes = FALSE)
+# Unlike the Writers, we don't have to close RecordBatchReaders,
+# but we do still need to close the file connection
+read_file_obj$close()
+\dontshow{\}) # examplesIf}
+}
+\seealso{
+\code{\link[=read_ipc_stream]{read_ipc_stream()}} and \code{\link[=read_feather]{read_feather()}} provide a much simpler interface
+for reading data from these formats and are sufficient for many use cases.
+}