summaryrefslogtreecommitdiffstats
path: root/src/arrow/r/man/RecordBatchWriter.Rd
diff options
context:
space:
mode:
Diffstat (limited to 'src/arrow/r/man/RecordBatchWriter.Rd')
-rw-r--r--src/arrow/r/man/RecordBatchWriter.Rd89
1 files changed, 89 insertions, 0 deletions
diff --git a/src/arrow/r/man/RecordBatchWriter.Rd b/src/arrow/r/man/RecordBatchWriter.Rd
new file mode 100644
index 000000000..219c150e6
--- /dev/null
+++ b/src/arrow/r/man/RecordBatchWriter.Rd
@@ -0,0 +1,89 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/record-batch-writer.R
+\docType{class}
+\name{RecordBatchWriter}
+\alias{RecordBatchWriter}
+\alias{RecordBatchStreamWriter}
+\alias{RecordBatchFileWriter}
+\title{RecordBatchWriter classes}
+\description{
+Apache Arrow defines two formats for \href{https://arrow.apache.org/docs/format/Columnar.html#serialization-and-interprocess-communication-ipc}{serializing data for interprocess communication (IPC)}:
+a "stream" format and a "file" format, known as Feather.
+\code{RecordBatchStreamWriter} and \code{RecordBatchFileWriter} are
+interfaces for writing record batches to those formats, respectively.
+
+For guidance on how to use these classes, see the examples section.
+}
+\section{Factory}{
+
+
+The \code{RecordBatchFileWriter$create()} and \code{RecordBatchStreamWriter$create()}
+factory methods instantiate the object and take the following arguments:
+\itemize{
+\item \code{sink} An \code{OutputStream}
+\item \code{schema} A \link{Schema} for the data to be written
+\item \code{use_legacy_format} logical: write data formatted so that Arrow libraries
+versions 0.14 and lower can read it. Default is \code{FALSE}. You can also
+enable this by setting the environment variable \code{ARROW_PRE_0_15_IPC_FORMAT=1}.
+\item \code{metadata_version}: A string like "V5" or the equivalent integer indicating
+the Arrow IPC MetadataVersion. Default (NULL) will use the latest version,
+unless the environment variable \code{ARROW_PRE_1_0_METADATA_VERSION=1}, in
+which case it will be V4.
+}
+}
+
+\section{Methods}{
+
+\itemize{
+\item \verb{$write(x)}: Write a \link{RecordBatch}, \link{Table}, or \code{data.frame}, dispatching
+to the methods below appropriately
+\item \verb{$write_batch(batch)}: Write a \code{RecordBatch} to stream
+\item \verb{$write_table(table)}: Write a \code{Table} to stream
+\item \verb{$close()}: close stream. Note that this indicates end-of-file or
+end-of-stream--it does not close the connection to the \code{sink}. That needs
+to be closed separately.
+}
+}
+
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+tf <- tempfile()
+on.exit(unlink(tf))
+
+batch <- record_batch(chickwts)
+
+# This opens a connection to the file in Arrow
+file_obj <- FileOutputStream$create(tf)
+# Pass that to a RecordBatchWriter to write data conforming to a schema
+writer <- RecordBatchFileWriter$create(file_obj, batch$schema)
+writer$write(batch)
+# You may write additional batches to the stream, provided that they have
+# the same schema.
+# Call "close" on the writer to indicate end-of-file/stream
+writer$close()
+# Then, close the connection--closing the IPC message does not close the file
+file_obj$close()
+
+# Now, we have a file we can read from. Same pattern: open file connection,
+# then pass it to a RecordBatchReader
+read_file_obj <- ReadableFile$create(tf)
+reader <- RecordBatchFileReader$create(read_file_obj)
+# RecordBatchFileReader knows how many batches it has (StreamReader does not)
+reader$num_record_batches
+# We could consume the Reader by calling $read_next_batch() until all are,
+# consumed, or we can call $read_table() to pull them all into a Table
+tab <- reader$read_table()
+# Call as.data.frame to turn that Table into an R data.frame
+df <- as.data.frame(tab)
+# This should be the same data we sent
+all.equal(df, chickwts, check.attributes = FALSE)
+# Unlike the Writers, we don't have to close RecordBatchReaders,
+# but we do still need to close the file connection
+read_file_obj$close()
+\dontshow{\}) # examplesIf}
+}
+\seealso{
+\code{\link[=write_ipc_stream]{write_ipc_stream()}} and \code{\link[=write_feather]{write_feather()}} provide a much simpler
+interface for writing data to these formats and are sufficient for many use
+cases. \code{\link[=write_to_raw]{write_to_raw()}} is a version that serializes data to a buffer.
+}