diff options
Diffstat (limited to '')
-rw-r--r-- | src/arrow/r/man/RecordBatchWriter.Rd | 89 |
1 files changed, 89 insertions, 0 deletions
diff --git a/src/arrow/r/man/RecordBatchWriter.Rd b/src/arrow/r/man/RecordBatchWriter.Rd new file mode 100644 index 000000000..219c150e6 --- /dev/null +++ b/src/arrow/r/man/RecordBatchWriter.Rd @@ -0,0 +1,89 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/record-batch-writer.R +\docType{class} +\name{RecordBatchWriter} +\alias{RecordBatchWriter} +\alias{RecordBatchStreamWriter} +\alias{RecordBatchFileWriter} +\title{RecordBatchWriter classes} +\description{ +Apache Arrow defines two formats for \href{https://arrow.apache.org/docs/format/Columnar.html#serialization-and-interprocess-communication-ipc}{serializing data for interprocess communication (IPC)}: +a "stream" format and a "file" format, known as Feather. +\code{RecordBatchStreamWriter} and \code{RecordBatchFileWriter} are +interfaces for writing record batches to those formats, respectively. + +For guidance on how to use these classes, see the examples section. +} +\section{Factory}{ + + +The \code{RecordBatchFileWriter$create()} and \code{RecordBatchStreamWriter$create()} +factory methods instantiate the object and take the following arguments: +\itemize{ +\item \code{sink} An \code{OutputStream} +\item \code{schema} A \link{Schema} for the data to be written +\item \code{use_legacy_format} logical: write data formatted so that Arrow libraries +versions 0.14 and lower can read it. Default is \code{FALSE}. You can also +enable this by setting the environment variable \code{ARROW_PRE_0_15_IPC_FORMAT=1}. +\item \code{metadata_version}: A string like "V5" or the equivalent integer indicating +the Arrow IPC MetadataVersion. Default (NULL) will use the latest version, +unless the environment variable \code{ARROW_PRE_1_0_METADATA_VERSION=1}, in +which case it will be V4. +} +} + +\section{Methods}{ + +\itemize{ +\item \verb{$write(x)}: Write a \link{RecordBatch}, \link{Table}, or \code{data.frame}, dispatching +to the methods below appropriately +\item \verb{$write_batch(batch)}: Write a \code{RecordBatch} to stream +\item \verb{$write_table(table)}: Write a \code{Table} to stream +\item \verb{$close()}: close stream. Note that this indicates end-of-file or +end-of-stream--it does not close the connection to the \code{sink}. That needs +to be closed separately. +} +} + +\examples{ +\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +tf <- tempfile() +on.exit(unlink(tf)) + +batch <- record_batch(chickwts) + +# This opens a connection to the file in Arrow +file_obj <- FileOutputStream$create(tf) +# Pass that to a RecordBatchWriter to write data conforming to a schema +writer <- RecordBatchFileWriter$create(file_obj, batch$schema) +writer$write(batch) +# You may write additional batches to the stream, provided that they have +# the same schema. +# Call "close" on the writer to indicate end-of-file/stream +writer$close() +# Then, close the connection--closing the IPC message does not close the file +file_obj$close() + +# Now, we have a file we can read from. Same pattern: open file connection, +# then pass it to a RecordBatchReader +read_file_obj <- ReadableFile$create(tf) +reader <- RecordBatchFileReader$create(read_file_obj) +# RecordBatchFileReader knows how many batches it has (StreamReader does not) +reader$num_record_batches +# We could consume the Reader by calling $read_next_batch() until all are, +# consumed, or we can call $read_table() to pull them all into a Table +tab <- reader$read_table() +# Call as.data.frame to turn that Table into an R data.frame +df <- as.data.frame(tab) +# This should be the same data we sent +all.equal(df, chickwts, check.attributes = FALSE) +# Unlike the Writers, we don't have to close RecordBatchReaders, +# but we do still need to close the file connection +read_file_obj$close() +\dontshow{\}) # examplesIf} +} +\seealso{ +\code{\link[=write_ipc_stream]{write_ipc_stream()}} and \code{\link[=write_feather]{write_feather()}} provide a much simpler +interface for writing data to these formats and are sufficient for many use +cases. \code{\link[=write_to_raw]{write_to_raw()}} is a version that serializes data to a buffer. +} |