From e6918187568dbd01842d8d1d2c808ce16a894239 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 21 Apr 2024 13:54:28 +0200 Subject: Adding upstream version 18.2.2. Signed-off-by: Daniel Baumann --- src/arrow/r/man/write_parquet.Rd | 108 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 108 insertions(+) create mode 100644 src/arrow/r/man/write_parquet.Rd (limited to 'src/arrow/r/man/write_parquet.Rd') diff --git a/src/arrow/r/man/write_parquet.Rd b/src/arrow/r/man/write_parquet.Rd new file mode 100644 index 000000000..d7147f7e8 --- /dev/null +++ b/src/arrow/r/man/write_parquet.Rd @@ -0,0 +1,108 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/parquet.R +\name{write_parquet} +\alias{write_parquet} +\title{Write Parquet file to disk} +\usage{ +write_parquet( + x, + sink, + chunk_size = NULL, + version = NULL, + compression = default_parquet_compression(), + compression_level = NULL, + use_dictionary = NULL, + write_statistics = NULL, + data_page_size = NULL, + use_deprecated_int96_timestamps = FALSE, + coerce_timestamps = NULL, + allow_truncated_timestamps = FALSE, + properties = NULL, + arrow_properties = NULL +) +} +\arguments{ +\item{x}{\code{data.frame}, \link{RecordBatch}, or \link{Table}} + +\item{sink}{A string file path, URI, or \link{OutputStream}, or path in a file +system (\code{SubTreeFileSystem})} + +\item{chunk_size}{chunk size in number of rows. If NULL, the total number of rows is used.} + +\item{version}{parquet version, "1.0" or "2.0". Default "1.0". Numeric values +are coerced to character.} + +\item{compression}{compression algorithm. Default "snappy". See details.} + +\item{compression_level}{compression level. Meaning depends on compression algorithm} + +\item{use_dictionary}{Specify if we should use dictionary encoding. Default \code{TRUE}} + +\item{write_statistics}{Specify if we should write statistics. Default \code{TRUE}} + +\item{data_page_size}{Set a target threshold for the approximate encoded +size of data pages within a column chunk (in bytes). Default 1 MiB.} + +\item{use_deprecated_int96_timestamps}{Write timestamps to INT96 Parquet format. Default \code{FALSE}.} + +\item{coerce_timestamps}{Cast timestamps a particular resolution. Can be +\code{NULL}, "ms" or "us". Default \code{NULL} (no casting)} + +\item{allow_truncated_timestamps}{Allow loss of data when coercing timestamps to a +particular resolution. E.g. if microsecond or nanosecond data is lost when coercing +to "ms", do not raise an exception} + +\item{properties}{A \code{ParquetWriterProperties} object, used instead of the options +enumerated in this function's signature. Providing \code{properties} as an argument +is deprecated; if you need to assemble \code{ParquetWriterProperties} outside +of \code{write_parquet()}, use \code{ParquetFileWriter} instead.} + +\item{arrow_properties}{A \code{ParquetArrowWriterProperties} object. Like +\code{properties}, this argument is deprecated.} +} +\value{ +the input \code{x} invisibly. +} +\description{ +\href{https://parquet.apache.org/}{Parquet} is a columnar storage file format. +This function enables you to write Parquet files from R. +} +\details{ +Due to features of the format, Parquet files cannot be appended to. +If you want to use the Parquet format but also want the ability to extend +your dataset, you can write to additional Parquet files and then treat +the whole directory of files as a \link{Dataset} you can query. +See \code{vignette("dataset", package = "arrow")} for examples of this. + +The parameters \code{compression}, \code{compression_level}, \code{use_dictionary} and +\code{write_statistics} support various patterns: +\itemize{ +\item The default \code{NULL} leaves the parameter unspecified, and the C++ library +uses an appropriate default for each column (defaults listed above) +\item A single, unnamed, value (e.g. a single string for \code{compression}) applies to all columns +\item An unnamed vector, of the same size as the number of columns, to specify a +value for each column, in positional order +\item A named vector, to specify the value for the named columns, the default +value for the setting is used when not supplied +} + +The \code{compression} argument can be any of the following (case insensitive): +"uncompressed", "snappy", "gzip", "brotli", "zstd", "lz4", "lzo" or "bz2". +Only "uncompressed" is guaranteed to be available, but "snappy" and "gzip" +are almost always included. See \code{\link[=codec_is_available]{codec_is_available()}}. +The default "snappy" is used if available, otherwise "uncompressed". To +disable compression, set \code{compression = "uncompressed"}. +Note that "uncompressed" columns may still have dictionary encoding. +} +\examples{ +\dontshow{if (arrow_with_parquet()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +tf1 <- tempfile(fileext = ".parquet") +write_parquet(data.frame(x = 1:5), tf1) + +# using compression +if (codec_is_available("gzip")) { + tf2 <- tempfile(fileext = ".gz.parquet") + write_parquet(data.frame(x = 1:5), tf2, compression = "gzip", compression_level = 5) +} +\dontshow{\}) # examplesIf} +} -- cgit v1.2.3