diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
commit | e6918187568dbd01842d8d1d2c808ce16a894239 (patch) | |
tree | 64f88b554b444a49f656b6c656111a145cbbaa28 /src/arrow/r/man/FileFormat.Rd | |
parent | Initial commit. (diff) | |
download | ceph-b26c4052f3542036551aa9dec9caa4226e456195.tar.xz ceph-b26c4052f3542036551aa9dec9caa4226e456195.zip |
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/arrow/r/man/FileFormat.Rd')
-rw-r--r-- | src/arrow/r/man/FileFormat.Rd | 68 |
1 files changed, 68 insertions, 0 deletions
diff --git a/src/arrow/r/man/FileFormat.Rd b/src/arrow/r/man/FileFormat.Rd new file mode 100644 index 000000000..cabacc937 --- /dev/null +++ b/src/arrow/r/man/FileFormat.Rd @@ -0,0 +1,68 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dataset-format.R +\name{FileFormat} +\alias{FileFormat} +\alias{ParquetFileFormat} +\alias{IpcFileFormat} +\alias{CsvFileFormat} +\title{Dataset file formats} +\description{ +A \code{FileFormat} holds information about how to read and parse the files +included in a \code{Dataset}. There are subclasses corresponding to the supported +file formats (\code{ParquetFileFormat} and \code{IpcFileFormat}). +} +\section{Factory}{ + +\code{FileFormat$create()} takes the following arguments: +\itemize{ +\item \code{format}: A string identifier of the file format. Currently supported values: +\itemize{ +\item "parquet" +\item "ipc"/"arrow"/"feather", all aliases for each other; for Feather, note that +only version 2 files are supported +\item "csv"/"text", aliases for the same thing (because comma is the default +delimiter for text files +\item "tsv", equivalent to passing \verb{format = "text", delimiter = "\\t"} +} +\item \code{...}: Additional format-specific options + +`format = "parquet"``: +\itemize{ +\item \code{dict_columns}: Names of columns which should be read as dictionaries. +\item Any Parquet options from \link{FragmentScanOptions}. +} + +\code{format = "text"}: see \link{CsvParseOptions}. Note that you can specify them either +with the Arrow C++ library naming ("delimiter", "quoting", etc.) or the +\code{readr}-style naming used in \code{\link[=read_csv_arrow]{read_csv_arrow()}} ("delim", "quote", etc.). +Not all \code{readr} options are currently supported; please file an issue if +you encounter one that \code{arrow} should support. Also, the following options are +supported. From \link{CsvReadOptions}: +\itemize{ +\item \code{skip_rows} +\item \code{column_names} +\item \code{autogenerate_column_names} +From \link{CsvFragmentScanOptions} (these values can be overridden at scan time): +\item \code{convert_options}: a \link{CsvConvertOptions} +\item \code{block_size} +} +} + +It returns the appropriate subclass of \code{FileFormat} (e.g. \code{ParquetFileFormat}) +} + +\examples{ +\dontshow{if (arrow_with_dataset() && tolower(Sys.info()[["sysname"]]) != "windows") (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +## Semi-colon delimited files +# Set up directory for examples +tf <- tempfile() +dir.create(tf) +on.exit(unlink(tf)) +write.table(mtcars, file.path(tf, "file1.txt"), sep = ";", row.names = FALSE) + +# Create FileFormat object +format <- FileFormat$create(format = "text", delimiter = ";") + +open_dataset(tf, format = format) +\dontshow{\}) # examplesIf} +} |