From e6918187568dbd01842d8d1d2c808ce16a894239 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 21 Apr 2024 13:54:28 +0200 Subject: Adding upstream version 18.2.2. Signed-off-by: Daniel Baumann --- src/arrow/r/man/FileFormat.Rd | 68 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 src/arrow/r/man/FileFormat.Rd (limited to 'src/arrow/r/man/FileFormat.Rd') diff --git a/src/arrow/r/man/FileFormat.Rd b/src/arrow/r/man/FileFormat.Rd new file mode 100644 index 000000000..cabacc937 --- /dev/null +++ b/src/arrow/r/man/FileFormat.Rd @@ -0,0 +1,68 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dataset-format.R +\name{FileFormat} +\alias{FileFormat} +\alias{ParquetFileFormat} +\alias{IpcFileFormat} +\alias{CsvFileFormat} +\title{Dataset file formats} +\description{ +A \code{FileFormat} holds information about how to read and parse the files +included in a \code{Dataset}. There are subclasses corresponding to the supported +file formats (\code{ParquetFileFormat} and \code{IpcFileFormat}). +} +\section{Factory}{ + +\code{FileFormat$create()} takes the following arguments: +\itemize{ +\item \code{format}: A string identifier of the file format. Currently supported values: +\itemize{ +\item "parquet" +\item "ipc"/"arrow"/"feather", all aliases for each other; for Feather, note that +only version 2 files are supported +\item "csv"/"text", aliases for the same thing (because comma is the default +delimiter for text files +\item "tsv", equivalent to passing \verb{format = "text", delimiter = "\\t"} +} +\item \code{...}: Additional format-specific options + +`format = "parquet"``: +\itemize{ +\item \code{dict_columns}: Names of columns which should be read as dictionaries. +\item Any Parquet options from \link{FragmentScanOptions}. +} + +\code{format = "text"}: see \link{CsvParseOptions}. Note that you can specify them either +with the Arrow C++ library naming ("delimiter", "quoting", etc.) or the +\code{readr}-style naming used in \code{\link[=read_csv_arrow]{read_csv_arrow()}} ("delim", "quote", etc.). +Not all \code{readr} options are currently supported; please file an issue if +you encounter one that \code{arrow} should support. Also, the following options are +supported. From \link{CsvReadOptions}: +\itemize{ +\item \code{skip_rows} +\item \code{column_names} +\item \code{autogenerate_column_names} +From \link{CsvFragmentScanOptions} (these values can be overridden at scan time): +\item \code{convert_options}: a \link{CsvConvertOptions} +\item \code{block_size} +} +} + +It returns the appropriate subclass of \code{FileFormat} (e.g. \code{ParquetFileFormat}) +} + +\examples{ +\dontshow{if (arrow_with_dataset() && tolower(Sys.info()[["sysname"]]) != "windows") (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +## Semi-colon delimited files +# Set up directory for examples +tf <- tempfile() +dir.create(tf) +on.exit(unlink(tf)) +write.table(mtcars, file.path(tf, "file1.txt"), sep = ";", row.names = FALSE) + +# Create FileFormat object +format <- FileFormat$create(format = "text", delimiter = ";") + +open_dataset(tf, format = format) +\dontshow{\}) # examplesIf} +} -- cgit v1.2.3