summaryrefslogtreecommitdiffstats
path: root/src/arrow/r/man
diff options
context:
space:
mode:
Diffstat (limited to 'src/arrow/r/man')
-rw-r--r--src/arrow/r/man/ArrayData.Rd27
-rw-r--r--src/arrow/r/man/ChunkedArray.Rd80
-rw-r--r--src/arrow/r/man/Codec.Rd24
-rw-r--r--src/arrow/r/man/CsvReadOptions.Rd107
-rw-r--r--src/arrow/r/man/CsvTableReader.Rd32
-rw-r--r--src/arrow/r/man/DataType.Rd15
-rw-r--r--src/arrow/r/man/Dataset.Rd81
-rw-r--r--src/arrow/r/man/DictionaryType.Rd15
-rw-r--r--src/arrow/r/man/Expression.Rd18
-rw-r--r--src/arrow/r/man/FeatherReader.Rd33
-rw-r--r--src/arrow/r/man/Field.Rd37
-rw-r--r--src/arrow/r/man/FileFormat.Rd68
-rw-r--r--src/arrow/r/man/FileInfo.Rd28
-rw-r--r--src/arrow/r/man/FileSelector.Rd27
-rw-r--r--src/arrow/r/man/FileSystem.Rd99
-rw-r--r--src/arrow/r/man/FileWriteOptions.Rd8
-rw-r--r--src/arrow/r/man/FixedWidthType.Rd15
-rw-r--r--src/arrow/r/man/FragmentScanOptions.Rd40
-rw-r--r--src/arrow/r/man/InputStream.Rd45
-rw-r--r--src/arrow/r/man/MemoryPool.Rd24
-rw-r--r--src/arrow/r/man/Message.Rd15
-rw-r--r--src/arrow/r/man/MessageReader.Rd15
-rw-r--r--src/arrow/r/man/OutputStream.Rd38
-rw-r--r--src/arrow/r/man/ParquetArrowReaderProperties.Rd29
-rw-r--r--src/arrow/r/man/ParquetFileReader.Rd59
-rw-r--r--src/arrow/r/man/ParquetFileWriter.Rd31
-rw-r--r--src/arrow/r/man/ParquetWriterProperties.Rd49
-rw-r--r--src/arrow/r/man/Partitioning.Rd51
-rw-r--r--src/arrow/r/man/RecordBatch.Rd92
-rw-r--r--src/arrow/r/man/RecordBatchReader.Rd86
-rw-r--r--src/arrow/r/man/RecordBatchWriter.Rd89
-rw-r--r--src/arrow/r/man/Scalar.Rd38
-rw-r--r--src/arrow/r/man/Scanner.Rd51
-rw-r--r--src/arrow/r/man/Schema.Rd86
-rw-r--r--src/arrow/r/man/Table.Rd92
-rw-r--r--src/arrow/r/man/array.Rd107
-rw-r--r--src/arrow/r/man/arrow-package.Rd45
-rw-r--r--src/arrow/r/man/arrow_available.Rd47
-rw-r--r--src/arrow/r/man/arrow_info.Rd17
-rw-r--r--src/arrow/r/man/buffer.Rd44
-rw-r--r--src/arrow/r/man/call_function.Rd51
-rw-r--r--src/arrow/r/man/cast_options.Rd22
-rw-r--r--src/arrow/r/man/codec_is_available.Rd25
-rw-r--r--src/arrow/r/man/compression.Rd31
-rw-r--r--src/arrow/r/man/contains_regex.Rd18
-rw-r--r--src/arrow/r/man/copy_files.Rd35
-rw-r--r--src/arrow/r/man/cpu_count.Rd17
-rw-r--r--src/arrow/r/man/create_package_with_all_dependencies.Rd70
-rw-r--r--src/arrow/r/man/data-type.Rd163
-rw-r--r--src/arrow/r/man/dataset_factory.Rd76
-rw-r--r--src/arrow/r/man/default_memory_pool.Rd15
-rw-r--r--src/arrow/r/man/dictionary.Rd24
-rw-r--r--src/arrow/r/man/enums.Rd88
-rw-r--r--src/arrow/r/man/flight_connect.Rd21
-rw-r--r--src/arrow/r/man/flight_get.Rd19
-rw-r--r--src/arrow/r/man/flight_put.Rd25
-rw-r--r--src/arrow/r/man/get_stringr_pattern_options.Rd22
-rw-r--r--src/arrow/r/man/hive_partition.Rd35
-rw-r--r--src/arrow/r/man/install_arrow.Rd61
-rw-r--r--src/arrow/r/man/install_pyarrow.Rd22
-rw-r--r--src/arrow/r/man/io_thread_count.Rd17
-rw-r--r--src/arrow/r/man/list_compute_functions.Rd45
-rw-r--r--src/arrow/r/man/list_flights.Rd23
-rw-r--r--src/arrow/r/man/load_flight_server.Rd22
-rw-r--r--src/arrow/r/man/make_readable_file.Rd29
-rw-r--r--src/arrow/r/man/map_batches.Rd30
-rw-r--r--src/arrow/r/man/match_arrow.Rd53
-rw-r--r--src/arrow/r/man/mmap_create.Rd19
-rw-r--r--src/arrow/r/man/mmap_open.Rd16
-rw-r--r--src/arrow/r/man/open_dataset.Rd146
-rw-r--r--src/arrow/r/man/read_delim_arrow.Rd218
-rw-r--r--src/arrow/r/man/read_feather.Rd50
-rw-r--r--src/arrow/r/man/read_ipc_stream.Rd42
-rw-r--r--src/arrow/r/man/read_json_arrow.Rd52
-rw-r--r--src/arrow/r/man/read_message.Rd14
-rw-r--r--src/arrow/r/man/read_parquet.Rd50
-rw-r--r--src/arrow/r/man/read_schema.Rd19
-rw-r--r--src/arrow/r/man/recycle_scalars.Rd18
-rw-r--r--src/arrow/r/man/reexports.Rd29
-rw-r--r--src/arrow/r/man/repeat_value_as_array.Rd20
-rw-r--r--src/arrow/r/man/s3_bucket.Rd28
-rw-r--r--src/arrow/r/man/to_arrow.Rd33
-rw-r--r--src/arrow/r/man/to_duckdb.Rd56
-rw-r--r--src/arrow/r/man/type.Rd27
-rw-r--r--src/arrow/r/man/unify_schemas.Rd27
-rw-r--r--src/arrow/r/man/value_counts.Rd24
-rw-r--r--src/arrow/r/man/write_csv_arrow.Rd32
-rw-r--r--src/arrow/r/man/write_dataset.Rd115
-rw-r--r--src/arrow/r/man/write_feather.Rd61
-rw-r--r--src/arrow/r/man/write_ipc_stream.Rd45
-rw-r--r--src/arrow/r/man/write_parquet.Rd108
-rw-r--r--src/arrow/r/man/write_to_raw.Rd28
92 files changed, 4240 insertions, 0 deletions
diff --git a/src/arrow/r/man/ArrayData.Rd b/src/arrow/r/man/ArrayData.Rd
new file mode 100644
index 000000000..383ab317d
--- /dev/null
+++ b/src/arrow/r/man/ArrayData.Rd
@@ -0,0 +1,27 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/array-data.R
+\docType{class}
+\name{ArrayData}
+\alias{ArrayData}
+\title{ArrayData class}
+\description{
+The \code{ArrayData} class allows you to get and inspect the data
+inside an \code{arrow::Array}.
+}
+\section{Usage}{
+\preformatted{data <- Array$create(x)$data()
+
+data$type
+data$length
+data$null_count
+data$offset
+data$buffers
+}
+}
+
+\section{Methods}{
+
+
+...
+}
+
diff --git a/src/arrow/r/man/ChunkedArray.Rd b/src/arrow/r/man/ChunkedArray.Rd
new file mode 100644
index 000000000..3a504f014
--- /dev/null
+++ b/src/arrow/r/man/ChunkedArray.Rd
@@ -0,0 +1,80 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/chunked-array.R
+\docType{class}
+\name{ChunkedArray}
+\alias{ChunkedArray}
+\alias{chunked_array}
+\title{ChunkedArray class}
+\usage{
+chunked_array(..., type = NULL)
+}
+\arguments{
+\item{\dots}{Vectors to coerce}
+
+\item{type}{currently ignored}
+}
+\description{
+A \code{ChunkedArray} is a data structure managing a list of
+primitive Arrow \link[=Array]{Arrays} logically as one large array. Chunked arrays
+may be grouped together in a \link{Table}.
+}
+\section{Factory}{
+
+The \code{ChunkedArray$create()} factory method instantiates the object from
+various Arrays or R vectors. \code{chunked_array()} is an alias for it.
+}
+
+\section{Methods}{
+
+\itemize{
+\item \verb{$length()}: Size in the number of elements this array contains
+\item \verb{$chunk(i)}: Extract an \code{Array} chunk by integer position
+\item \verb{$as_vector()}: convert to an R vector
+\item \verb{$Slice(offset, length = NULL)}: Construct a zero-copy slice of the array
+with the indicated offset and length. If length is \code{NULL}, the slice goes
+until the end of the array.
+\item \verb{$Take(i)}: return a \code{ChunkedArray} with values at positions given by
+integers \code{i}. If \code{i} is an Arrow \code{Array} or \code{ChunkedArray}, it will be
+coerced to an R vector before taking.
+\item \verb{$Filter(i, keep_na = TRUE)}: return a \code{ChunkedArray} with values at positions where
+logical vector or Arrow boolean-type \verb{(Chunked)Array} \code{i} is \code{TRUE}.
+\item \verb{$SortIndices(descending = FALSE)}: return an \code{Array} of integer positions that can be
+used to rearrange the \code{ChunkedArray} in ascending or descending order
+\item \verb{$cast(target_type, safe = TRUE, options = cast_options(safe))}: Alter the
+data in the array to change its type.
+\item \verb{$null_count}: The number of null entries in the array
+\item \verb{$chunks}: return a list of \code{Array}s
+\item \verb{$num_chunks}: integer number of chunks in the \code{ChunkedArray}
+\item \verb{$type}: logical type of data
+\item \verb{$View(type)}: Construct a zero-copy view of this \code{ChunkedArray} with the
+given type.
+\item \verb{$Validate()}: Perform any validation checks to determine obvious inconsistencies
+within the array's internal data. This can be an expensive check, potentially \code{O(length)}
+}
+}
+
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+# Pass items into chunked_array as separate objects to create chunks
+class_scores <- chunked_array(c(87, 88, 89), c(94, 93, 92), c(71, 72, 73))
+class_scores$num_chunks
+
+# When taking a Slice from a chunked_array, chunks are preserved
+class_scores$Slice(2, length = 5)
+
+# You can combine Take and SortIndices to return a ChunkedArray with 1 chunk
+# containing all values, ordered.
+class_scores$Take(class_scores$SortIndices(descending = TRUE))
+
+# If you pass a list into chunked_array, you get a list of length 1
+list_scores <- chunked_array(list(c(9.9, 9.6, 9.5), c(8.2, 8.3, 8.4), c(10.0, 9.9, 9.8)))
+list_scores$num_chunks
+
+# When constructing a ChunkedArray, the first chunk is used to infer type.
+doubles <- chunked_array(c(1, 2, 3), c(5L, 6L, 7L))
+doubles$type
+\dontshow{\}) # examplesIf}
+}
+\seealso{
+\link{Array}
+}
diff --git a/src/arrow/r/man/Codec.Rd b/src/arrow/r/man/Codec.Rd
new file mode 100644
index 000000000..86723aed5
--- /dev/null
+++ b/src/arrow/r/man/Codec.Rd
@@ -0,0 +1,24 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/compression.R
+\docType{class}
+\name{Codec}
+\alias{Codec}
+\title{Compression Codec class}
+\description{
+Codecs allow you to create \link[=compression]{compressed input and output streams}.
+}
+\section{Factory}{
+
+The \code{Codec$create()} factory method takes the following arguments:
+\itemize{
+\item \code{type}: string name of the compression method. Possible values are
+"uncompressed", "snappy", "gzip", "brotli", "zstd", "lz4", "lzo", or
+"bz2". \code{type} may be upper- or lower-cased. Not all methods may be
+available; support depends on build-time flags for the C++ library.
+See \code{\link[=codec_is_available]{codec_is_available()}}. Most builds support at least "snappy" and
+"gzip". All support "uncompressed".
+\item \code{compression_level}: compression level, the default value (\code{NA}) uses the
+default compression level for the selected compression \code{type}.
+}
+}
+
diff --git a/src/arrow/r/man/CsvReadOptions.Rd b/src/arrow/r/man/CsvReadOptions.Rd
new file mode 100644
index 000000000..d08869270
--- /dev/null
+++ b/src/arrow/r/man/CsvReadOptions.Rd
@@ -0,0 +1,107 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/csv.R, R/json.R
+\docType{class}
+\name{CsvReadOptions}
+\alias{CsvReadOptions}
+\alias{CsvWriteOptions}
+\alias{CsvParseOptions}
+\alias{TimestampParser}
+\alias{CsvConvertOptions}
+\alias{JsonReadOptions}
+\alias{JsonParseOptions}
+\title{File reader options}
+\description{
+\code{CsvReadOptions}, \code{CsvParseOptions}, \code{CsvConvertOptions},
+\code{JsonReadOptions}, \code{JsonParseOptions}, and \code{TimestampParser} are containers for various
+file reading options. See their usage in \code{\link[=read_csv_arrow]{read_csv_arrow()}} and
+\code{\link[=read_json_arrow]{read_json_arrow()}}, respectively.
+}
+\section{Factory}{
+
+
+The \code{CsvReadOptions$create()} and \code{JsonReadOptions$create()} factory methods
+take the following arguments:
+\itemize{
+\item \code{use_threads} Whether to use the global CPU thread pool
+\item \code{block_size} Block size we request from the IO layer; also determines
+the size of chunks when use_threads is \code{TRUE}. NB: if \code{FALSE}, JSON input
+must end with an empty line.
+}
+
+\code{CsvReadOptions$create()} further accepts these additional arguments:
+\itemize{
+\item \code{skip_rows} Number of lines to skip before reading data (default 0)
+\item \code{column_names} Character vector to supply column names. If length-0
+(the default), the first non-skipped row will be parsed to generate column
+names, unless \code{autogenerate_column_names} is \code{TRUE}.
+\item \code{autogenerate_column_names} Logical: generate column names instead of
+using the first non-skipped row (the default)? If \code{TRUE}, column names will
+be "f0", "f1", ..., "fN".
+}
+
+\code{CsvParseOptions$create()} takes the following arguments:
+\itemize{
+\item \code{delimiter} Field delimiting character (default \code{","})
+\item \code{quoting} Logical: are strings quoted? (default \code{TRUE})
+\item \code{quote_char} Quoting character, if \code{quoting} is \code{TRUE}
+\item \code{double_quote} Logical: are quotes inside values double-quoted? (default \code{TRUE})
+\item \code{escaping} Logical: whether escaping is used (default \code{FALSE})
+\item \code{escape_char} Escaping character, if \code{escaping} is \code{TRUE}
+\item \code{newlines_in_values} Logical: are values allowed to contain CR (\code{0x0d})
+and LF (\code{0x0a}) characters? (default \code{FALSE})
+\item \code{ignore_empty_lines} Logical: should empty lines be ignored (default) or
+generate a row of missing values (if \code{FALSE})?
+}
+
+\code{JsonParseOptions$create()} accepts only the \code{newlines_in_values} argument.
+
+\code{CsvConvertOptions$create()} takes the following arguments:
+\itemize{
+\item \code{check_utf8} Logical: check UTF8 validity of string columns? (default \code{TRUE})
+\item \code{null_values} character vector of recognized spellings for null values.
+Analogous to the \code{na.strings} argument to
+\code{\link[utils:read.table]{read.csv()}} or \code{na} in \code{readr::read_csv()}.
+\item \code{strings_can_be_null} Logical: can string / binary columns have
+null values? Similar to the \code{quoted_na} argument to \code{readr::read_csv()}.
+(default \code{FALSE})
+\item \code{true_values} character vector of recognized spellings for \code{TRUE} values
+\item \code{false_values} character vector of recognized spellings for \code{FALSE} values
+\item \code{col_types} A \code{Schema} or \code{NULL} to infer types
+\item \code{auto_dict_encode} Logical: Whether to try to automatically
+dictionary-encode string / binary data (think \code{stringsAsFactors}). Default \code{FALSE}.
+This setting is ignored for non-inferred columns (those in \code{col_types}).
+\item \code{auto_dict_max_cardinality} If \code{auto_dict_encode}, string/binary columns
+are dictionary-encoded up to this number of unique values (default 50),
+after which it switches to regular encoding.
+\item \code{include_columns} If non-empty, indicates the names of columns from the
+CSV file that should be actually read and converted (in the vector's order).
+\item \code{include_missing_columns} Logical: if \code{include_columns} is provided, should
+columns named in it but not found in the data be included as a column of
+type \code{null()}? The default (\code{FALSE}) means that the reader will instead
+raise an error.
+\item \code{timestamp_parsers} User-defined timestamp parsers. If more than one
+parser is specified, the CSV conversion logic will try parsing values
+starting from the beginning of this vector. Possible values are
+(a) \code{NULL}, the default, which uses the ISO-8601 parser;
+(b) a character vector of \link[base:strptime]{strptime} parse strings; or
+(c) a list of \link{TimestampParser} objects.
+}
+
+\code{TimestampParser$create()} takes an optional \code{format} string argument.
+See \code{\link[base:strptime]{strptime()}} for example syntax.
+The default is to use an ISO-8601 format parser.
+
+The \code{CsvWriteOptions$create()} factory method takes the following arguments:
+\itemize{
+\item \code{include_header} Whether to write an initial header line with column names
+\item \code{batch_size} Maximum number of rows processed at a time. Default is 1024.
+}
+}
+
+\section{Active bindings}{
+
+\itemize{
+\item \code{column_names}: from \code{CsvReadOptions}
+}
+}
+
diff --git a/src/arrow/r/man/CsvTableReader.Rd b/src/arrow/r/man/CsvTableReader.Rd
new file mode 100644
index 000000000..1afa9d020
--- /dev/null
+++ b/src/arrow/r/man/CsvTableReader.Rd
@@ -0,0 +1,32 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/csv.R, R/json.R
+\docType{class}
+\name{CsvTableReader}
+\alias{CsvTableReader}
+\alias{JsonTableReader}
+\title{Arrow CSV and JSON table reader classes}
+\description{
+\code{CsvTableReader} and \code{JsonTableReader} wrap the Arrow C++ CSV
+and JSON table readers. See their usage in \code{\link[=read_csv_arrow]{read_csv_arrow()}} and
+\code{\link[=read_json_arrow]{read_json_arrow()}}, respectively.
+}
+\section{Factory}{
+
+
+The \code{CsvTableReader$create()} and \code{JsonTableReader$create()} factory methods
+take the following arguments:
+\itemize{
+\item \code{file} An Arrow \link{InputStream}
+\item \code{convert_options} (CSV only), \code{parse_options}, \code{read_options}: see
+\link{CsvReadOptions}
+\item \code{...} additional parameters.
+}
+}
+
+\section{Methods}{
+
+\itemize{
+\item \verb{$Read()}: returns an Arrow Table.
+}
+}
+
diff --git a/src/arrow/r/man/DataType.Rd b/src/arrow/r/man/DataType.Rd
new file mode 100644
index 000000000..8c96141be
--- /dev/null
+++ b/src/arrow/r/man/DataType.Rd
@@ -0,0 +1,15 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/type.R
+\docType{class}
+\name{DataType}
+\alias{DataType}
+\title{class arrow::DataType}
+\description{
+class arrow::DataType
+}
+\section{Methods}{
+
+
+TODO
+}
+
diff --git a/src/arrow/r/man/Dataset.Rd b/src/arrow/r/man/Dataset.Rd
new file mode 100644
index 000000000..c19a0df6c
--- /dev/null
+++ b/src/arrow/r/man/Dataset.Rd
@@ -0,0 +1,81 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/dataset.R, R/dataset-factory.R
+\name{Dataset}
+\alias{Dataset}
+\alias{FileSystemDataset}
+\alias{UnionDataset}
+\alias{InMemoryDataset}
+\alias{DatasetFactory}
+\alias{FileSystemDatasetFactory}
+\title{Multi-file datasets}
+\description{
+Arrow Datasets allow you to query against data that has been split across
+multiple files. This sharding of data may indicate partitioning, which
+can accelerate queries that only touch some partitions (files).
+
+A \code{Dataset} contains one or more \code{Fragments}, such as files, of potentially
+differing type and partitioning.
+
+For \code{Dataset$create()}, see \code{\link[=open_dataset]{open_dataset()}}, which is an alias for it.
+
+\code{DatasetFactory} is used to provide finer control over the creation of \code{Dataset}s.
+}
+\section{Factory}{
+
+\code{DatasetFactory} is used to create a \code{Dataset}, inspect the \link{Schema} of the
+fragments contained in it, and declare a partitioning.
+\code{FileSystemDatasetFactory} is a subclass of \code{DatasetFactory} for
+discovering files in the local file system, the only currently supported
+file system.
+
+For the \code{DatasetFactory$create()} factory method, see \code{\link[=dataset_factory]{dataset_factory()}}, an
+alias for it. A \code{DatasetFactory} has:
+\itemize{
+\item \verb{$Inspect(unify_schemas)}: If \code{unify_schemas} is \code{TRUE}, all fragments
+will be scanned and a unified \link{Schema} will be created from them; if \code{FALSE}
+(default), only the first fragment will be inspected for its schema. Use this
+fast path when you know and trust that all fragments have an identical schema.
+\item \verb{$Finish(schema, unify_schemas)}: Returns a \code{Dataset}. If \code{schema} is provided,
+it will be used for the \code{Dataset}; if omitted, a \code{Schema} will be created from
+inspecting the fragments (files) in the dataset, following \code{unify_schemas}
+as described above.
+}
+
+\code{FileSystemDatasetFactory$create()} is a lower-level factory method and
+takes the following arguments:
+\itemize{
+\item \code{filesystem}: A \link{FileSystem}
+\item \code{selector}: Either a \link{FileSelector} or \code{NULL}
+\item \code{paths}: Either a character vector of file paths or \code{NULL}
+\item \code{format}: A \link{FileFormat}
+\item \code{partitioning}: Either \code{Partitioning}, \code{PartitioningFactory}, or \code{NULL}
+}
+}
+
+\section{Methods}{
+
+
+A \code{Dataset} has the following methods:
+\itemize{
+\item \verb{$NewScan()}: Returns a \link{ScannerBuilder} for building a query
+\item \verb{$schema}: Active binding that returns the \link{Schema} of the Dataset; you
+may also replace the dataset's schema by using \code{ds$schema <- new_schema}.
+This method currently supports only adding, removing, or reordering
+fields in the schema: you cannot alter or cast the field types.
+}
+
+\code{FileSystemDataset} has the following methods:
+\itemize{
+\item \verb{$files}: Active binding, returns the files of the \code{FileSystemDataset}
+\item \verb{$format}: Active binding, returns the \link{FileFormat} of the \code{FileSystemDataset}
+}
+
+\code{UnionDataset} has the following methods:
+\itemize{
+\item \verb{$children}: Active binding, returns all child \code{Dataset}s.
+}
+}
+
+\seealso{
+\code{\link[=open_dataset]{open_dataset()}} for a simple interface to creating a \code{Dataset}
+}
diff --git a/src/arrow/r/man/DictionaryType.Rd b/src/arrow/r/man/DictionaryType.Rd
new file mode 100644
index 000000000..8c9087f1a
--- /dev/null
+++ b/src/arrow/r/man/DictionaryType.Rd
@@ -0,0 +1,15 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/dictionary.R
+\docType{class}
+\name{DictionaryType}
+\alias{DictionaryType}
+\title{class DictionaryType}
+\description{
+class DictionaryType
+}
+\section{Methods}{
+
+
+TODO
+}
+
diff --git a/src/arrow/r/man/Expression.Rd b/src/arrow/r/man/Expression.Rd
new file mode 100644
index 000000000..58a6a44c0
--- /dev/null
+++ b/src/arrow/r/man/Expression.Rd
@@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/expression.R
+\name{Expression}
+\alias{Expression}
+\title{Arrow expressions}
+\description{
+\code{Expression}s are used to define filter logic for passing to a \link{Dataset}
+\link{Scanner}.
+
+\code{Expression$scalar(x)} constructs an \code{Expression} which always evaluates to
+the provided scalar (length-1) R value.
+
+\code{Expression$field_ref(name)} is used to construct an \code{Expression} which
+evaluates to the named column in the \code{Dataset} against which it is evaluated.
+
+\code{Expression$create(function_name, ..., options)} builds a function-call
+\code{Expression} containing one or more \code{Expression}s.
+}
diff --git a/src/arrow/r/man/FeatherReader.Rd b/src/arrow/r/man/FeatherReader.Rd
new file mode 100644
index 000000000..64a307fcf
--- /dev/null
+++ b/src/arrow/r/man/FeatherReader.Rd
@@ -0,0 +1,33 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/feather.R
+\docType{class}
+\name{FeatherReader}
+\alias{FeatherReader}
+\title{FeatherReader class}
+\description{
+This class enables you to interact with Feather files. Create
+one to connect to a file or other InputStream, and call \code{Read()} on it to
+make an \code{arrow::Table}. See its usage in \code{\link[=read_feather]{read_feather()}}.
+}
+\section{Factory}{
+
+
+The \code{FeatherReader$create()} factory method instantiates the object and
+takes the following argument:
+\itemize{
+\item \code{file} an Arrow file connection object inheriting from \code{RandomAccessFile}.
+}
+}
+
+\section{Methods}{
+
+\itemize{
+\item \verb{$Read(columns)}: Returns a \code{Table} of the selected columns, a vector of
+integer indices
+\item \verb{$column_names}: Active binding, returns the column names in the Feather file
+\item \verb{$schema}: Active binding, returns the schema of the Feather file
+\item \verb{$version}: Active binding, returns \code{1} or \code{2}, according to the Feather
+file version
+}
+}
+
diff --git a/src/arrow/r/man/Field.Rd b/src/arrow/r/man/Field.Rd
new file mode 100644
index 000000000..3b709e879
--- /dev/null
+++ b/src/arrow/r/man/Field.Rd
@@ -0,0 +1,37 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/field.R
+\docType{class}
+\name{Field}
+\alias{Field}
+\alias{field}
+\title{Field class}
+\usage{
+field(name, type, metadata, nullable = TRUE)
+}
+\arguments{
+\item{name}{field name}
+
+\item{type}{logical type, instance of \link{DataType}}
+
+\item{metadata}{currently ignored}
+
+\item{nullable}{TRUE if field is nullable}
+}
+\description{
+\code{field()} lets you create an \code{arrow::Field} that maps a
+\link[=data-type]{DataType} to a column name. Fields are contained in
+\link[=Schema]{Schemas}.
+}
+\section{Methods}{
+
+\itemize{
+\item \code{f$ToString()}: convert to a string
+\item \code{f$Equals(other)}: test for equality. More naturally called as \code{f == other}
+}
+}
+
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+field("x", int32())
+\dontshow{\}) # examplesIf}
+}
diff --git a/src/arrow/r/man/FileFormat.Rd b/src/arrow/r/man/FileFormat.Rd
new file mode 100644
index 000000000..cabacc937
--- /dev/null
+++ b/src/arrow/r/man/FileFormat.Rd
@@ -0,0 +1,68 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/dataset-format.R
+\name{FileFormat}
+\alias{FileFormat}
+\alias{ParquetFileFormat}
+\alias{IpcFileFormat}
+\alias{CsvFileFormat}
+\title{Dataset file formats}
+\description{
+A \code{FileFormat} holds information about how to read and parse the files
+included in a \code{Dataset}. There are subclasses corresponding to the supported
+file formats (\code{ParquetFileFormat} and \code{IpcFileFormat}).
+}
+\section{Factory}{
+
+\code{FileFormat$create()} takes the following arguments:
+\itemize{
+\item \code{format}: A string identifier of the file format. Currently supported values:
+\itemize{
+\item "parquet"
+\item "ipc"/"arrow"/"feather", all aliases for each other; for Feather, note that
+only version 2 files are supported
+\item "csv"/"text", aliases for the same thing (because comma is the default
+delimiter for text files
+\item "tsv", equivalent to passing \verb{format = "text", delimiter = "\\t"}
+}
+\item \code{...}: Additional format-specific options
+
+`format = "parquet"``:
+\itemize{
+\item \code{dict_columns}: Names of columns which should be read as dictionaries.
+\item Any Parquet options from \link{FragmentScanOptions}.
+}
+
+\code{format = "text"}: see \link{CsvParseOptions}. Note that you can specify them either
+with the Arrow C++ library naming ("delimiter", "quoting", etc.) or the
+\code{readr}-style naming used in \code{\link[=read_csv_arrow]{read_csv_arrow()}} ("delim", "quote", etc.).
+Not all \code{readr} options are currently supported; please file an issue if
+you encounter one that \code{arrow} should support. Also, the following options are
+supported. From \link{CsvReadOptions}:
+\itemize{
+\item \code{skip_rows}
+\item \code{column_names}
+\item \code{autogenerate_column_names}
+From \link{CsvFragmentScanOptions} (these values can be overridden at scan time):
+\item \code{convert_options}: a \link{CsvConvertOptions}
+\item \code{block_size}
+}
+}
+
+It returns the appropriate subclass of \code{FileFormat} (e.g. \code{ParquetFileFormat})
+}
+
+\examples{
+\dontshow{if (arrow_with_dataset() && tolower(Sys.info()[["sysname"]]) != "windows") (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+## Semi-colon delimited files
+# Set up directory for examples
+tf <- tempfile()
+dir.create(tf)
+on.exit(unlink(tf))
+write.table(mtcars, file.path(tf, "file1.txt"), sep = ";", row.names = FALSE)
+
+# Create FileFormat object
+format <- FileFormat$create(format = "text", delimiter = ";")
+
+open_dataset(tf, format = format)
+\dontshow{\}) # examplesIf}
+}
diff --git a/src/arrow/r/man/FileInfo.Rd b/src/arrow/r/man/FileInfo.Rd
new file mode 100644
index 000000000..ef6182e4e
--- /dev/null
+++ b/src/arrow/r/man/FileInfo.Rd
@@ -0,0 +1,28 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/filesystem.R
+\name{FileInfo}
+\alias{FileInfo}
+\title{FileSystem entry info}
+\description{
+FileSystem entry info
+}
+\section{Methods}{
+
+\itemize{
+\item \code{base_name()} : The file base name (component after the last directory
+separator).
+\item \code{extension()} : The file extension
+}
+}
+
+\section{Active bindings}{
+
+\itemize{
+\item \verb{$type}: The file type
+\item \verb{$path}: The full file path in the filesystem
+\item \verb{$size}: The size in bytes, if available. Only regular files are
+guaranteed to have a size.
+\item \verb{$mtime}: The time of last modification, if available.
+}
+}
+
diff --git a/src/arrow/r/man/FileSelector.Rd b/src/arrow/r/man/FileSelector.Rd
new file mode 100644
index 000000000..a3c6deefc
--- /dev/null
+++ b/src/arrow/r/man/FileSelector.Rd
@@ -0,0 +1,27 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/filesystem.R
+\name{FileSelector}
+\alias{FileSelector}
+\title{file selector}
+\description{
+file selector
+}
+\section{Factory}{
+
+
+The \verb{$create()} factory method instantiates a \code{FileSelector} given the 3 fields
+described below.
+}
+
+\section{Fields}{
+
+\itemize{
+\item \code{base_dir}: The directory in which to select files. If the path exists but
+doesn't point to a directory, this should be an error.
+\item \code{allow_not_found}: The behavior if \code{base_dir} doesn't exist in the
+filesystem. If \code{FALSE}, an error is returned. If \code{TRUE}, an empty
+selection is returned
+\item \code{recursive}: Whether to recurse into subdirectories.
+}
+}
+
diff --git a/src/arrow/r/man/FileSystem.Rd b/src/arrow/r/man/FileSystem.Rd
new file mode 100644
index 000000000..2f3dcff67
--- /dev/null
+++ b/src/arrow/r/man/FileSystem.Rd
@@ -0,0 +1,99 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/filesystem.R
+\docType{class}
+\name{FileSystem}
+\alias{FileSystem}
+\alias{LocalFileSystem}
+\alias{S3FileSystem}
+\alias{SubTreeFileSystem}
+\title{FileSystem classes}
+\description{
+\code{FileSystem} is an abstract file system API,
+\code{LocalFileSystem} is an implementation accessing files
+on the local machine. \code{SubTreeFileSystem} is an implementation that delegates
+to another implementation after prepending a fixed base path
+}
+\section{Factory}{
+
+
+\code{LocalFileSystem$create()} returns the object and takes no arguments.
+
+\code{SubTreeFileSystem$create()} takes the following arguments:
+\itemize{
+\item \code{base_path}, a string path
+\item \code{base_fs}, a \code{FileSystem} object
+}
+
+\code{S3FileSystem$create()} optionally takes arguments:
+\itemize{
+\item \code{anonymous}: logical, default \code{FALSE}. If true, will not attempt to look up
+credentials using standard AWS configuration methods.
+\item \code{access_key}, \code{secret_key}: authentication credentials. If one is provided,
+the other must be as well. If both are provided, they will override any
+AWS configuration set at the environment level.
+\item \code{session_token}: optional string for authentication along with
+\code{access_key} and \code{secret_key}
+\item \code{role_arn}: string AWS ARN of an AccessRole. If provided instead of \code{access_key} and
+\code{secret_key}, temporary credentials will be fetched by assuming this role.
+\item \code{session_name}: optional string identifier for the assumed role session.
+\item \code{external_id}: optional unique string identifier that might be required
+when you assume a role in another account.
+\item \code{load_frequency}: integer, frequency (in seconds) with which temporary
+credentials from an assumed role session will be refreshed. Default is
+900 (i.e. 15 minutes)
+\item \code{region}: AWS region to connect to. If omitted, the AWS library will
+provide a sensible default based on client configuration, falling back
+to "us-east-1" if no other alternatives are found.
+\item \code{endpoint_override}: If non-empty, override region with a connect string
+such as "localhost:9000". This is useful for connecting to file systems
+that emulate S3.
+\item \code{scheme}: S3 connection transport (default "https")
+\item \code{background_writes}: logical, whether \code{OutputStream} writes will be issued
+in the background, without blocking (default \code{TRUE})
+}
+}
+
+\section{Methods}{
+
+\itemize{
+\item \verb{$GetFileInfo(x)}: \code{x} may be a \link{FileSelector} or a character
+vector of paths. Returns a list of \link{FileInfo}
+\item \verb{$CreateDir(path, recursive = TRUE)}: Create a directory and subdirectories.
+\item \verb{$DeleteDir(path)}: Delete a directory and its contents, recursively.
+\item \verb{$DeleteDirContents(path)}: Delete a directory's contents, recursively.
+Like \verb{$DeleteDir()},
+but doesn't delete the directory itself. Passing an empty path (\code{""}) will
+wipe the entire filesystem tree.
+\item \verb{$DeleteFile(path)} : Delete a file.
+\item \verb{$DeleteFiles(paths)} : Delete many files. The default implementation
+issues individual delete operations in sequence.
+\item \verb{$Move(src, dest)}: Move / rename a file or directory. If the destination
+exists:
+if it is a non-empty directory, an error is returned
+otherwise, if it has the same type as the source, it is replaced
+otherwise, behavior is unspecified (implementation-dependent).
+\item \verb{$CopyFile(src, dest)}: Copy a file. If the destination exists and is a
+directory, an error is returned. Otherwise, it is replaced.
+\item \verb{$OpenInputStream(path)}: Open an \link[=InputStream]{input stream} for
+sequential reading.
+\item \verb{$OpenInputFile(path)}: Open an \link[=RandomAccessFile]{input file} for random
+access reading.
+\item \verb{$OpenOutputStream(path)}: Open an \link[=OutputStream]{output stream} for
+sequential writing.
+\item \verb{$OpenAppendStream(path)}: Open an \link[=OutputStream]{output stream} for
+appending.
+}
+}
+
+\section{Active bindings}{
+
+\itemize{
+\item \verb{$type_name}: string filesystem type name, such as "local", "s3", etc.
+\item \verb{$region}: string AWS region, for \code{S3FileSystem} and \code{SubTreeFileSystem}
+containing a \code{S3FileSystem}
+\item \verb{$base_fs}: for \code{SubTreeFileSystem}, the \code{FileSystem} it contains
+\item \verb{$base_path}: for \code{SubTreeFileSystem}, the path in \verb{$base_fs} which is considered
+root in this \code{SubTreeFileSystem}.
+}
+}
+
diff --git a/src/arrow/r/man/FileWriteOptions.Rd b/src/arrow/r/man/FileWriteOptions.Rd
new file mode 100644
index 000000000..661393c8e
--- /dev/null
+++ b/src/arrow/r/man/FileWriteOptions.Rd
@@ -0,0 +1,8 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/dataset-format.R
+\name{FileWriteOptions}
+\alias{FileWriteOptions}
+\title{Format-specific write options}
+\description{
+A \code{FileWriteOptions} holds write options specific to a \code{FileFormat}.
+}
diff --git a/src/arrow/r/man/FixedWidthType.Rd b/src/arrow/r/man/FixedWidthType.Rd
new file mode 100644
index 000000000..28578268d
--- /dev/null
+++ b/src/arrow/r/man/FixedWidthType.Rd
@@ -0,0 +1,15 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/type.R
+\docType{class}
+\name{FixedWidthType}
+\alias{FixedWidthType}
+\title{class arrow::FixedWidthType}
+\description{
+class arrow::FixedWidthType
+}
+\section{Methods}{
+
+
+TODO
+}
+
diff --git a/src/arrow/r/man/FragmentScanOptions.Rd b/src/arrow/r/man/FragmentScanOptions.Rd
new file mode 100644
index 000000000..103d05895
--- /dev/null
+++ b/src/arrow/r/man/FragmentScanOptions.Rd
@@ -0,0 +1,40 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/dataset-format.R
+\name{FragmentScanOptions}
+\alias{FragmentScanOptions}
+\alias{CsvFragmentScanOptions}
+\alias{ParquetFragmentScanOptions}
+\title{Format-specific scan options}
+\description{
+A \code{FragmentScanOptions} holds options specific to a \code{FileFormat} and a scan
+operation.
+}
+\section{Factory}{
+
+\code{FragmentScanOptions$create()} takes the following arguments:
+\itemize{
+\item \code{format}: A string identifier of the file format. Currently supported values:
+\itemize{
+\item "parquet"
+\item "csv"/"text", aliases for the same format.
+}
+\item \code{...}: Additional format-specific options
+
+`format = "parquet"``:
+\itemize{
+\item \code{use_buffered_stream}: Read files through buffered input streams rather than
+loading entire row groups at once. This may be enabled
+to reduce memory overhead. Disabled by default.
+\item \code{buffer_size}: Size of buffered stream, if enabled. Default is 8KB.
+\item \code{pre_buffer}: Pre-buffer the raw Parquet data. This can improve performance
+on high-latency filesystems. Disabled by default.
+\code{format = "text"}: see \link{CsvConvertOptions}. Note that options can only be
+specified with the Arrow C++ library naming. Also, "block_size" from
+\link{CsvReadOptions} may be given.
+}
+}
+
+It returns the appropriate subclass of \code{FragmentScanOptions}
+(e.g. \code{CsvFragmentScanOptions}).
+}
+
diff --git a/src/arrow/r/man/InputStream.Rd b/src/arrow/r/man/InputStream.Rd
new file mode 100644
index 000000000..b909a77a1
--- /dev/null
+++ b/src/arrow/r/man/InputStream.Rd
@@ -0,0 +1,45 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/io.R
+\docType{class}
+\name{InputStream}
+\alias{InputStream}
+\alias{RandomAccessFile}
+\alias{MemoryMappedFile}
+\alias{ReadableFile}
+\alias{BufferReader}
+\title{InputStream classes}
+\description{
+\code{RandomAccessFile} inherits from \code{InputStream} and is a base
+class for: \code{ReadableFile} for reading from a file; \code{MemoryMappedFile} for
+the same but with memory mapping; and \code{BufferReader} for reading from a
+buffer. Use these with the various table readers.
+}
+\section{Factory}{
+
+
+The \verb{$create()} factory methods instantiate the \code{InputStream} object and
+take the following arguments, depending on the subclass:
+\itemize{
+\item \code{path} For \code{ReadableFile}, a character file name
+\item \code{x} For \code{BufferReader}, a \link{Buffer} or an object that can be
+made into a buffer via \code{buffer()}.
+}
+
+To instantiate a \code{MemoryMappedFile}, call \code{\link[=mmap_open]{mmap_open()}}.
+}
+
+\section{Methods}{
+
+\itemize{
+\item \verb{$GetSize()}:
+\item \verb{$supports_zero_copy()}: Logical
+\item \verb{$seek(position)}: go to that position in the stream
+\item \verb{$tell()}: return the position in the stream
+\item \verb{$close()}: close the stream
+\item \verb{$Read(nbytes)}: read data from the stream, either a specified \code{nbytes} or
+all, if \code{nbytes} is not provided
+\item \verb{$ReadAt(position, nbytes)}: similar to \verb{$seek(position)$Read(nbytes)}
+\item \verb{$Resize(size)}: for a \code{MemoryMappedFile} that is writeable
+}
+}
+
diff --git a/src/arrow/r/man/MemoryPool.Rd b/src/arrow/r/man/MemoryPool.Rd
new file mode 100644
index 000000000..75f1882d2
--- /dev/null
+++ b/src/arrow/r/man/MemoryPool.Rd
@@ -0,0 +1,24 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/memory-pool.R
+\docType{class}
+\name{MemoryPool}
+\alias{MemoryPool}
+\title{class arrow::MemoryPool}
+\description{
+class arrow::MemoryPool
+}
+\section{Methods}{
+
+\itemize{
+\item \code{backend_name}: one of "jemalloc", "mimalloc", or "system". Alternative
+memory allocators are optionally enabled at build time. Windows builds
+generally have \code{mimalloc}, and most others have both \code{jemalloc} (used by
+default) and \code{mimalloc}. To change memory allocators at runtime, set the
+environment variable \code{ARROW_DEFAULT_MEMORY_POOL} to one of those strings
+prior to loading the \code{arrow} library.
+\item \code{bytes_allocated}
+\item \code{max_memory}
+}
+}
+
+\keyword{internal}
diff --git a/src/arrow/r/man/Message.Rd b/src/arrow/r/man/Message.Rd
new file mode 100644
index 000000000..84dd90a64
--- /dev/null
+++ b/src/arrow/r/man/Message.Rd
@@ -0,0 +1,15 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/message.R
+\docType{class}
+\name{Message}
+\alias{Message}
+\title{class arrow::Message}
+\description{
+class arrow::Message
+}
+\section{Methods}{
+
+
+TODO
+}
+
diff --git a/src/arrow/r/man/MessageReader.Rd b/src/arrow/r/man/MessageReader.Rd
new file mode 100644
index 000000000..d198c185e
--- /dev/null
+++ b/src/arrow/r/man/MessageReader.Rd
@@ -0,0 +1,15 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/message.R
+\docType{class}
+\name{MessageReader}
+\alias{MessageReader}
+\title{class arrow::MessageReader}
+\description{
+class arrow::MessageReader
+}
+\section{Methods}{
+
+
+TODO
+}
+
diff --git a/src/arrow/r/man/OutputStream.Rd b/src/arrow/r/man/OutputStream.Rd
new file mode 100644
index 000000000..f7c71b192
--- /dev/null
+++ b/src/arrow/r/man/OutputStream.Rd
@@ -0,0 +1,38 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/io.R
+\docType{class}
+\name{OutputStream}
+\alias{OutputStream}
+\alias{FileOutputStream}
+\alias{BufferOutputStream}
+\title{OutputStream classes}
+\description{
+\code{FileOutputStream} is for writing to a file;
+\code{BufferOutputStream} writes to a buffer;
+You can create one and pass it to any of the table writers, for example.
+}
+\section{Factory}{
+
+
+The \verb{$create()} factory methods instantiate the \code{OutputStream} object and
+take the following arguments, depending on the subclass:
+\itemize{
+\item \code{path} For \code{FileOutputStream}, a character file name
+\item \code{initial_capacity} For \code{BufferOutputStream}, the size in bytes of the
+buffer.
+}
+}
+
+\section{Methods}{
+
+\itemize{
+\item \verb{$tell()}: return the position in the stream
+\item \verb{$close()}: close the stream
+\item \verb{$write(x)}: send \code{x} to the stream
+\item \verb{$capacity()}: for \code{BufferOutputStream}
+\item \verb{$finish()}: for \code{BufferOutputStream}
+\item \verb{$GetExtentBytesWritten()}: for \code{MockOutputStream}, report how many bytes
+were sent.
+}
+}
+
diff --git a/src/arrow/r/man/ParquetArrowReaderProperties.Rd b/src/arrow/r/man/ParquetArrowReaderProperties.Rd
new file mode 100644
index 000000000..33a50f712
--- /dev/null
+++ b/src/arrow/r/man/ParquetArrowReaderProperties.Rd
@@ -0,0 +1,29 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/parquet.R
+\docType{class}
+\name{ParquetArrowReaderProperties}
+\alias{ParquetArrowReaderProperties}
+\title{ParquetArrowReaderProperties class}
+\description{
+This class holds settings to control how a Parquet file is read
+by \link{ParquetFileReader}.
+}
+\section{Factory}{
+
+
+The \code{ParquetArrowReaderProperties$create()} factory method instantiates the object
+and takes the following arguments:
+\itemize{
+\item \code{use_threads} Logical: whether to use multithreading (default \code{TRUE})
+}
+}
+
+\section{Methods}{
+
+\itemize{
+\item \verb{$read_dictionary(column_index)}
+\item \verb{$set_read_dictionary(column_index, read_dict)}
+\item \verb{$use_threads(use_threads)}
+}
+}
+
diff --git a/src/arrow/r/man/ParquetFileReader.Rd b/src/arrow/r/man/ParquetFileReader.Rd
new file mode 100644
index 000000000..30d0725a4
--- /dev/null
+++ b/src/arrow/r/man/ParquetFileReader.Rd
@@ -0,0 +1,59 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/parquet.R
+\docType{class}
+\name{ParquetFileReader}
+\alias{ParquetFileReader}
+\title{ParquetFileReader class}
+\description{
+This class enables you to interact with Parquet files.
+}
+\section{Factory}{
+
+
+The \code{ParquetFileReader$create()} factory method instantiates the object and
+takes the following arguments:
+\itemize{
+\item \code{file} A character file name, raw vector, or Arrow file connection object
+(e.g. \code{RandomAccessFile}).
+\item \code{props} Optional \link{ParquetArrowReaderProperties}
+\item \code{mmap} Logical: whether to memory-map the file (default \code{TRUE})
+\item \code{...} Additional arguments, currently ignored
+}
+}
+
+\section{Methods}{
+
+\itemize{
+\item \verb{$ReadTable(column_indices)}: get an \code{arrow::Table} from the file. The optional
+\verb{column_indices=} argument is a 0-based integer vector indicating which columns to retain.
+\item \verb{$ReadRowGroup(i, column_indices)}: get an \code{arrow::Table} by reading the \code{i}th row group (0-based).
+The optional \verb{column_indices=} argument is a 0-based integer vector indicating which columns to retain.
+\item \verb{$ReadRowGroups(row_groups, column_indices)}: get an \code{arrow::Table} by reading several row
+groups (0-based integers).
+The optional \verb{column_indices=} argument is a 0-based integer vector indicating which columns to retain.
+\item \verb{$GetSchema()}: get the \code{arrow::Schema} of the data in the file
+\item \verb{$ReadColumn(i)}: read the \code{i}th column (0-based) as a \link{ChunkedArray}.
+}
+}
+
+\section{Active bindings}{
+
+\itemize{
+\item \verb{$num_rows}: number of rows.
+\item \verb{$num_columns}: number of columns.
+\item \verb{$num_row_groups}: number of row groups.
+}
+}
+
+\examples{
+\dontshow{if (arrow_with_parquet()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+f <- system.file("v0.7.1.parquet", package = "arrow")
+pq <- ParquetFileReader$create(f)
+pq$GetSchema()
+if (codec_is_available("snappy")) {
+ # This file has compressed data columns
+ tab <- pq$ReadTable()
+ tab$schema
+}
+\dontshow{\}) # examplesIf}
+}
diff --git a/src/arrow/r/man/ParquetFileWriter.Rd b/src/arrow/r/man/ParquetFileWriter.Rd
new file mode 100644
index 000000000..f36e85ab6
--- /dev/null
+++ b/src/arrow/r/man/ParquetFileWriter.Rd
@@ -0,0 +1,31 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/parquet.R
+\docType{class}
+\name{ParquetFileWriter}
+\alias{ParquetFileWriter}
+\title{ParquetFileWriter class}
+\description{
+This class enables you to interact with Parquet files.
+}
+\section{Factory}{
+
+
+The \code{ParquetFileWriter$create()} factory method instantiates the object and
+takes the following arguments:
+\itemize{
+\item \code{schema} A \link{Schema}
+\item \code{sink} An \link[=OutputStream]{arrow::io::OutputStream}
+\item \code{properties} An instance of \link{ParquetWriterProperties}
+\item \code{arrow_properties} An instance of \code{ParquetArrowWriterProperties}
+}
+}
+
+\section{Methods}{
+
+\itemize{
+\item \code{WriteTable} Write a \link{Table} to \code{sink}
+\item \code{Close} Close the writer. Note: does not close the \code{sink}.
+\link[=OutputStream]{arrow::io::OutputStream} has its own \code{close()} method.
+}
+}
+
diff --git a/src/arrow/r/man/ParquetWriterProperties.Rd b/src/arrow/r/man/ParquetWriterProperties.Rd
new file mode 100644
index 000000000..7beb8a82a
--- /dev/null
+++ b/src/arrow/r/man/ParquetWriterProperties.Rd
@@ -0,0 +1,49 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/parquet.R
+\docType{class}
+\name{ParquetWriterProperties}
+\alias{ParquetWriterProperties}
+\title{ParquetWriterProperties class}
+\description{
+This class holds settings to control how a Parquet file is read
+by \link{ParquetFileWriter}.
+}
+\details{
+The parameters \code{compression}, \code{compression_level}, \code{use_dictionary}
+and write_statistics` support various patterns:
+\itemize{
+\item The default \code{NULL} leaves the parameter unspecified, and the C++ library
+uses an appropriate default for each column (defaults listed above)
+\item A single, unnamed, value (e.g. a single string for \code{compression}) applies to all columns
+\item An unnamed vector, of the same size as the number of columns, to specify a
+value for each column, in positional order
+\item A named vector, to specify the value for the named columns, the default
+value for the setting is used when not supplied
+}
+
+Unlike the high-level \link{write_parquet}, \code{ParquetWriterProperties} arguments
+use the C++ defaults. Currently this means "uncompressed" rather than
+"snappy" for the \code{compression} argument.
+}
+\section{Factory}{
+
+
+The \code{ParquetWriterProperties$create()} factory method instantiates the object
+and takes the following arguments:
+\itemize{
+\item \code{table}: table to write (required)
+\item \code{version}: Parquet version, "1.0" or "2.0". Default "1.0"
+\item \code{compression}: Compression type, algorithm \code{"uncompressed"}
+\item \code{compression_level}: Compression level; meaning depends on compression algorithm
+\item \code{use_dictionary}: Specify if we should use dictionary encoding. Default \code{TRUE}
+\item \code{write_statistics}: Specify if we should write statistics. Default \code{TRUE}
+\item \code{data_page_size}: Set a target threshold for the approximate encoded
+size of data pages within a column chunk (in bytes). Default 1 MiB.
+}
+}
+
+\seealso{
+\link{write_parquet}
+
+\link{Schema} for information about schemas and metadata handling.
+}
diff --git a/src/arrow/r/man/Partitioning.Rd b/src/arrow/r/man/Partitioning.Rd
new file mode 100644
index 000000000..cfe374155
--- /dev/null
+++ b/src/arrow/r/man/Partitioning.Rd
@@ -0,0 +1,51 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/dataset-partition.R
+\name{Partitioning}
+\alias{Partitioning}
+\alias{DirectoryPartitioning}
+\alias{HivePartitioning}
+\alias{DirectoryPartitioningFactory}
+\alias{HivePartitioningFactory}
+\title{Define Partitioning for a Dataset}
+\description{
+Pass a \code{Partitioning} object to a \link{FileSystemDatasetFactory}'s \verb{$create()}
+method to indicate how the file's paths should be interpreted to define
+partitioning.
+
+\code{DirectoryPartitioning} describes how to interpret raw path segments, in
+order. For example, \code{schema(year = int16(), month = int8())} would define
+partitions for file paths like "2019/01/file.parquet",
+"2019/02/file.parquet", etc. In this scheme \code{NULL} values will be skipped. In
+the previous example: when writing a dataset if the month was \code{NA} (or
+\code{NULL}), the files would be placed in "2019/file.parquet". When reading, the
+rows in "2019/file.parquet" would return an \code{NA} for the month column. An
+error will be raised if an outer directory is \code{NULL} and an inner directory
+is not.
+
+\code{HivePartitioning} is for Hive-style partitioning, which embeds field
+names and values in path segments, such as
+"/year=2019/month=2/data.parquet". Because fields are named in the path
+segments, order does not matter. This partitioning scheme allows \code{NULL}
+values. They will be replaced by a configurable \code{null_fallback} which
+defaults to the string \code{"__HIVE_DEFAULT_PARTITION__"} when writing. When
+reading, the \code{null_fallback} string will be replaced with \code{NA}s as
+appropriate.
+
+\code{PartitioningFactory} subclasses instruct the \code{DatasetFactory} to detect
+partition features from the file paths.
+}
+\section{Factory}{
+
+Both \code{DirectoryPartitioning$create()} and \code{HivePartitioning$create()}
+methods take a \link{Schema} as a single input argument. The helper
+function \code{\link[=hive_partition]{hive_partition(...)}} is shorthand for
+\code{HivePartitioning$create(schema(...))}.
+
+With \code{DirectoryPartitioningFactory$create()}, you can provide just the
+names of the path segments (in our example, \code{c("year", "month")}), and
+the \code{DatasetFactory} will infer the data types for those partition variables.
+\code{HivePartitioningFactory$create()} takes no arguments: both variable names
+and their types can be inferred from the file paths. \code{hive_partition()} with
+no arguments returns a \code{HivePartitioningFactory}.
+}
+
diff --git a/src/arrow/r/man/RecordBatch.Rd b/src/arrow/r/man/RecordBatch.Rd
new file mode 100644
index 000000000..ff08c2158
--- /dev/null
+++ b/src/arrow/r/man/RecordBatch.Rd
@@ -0,0 +1,92 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/record-batch.R
+\docType{class}
+\name{RecordBatch}
+\alias{RecordBatch}
+\alias{record_batch}
+\title{RecordBatch class}
+\usage{
+record_batch(..., schema = NULL)
+}
+\arguments{
+\item{...}{A \code{data.frame} or a named set of Arrays or vectors. If given a
+mixture of data.frames and vectors, the inputs will be autospliced together
+(see examples). Alternatively, you can provide a single Arrow IPC
+\code{InputStream}, \code{Message}, \code{Buffer}, or R \code{raw} object containing a \code{Buffer}.}
+
+\item{schema}{a \link{Schema}, or \code{NULL} (the default) to infer the schema from
+the data in \code{...}. When providing an Arrow IPC buffer, \code{schema} is required.}
+}
+\description{
+A record batch is a collection of equal-length arrays matching
+a particular \link{Schema}. It is a table-like data structure that is semantically
+a sequence of \link[=Field]{fields}, each a contiguous Arrow \link{Array}.
+}
+\section{S3 Methods and Usage}{
+
+Record batches are data-frame-like, and many methods you expect to work on
+a \code{data.frame} are implemented for \code{RecordBatch}. This includes \code{[}, \code{[[},
+\code{$}, \code{names}, \code{dim}, \code{nrow}, \code{ncol}, \code{head}, and \code{tail}. You can also pull
+the data from an Arrow record batch into R with \code{as.data.frame()}. See the
+examples.
+
+A caveat about the \code{$} method: because \code{RecordBatch} is an \code{R6} object,
+\code{$} is also used to access the object's methods (see below). Methods take
+precedence over the table's columns. So, \code{batch$Slice} would return the
+"Slice" method function even if there were a column in the table called
+"Slice".
+}
+
+\section{R6 Methods}{
+
+In addition to the more R-friendly S3 methods, a \code{RecordBatch} object has
+the following R6 methods that map onto the underlying C++ methods:
+\itemize{
+\item \verb{$Equals(other)}: Returns \code{TRUE} if the \code{other} record batch is equal
+\item \verb{$column(i)}: Extract an \code{Array} by integer position from the batch
+\item \verb{$column_name(i)}: Get a column's name by integer position
+\item \verb{$names()}: Get all column names (called by \code{names(batch)})
+\item \verb{$RenameColumns(value)}: Set all column names (called by \code{names(batch) <- value})
+\item \verb{$GetColumnByName(name)}: Extract an \code{Array} by string name
+\item \verb{$RemoveColumn(i)}: Drops a column from the batch by integer position
+\item \verb{$SelectColumns(indices)}: Return a new record batch with a selection of columns, expressed as 0-based integers.
+\item \verb{$Slice(offset, length = NULL)}: Create a zero-copy view starting at the
+indicated integer offset and going for the given length, or to the end
+of the table if \code{NULL}, the default.
+\item \verb{$Take(i)}: return an \code{RecordBatch} with rows at positions given by
+integers (R vector or Array Array) \code{i}.
+\item \verb{$Filter(i, keep_na = TRUE)}: return an \code{RecordBatch} with rows at positions where logical
+vector (or Arrow boolean Array) \code{i} is \code{TRUE}.
+\item \verb{$SortIndices(names, descending = FALSE)}: return an \code{Array} of integer row
+positions that can be used to rearrange the \code{RecordBatch} in ascending or
+descending order by the first named column, breaking ties with further named
+columns. \code{descending} can be a logical vector of length one or of the same
+length as \code{names}.
+\item \verb{$serialize()}: Returns a raw vector suitable for interprocess communication
+\item \verb{$cast(target_schema, safe = TRUE, options = cast_options(safe))}: Alter
+the schema of the record batch.
+}
+
+There are also some active bindings
+\itemize{
+\item \verb{$num_columns}
+\item \verb{$num_rows}
+\item \verb{$schema}
+\item \verb{$metadata}: Returns the key-value metadata of the \code{Schema} as a named list.
+Modify or replace by assigning in (\code{batch$metadata <- new_metadata}).
+All list elements are coerced to string. See \code{schema()} for more information.
+\item \verb{$columns}: Returns a list of \code{Array}s
+}
+}
+
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+batch <- record_batch(name = rownames(mtcars), mtcars)
+dim(batch)
+dim(head(batch))
+names(batch)
+batch$mpg
+batch[["cyl"]]
+as.data.frame(batch[4:8, c("gear", "hp", "wt")])
+\dontshow{\}) # examplesIf}
+}
diff --git a/src/arrow/r/man/RecordBatchReader.Rd b/src/arrow/r/man/RecordBatchReader.Rd
new file mode 100644
index 000000000..90c796a66
--- /dev/null
+++ b/src/arrow/r/man/RecordBatchReader.Rd
@@ -0,0 +1,86 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/record-batch-reader.R
+\docType{class}
+\name{RecordBatchReader}
+\alias{RecordBatchReader}
+\alias{RecordBatchStreamReader}
+\alias{RecordBatchFileReader}
+\title{RecordBatchReader classes}
+\description{
+Apache Arrow defines two formats for \href{https://arrow.apache.org/docs/format/Columnar.html#serialization-and-interprocess-communication-ipc}{serializing data for interprocess communication (IPC)}:
+a "stream" format and a "file" format, known as Feather.
+\code{RecordBatchStreamReader} and \code{RecordBatchFileReader} are
+interfaces for accessing record batches from input sources in those formats,
+respectively.
+
+For guidance on how to use these classes, see the examples section.
+}
+\section{Factory}{
+
+
+The \code{RecordBatchFileReader$create()} and \code{RecordBatchStreamReader$create()}
+factory methods instantiate the object and
+take a single argument, named according to the class:
+\itemize{
+\item \code{file} A character file name, raw vector, or Arrow file connection object
+(e.g. \link{RandomAccessFile}).
+\item \code{stream} A raw vector, \link{Buffer}, or \link{InputStream}.
+}
+}
+
+\section{Methods}{
+
+\itemize{
+\item \verb{$read_next_batch()}: Returns a \code{RecordBatch}, iterating through the
+Reader. If there are no further batches in the Reader, it returns \code{NULL}.
+\item \verb{$schema}: Returns a \link{Schema} (active binding)
+\item \verb{$batches()}: Returns a list of \code{RecordBatch}es
+\item \verb{$read_table()}: Collects the reader's \code{RecordBatch}es into a \link{Table}
+\item \verb{$get_batch(i)}: For \code{RecordBatchFileReader}, return a particular batch
+by an integer index.
+\item \verb{$num_record_batches()}: For \code{RecordBatchFileReader}, see how many batches
+are in the file.
+}
+}
+
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+tf <- tempfile()
+on.exit(unlink(tf))
+
+batch <- record_batch(chickwts)
+
+# This opens a connection to the file in Arrow
+file_obj <- FileOutputStream$create(tf)
+# Pass that to a RecordBatchWriter to write data conforming to a schema
+writer <- RecordBatchFileWriter$create(file_obj, batch$schema)
+writer$write(batch)
+# You may write additional batches to the stream, provided that they have
+# the same schema.
+# Call "close" on the writer to indicate end-of-file/stream
+writer$close()
+# Then, close the connection--closing the IPC message does not close the file
+file_obj$close()
+
+# Now, we have a file we can read from. Same pattern: open file connection,
+# then pass it to a RecordBatchReader
+read_file_obj <- ReadableFile$create(tf)
+reader <- RecordBatchFileReader$create(read_file_obj)
+# RecordBatchFileReader knows how many batches it has (StreamReader does not)
+reader$num_record_batches
+# We could consume the Reader by calling $read_next_batch() until all are,
+# consumed, or we can call $read_table() to pull them all into a Table
+tab <- reader$read_table()
+# Call as.data.frame to turn that Table into an R data.frame
+df <- as.data.frame(tab)
+# This should be the same data we sent
+all.equal(df, chickwts, check.attributes = FALSE)
+# Unlike the Writers, we don't have to close RecordBatchReaders,
+# but we do still need to close the file connection
+read_file_obj$close()
+\dontshow{\}) # examplesIf}
+}
+\seealso{
+\code{\link[=read_ipc_stream]{read_ipc_stream()}} and \code{\link[=read_feather]{read_feather()}} provide a much simpler interface
+for reading data from these formats and are sufficient for many use cases.
+}
diff --git a/src/arrow/r/man/RecordBatchWriter.Rd b/src/arrow/r/man/RecordBatchWriter.Rd
new file mode 100644
index 000000000..219c150e6
--- /dev/null
+++ b/src/arrow/r/man/RecordBatchWriter.Rd
@@ -0,0 +1,89 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/record-batch-writer.R
+\docType{class}
+\name{RecordBatchWriter}
+\alias{RecordBatchWriter}
+\alias{RecordBatchStreamWriter}
+\alias{RecordBatchFileWriter}
+\title{RecordBatchWriter classes}
+\description{
+Apache Arrow defines two formats for \href{https://arrow.apache.org/docs/format/Columnar.html#serialization-and-interprocess-communication-ipc}{serializing data for interprocess communication (IPC)}:
+a "stream" format and a "file" format, known as Feather.
+\code{RecordBatchStreamWriter} and \code{RecordBatchFileWriter} are
+interfaces for writing record batches to those formats, respectively.
+
+For guidance on how to use these classes, see the examples section.
+}
+\section{Factory}{
+
+
+The \code{RecordBatchFileWriter$create()} and \code{RecordBatchStreamWriter$create()}
+factory methods instantiate the object and take the following arguments:
+\itemize{
+\item \code{sink} An \code{OutputStream}
+\item \code{schema} A \link{Schema} for the data to be written
+\item \code{use_legacy_format} logical: write data formatted so that Arrow libraries
+versions 0.14 and lower can read it. Default is \code{FALSE}. You can also
+enable this by setting the environment variable \code{ARROW_PRE_0_15_IPC_FORMAT=1}.
+\item \code{metadata_version}: A string like "V5" or the equivalent integer indicating
+the Arrow IPC MetadataVersion. Default (NULL) will use the latest version,
+unless the environment variable \code{ARROW_PRE_1_0_METADATA_VERSION=1}, in
+which case it will be V4.
+}
+}
+
+\section{Methods}{
+
+\itemize{
+\item \verb{$write(x)}: Write a \link{RecordBatch}, \link{Table}, or \code{data.frame}, dispatching
+to the methods below appropriately
+\item \verb{$write_batch(batch)}: Write a \code{RecordBatch} to stream
+\item \verb{$write_table(table)}: Write a \code{Table} to stream
+\item \verb{$close()}: close stream. Note that this indicates end-of-file or
+end-of-stream--it does not close the connection to the \code{sink}. That needs
+to be closed separately.
+}
+}
+
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+tf <- tempfile()
+on.exit(unlink(tf))
+
+batch <- record_batch(chickwts)
+
+# This opens a connection to the file in Arrow
+file_obj <- FileOutputStream$create(tf)
+# Pass that to a RecordBatchWriter to write data conforming to a schema
+writer <- RecordBatchFileWriter$create(file_obj, batch$schema)
+writer$write(batch)
+# You may write additional batches to the stream, provided that they have
+# the same schema.
+# Call "close" on the writer to indicate end-of-file/stream
+writer$close()
+# Then, close the connection--closing the IPC message does not close the file
+file_obj$close()
+
+# Now, we have a file we can read from. Same pattern: open file connection,
+# then pass it to a RecordBatchReader
+read_file_obj <- ReadableFile$create(tf)
+reader <- RecordBatchFileReader$create(read_file_obj)
+# RecordBatchFileReader knows how many batches it has (StreamReader does not)
+reader$num_record_batches
+# We could consume the Reader by calling $read_next_batch() until all are,
+# consumed, or we can call $read_table() to pull them all into a Table
+tab <- reader$read_table()
+# Call as.data.frame to turn that Table into an R data.frame
+df <- as.data.frame(tab)
+# This should be the same data we sent
+all.equal(df, chickwts, check.attributes = FALSE)
+# Unlike the Writers, we don't have to close RecordBatchReaders,
+# but we do still need to close the file connection
+read_file_obj$close()
+\dontshow{\}) # examplesIf}
+}
+\seealso{
+\code{\link[=write_ipc_stream]{write_ipc_stream()}} and \code{\link[=write_feather]{write_feather()}} provide a much simpler
+interface for writing data to these formats and are sufficient for many use
+cases. \code{\link[=write_to_raw]{write_to_raw()}} is a version that serializes data to a buffer.
+}
diff --git a/src/arrow/r/man/Scalar.Rd b/src/arrow/r/man/Scalar.Rd
new file mode 100644
index 000000000..21e04c12e
--- /dev/null
+++ b/src/arrow/r/man/Scalar.Rd
@@ -0,0 +1,38 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/scalar.R
+\docType{class}
+\name{Scalar}
+\alias{Scalar}
+\title{Arrow scalars}
+\description{
+A \code{Scalar} holds a single value of an Arrow type.
+}
+\section{Methods}{
+
+\verb{$ToString()}: convert to a string
+\verb{$as_vector()}: convert to an R vector
+\verb{$as_array()}: convert to an Arrow \code{Array}
+\verb{$Equals(other)}: is this Scalar equal to \code{other}
+\verb{$ApproxEquals(other)}: is this Scalar approximately equal to \code{other}
+\verb{$is_valid}: is this Scalar valid
+\verb{$null_count}: number of invalid values - 1 or 0
+\verb{$type}: Scalar type
+}
+
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+Scalar$create(pi)
+Scalar$create(404)
+# If you pass a vector into Scalar$create, you get a list containing your items
+Scalar$create(c(1, 2, 3))
+
+# Comparisons
+my_scalar <- Scalar$create(99)
+my_scalar$ApproxEquals(Scalar$create(99.00001)) # FALSE
+my_scalar$ApproxEquals(Scalar$create(99.000009)) # TRUE
+my_scalar$Equals(Scalar$create(99.000009)) # FALSE
+my_scalar$Equals(Scalar$create(99L)) # FALSE (types don't match)
+
+my_scalar$ToString()
+\dontshow{\}) # examplesIf}
+}
diff --git a/src/arrow/r/man/Scanner.Rd b/src/arrow/r/man/Scanner.Rd
new file mode 100644
index 000000000..db6488f50
--- /dev/null
+++ b/src/arrow/r/man/Scanner.Rd
@@ -0,0 +1,51 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/dataset-scan.R
+\name{Scanner}
+\alias{Scanner}
+\alias{ScannerBuilder}
+\title{Scan the contents of a dataset}
+\description{
+A \code{Scanner} iterates over a \link{Dataset}'s fragments and returns data
+according to given row filtering and column projection. A \code{ScannerBuilder}
+can help create one.
+}
+\section{Factory}{
+
+\code{Scanner$create()} wraps the \code{ScannerBuilder} interface to make a \code{Scanner}.
+It takes the following arguments:
+\itemize{
+\item \code{dataset}: A \code{Dataset} or \code{arrow_dplyr_query} object, as returned by the
+\code{dplyr} methods on \code{Dataset}.
+\item \code{projection}: A character vector of column names to select columns or a
+named list of expressions
+\item \code{filter}: A \code{Expression} to filter the scanned rows by, or \code{TRUE} (default)
+to keep all rows.
+\item \code{use_threads}: logical: should scanning use multithreading? Default \code{TRUE}
+\item \code{use_async}: logical: should the async scanner (performs better on
+high-latency/highly parallel filesystems like S3) be used? Default \code{FALSE}
+\item \code{...}: Additional arguments, currently ignored
+}
+}
+
+\section{Methods}{
+
+\code{ScannerBuilder} has the following methods:
+\itemize{
+\item \verb{$Project(cols)}: Indicate that the scan should only return columns given
+by \code{cols}, a character vector of column names
+\item \verb{$Filter(expr)}: Filter rows by an \link{Expression}.
+\item \verb{$UseThreads(threads)}: logical: should the scan use multithreading?
+The method's default input is \code{TRUE}, but you must call the method to enable
+multithreading because the scanner default is \code{FALSE}.
+\item \verb{$UseAsync(use_async)}: logical: should the async scanner be used?
+\item \verb{$BatchSize(batch_size)}: integer: Maximum row count of scanned record
+batches, default is 32K. If scanned record batches are overflowing memory
+then this method can be called to reduce their size.
+\item \verb{$schema}: Active binding, returns the \link{Schema} of the Dataset
+\item \verb{$Finish()}: Returns a \code{Scanner}
+}
+
+\code{Scanner} currently has a single method, \verb{$ToTable()}, which evaluates the
+query and returns an Arrow \link{Table}.
+}
+
diff --git a/src/arrow/r/man/Schema.Rd b/src/arrow/r/man/Schema.Rd
new file mode 100644
index 000000000..7322c70f2
--- /dev/null
+++ b/src/arrow/r/man/Schema.Rd
@@ -0,0 +1,86 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/schema.R
+\docType{class}
+\name{Schema}
+\alias{Schema}
+\alias{schema}
+\title{Schema class}
+\usage{
+schema(...)
+}
+\arguments{
+\item{...}{named list containing \link[=data-type]{data types} or
+a list of \link[=field]{fields} containing the fields for the schema}
+}
+\description{
+A \code{Schema} is a list of \link{Field}s, which map names to
+Arrow \link[=data-type]{data types}. Create a \code{Schema} when you
+want to convert an R \code{data.frame} to Arrow but don't want to rely on the
+default mapping of R types to Arrow types, such as when you want to choose a
+specific numeric precision, or when creating a \link{Dataset} and you want to
+ensure a specific schema rather than inferring it from the various files.
+
+Many Arrow objects, including \link{Table} and \link{Dataset}, have a \verb{$schema} method
+(active binding) that lets you access their schema.
+}
+\section{Methods}{
+
+\itemize{
+\item \verb{$ToString()}: convert to a string
+\item \verb{$field(i)}: returns the field at index \code{i} (0-based)
+\item \verb{$GetFieldByName(x)}: returns the field with name \code{x}
+\item \verb{$WithMetadata(metadata)}: returns a new \code{Schema} with the key-value
+\code{metadata} set. Note that all list elements in \code{metadata} will be coerced
+to \code{character}.
+}
+}
+
+\section{Active bindings}{
+
+\itemize{
+\item \verb{$names}: returns the field names (called in \code{names(Schema)})
+\item \verb{$num_fields}: returns the number of fields (called in \code{length(Schema)})
+\item \verb{$fields}: returns the list of \code{Field}s in the \code{Schema}, suitable for
+iterating over
+\item \verb{$HasMetadata}: logical: does this \code{Schema} have extra metadata?
+\item \verb{$metadata}: returns the key-value metadata as a named list.
+Modify or replace by assigning in (\code{sch$metadata <- new_metadata}).
+All list elements are coerced to string.
+}
+}
+
+\section{R Metadata}{
+
+
+When converting a data.frame to an Arrow Table or RecordBatch, attributes
+from the \code{data.frame} are saved alongside tables so that the object can be
+reconstructed faithfully in R (e.g. with \code{as.data.frame()}). This metadata
+can be both at the top-level of the \code{data.frame} (e.g. \code{attributes(df)}) or
+at the column (e.g. \code{attributes(df$col_a)}) or for list columns only:
+element level (e.g. \code{attributes(df[1, "col_a"])}). For example, this allows
+for storing \code{haven} columns in a table and being able to faithfully
+re-create them when pulled back into R. This metadata is separate from the
+schema (column names and types) which is compatible with other Arrow
+clients. The R metadata is only read by R and is ignored by other clients
+(e.g. Pandas has its own custom metadata). This metadata is stored in
+\verb{$metadata$r}.
+
+Since Schema metadata keys and values must be strings, this metadata is
+saved by serializing R's attribute list structure to a string. If the
+serialized metadata exceeds 100Kb in size, by default it is compressed
+starting in version 3.0.0. To disable this compression (e.g. for tables
+that are compatible with Arrow versions before 3.0.0 and include large
+amounts of metadata), set the option \code{arrow.compress_metadata} to \code{FALSE}.
+Files with compressed metadata are readable by older versions of arrow, but
+the metadata is dropped.
+}
+
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+df <- data.frame(col1 = 2:4, col2 = c(0.1, 0.3, 0.5))
+tab1 <- arrow_table(df)
+tab1$schema
+tab2 <- arrow_table(df, schema = schema(col1 = int8(), col2 = float32()))
+tab2$schema
+\dontshow{\}) # examplesIf}
+}
diff --git a/src/arrow/r/man/Table.Rd b/src/arrow/r/man/Table.Rd
new file mode 100644
index 000000000..d5654bf93
--- /dev/null
+++ b/src/arrow/r/man/Table.Rd
@@ -0,0 +1,92 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/table.R
+\docType{class}
+\name{Table}
+\alias{Table}
+\alias{arrow_table}
+\title{Table class}
+\usage{
+arrow_table(..., schema = NULL)
+}
+\arguments{
+\item{...}{A \code{data.frame} or a named set of Arrays or vectors. If given a
+mixture of data.frames and named vectors, the inputs will be autospliced together
+(see examples). Alternatively, you can provide a single Arrow IPC
+\code{InputStream}, \code{Message}, \code{Buffer}, or R \code{raw} object containing a \code{Buffer}.}
+
+\item{schema}{a \link{Schema}, or \code{NULL} (the default) to infer the schema from
+the data in \code{...}. When providing an Arrow IPC buffer, \code{schema} is required.}
+}
+\description{
+A Table is a sequence of \link[=ChunkedArray]{chunked arrays}. They
+have a similar interface to \link[=RecordBatch]{record batches}, but they can be
+composed from multiple record batches or chunked arrays.
+}
+\section{S3 Methods and Usage}{
+
+Tables are data-frame-like, and many methods you expect to work on
+a \code{data.frame} are implemented for \code{Table}. This includes \code{[}, \code{[[},
+\code{$}, \code{names}, \code{dim}, \code{nrow}, \code{ncol}, \code{head}, and \code{tail}. You can also pull
+the data from an Arrow table into R with \code{as.data.frame()}. See the
+examples.
+
+A caveat about the \code{$} method: because \code{Table} is an \code{R6} object,
+\code{$} is also used to access the object's methods (see below). Methods take
+precedence over the table's columns. So, \code{tab$Slice} would return the
+"Slice" method function even if there were a column in the table called
+"Slice".
+}
+
+\section{R6 Methods}{
+
+In addition to the more R-friendly S3 methods, a \code{Table} object has
+the following R6 methods that map onto the underlying C++ methods:
+\itemize{
+\item \verb{$column(i)}: Extract a \code{ChunkedArray} by integer position from the table
+\item \verb{$ColumnNames()}: Get all column names (called by \code{names(tab)})
+\item \verb{$RenameColumns(value)}: Set all column names (called by \code{names(tab) <- value})
+\item \verb{$GetColumnByName(name)}: Extract a \code{ChunkedArray} by string name
+\item \verb{$field(i)}: Extract a \code{Field} from the table schema by integer position
+\item \verb{$SelectColumns(indices)}: Return new \code{Table} with specified columns, expressed as 0-based integers.
+\item \verb{$Slice(offset, length = NULL)}: Create a zero-copy view starting at the
+indicated integer offset and going for the given length, or to the end
+of the table if \code{NULL}, the default.
+\item \verb{$Take(i)}: return an \code{Table} with rows at positions given by
+integers \code{i}. If \code{i} is an Arrow \code{Array} or \code{ChunkedArray}, it will be
+coerced to an R vector before taking.
+\item \verb{$Filter(i, keep_na = TRUE)}: return an \code{Table} with rows at positions where logical
+vector or Arrow boolean-type \verb{(Chunked)Array} \code{i} is \code{TRUE}.
+\item \verb{$SortIndices(names, descending = FALSE)}: return an \code{Array} of integer row
+positions that can be used to rearrange the \code{Table} in ascending or descending
+order by the first named column, breaking ties with further named columns.
+\code{descending} can be a logical vector of length one or of the same length as
+\code{names}.
+\item \verb{$serialize(output_stream, ...)}: Write the table to the given
+\link{OutputStream}
+\item \verb{$cast(target_schema, safe = TRUE, options = cast_options(safe))}: Alter
+the schema of the record batch.
+}
+
+There are also some active bindings:
+\itemize{
+\item \verb{$num_columns}
+\item \verb{$num_rows}
+\item \verb{$schema}
+\item \verb{$metadata}: Returns the key-value metadata of the \code{Schema} as a named list.
+Modify or replace by assigning in (\code{tab$metadata <- new_metadata}).
+All list elements are coerced to string. See \code{schema()} for more information.
+\item \verb{$columns}: Returns a list of \code{ChunkedArray}s
+}
+}
+
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+tbl <- arrow_table(name = rownames(mtcars), mtcars)
+dim(tbl)
+dim(head(tbl))
+names(tbl)
+tbl$mpg
+tbl[["cyl"]]
+as.data.frame(tbl[4:8, c("gear", "hp", "wt")])
+\dontshow{\}) # examplesIf}
+}
diff --git a/src/arrow/r/man/array.Rd b/src/arrow/r/man/array.Rd
new file mode 100644
index 000000000..78d3eaff6
--- /dev/null
+++ b/src/arrow/r/man/array.Rd
@@ -0,0 +1,107 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/array.R, R/scalar.R
+\docType{class}
+\name{array}
+\alias{array}
+\alias{Array}
+\alias{DictionaryArray}
+\alias{StructArray}
+\alias{ListArray}
+\alias{LargeListArray}
+\alias{FixedSizeListArray}
+\alias{StructScalar}
+\title{Arrow Arrays}
+\description{
+An \code{Array} is an immutable data array with some logical type
+and some length. Most logical types are contained in the base
+\code{Array} class; there are also subclasses for \code{DictionaryArray}, \code{ListArray},
+and \code{StructArray}.
+}
+\section{Factory}{
+
+The \code{Array$create()} factory method instantiates an \code{Array} and
+takes the following arguments:
+\itemize{
+\item \code{x}: an R vector, list, or \code{data.frame}
+\item \code{type}: an optional \link[=data-type]{data type} for \code{x}. If omitted, the type
+will be inferred from the data.
+}
+
+\code{Array$create()} will return the appropriate subclass of \code{Array}, such as
+\code{DictionaryArray} when given an R factor.
+
+To compose a \code{DictionaryArray} directly, call \code{DictionaryArray$create()},
+which takes two arguments:
+\itemize{
+\item \code{x}: an R vector or \code{Array} of integers for the dictionary indices
+\item \code{dict}: an R vector or \code{Array} of dictionary values (like R factor levels
+but not limited to strings only)
+}
+}
+
+\section{Usage}{
+\preformatted{a <- Array$create(x)
+length(a)
+
+print(a)
+a == a
+}
+}
+
+\section{Methods}{
+
+\itemize{
+\item \verb{$IsNull(i)}: Return true if value at index is null. Does not boundscheck
+\item \verb{$IsValid(i)}: Return true if value at index is valid. Does not boundscheck
+\item \verb{$length()}: Size in the number of elements this array contains
+\item \verb{$offset}: A relative position into another array's data, to enable zero-copy slicing
+\item \verb{$null_count}: The number of null entries in the array
+\item \verb{$type}: logical type of data
+\item \verb{$type_id()}: type id
+\item \verb{$Equals(other)} : is this array equal to \code{other}
+\item \verb{$ApproxEquals(other)} :
+\item \verb{$Diff(other)} : return a string expressing the difference between two arrays
+\item \verb{$data()}: return the underlying \link{ArrayData}
+\item \verb{$as_vector()}: convert to an R vector
+\item \verb{$ToString()}: string representation of the array
+\item \verb{$Slice(offset, length = NULL)}: Construct a zero-copy slice of the array
+with the indicated offset and length. If length is \code{NULL}, the slice goes
+until the end of the array.
+\item \verb{$Take(i)}: return an \code{Array} with values at positions given by integers
+(R vector or Array Array) \code{i}.
+\item \verb{$Filter(i, keep_na = TRUE)}: return an \code{Array} with values at positions where logical
+vector (or Arrow boolean Array) \code{i} is \code{TRUE}.
+\item \verb{$SortIndices(descending = FALSE)}: return an \code{Array} of integer positions that can be
+used to rearrange the \code{Array} in ascending or descending order
+\item \verb{$RangeEquals(other, start_idx, end_idx, other_start_idx)} :
+\item \verb{$cast(target_type, safe = TRUE, options = cast_options(safe))}: Alter the
+data in the array to change its type.
+\item \verb{$View(type)}: Construct a zero-copy view of this array with the given type.
+\item \verb{$Validate()} : Perform any validation checks to determine obvious inconsistencies
+within the array's internal data. This can be an expensive check, potentially \code{O(length)}
+}
+}
+
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+my_array <- Array$create(1:10)
+my_array$type
+my_array$cast(int8())
+
+# Check if value is null; zero-indexed
+na_array <- Array$create(c(1:5, NA))
+na_array$IsNull(0)
+na_array$IsNull(5)
+na_array$IsValid(5)
+na_array$null_count
+
+# zero-copy slicing; the offset of the new Array will be the same as the index passed to $Slice
+new_array <- na_array$Slice(5)
+new_array$offset
+
+# Compare 2 arrays
+na_array2 <- na_array
+na_array2 == na_array # element-wise comparison
+na_array2$Equals(na_array) # overall comparison
+\dontshow{\}) # examplesIf}
+}
diff --git a/src/arrow/r/man/arrow-package.Rd b/src/arrow/r/man/arrow-package.Rd
new file mode 100644
index 000000000..021762162
--- /dev/null
+++ b/src/arrow/r/man/arrow-package.Rd
@@ -0,0 +1,45 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/arrow-package.R
+\docType{package}
+\name{arrow-package}
+\alias{arrow}
+\alias{arrow-package}
+\title{arrow: Integration to 'Apache' 'Arrow'}
+\description{
+'Apache' 'Arrow' <https://arrow.apache.org/> is a cross-language
+ development platform for in-memory data. It specifies a standardized
+ language-independent columnar memory format for flat and hierarchical data,
+ organized for efficient analytic operations on modern hardware. This
+ package provides an interface to the 'Arrow C++' library.
+}
+\seealso{
+Useful links:
+\itemize{
+ \item \url{https://github.com/apache/arrow/}
+ \item \url{https://arrow.apache.org/docs/r/}
+ \item Report bugs at \url{https://issues.apache.org/jira/projects/ARROW/issues}
+}
+
+}
+\author{
+\strong{Maintainer}: Neal Richardson \email{neal@ursalabs.org}
+
+Authors:
+\itemize{
+ \item Ian Cook \email{ianmcook@gmail.com}
+ \item Nic Crane \email{thisisnic@gmail.com}
+ \item Jonathan Keane \email{jkeane@gmail.com}
+ \item Romain François \email{romain@rstudio.com} (\href{https://orcid.org/0000-0002-2444-4226}{ORCID})
+ \item Jeroen Ooms \email{jeroen@berkeley.edu}
+ \item Apache Arrow \email{dev@arrow.apache.org} [copyright holder]
+}
+
+Other contributors:
+\itemize{
+ \item Javier Luraschi \email{javier@rstudio.com} [contributor]
+ \item Karl Dunkle Werner \email{karldw@users.noreply.github.com} (\href{https://orcid.org/0000-0003-0523-7309}{ORCID}) [contributor]
+ \item Jeffrey Wong \email{jeffreyw@netflix.com} [contributor]
+}
+
+}
+\keyword{internal}
diff --git a/src/arrow/r/man/arrow_available.Rd b/src/arrow/r/man/arrow_available.Rd
new file mode 100644
index 000000000..3061d10dc
--- /dev/null
+++ b/src/arrow/r/man/arrow_available.Rd
@@ -0,0 +1,47 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/arrow-package.R
+\name{arrow_available}
+\alias{arrow_available}
+\alias{arrow_with_dataset}
+\alias{arrow_with_parquet}
+\alias{arrow_with_s3}
+\alias{arrow_with_json}
+\title{Is the C++ Arrow library available?}
+\usage{
+arrow_available()
+
+arrow_with_dataset()
+
+arrow_with_parquet()
+
+arrow_with_s3()
+
+arrow_with_json()
+}
+\value{
+\code{TRUE} or \code{FALSE} depending on whether the package was installed
+with:
+\itemize{
+\item The Arrow C++ library (check with \code{arrow_available()})
+\item Arrow Dataset support enabled (check with \code{arrow_with_dataset()})
+\item Parquet support enabled (check with \code{arrow_with_parquet()})
+\item JSON support enabled (check with \code{arrow_with_json()})
+\item Amazon S3 support enabled (check with \code{arrow_with_s3()})
+}
+}
+\description{
+You won't generally need to call these function, but they're made available
+for diagnostic purposes.
+}
+\examples{
+arrow_available()
+arrow_with_dataset()
+arrow_with_parquet()
+arrow_with_json()
+arrow_with_s3()
+}
+\seealso{
+If any of these are \code{FALSE}, see
+\code{vignette("install", package = "arrow")} for guidance on reinstalling the
+package.
+}
diff --git a/src/arrow/r/man/arrow_info.Rd b/src/arrow/r/man/arrow_info.Rd
new file mode 100644
index 000000000..95444a8bb
--- /dev/null
+++ b/src/arrow/r/man/arrow_info.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/arrow-package.R
+\name{arrow_info}
+\alias{arrow_info}
+\title{Report information on the package's capabilities}
+\usage{
+arrow_info()
+}
+\value{
+A list including version information, boolean "capabilities", and
+statistics from Arrow's memory allocator, and also Arrow's run-time
+information.
+}
+\description{
+This function summarizes a number of build-time configurations and run-time
+settings for the Arrow package. It may be useful for diagnostics.
+}
diff --git a/src/arrow/r/man/buffer.Rd b/src/arrow/r/man/buffer.Rd
new file mode 100644
index 000000000..a3ca1fc2f
--- /dev/null
+++ b/src/arrow/r/man/buffer.Rd
@@ -0,0 +1,44 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/buffer.R
+\docType{class}
+\name{buffer}
+\alias{buffer}
+\alias{Buffer}
+\title{Buffer class}
+\usage{
+buffer(x)
+}
+\arguments{
+\item{x}{R object. Only raw, numeric and integer vectors are currently supported}
+}
+\value{
+an instance of \code{Buffer} that borrows memory from \code{x}
+}
+\description{
+A Buffer is an object containing a pointer to a piece of
+contiguous memory with a particular size.
+}
+\section{Factory}{
+
+\code{buffer()} lets you create an \code{arrow::Buffer} from an R object
+}
+
+\section{Methods}{
+
+\itemize{
+\item \verb{$is_mutable} : is this buffer mutable?
+\item \verb{$ZeroPadding()} : zero bytes in padding, i.e. bytes between size and capacity
+\item \verb{$size} : size in memory, in bytes
+\item \verb{$capacity}: possible capacity, in bytes
+}
+}
+
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+my_buffer <- buffer(c(1, 2, 3, 4))
+my_buffer$is_mutable
+my_buffer$ZeroPadding()
+my_buffer$size
+my_buffer$capacity
+\dontshow{\}) # examplesIf}
+}
diff --git a/src/arrow/r/man/call_function.Rd b/src/arrow/r/man/call_function.Rd
new file mode 100644
index 000000000..c216af06f
--- /dev/null
+++ b/src/arrow/r/man/call_function.Rd
@@ -0,0 +1,51 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/compute.R
+\name{call_function}
+\alias{call_function}
+\title{Call an Arrow compute function}
+\usage{
+call_function(
+ function_name,
+ ...,
+ args = list(...),
+ options = empty_named_list()
+)
+}
+\arguments{
+\item{function_name}{string Arrow compute function name}
+
+\item{...}{Function arguments, which may include \code{Array}, \code{ChunkedArray}, \code{Scalar},
+\code{RecordBatch}, or \code{Table}.}
+
+\item{args}{list arguments as an alternative to specifying in \code{...}}
+
+\item{options}{named list of C++ function options.}
+}
+\value{
+An \code{Array}, \code{ChunkedArray}, \code{Scalar}, \code{RecordBatch}, or \code{Table}, whatever the compute function results in.
+}
+\description{
+This function provides a lower-level API for calling Arrow functions by their
+string function name. You won't use it directly for most applications.
+Many Arrow compute functions are mapped to R methods,
+and in a \code{dplyr} evaluation context, \link[=list_compute_functions]{all Arrow functions}
+are callable with an \code{arrow_} prefix.
+}
+\details{
+When passing indices in \code{...}, \code{args}, or \code{options}, express them as
+0-based integers (consistent with C++).
+}
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+a <- Array$create(c(1L, 2L, 3L, NA, 5L))
+s <- Scalar$create(4L)
+call_function("coalesce", a, s)
+
+a <- Array$create(rnorm(10000))
+call_function("quantile", a, options = list(q = seq(0, 1, 0.25)))
+\dontshow{\}) # examplesIf}
+}
+\seealso{
+\href{https://arrow.apache.org/docs/cpp/compute.html}{Arrow C++ documentation} for
+the functions and their respective options.
+}
diff --git a/src/arrow/r/man/cast_options.Rd b/src/arrow/r/man/cast_options.Rd
new file mode 100644
index 000000000..40d78052c
--- /dev/null
+++ b/src/arrow/r/man/cast_options.Rd
@@ -0,0 +1,22 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/compute.R
+\name{cast_options}
+\alias{cast_options}
+\title{Cast options}
+\usage{
+cast_options(safe = TRUE, ...)
+}
+\arguments{
+\item{safe}{logical: enforce safe conversion? Default \code{TRUE}}
+
+\item{...}{additional cast options, such as \code{allow_int_overflow},
+\code{allow_time_truncate}, and \code{allow_float_truncate}, which are set to \code{!safe}
+by default}
+}
+\value{
+A list
+}
+\description{
+Cast options
+}
+\keyword{internal}
diff --git a/src/arrow/r/man/codec_is_available.Rd b/src/arrow/r/man/codec_is_available.Rd
new file mode 100644
index 000000000..b3238ff1d
--- /dev/null
+++ b/src/arrow/r/man/codec_is_available.Rd
@@ -0,0 +1,25 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/compression.R
+\name{codec_is_available}
+\alias{codec_is_available}
+\title{Check whether a compression codec is available}
+\usage{
+codec_is_available(type)
+}
+\arguments{
+\item{type}{A string, one of "uncompressed", "snappy", "gzip", "brotli",
+"zstd", "lz4", "lzo", or "bz2", case insensitive.}
+}
+\value{
+Logical: is \code{type} available?
+}
+\description{
+Support for compression libraries depends on the build-time settings of
+the Arrow C++ library. This function lets you know which are available for
+use.
+}
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+codec_is_available("gzip")
+\dontshow{\}) # examplesIf}
+}
diff --git a/src/arrow/r/man/compression.Rd b/src/arrow/r/man/compression.Rd
new file mode 100644
index 000000000..7cdb320d6
--- /dev/null
+++ b/src/arrow/r/man/compression.Rd
@@ -0,0 +1,31 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/compression.R
+\docType{class}
+\name{compression}
+\alias{compression}
+\alias{CompressedOutputStream}
+\alias{CompressedInputStream}
+\title{Compressed stream classes}
+\description{
+\code{CompressedInputStream} and \code{CompressedOutputStream}
+allow you to apply a compression \link{Codec} to an
+input or output stream.
+}
+\section{Factory}{
+
+
+The \code{CompressedInputStream$create()} and \code{CompressedOutputStream$create()}
+factory methods instantiate the object and take the following arguments:
+\itemize{
+\item \code{stream} An \link{InputStream} or \link{OutputStream}, respectively
+\item \code{codec} A \code{Codec}, either a \link{Codec} instance or a string
+\item \code{compression_level} compression level for when the \code{codec} argument is given as a string
+}
+}
+
+\section{Methods}{
+
+
+Methods are inherited from \link{InputStream} and \link{OutputStream}, respectively
+}
+
diff --git a/src/arrow/r/man/contains_regex.Rd b/src/arrow/r/man/contains_regex.Rd
new file mode 100644
index 000000000..f05f11d02
--- /dev/null
+++ b/src/arrow/r/man/contains_regex.Rd
@@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/dplyr-functions.R
+\name{contains_regex}
+\alias{contains_regex}
+\title{Does this string contain regex metacharacters?}
+\usage{
+contains_regex(string)
+}
+\arguments{
+\item{string}{String to be tested}
+}
+\value{
+Logical: does \code{string} contain regex metacharacters?
+}
+\description{
+Does this string contain regex metacharacters?
+}
+\keyword{internal}
diff --git a/src/arrow/r/man/copy_files.Rd b/src/arrow/r/man/copy_files.Rd
new file mode 100644
index 000000000..1b83703f1
--- /dev/null
+++ b/src/arrow/r/man/copy_files.Rd
@@ -0,0 +1,35 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/filesystem.R
+\name{copy_files}
+\alias{copy_files}
+\title{Copy files between FileSystems}
+\usage{
+copy_files(from, to, chunk_size = 1024L * 1024L)
+}
+\arguments{
+\item{from}{A string path to a local directory or file, a URI, or a
+\code{SubTreeFileSystem}. Files will be copied recursively from this path.}
+
+\item{to}{A string path to a local directory or file, a URI, or a
+\code{SubTreeFileSystem}. Directories will be created as necessary}
+
+\item{chunk_size}{The maximum size of block to read before flushing
+to the destination file. A larger chunk_size will use more memory while
+copying but may help accommodate high latency FileSystems.}
+}
+\value{
+Nothing: called for side effects in the file system
+}
+\description{
+Copy files between FileSystems
+}
+\examples{
+\dontshow{if (FALSE) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+# Copy an S3 bucket's files to a local directory:
+copy_files("s3://your-bucket-name", "local-directory")
+# Using a FileSystem object
+copy_files(s3_bucket("your-bucket-name"), "local-directory")
+# Or go the other way, from local to S3
+copy_files("local-directory", s3_bucket("your-bucket-name"))
+\dontshow{\}) # examplesIf}
+}
diff --git a/src/arrow/r/man/cpu_count.Rd b/src/arrow/r/man/cpu_count.Rd
new file mode 100644
index 000000000..f2abfc197
--- /dev/null
+++ b/src/arrow/r/man/cpu_count.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/config.R
+\name{cpu_count}
+\alias{cpu_count}
+\alias{set_cpu_count}
+\title{Manage the global CPU thread pool in libarrow}
+\usage{
+cpu_count()
+
+set_cpu_count(num_threads)
+}
+\arguments{
+\item{num_threads}{integer: New number of threads for thread pool}
+}
+\description{
+Manage the global CPU thread pool in libarrow
+}
diff --git a/src/arrow/r/man/create_package_with_all_dependencies.Rd b/src/arrow/r/man/create_package_with_all_dependencies.Rd
new file mode 100644
index 000000000..b2da8c249
--- /dev/null
+++ b/src/arrow/r/man/create_package_with_all_dependencies.Rd
@@ -0,0 +1,70 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/install-arrow.R
+\name{create_package_with_all_dependencies}
+\alias{create_package_with_all_dependencies}
+\title{Create a source bundle that includes all thirdparty dependencies}
+\usage{
+create_package_with_all_dependencies(dest_file = NULL, source_file = NULL)
+}
+\arguments{
+\item{dest_file}{File path for the new tar.gz package. Defaults to
+\code{arrow_V.V.V_with_deps.tar.gz} in the current directory (\code{V.V.V} is the version)}
+
+\item{source_file}{File path for the input tar.gz package. Defaults to
+downloading the package from CRAN (or whatever you have set as the first in
+\code{getOption("repos")})}
+}
+\value{
+The full path to \code{dest_file}, invisibly
+
+This function is used for setting up an offline build. If it's possible to
+download at build time, don't use this function. Instead, let \code{cmake}
+download the required dependencies for you.
+These downloaded dependencies are only used in the build if
+\code{ARROW_DEPENDENCY_SOURCE} is unset, \code{BUNDLED}, or \code{AUTO}.
+https://arrow.apache.org/docs/developers/cpp/building.html#offline-builds
+
+If you're using binary packages you shouldn't need to use this function. You
+should download the appropriate binary from your package repository, transfer
+that to the offline computer, and install that. Any OS can create the source
+bundle, but it cannot be installed on Windows. (Instead, use a standard
+Windows binary package.)
+
+Note if you're using RStudio Package Manager on Linux: If you still want to
+make a source bundle with this function, make sure to set the first repo in
+\code{options("repos")} to be a mirror that contains source packages (that is:
+something other than the RSPM binary mirror URLs).
+\subsection{Steps for an offline install with optional dependencies:}{
+\subsection{Using a computer with internet access, pre-download the dependencies:}{
+\itemize{
+\item Install the \code{arrow} package \emph{or} run
+\code{source("https://raw.githubusercontent.com/apache/arrow/master/r/R/install-arrow.R")}
+\item Run \code{create_package_with_all_dependencies("my_arrow_pkg.tar.gz")}
+\item Copy the newly created \code{my_arrow_pkg.tar.gz} to the computer without internet access
+}
+}
+
+\subsection{On the computer without internet access, install the prepared package:}{
+\itemize{
+\item Install the \code{arrow} package from the copied file
+\itemize{
+\item \code{install.packages("my_arrow_pkg.tar.gz", dependencies = c("Depends", "Imports", "LinkingTo"))}
+\item This installation will build from source, so \code{cmake} must be available
+}
+\item Run \code{\link[=arrow_info]{arrow_info()}} to check installed capabilities
+}
+}
+
+}
+}
+\description{
+Create a source bundle that includes all thirdparty dependencies
+}
+\examples{
+\dontrun{
+new_pkg <- create_package_with_all_dependencies()
+# Note: this works when run in the same R session, but it's meant to be
+# copied to a different computer.
+install.packages(new_pkg, dependencies = c("Depends", "Imports", "LinkingTo"))
+}
+}
diff --git a/src/arrow/r/man/data-type.Rd b/src/arrow/r/man/data-type.Rd
new file mode 100644
index 000000000..a06318975
--- /dev/null
+++ b/src/arrow/r/man/data-type.Rd
@@ -0,0 +1,163 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/type.R
+\name{data-type}
+\alias{data-type}
+\alias{int8}
+\alias{int16}
+\alias{int32}
+\alias{int64}
+\alias{uint8}
+\alias{uint16}
+\alias{uint32}
+\alias{uint64}
+\alias{float16}
+\alias{halffloat}
+\alias{float32}
+\alias{float}
+\alias{float64}
+\alias{boolean}
+\alias{bool}
+\alias{utf8}
+\alias{large_utf8}
+\alias{binary}
+\alias{large_binary}
+\alias{fixed_size_binary}
+\alias{string}
+\alias{date32}
+\alias{date64}
+\alias{time32}
+\alias{time64}
+\alias{null}
+\alias{timestamp}
+\alias{decimal}
+\alias{struct}
+\alias{list_of}
+\alias{large_list_of}
+\alias{FixedSizeListType}
+\alias{fixed_size_list_of}
+\title{Apache Arrow data types}
+\usage{
+int8()
+
+int16()
+
+int32()
+
+int64()
+
+uint8()
+
+uint16()
+
+uint32()
+
+uint64()
+
+float16()
+
+halffloat()
+
+float32()
+
+float()
+
+float64()
+
+boolean()
+
+bool()
+
+utf8()
+
+large_utf8()
+
+binary()
+
+large_binary()
+
+fixed_size_binary(byte_width)
+
+string()
+
+date32()
+
+date64()
+
+time32(unit = c("ms", "s"))
+
+time64(unit = c("ns", "us"))
+
+null()
+
+timestamp(unit = c("s", "ms", "us", "ns"), timezone = "")
+
+decimal(precision, scale)
+
+struct(...)
+
+list_of(type)
+
+large_list_of(type)
+
+fixed_size_list_of(type, list_size)
+}
+\arguments{
+\item{byte_width}{byte width for \code{FixedSizeBinary} type.}
+
+\item{unit}{For time/timestamp types, the time unit. \code{time32()} can take
+either "s" or "ms", while \code{time64()} can be "us" or "ns". \code{timestamp()} can
+take any of those four values.}
+
+\item{timezone}{For \code{timestamp()}, an optional time zone string.}
+
+\item{precision}{For \code{decimal()}, precision}
+
+\item{scale}{For \code{decimal()}, scale}
+
+\item{...}{For \code{struct()}, a named list of types to define the struct columns}
+
+\item{type}{For \code{list_of()}, a data type to make a list-of-type}
+
+\item{list_size}{list size for \code{FixedSizeList} type.}
+}
+\value{
+An Arrow type object inheriting from DataType.
+}
+\description{
+These functions create type objects corresponding to Arrow types. Use them
+when defining a \code{\link[=schema]{schema()}} or as inputs to other types, like \code{struct}. Most
+of these functions don't take arguments, but a few do.
+}
+\details{
+A few functions have aliases:
+\itemize{
+\item \code{utf8()} and \code{string()}
+\item \code{float16()} and \code{halffloat()}
+\item \code{float32()} and \code{float()}
+\item \code{bool()} and \code{boolean()}
+\item When called inside an \code{arrow} function, such as \code{schema()} or \code{cast()},
+\code{double()} also is supported as a way of creating a \code{float64()}
+}
+
+\code{date32()} creates a datetime type with a "day" unit, like the R \code{Date}
+class. \code{date64()} has a "ms" unit.
+
+\code{uint32} (32 bit unsigned integer), \code{uint64} (64 bit unsigned integer), and
+\code{int64} (64-bit signed integer) types may contain values that exceed the
+range of R's \code{integer} type (32-bit signed integer). When these arrow objects
+are translated to R objects, \code{uint32} and \code{uint64} are converted to \code{double}
+("numeric") and \code{int64} is converted to \code{bit64::integer64}. For \code{int64}
+types, this conversion can be disabled (so that \code{int64} always yields a
+\code{bit64::integer64} object) by setting \code{options(arrow.int64_downcast = FALSE)}.
+}
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+bool()
+struct(a = int32(), b = double())
+timestamp("ms", timezone = "CEST")
+time64("ns")
+\dontshow{\}) # examplesIf}
+}
+\seealso{
+\code{\link[=dictionary]{dictionary()}} for creating a dictionary (factor-like) type.
+}
diff --git a/src/arrow/r/man/dataset_factory.Rd b/src/arrow/r/man/dataset_factory.Rd
new file mode 100644
index 000000000..d119c150b
--- /dev/null
+++ b/src/arrow/r/man/dataset_factory.Rd
@@ -0,0 +1,76 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/dataset-factory.R
+\name{dataset_factory}
+\alias{dataset_factory}
+\title{Create a DatasetFactory}
+\usage{
+dataset_factory(
+ x,
+ filesystem = NULL,
+ format = c("parquet", "arrow", "ipc", "feather", "csv", "tsv", "text"),
+ partitioning = NULL,
+ ...
+)
+}
+\arguments{
+\item{x}{A string path to a directory containing data files, a vector of one
+one or more string paths to data files, or a list of \code{DatasetFactory} objects
+whose datasets should be combined. If this argument is specified it will be
+used to construct a \code{UnionDatasetFactory} and other arguments will be
+ignored.}
+
+\item{filesystem}{A \link{FileSystem} object; if omitted, the \code{FileSystem} will
+be detected from \code{x}}
+
+\item{format}{A \link{FileFormat} object, or a string identifier of the format of
+the files in \code{x}. Currently supported values:
+\itemize{
+\item "parquet"
+\item "ipc"/"arrow"/"feather", all aliases for each other; for Feather, note that
+only version 2 files are supported
+\item "csv"/"text", aliases for the same thing (because comma is the default
+delimiter for text files
+\item "tsv", equivalent to passing \verb{format = "text", delimiter = "\\t"}
+}
+
+Default is "parquet", unless a \code{delimiter} is also specified, in which case
+it is assumed to be "text".}
+
+\item{partitioning}{One of
+\itemize{
+\item A \code{Schema}, in which case the file paths relative to \code{sources} will be
+parsed, and path segments will be matched with the schema fields. For
+example, \code{schema(year = int16(), month = int8())} would create partitions
+for file paths like "2019/01/file.parquet", "2019/02/file.parquet", etc.
+\item A character vector that defines the field names corresponding to those
+path segments (that is, you're providing the names that would correspond
+to a \code{Schema} but the types will be autodetected)
+\item A \code{HivePartitioning} or \code{HivePartitioningFactory}, as returned
+by \code{\link[=hive_partition]{hive_partition()}} which parses explicit or autodetected fields from
+Hive-style path segments
+\item \code{NULL} for no partitioning
+}}
+
+\item{...}{Additional format-specific options, passed to
+\code{FileFormat$create()}. For CSV options, note that you can specify them either
+with the Arrow C++ library naming ("delimiter", "quoting", etc.) or the
+\code{readr}-style naming used in \code{\link[=read_csv_arrow]{read_csv_arrow()}} ("delim", "quote", etc.).
+Not all \code{readr} options are currently supported; please file an issue if you
+encounter one that \code{arrow} should support.}
+}
+\value{
+A \code{DatasetFactory} object. Pass this to \code{\link[=open_dataset]{open_dataset()}},
+in a list potentially with other \code{DatasetFactory} objects, to create
+a \code{Dataset}.
+}
+\description{
+A \link{Dataset} can constructed using one or more \link{DatasetFactory}s.
+This function helps you construct a \code{DatasetFactory} that you can pass to
+\code{\link[=open_dataset]{open_dataset()}}.
+}
+\details{
+If you would only have a single \code{DatasetFactory} (for example, you have a
+single directory containing Parquet files), you can call \code{open_dataset()}
+directly. Use \code{dataset_factory()} when you
+want to combine different directories, file systems, or file formats.
+}
diff --git a/src/arrow/r/man/default_memory_pool.Rd b/src/arrow/r/man/default_memory_pool.Rd
new file mode 100644
index 000000000..232a89e6a
--- /dev/null
+++ b/src/arrow/r/man/default_memory_pool.Rd
@@ -0,0 +1,15 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/memory-pool.R
+\name{default_memory_pool}
+\alias{default_memory_pool}
+\title{Arrow's default \link{MemoryPool}}
+\usage{
+default_memory_pool()
+}
+\value{
+the default \link{MemoryPool}
+}
+\description{
+Arrow's default \link{MemoryPool}
+}
+\keyword{internal}
diff --git a/src/arrow/r/man/dictionary.Rd b/src/arrow/r/man/dictionary.Rd
new file mode 100644
index 000000000..d4b934954
--- /dev/null
+++ b/src/arrow/r/man/dictionary.Rd
@@ -0,0 +1,24 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/dictionary.R
+\name{dictionary}
+\alias{dictionary}
+\title{Create a dictionary type}
+\usage{
+dictionary(index_type = int32(), value_type = utf8(), ordered = FALSE)
+}
+\arguments{
+\item{index_type}{A DataType for the indices (default \code{\link[=int32]{int32()}})}
+
+\item{value_type}{A DataType for the values (default \code{\link[=utf8]{utf8()}})}
+
+\item{ordered}{Is this an ordered dictionary (default \code{FALSE})?}
+}
+\value{
+A \link{DictionaryType}
+}
+\description{
+Create a dictionary type
+}
+\seealso{
+\link[=data-type]{Other Arrow data types}
+}
diff --git a/src/arrow/r/man/enums.Rd b/src/arrow/r/man/enums.Rd
new file mode 100644
index 000000000..7ec126a01
--- /dev/null
+++ b/src/arrow/r/man/enums.Rd
@@ -0,0 +1,88 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/enums.R
+\docType{data}
+\name{enums}
+\alias{enums}
+\alias{TimeUnit}
+\alias{DateUnit}
+\alias{Type}
+\alias{StatusCode}
+\alias{FileMode}
+\alias{MessageType}
+\alias{CompressionType}
+\alias{FileType}
+\alias{ParquetVersionType}
+\alias{MetadataVersion}
+\alias{QuantileInterpolation}
+\alias{NullEncodingBehavior}
+\alias{NullHandlingBehavior}
+\alias{RoundMode}
+\alias{JoinType}
+\title{Arrow enums}
+\format{
+An object of class \code{TimeUnit::type} (inherits from \code{arrow-enum}) of length 4.
+
+An object of class \code{DateUnit} (inherits from \code{arrow-enum}) of length 2.
+
+An object of class \code{Type::type} (inherits from \code{arrow-enum}) of length 37.
+
+An object of class \code{StatusCode} (inherits from \code{arrow-enum}) of length 17.
+
+An object of class \code{FileMode} (inherits from \code{arrow-enum}) of length 3.
+
+An object of class \code{MessageType} (inherits from \code{arrow-enum}) of length 5.
+
+An object of class \code{Compression::type} (inherits from \code{arrow-enum}) of length 9.
+
+An object of class \code{FileType} (inherits from \code{arrow-enum}) of length 4.
+
+An object of class \code{ParquetVersionType} (inherits from \code{arrow-enum}) of length 2.
+
+An object of class \code{MetadataVersion} (inherits from \code{arrow-enum}) of length 5.
+
+An object of class \code{QuantileInterpolation} (inherits from \code{arrow-enum}) of length 5.
+
+An object of class \code{NullEncodingBehavior} (inherits from \code{arrow-enum}) of length 2.
+
+An object of class \code{NullHandlingBehavior} (inherits from \code{arrow-enum}) of length 3.
+
+An object of class \code{RoundMode} (inherits from \code{arrow-enum}) of length 10.
+
+An object of class \code{JoinType} (inherits from \code{arrow-enum}) of length 8.
+}
+\usage{
+TimeUnit
+
+DateUnit
+
+Type
+
+StatusCode
+
+FileMode
+
+MessageType
+
+CompressionType
+
+FileType
+
+ParquetVersionType
+
+MetadataVersion
+
+QuantileInterpolation
+
+NullEncodingBehavior
+
+NullHandlingBehavior
+
+RoundMode
+
+JoinType
+}
+\description{
+Arrow enums
+}
+\keyword{datasets}
+\keyword{internal}
diff --git a/src/arrow/r/man/flight_connect.Rd b/src/arrow/r/man/flight_connect.Rd
new file mode 100644
index 000000000..9da7fad75
--- /dev/null
+++ b/src/arrow/r/man/flight_connect.Rd
@@ -0,0 +1,21 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/flight.R
+\name{flight_connect}
+\alias{flight_connect}
+\title{Connect to a Flight server}
+\usage{
+flight_connect(host = "localhost", port, scheme = "grpc+tcp")
+}
+\arguments{
+\item{host}{string hostname to connect to}
+
+\item{port}{integer port to connect on}
+
+\item{scheme}{URL scheme, default is "grpc+tcp"}
+}
+\value{
+A \code{pyarrow.flight.FlightClient}.
+}
+\description{
+Connect to a Flight server
+}
diff --git a/src/arrow/r/man/flight_get.Rd b/src/arrow/r/man/flight_get.Rd
new file mode 100644
index 000000000..a79c4d727
--- /dev/null
+++ b/src/arrow/r/man/flight_get.Rd
@@ -0,0 +1,19 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/flight.R
+\name{flight_get}
+\alias{flight_get}
+\title{Get data from a Flight server}
+\usage{
+flight_get(client, path)
+}
+\arguments{
+\item{client}{\code{pyarrow.flight.FlightClient}, as returned by \code{\link[=flight_connect]{flight_connect()}}}
+
+\item{path}{string identifier under which data is stored}
+}
+\value{
+A \link{Table}
+}
+\description{
+Get data from a Flight server
+}
diff --git a/src/arrow/r/man/flight_put.Rd b/src/arrow/r/man/flight_put.Rd
new file mode 100644
index 000000000..13a8da16f
--- /dev/null
+++ b/src/arrow/r/man/flight_put.Rd
@@ -0,0 +1,25 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/flight.R
+\name{flight_put}
+\alias{flight_put}
+\title{Send data to a Flight server}
+\usage{
+flight_put(client, data, path, overwrite = TRUE)
+}
+\arguments{
+\item{client}{\code{pyarrow.flight.FlightClient}, as returned by \code{\link[=flight_connect]{flight_connect()}}}
+
+\item{data}{\code{data.frame}, \link{RecordBatch}, or \link{Table} to upload}
+
+\item{path}{string identifier to store the data under}
+
+\item{overwrite}{logical: if \code{path} exists on \code{client} already, should we
+replace it with the contents of \code{data}? Default is \code{TRUE}; if \code{FALSE} and
+\code{path} exists, the function will error.}
+}
+\value{
+\code{client}, invisibly.
+}
+\description{
+Send data to a Flight server
+}
diff --git a/src/arrow/r/man/get_stringr_pattern_options.Rd b/src/arrow/r/man/get_stringr_pattern_options.Rd
new file mode 100644
index 000000000..7107b9060
--- /dev/null
+++ b/src/arrow/r/man/get_stringr_pattern_options.Rd
@@ -0,0 +1,22 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/dplyr-functions.R
+\name{get_stringr_pattern_options}
+\alias{get_stringr_pattern_options}
+\title{Get \code{stringr} pattern options}
+\usage{
+get_stringr_pattern_options(pattern)
+}
+\arguments{
+\item{pattern}{Unevaluated expression containing a call to a \code{stringr}
+pattern modifier function}
+}
+\value{
+List containing elements \code{pattern}, \code{fixed}, and \code{ignore_case}
+}
+\description{
+This function assigns definitions for the \code{stringr} pattern modifier
+functions (\code{fixed()}, \code{regex()}, etc.) inside itself, and uses them to
+evaluate the quoted expression \code{pattern}, returning a list that is used
+to control pattern matching behavior in internal \code{arrow} functions.
+}
+\keyword{internal}
diff --git a/src/arrow/r/man/hive_partition.Rd b/src/arrow/r/man/hive_partition.Rd
new file mode 100644
index 000000000..eef9f9157
--- /dev/null
+++ b/src/arrow/r/man/hive_partition.Rd
@@ -0,0 +1,35 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/dataset-partition.R
+\name{hive_partition}
+\alias{hive_partition}
+\title{Construct Hive partitioning}
+\usage{
+hive_partition(..., null_fallback = NULL, segment_encoding = "uri")
+}
+\arguments{
+\item{...}{named list of \link[=data-type]{data types}, passed to \code{\link[=schema]{schema()}}}
+
+\item{null_fallback}{character to be used in place of missing values (\code{NA} or \code{NULL})
+in partition columns. Default is \code{"__HIVE_DEFAULT_PARTITION__"},
+which is what Hive uses.}
+
+\item{segment_encoding}{Decode partition segments after splitting paths.
+Default is \code{"uri"} (URI-decode segments). May also be \code{"none"} (leave as-is).}
+}
+\value{
+A \link[=Partitioning]{HivePartitioning}, or a \code{HivePartitioningFactory} if
+calling \code{hive_partition()} with no arguments.
+}
+\description{
+Hive partitioning embeds field names and values in path segments, such as
+"/year=2019/month=2/data.parquet".
+}
+\details{
+Because fields are named in the path segments, order of fields passed to
+\code{hive_partition()} does not matter.
+}
+\examples{
+\dontshow{if (arrow_with_dataset()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+hive_partition(year = int16(), month = int8())
+\dontshow{\}) # examplesIf}
+}
diff --git a/src/arrow/r/man/install_arrow.Rd b/src/arrow/r/man/install_arrow.Rd
new file mode 100644
index 000000000..bf94650b3
--- /dev/null
+++ b/src/arrow/r/man/install_arrow.Rd
@@ -0,0 +1,61 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/install-arrow.R
+\name{install_arrow}
+\alias{install_arrow}
+\title{Install or upgrade the Arrow library}
+\usage{
+install_arrow(
+ nightly = FALSE,
+ binary = Sys.getenv("LIBARROW_BINARY", TRUE),
+ use_system = Sys.getenv("ARROW_USE_PKG_CONFIG", FALSE),
+ minimal = Sys.getenv("LIBARROW_MINIMAL", FALSE),
+ verbose = Sys.getenv("ARROW_R_DEV", FALSE),
+ repos = getOption("repos"),
+ ...
+)
+}
+\arguments{
+\item{nightly}{logical: Should we install a development version of the
+package, or should we install from CRAN (the default).}
+
+\item{binary}{On Linux, value to set for the environment variable
+\code{LIBARROW_BINARY}, which governs how C++ binaries are used, if at all.
+The default value, \code{TRUE}, tells the installation script to detect the
+Linux distribution and version and find an appropriate C++ library. \code{FALSE}
+would tell the script not to retrieve a binary and instead build Arrow C++
+from source. Other valid values are strings corresponding to a Linux
+distribution-version, to override the value that would be detected.
+See \code{vignette("install", package = "arrow")} for further details.}
+
+\item{use_system}{logical: Should we use \code{pkg-config} to look for Arrow
+system packages? Default is \code{FALSE}. If \code{TRUE}, source installation may be
+faster, but there is a risk of version mismatch. This sets the
+\code{ARROW_USE_PKG_CONFIG} environment variable.}
+
+\item{minimal}{logical: If building from source, should we build without
+optional dependencies (compression libraries, for example)? Default is
+\code{FALSE}. This sets the \code{LIBARROW_MINIMAL} environment variable.}
+
+\item{verbose}{logical: Print more debugging output when installing? Default
+is \code{FALSE}. This sets the \code{ARROW_R_DEV} environment variable.}
+
+\item{repos}{character vector of base URLs of the repositories to install
+from (passed to \code{install.packages()})}
+
+\item{...}{Additional arguments passed to \code{install.packages()}}
+}
+\description{
+Use this function to install the latest release of \code{arrow}, to switch to or
+from a nightly development version, or on Linux to try reinstalling with
+all necessary C++ dependencies.
+}
+\details{
+Note that, unlike packages like \code{tensorflow}, \code{blogdown}, and others that
+require external dependencies, you do not need to run \code{install_arrow()}
+after a successful \code{arrow} installation.
+}
+\seealso{
+\code{\link[=arrow_available]{arrow_available()}} to see if the package was configured with
+necessary C++ dependencies. \code{vignette("install", package = "arrow")} for
+more ways to tune installation on Linux.
+}
diff --git a/src/arrow/r/man/install_pyarrow.Rd b/src/arrow/r/man/install_pyarrow.Rd
new file mode 100644
index 000000000..223a26754
--- /dev/null
+++ b/src/arrow/r/man/install_pyarrow.Rd
@@ -0,0 +1,22 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/python.R
+\name{install_pyarrow}
+\alias{install_pyarrow}
+\title{Install pyarrow for use with reticulate}
+\usage{
+install_pyarrow(envname = NULL, nightly = FALSE, ...)
+}
+\arguments{
+\item{envname}{The name or full path of the Python environment to install
+into. This can be a virtualenv or conda environment created by \code{reticulate}.
+See \code{reticulate::py_install()}.}
+
+\item{nightly}{logical: Should we install a development version of the
+package? Default is to use the official release version.}
+
+\item{...}{additional arguments passed to \code{reticulate::py_install()}.}
+}
+\description{
+\code{pyarrow} is the Python package for Apache Arrow. This function helps with
+installing it for use with \code{reticulate}.
+}
diff --git a/src/arrow/r/man/io_thread_count.Rd b/src/arrow/r/man/io_thread_count.Rd
new file mode 100644
index 000000000..b1dfa0ba7
--- /dev/null
+++ b/src/arrow/r/man/io_thread_count.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/config.R
+\name{io_thread_count}
+\alias{io_thread_count}
+\alias{set_io_thread_count}
+\title{Manage the global I/O thread pool in libarrow}
+\usage{
+io_thread_count()
+
+set_io_thread_count(num_threads)
+}
+\arguments{
+\item{num_threads}{integer: New number of threads for thread pool}
+}
+\description{
+Manage the global I/O thread pool in libarrow
+}
diff --git a/src/arrow/r/man/list_compute_functions.Rd b/src/arrow/r/man/list_compute_functions.Rd
new file mode 100644
index 000000000..45e033836
--- /dev/null
+++ b/src/arrow/r/man/list_compute_functions.Rd
@@ -0,0 +1,45 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/compute.R
+\name{list_compute_functions}
+\alias{list_compute_functions}
+\title{List available Arrow C++ compute functions}
+\usage{
+list_compute_functions(pattern = NULL, ...)
+}
+\arguments{
+\item{pattern}{Optional regular expression to filter the function list}
+
+\item{...}{Additional parameters passed to \code{grep()}}
+}
+\value{
+A character vector of available Arrow C++ function names
+}
+\description{
+This function lists the names of all available Arrow C++ library compute functions.
+These can be called by passing to \code{\link[=call_function]{call_function()}}, or they can be
+called by name with an \code{arrow_} prefix inside a \code{dplyr} verb.
+}
+\details{
+The resulting list describes the capabilities of your \code{arrow} build.
+Some functions, such as string and regular expression functions,
+require optional build-time C++ dependencies. If your \code{arrow} package
+was not compiled with those features enabled, those functions will
+not appear in this list.
+
+Some functions take options that need to be passed when calling them
+(in a list called \code{options}). These options require custom handling
+in C++; many functions already have that handling set up but not all do.
+If you encounter one that needs special handling for options, please
+report an issue.
+
+Note that this list does \emph{not} enumerate all of the R bindings for these functions.
+The package includes Arrow methods for many base R functions that can
+be called directly on Arrow objects, as well as some tidyverse-flavored versions
+available inside \code{dplyr} verbs.
+}
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+available_funcs <- list_compute_functions()
+utf8_funcs <- list_compute_functions(pattern = "^UTF8", ignore.case = TRUE)
+\dontshow{\}) # examplesIf}
+}
diff --git a/src/arrow/r/man/list_flights.Rd b/src/arrow/r/man/list_flights.Rd
new file mode 100644
index 000000000..d8ebb0d02
--- /dev/null
+++ b/src/arrow/r/man/list_flights.Rd
@@ -0,0 +1,23 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/flight.R
+\name{list_flights}
+\alias{list_flights}
+\alias{flight_path_exists}
+\title{See available resources on a Flight server}
+\usage{
+list_flights(client)
+
+flight_path_exists(client, path)
+}
+\arguments{
+\item{client}{\code{pyarrow.flight.FlightClient}, as returned by \code{\link[=flight_connect]{flight_connect()}}}
+
+\item{path}{string identifier under which data is stored}
+}
+\value{
+\code{list_flights()} returns a character vector of paths.
+\code{flight_path_exists()} returns a logical value, the equivalent of \code{path \%in\% list_flights()}
+}
+\description{
+See available resources on a Flight server
+}
diff --git a/src/arrow/r/man/load_flight_server.Rd b/src/arrow/r/man/load_flight_server.Rd
new file mode 100644
index 000000000..66d30f391
--- /dev/null
+++ b/src/arrow/r/man/load_flight_server.Rd
@@ -0,0 +1,22 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/flight.R
+\name{load_flight_server}
+\alias{load_flight_server}
+\title{Load a Python Flight server}
+\usage{
+load_flight_server(name, path = system.file(package = "arrow"))
+}
+\arguments{
+\item{name}{string Python module name}
+
+\item{path}{file system path where the Python module is found. Default is
+to look in the \verb{inst/} directory for included modules.}
+}
+\description{
+Load a Python Flight server
+}
+\examples{
+\dontshow{if (FALSE) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+load_flight_server("demo_flight_server")
+\dontshow{\}) # examplesIf}
+}
diff --git a/src/arrow/r/man/make_readable_file.Rd b/src/arrow/r/man/make_readable_file.Rd
new file mode 100644
index 000000000..fe2e29826
--- /dev/null
+++ b/src/arrow/r/man/make_readable_file.Rd
@@ -0,0 +1,29 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/io.R
+\name{make_readable_file}
+\alias{make_readable_file}
+\title{Handle a range of possible input sources}
+\usage{
+make_readable_file(file, mmap = TRUE, compression = NULL, filesystem = NULL)
+}
+\arguments{
+\item{file}{A character file name, \code{raw} vector, or an Arrow input stream}
+
+\item{mmap}{Logical: whether to memory-map the file (default \code{TRUE})}
+
+\item{compression}{If the file is compressed, created a \link{CompressedInputStream}
+with this compression codec, either a \link{Codec} or the string name of one.
+If \code{NULL} (default) and \code{file} is a string file name, the function will try
+to infer compression from the file extension.}
+
+\item{filesystem}{If not \code{NULL}, \code{file} will be opened via the
+\code{filesystem$OpenInputFile()} filesystem method, rather than the \code{io} module's
+\code{MemoryMappedFile} or \code{ReadableFile} constructors.}
+}
+\value{
+An \code{InputStream} or a subclass of one.
+}
+\description{
+Handle a range of possible input sources
+}
+\keyword{internal}
diff --git a/src/arrow/r/man/map_batches.Rd b/src/arrow/r/man/map_batches.Rd
new file mode 100644
index 000000000..08e7b86c0
--- /dev/null
+++ b/src/arrow/r/man/map_batches.Rd
@@ -0,0 +1,30 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/dataset-scan.R
+\name{map_batches}
+\alias{map_batches}
+\title{Apply a function to a stream of RecordBatches}
+\usage{
+map_batches(X, FUN, ..., .data.frame = TRUE)
+}
+\arguments{
+\item{X}{A \code{Dataset} or \code{arrow_dplyr_query} object, as returned by the
+\code{dplyr} methods on \code{Dataset}.}
+
+\item{FUN}{A function or \code{purrr}-style lambda expression to apply to each
+batch}
+
+\item{...}{Additional arguments passed to \code{FUN}}
+
+\item{.data.frame}{logical: collect the resulting chunks into a single
+\code{data.frame}? Default \code{TRUE}}
+}
+\description{
+As an alternative to calling \code{collect()} on a \code{Dataset} query, you can
+use this function to access the stream of \code{RecordBatch}es in the \code{Dataset}.
+This lets you aggregate on each chunk and pull the intermediate results into
+a \code{data.frame} for further aggregation, even if you couldn't fit the whole
+\code{Dataset} result in memory.
+}
+\details{
+This is experimental and not recommended for production use.
+}
diff --git a/src/arrow/r/man/match_arrow.Rd b/src/arrow/r/man/match_arrow.Rd
new file mode 100644
index 000000000..877a41926
--- /dev/null
+++ b/src/arrow/r/man/match_arrow.Rd
@@ -0,0 +1,53 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/compute.R
+\name{match_arrow}
+\alias{match_arrow}
+\alias{is_in}
+\title{\code{match} and \code{\%in\%} for Arrow objects}
+\usage{
+match_arrow(x, table, ...)
+
+is_in(x, table, ...)
+}
+\arguments{
+\item{x}{\code{Scalar}, \code{Array} or \code{ChunkedArray}}
+
+\item{table}{\code{Scalar}, Array\verb{, }ChunkedArray`, or R vector lookup table.}
+
+\item{...}{additional arguments, ignored}
+}
+\value{
+\code{match_arrow()} returns an \code{int32}-type Arrow object of the same length
+and type as \code{x} with the (0-based) indexes into \code{table}. \code{is_in()} returns a
+\code{boolean}-type Arrow object of the same length and type as \code{x} with values indicating
+per element of \code{x} it it is present in \code{table}.
+}
+\description{
+\code{base::match()} is not a generic, so we can't just define Arrow methods for
+it. This function exposes the analogous functions in the Arrow C++ library.
+}
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+# note that the returned value is 0-indexed
+cars_tbl <- arrow_table(name = rownames(mtcars), mtcars)
+match_arrow(Scalar$create("Mazda RX4 Wag"), cars_tbl$name)
+
+is_in(Array$create("Mazda RX4 Wag"), cars_tbl$name)
+
+# Although there are multiple matches, you are returned the index of the first
+# match, as with the base R equivalent
+match(4, mtcars$cyl) # 1-indexed
+match_arrow(Scalar$create(4), cars_tbl$cyl) # 0-indexed
+
+# If `x` contains multiple values, you are returned the indices of the first
+# match for each value.
+match(c(4, 6, 8), mtcars$cyl)
+match_arrow(Array$create(c(4, 6, 8)), cars_tbl$cyl)
+
+# Return type matches type of `x`
+is_in(c(4, 6, 8), mtcars$cyl) # returns vector
+is_in(Scalar$create(4), mtcars$cyl) # returns Scalar
+is_in(Array$create(c(4, 6, 8)), cars_tbl$cyl) # returns Array
+is_in(ChunkedArray$create(c(4, 6), 8), cars_tbl$cyl) # returns ChunkedArray
+\dontshow{\}) # examplesIf}
+}
diff --git a/src/arrow/r/man/mmap_create.Rd b/src/arrow/r/man/mmap_create.Rd
new file mode 100644
index 000000000..b85519348
--- /dev/null
+++ b/src/arrow/r/man/mmap_create.Rd
@@ -0,0 +1,19 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/io.R
+\name{mmap_create}
+\alias{mmap_create}
+\title{Create a new read/write memory mapped file of a given size}
+\usage{
+mmap_create(path, size)
+}
+\arguments{
+\item{path}{file path}
+
+\item{size}{size in bytes}
+}
+\value{
+a \link[=MemoryMappedFile]{arrow::io::MemoryMappedFile}
+}
+\description{
+Create a new read/write memory mapped file of a given size
+}
diff --git a/src/arrow/r/man/mmap_open.Rd b/src/arrow/r/man/mmap_open.Rd
new file mode 100644
index 000000000..d0047a72c
--- /dev/null
+++ b/src/arrow/r/man/mmap_open.Rd
@@ -0,0 +1,16 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/io.R
+\name{mmap_open}
+\alias{mmap_open}
+\title{Open a memory mapped file}
+\usage{
+mmap_open(path, mode = c("read", "write", "readwrite"))
+}
+\arguments{
+\item{path}{file path}
+
+\item{mode}{file mode (read/write/readwrite)}
+}
+\description{
+Open a memory mapped file
+}
diff --git a/src/arrow/r/man/open_dataset.Rd b/src/arrow/r/man/open_dataset.Rd
new file mode 100644
index 000000000..4d6b492e3
--- /dev/null
+++ b/src/arrow/r/man/open_dataset.Rd
@@ -0,0 +1,146 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/dataset.R
+\name{open_dataset}
+\alias{open_dataset}
+\title{Open a multi-file dataset}
+\usage{
+open_dataset(
+ sources,
+ schema = NULL,
+ partitioning = hive_partition(),
+ unify_schemas = NULL,
+ format = c("parquet", "arrow", "ipc", "feather", "csv", "tsv", "text"),
+ ...
+)
+}
+\arguments{
+\item{sources}{One of:
+\itemize{
+\item a string path or URI to a directory containing data files
+\item a string path or URI to a single file
+\item a character vector of paths or URIs to individual data files
+\item a list of \code{Dataset} objects as created by this function
+\item a list of \code{DatasetFactory} objects as created by \code{\link[=dataset_factory]{dataset_factory()}}.
+}
+
+When \code{sources} is a vector of file URIs, they must all use the same protocol
+and point to files located in the same file system and having the same
+format.}
+
+\item{schema}{\link{Schema} for the \code{Dataset}. If \code{NULL} (the default), the schema
+will be inferred from the data sources.}
+
+\item{partitioning}{When \code{sources} is a directory path/URI, one of:
+\itemize{
+\item a \code{Schema}, in which case the file paths relative to \code{sources} will be
+parsed, and path segments will be matched with the schema fields. For
+example, \code{schema(year = int16(), month = int8())} would create partitions
+for file paths like \code{"2019/01/file.parquet"}, \code{"2019/02/file.parquet"},
+etc.
+\item a character vector that defines the field names corresponding to those
+path segments (that is, you're providing the names that would correspond
+to a \code{Schema} but the types will be autodetected)
+\item a \code{HivePartitioning} or \code{HivePartitioningFactory}, as returned
+by \code{\link[=hive_partition]{hive_partition()}} which parses explicit or autodetected fields from
+Hive-style path segments
+\item \code{NULL} for no partitioning
+}
+
+The default is to autodetect Hive-style partitions. When \code{sources} is not a
+directory path/URI, \code{partitioning} is ignored.}
+
+\item{unify_schemas}{logical: should all data fragments (files, \code{Dataset}s)
+be scanned in order to create a unified schema from them? If \code{FALSE}, only
+the first fragment will be inspected for its schema. Use this fast path
+when you know and trust that all fragments have an identical schema.
+The default is \code{FALSE} when creating a dataset from a directory path/URI or
+vector of file paths/URIs (because there may be many files and scanning may
+be slow) but \code{TRUE} when \code{sources} is a list of \code{Dataset}s (because there
+should be few \code{Dataset}s in the list and their \code{Schema}s are already in
+memory).}
+
+\item{format}{A \link{FileFormat} object, or a string identifier of the format of
+the files in \code{x}. This argument is ignored when \code{sources} is a list of \code{Dataset} objects.
+Currently supported values:
+\itemize{
+\item "parquet"
+\item "ipc"/"arrow"/"feather", all aliases for each other; for Feather, note that
+only version 2 files are supported
+\item "csv"/"text", aliases for the same thing (because comma is the default
+delimiter for text files
+\item "tsv", equivalent to passing \verb{format = "text", delimiter = "\\t"}
+}
+
+Default is "parquet", unless a \code{delimiter} is also specified, in which case
+it is assumed to be "text".}
+
+\item{...}{additional arguments passed to \code{dataset_factory()} when \code{sources}
+is a directory path/URI or vector of file paths/URIs, otherwise ignored.
+These may include \code{format} to indicate the file format, or other
+format-specific options.}
+}
+\value{
+A \link{Dataset} R6 object. Use \code{dplyr} methods on it to query the data,
+or call \code{\link[=Scanner]{$NewScan()}} to construct a query directly.
+}
+\description{
+Arrow Datasets allow you to query against data that has been split across
+multiple files. This sharding of data may indicate partitioning, which
+can accelerate queries that only touch some partitions (files). Call
+\code{open_dataset()} to point to a directory of data files and return a
+\code{Dataset}, then use \code{dplyr} methods to query it.
+}
+\examples{
+\dontshow{if (arrow_with_dataset() & arrow_with_parquet()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+# Set up directory for examples
+tf <- tempfile()
+dir.create(tf)
+on.exit(unlink(tf))
+
+data <- dplyr::group_by(mtcars, cyl)
+write_dataset(data, tf)
+
+# You can specify a directory containing the files for your dataset and
+# open_dataset will scan all files in your directory.
+open_dataset(tf)
+
+# You can also supply a vector of paths
+open_dataset(c(file.path(tf, "cyl=4/part-0.parquet"), file.path(tf, "cyl=8/part-0.parquet")))
+
+## You must specify the file format if using a format other than parquet.
+tf2 <- tempfile()
+dir.create(tf2)
+on.exit(unlink(tf2))
+write_dataset(data, tf2, format = "ipc")
+# This line will results in errors when you try to work with the data
+\dontrun{
+open_dataset(tf2)
+}
+# This line will work
+open_dataset(tf2, format = "ipc")
+
+## You can specify file partitioning to include it as a field in your dataset
+# Create a temporary directory and write example dataset
+tf3 <- tempfile()
+dir.create(tf3)
+on.exit(unlink(tf3))
+write_dataset(airquality, tf3, partitioning = c("Month", "Day"), hive_style = FALSE)
+
+# View files - you can see the partitioning means that files have been written
+# to folders based on Month/Day values
+tf3_files <- list.files(tf3, recursive = TRUE)
+
+# With no partitioning specified, dataset contains all files but doesn't include
+# directory names as field names
+open_dataset(tf3)
+
+# Now that partitioning has been specified, your dataset contains columns for Month and Day
+open_dataset(tf3, partitioning = c("Month", "Day"))
+
+# If you want to specify the data types for your fields, you can pass in a Schema
+open_dataset(tf3, partitioning = schema(Month = int8(), Day = int8()))
+\dontshow{\}) # examplesIf}
+}
+\seealso{
+\code{vignette("dataset", package = "arrow")}
+}
diff --git a/src/arrow/r/man/read_delim_arrow.Rd b/src/arrow/r/man/read_delim_arrow.Rd
new file mode 100644
index 000000000..7bfda29b8
--- /dev/null
+++ b/src/arrow/r/man/read_delim_arrow.Rd
@@ -0,0 +1,218 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/csv.R
+\name{read_delim_arrow}
+\alias{read_delim_arrow}
+\alias{read_csv_arrow}
+\alias{read_tsv_arrow}
+\title{Read a CSV or other delimited file with Arrow}
+\usage{
+read_delim_arrow(
+ file,
+ delim = ",",
+ quote = "\\"",
+ escape_double = TRUE,
+ escape_backslash = FALSE,
+ schema = NULL,
+ col_names = TRUE,
+ col_types = NULL,
+ col_select = NULL,
+ na = c("", "NA"),
+ quoted_na = TRUE,
+ skip_empty_rows = TRUE,
+ skip = 0L,
+ parse_options = NULL,
+ convert_options = NULL,
+ read_options = NULL,
+ as_data_frame = TRUE,
+ timestamp_parsers = NULL
+)
+
+read_csv_arrow(
+ file,
+ quote = "\\"",
+ escape_double = TRUE,
+ escape_backslash = FALSE,
+ schema = NULL,
+ col_names = TRUE,
+ col_types = NULL,
+ col_select = NULL,
+ na = c("", "NA"),
+ quoted_na = TRUE,
+ skip_empty_rows = TRUE,
+ skip = 0L,
+ parse_options = NULL,
+ convert_options = NULL,
+ read_options = NULL,
+ as_data_frame = TRUE,
+ timestamp_parsers = NULL
+)
+
+read_tsv_arrow(
+ file,
+ quote = "\\"",
+ escape_double = TRUE,
+ escape_backslash = FALSE,
+ schema = NULL,
+ col_names = TRUE,
+ col_types = NULL,
+ col_select = NULL,
+ na = c("", "NA"),
+ quoted_na = TRUE,
+ skip_empty_rows = TRUE,
+ skip = 0L,
+ parse_options = NULL,
+ convert_options = NULL,
+ read_options = NULL,
+ as_data_frame = TRUE,
+ timestamp_parsers = NULL
+)
+}
+\arguments{
+\item{file}{A character file name or URI, \code{raw} vector, an Arrow input stream,
+or a \code{FileSystem} with path (\code{SubTreeFileSystem}).
+If a file name, a memory-mapped Arrow \link{InputStream} will be opened and
+closed when finished; compression will be detected from the file extension
+and handled automatically. If an input stream is provided, it will be left
+open.}
+
+\item{delim}{Single character used to separate fields within a record.}
+
+\item{quote}{Single character used to quote strings.}
+
+\item{escape_double}{Does the file escape quotes by doubling them?
+i.e. If this option is \code{TRUE}, the value \verb{""""} represents
+a single quote, \verb{\\"}.}
+
+\item{escape_backslash}{Does the file use backslashes to escape special
+characters? This is more general than \code{escape_double} as backslashes
+can be used to escape the delimiter character, the quote character, or
+to add special characters like \verb{\\\\n}.}
+
+\item{schema}{\link{Schema} that describes the table. If provided, it will be
+used to satisfy both \code{col_names} and \code{col_types}.}
+
+\item{col_names}{If \code{TRUE}, the first row of the input will be used as the
+column names and will not be included in the data frame. If \code{FALSE}, column
+names will be generated by Arrow, starting with "f0", "f1", ..., "fN".
+Alternatively, you can specify a character vector of column names.}
+
+\item{col_types}{A compact string representation of the column types, or
+\code{NULL} (the default) to infer types from the data.}
+
+\item{col_select}{A character vector of column names to keep, as in the
+"select" argument to \code{data.table::fread()}, or a
+\link[tidyselect:vars_select]{tidy selection specification}
+of columns, as used in \code{dplyr::select()}.}
+
+\item{na}{A character vector of strings to interpret as missing values.}
+
+\item{quoted_na}{Should missing values inside quotes be treated as missing
+values (the default) or strings. (Note that this is different from the
+the Arrow C++ default for the corresponding convert option,
+\code{strings_can_be_null}.)}
+
+\item{skip_empty_rows}{Should blank rows be ignored altogether? If
+\code{TRUE}, blank rows will not be represented at all. If \code{FALSE}, they will be
+filled with missings.}
+
+\item{skip}{Number of lines to skip before reading data.}
+
+\item{parse_options}{see \link[=CsvReadOptions]{file reader options}.
+If given, this overrides any
+parsing options provided in other arguments (e.g. \code{delim}, \code{quote}, etc.).}
+
+\item{convert_options}{see \link[=CsvReadOptions]{file reader options}}
+
+\item{read_options}{see \link[=CsvReadOptions]{file reader options}}
+
+\item{as_data_frame}{Should the function return a \code{data.frame} (default) or
+an Arrow \link{Table}?}
+
+\item{timestamp_parsers}{User-defined timestamp parsers. If more than one
+parser is specified, the CSV conversion logic will try parsing values
+starting from the beginning of this vector. Possible values are:
+\itemize{
+\item \code{NULL}: the default, which uses the ISO-8601 parser
+\item a character vector of \link[base:strptime]{strptime} parse strings
+\item a list of \link{TimestampParser} objects
+}}
+}
+\value{
+A \code{data.frame}, or a Table if \code{as_data_frame = FALSE}.
+}
+\description{
+These functions uses the Arrow C++ CSV reader to read into a \code{data.frame}.
+Arrow C++ options have been mapped to argument names that follow those of
+\code{readr::read_delim()}, and \code{col_select} was inspired by \code{vroom::vroom()}.
+}
+\details{
+\code{read_csv_arrow()} and \code{read_tsv_arrow()} are wrappers around
+\code{read_delim_arrow()} that specify a delimiter.
+
+Note that not all \code{readr} options are currently implemented here. Please file
+an issue if you encounter one that \code{arrow} should support.
+
+If you need to control Arrow-specific reader parameters that don't have an
+equivalent in \code{readr::read_csv()}, you can either provide them in the
+\code{parse_options}, \code{convert_options}, or \code{read_options} arguments, or you can
+use \link{CsvTableReader} directly for lower-level access.
+}
+\section{Specifying column types and names}{
+
+
+By default, the CSV reader will infer the column names and data types from the file, but there
+are a few ways you can specify them directly.
+
+One way is to provide an Arrow \link{Schema} in the \code{schema} argument,
+which is an ordered map of column name to type.
+When provided, it satisfies both the \code{col_names} and \code{col_types} arguments.
+This is good if you know all of this information up front.
+
+You can also pass a \code{Schema} to the \code{col_types} argument. If you do this,
+column names will still be inferred from the file unless you also specify
+\code{col_names}. In either case, the column names in the \code{Schema} must match the
+data's column names, whether they are explicitly provided or inferred. That
+said, this \code{Schema} does not have to reference all columns: those omitted
+will have their types inferred.
+
+Alternatively, you can declare column types by providing the compact string representation
+that \code{readr} uses to the \code{col_types} argument. This means you provide a
+single string, one character per column, where the characters map to Arrow
+types analogously to the \code{readr} type mapping:
+\itemize{
+\item "c": \code{utf8()}
+\item "i": \code{int32()}
+\item "n": \code{float64()}
+\item "d": \code{float64()}
+\item "l": \code{bool()}
+\item "f": \code{dictionary()}
+\item "D": \code{date32()}
+\item "T": \code{timestamp()}
+\item "t": \code{time32()}
+\item "_": \code{null()}
+\item "-": \code{null()}
+\item "?": infer the type from the data
+}
+
+If you use the compact string representation for \code{col_types}, you must also
+specify \code{col_names}.
+
+Regardless of how types are specified, all columns with a \code{null()} type will
+be dropped.
+
+Note that if you are specifying column names, whether by \code{schema} or
+\code{col_names}, and the CSV file has a header row that would otherwise be used
+to idenfity column names, you'll need to add \code{skip = 1} to skip that row.
+}
+
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+tf <- tempfile()
+on.exit(unlink(tf))
+write.csv(mtcars, file = tf)
+df <- read_csv_arrow(tf)
+dim(df)
+# Can select columns
+df <- read_csv_arrow(tf, col_select = starts_with("d"))
+\dontshow{\}) # examplesIf}
+}
diff --git a/src/arrow/r/man/read_feather.Rd b/src/arrow/r/man/read_feather.Rd
new file mode 100644
index 000000000..95f4d1d12
--- /dev/null
+++ b/src/arrow/r/man/read_feather.Rd
@@ -0,0 +1,50 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/feather.R
+\name{read_feather}
+\alias{read_feather}
+\title{Read a Feather file}
+\usage{
+read_feather(file, col_select = NULL, as_data_frame = TRUE, ...)
+}
+\arguments{
+\item{file}{A character file name or URI, \code{raw} vector, an Arrow input stream,
+or a \code{FileSystem} with path (\code{SubTreeFileSystem}).
+If a file name or URI, an Arrow \link{InputStream} will be opened and
+closed when finished. If an input stream is provided, it will be left
+open.}
+
+\item{col_select}{A character vector of column names to keep, as in the
+"select" argument to \code{data.table::fread()}, or a
+\link[tidyselect:vars_select]{tidy selection specification}
+of columns, as used in \code{dplyr::select()}.}
+
+\item{as_data_frame}{Should the function return a \code{data.frame} (default) or
+an Arrow \link{Table}?}
+
+\item{...}{additional parameters, passed to \code{\link[=make_readable_file]{make_readable_file()}}.}
+}
+\value{
+A \code{data.frame} if \code{as_data_frame} is \code{TRUE} (the default), or an
+Arrow \link{Table} otherwise
+}
+\description{
+Feather provides binary columnar serialization for data frames.
+It is designed to make reading and writing data frames efficient,
+and to make sharing data across data analysis languages easy.
+This function reads both the original, limited specification of the format
+and the version 2 specification, which is the Apache Arrow IPC file format.
+}
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+tf <- tempfile()
+on.exit(unlink(tf))
+write_feather(mtcars, tf)
+df <- read_feather(tf)
+dim(df)
+# Can select columns
+df <- read_feather(tf, col_select = starts_with("d"))
+\dontshow{\}) # examplesIf}
+}
+\seealso{
+\link{FeatherReader} and \link{RecordBatchReader} for lower-level access to reading Arrow IPC data.
+}
diff --git a/src/arrow/r/man/read_ipc_stream.Rd b/src/arrow/r/man/read_ipc_stream.Rd
new file mode 100644
index 000000000..d4dd78314
--- /dev/null
+++ b/src/arrow/r/man/read_ipc_stream.Rd
@@ -0,0 +1,42 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/deprecated.R, R/ipc_stream.R
+\name{read_arrow}
+\alias{read_arrow}
+\alias{read_ipc_stream}
+\title{Read Arrow IPC stream format}
+\usage{
+read_arrow(file, ...)
+
+read_ipc_stream(file, as_data_frame = TRUE, ...)
+}
+\arguments{
+\item{file}{A character file name or URI, \code{raw} vector, an Arrow input stream,
+or a \code{FileSystem} with path (\code{SubTreeFileSystem}).
+If a file name or URI, an Arrow \link{InputStream} will be opened and
+closed when finished. If an input stream is provided, it will be left
+open.}
+
+\item{...}{extra parameters passed to \code{read_feather()}.}
+
+\item{as_data_frame}{Should the function return a \code{data.frame} (default) or
+an Arrow \link{Table}?}
+}
+\value{
+A \code{data.frame} if \code{as_data_frame} is \code{TRUE} (the default), or an
+Arrow \link{Table} otherwise
+}
+\description{
+Apache Arrow defines two formats for \href{https://arrow.apache.org/docs/format/Columnar.html#serialization-and-interprocess-communication-ipc}{serializing data for interprocess communication (IPC)}:
+a "stream" format and a "file" format, known as Feather. \code{read_ipc_stream()}
+and \code{\link[=read_feather]{read_feather()}} read those formats, respectively.
+}
+\details{
+\code{read_arrow()}, a wrapper around \code{read_ipc_stream()} and \code{read_feather()},
+is deprecated. You should explicitly choose
+the function that will read the desired IPC format (stream or file) since
+a file or \code{InputStream} may contain either.
+}
+\seealso{
+\code{\link[=read_feather]{read_feather()}} for writing IPC files. \link{RecordBatchReader} for a
+lower-level interface.
+}
diff --git a/src/arrow/r/man/read_json_arrow.Rd b/src/arrow/r/man/read_json_arrow.Rd
new file mode 100644
index 000000000..610867ca4
--- /dev/null
+++ b/src/arrow/r/man/read_json_arrow.Rd
@@ -0,0 +1,52 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/json.R
+\name{read_json_arrow}
+\alias{read_json_arrow}
+\title{Read a JSON file}
+\usage{
+read_json_arrow(
+ file,
+ col_select = NULL,
+ as_data_frame = TRUE,
+ schema = NULL,
+ ...
+)
+}
+\arguments{
+\item{file}{A character file name or URI, \code{raw} vector, an Arrow input stream,
+or a \code{FileSystem} with path (\code{SubTreeFileSystem}).
+If a file name, a memory-mapped Arrow \link{InputStream} will be opened and
+closed when finished; compression will be detected from the file extension
+and handled automatically. If an input stream is provided, it will be left
+open.}
+
+\item{col_select}{A character vector of column names to keep, as in the
+"select" argument to \code{data.table::fread()}, or a
+\link[tidyselect:vars_select]{tidy selection specification}
+of columns, as used in \code{dplyr::select()}.}
+
+\item{as_data_frame}{Should the function return a \code{data.frame} (default) or
+an Arrow \link{Table}?}
+
+\item{schema}{\link{Schema} that describes the table.}
+
+\item{...}{Additional options passed to \code{JsonTableReader$create()}}
+}
+\value{
+A \code{data.frame}, or a Table if \code{as_data_frame = FALSE}.
+}
+\description{
+Using \link{JsonTableReader}
+}
+\examples{
+\dontshow{if (arrow_with_json()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+tf <- tempfile()
+on.exit(unlink(tf))
+writeLines('
+ { "hello": 3.5, "world": false, "yo": "thing" }
+ { "hello": 3.25, "world": null }
+ { "hello": 0.0, "world": true, "yo": null }
+ ', tf, useBytes = TRUE)
+df <- read_json_arrow(tf)
+\dontshow{\}) # examplesIf}
+}
diff --git a/src/arrow/r/man/read_message.Rd b/src/arrow/r/man/read_message.Rd
new file mode 100644
index 000000000..444c76c86
--- /dev/null
+++ b/src/arrow/r/man/read_message.Rd
@@ -0,0 +1,14 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/message.R
+\name{read_message}
+\alias{read_message}
+\title{Read a Message from a stream}
+\usage{
+read_message(stream)
+}
+\arguments{
+\item{stream}{an InputStream}
+}
+\description{
+Read a Message from a stream
+}
diff --git a/src/arrow/r/man/read_parquet.Rd b/src/arrow/r/man/read_parquet.Rd
new file mode 100644
index 000000000..056e86447
--- /dev/null
+++ b/src/arrow/r/man/read_parquet.Rd
@@ -0,0 +1,50 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/parquet.R
+\name{read_parquet}
+\alias{read_parquet}
+\title{Read a Parquet file}
+\usage{
+read_parquet(
+ file,
+ col_select = NULL,
+ as_data_frame = TRUE,
+ props = ParquetArrowReaderProperties$create(),
+ ...
+)
+}
+\arguments{
+\item{file}{A character file name or URI, \code{raw} vector, an Arrow input stream,
+or a \code{FileSystem} with path (\code{SubTreeFileSystem}).
+If a file name or URI, an Arrow \link{InputStream} will be opened and
+closed when finished. If an input stream is provided, it will be left
+open.}
+
+\item{col_select}{A character vector of column names to keep, as in the
+"select" argument to \code{data.table::fread()}, or a
+\link[tidyselect:vars_select]{tidy selection specification}
+of columns, as used in \code{dplyr::select()}.}
+
+\item{as_data_frame}{Should the function return a \code{data.frame} (default) or
+an Arrow \link{Table}?}
+
+\item{props}{\link{ParquetArrowReaderProperties}}
+
+\item{...}{Additional arguments passed to \code{ParquetFileReader$create()}}
+}
+\value{
+A \link[=Table]{arrow::Table}, or a \code{data.frame} if \code{as_data_frame} is
+\code{TRUE} (the default).
+}
+\description{
+'\href{https://parquet.apache.org/}{Parquet}' is a columnar storage file format.
+This function enables you to read Parquet files into R.
+}
+\examples{
+\dontshow{if (arrow_with_parquet()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+tf <- tempfile()
+on.exit(unlink(tf))
+write_parquet(mtcars, tf)
+df <- read_parquet(tf, col_select = starts_with("d"))
+head(df)
+\dontshow{\}) # examplesIf}
+}
diff --git a/src/arrow/r/man/read_schema.Rd b/src/arrow/r/man/read_schema.Rd
new file mode 100644
index 000000000..8738b8aeb
--- /dev/null
+++ b/src/arrow/r/man/read_schema.Rd
@@ -0,0 +1,19 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/schema.R
+\name{read_schema}
+\alias{read_schema}
+\title{read a Schema from a stream}
+\usage{
+read_schema(stream, ...)
+}
+\arguments{
+\item{stream}{a \code{Message}, \code{InputStream}, or \code{Buffer}}
+
+\item{...}{currently ignored}
+}
+\value{
+A \link{Schema}
+}
+\description{
+read a Schema from a stream
+}
diff --git a/src/arrow/r/man/recycle_scalars.Rd b/src/arrow/r/man/recycle_scalars.Rd
new file mode 100644
index 000000000..3d97ecfd7
--- /dev/null
+++ b/src/arrow/r/man/recycle_scalars.Rd
@@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/util.R
+\name{recycle_scalars}
+\alias{recycle_scalars}
+\title{Recycle scalar values in a list of arrays}
+\usage{
+recycle_scalars(arrays)
+}
+\arguments{
+\item{arrays}{List of arrays}
+}
+\value{
+List of arrays with any vector/Scalar/Array/ChunkedArray values of length 1 recycled
+}
+\description{
+Recycle scalar values in a list of arrays
+}
+\keyword{internal}
diff --git a/src/arrow/r/man/reexports.Rd b/src/arrow/r/man/reexports.Rd
new file mode 100644
index 000000000..591158c72
--- /dev/null
+++ b/src/arrow/r/man/reexports.Rd
@@ -0,0 +1,29 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/reexports-bit64.R, R/reexports-tidyselect.R
+\docType{import}
+\name{reexports}
+\alias{reexports}
+\alias{print.integer64}
+\alias{str.integer64}
+\alias{contains}
+\alias{select_helpers}
+\alias{ends_with}
+\alias{everything}
+\alias{matches}
+\alias{num_range}
+\alias{one_of}
+\alias{starts_with}
+\alias{last_col}
+\alias{all_of}
+\title{Objects exported from other packages}
+\keyword{internal}
+\description{
+These objects are imported from other packages. Follow the links
+below to see their documentation.
+
+\describe{
+ \item{bit64}{\code{\link[bit64:bit64-package]{print.integer64}}, \code{\link[bit64:bit64-package]{str.integer64}}}
+
+ \item{tidyselect}{\code{\link[tidyselect]{all_of}}, \code{\link[tidyselect:starts_with]{contains}}, \code{\link[tidyselect:starts_with]{ends_with}}, \code{\link[tidyselect]{everything}}, \code{\link[tidyselect:everything]{last_col}}, \code{\link[tidyselect:starts_with]{matches}}, \code{\link[tidyselect:starts_with]{num_range}}, \code{\link[tidyselect]{one_of}}, \code{\link[tidyselect]{starts_with}}}
+}}
+
diff --git a/src/arrow/r/man/repeat_value_as_array.Rd b/src/arrow/r/man/repeat_value_as_array.Rd
new file mode 100644
index 000000000..a4937326e
--- /dev/null
+++ b/src/arrow/r/man/repeat_value_as_array.Rd
@@ -0,0 +1,20 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/util.R
+\name{repeat_value_as_array}
+\alias{repeat_value_as_array}
+\title{Take an object of length 1 and repeat it.}
+\usage{
+repeat_value_as_array(object, n)
+}
+\arguments{
+\item{object}{Object of length 1 to be repeated - vector, \code{Scalar}, \code{Array}, or \code{ChunkedArray}}
+
+\item{n}{Number of repetitions}
+}
+\value{
+\code{Array} of length \code{n}
+}
+\description{
+Take an object of length 1 and repeat it.
+}
+\keyword{internal}
diff --git a/src/arrow/r/man/s3_bucket.Rd b/src/arrow/r/man/s3_bucket.Rd
new file mode 100644
index 000000000..95a086dea
--- /dev/null
+++ b/src/arrow/r/man/s3_bucket.Rd
@@ -0,0 +1,28 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/filesystem.R
+\name{s3_bucket}
+\alias{s3_bucket}
+\title{Connect to an AWS S3 bucket}
+\usage{
+s3_bucket(bucket, ...)
+}
+\arguments{
+\item{bucket}{string S3 bucket name or path}
+
+\item{...}{Additional connection options, passed to \code{S3FileSystem$create()}}
+}
+\value{
+A \code{SubTreeFileSystem} containing an \code{S3FileSystem} and the bucket's
+relative path. Note that this function's success does not guarantee that you
+are authorized to access the bucket's contents.
+}
+\description{
+\code{s3_bucket()} is a convenience function to create an \code{S3FileSystem} object
+that automatically detects the bucket's AWS region and holding onto the its
+relative path.
+}
+\examples{
+\dontshow{if (arrow_with_s3()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+bucket <- s3_bucket("ursa-labs-taxi-data")
+\dontshow{\}) # examplesIf}
+}
diff --git a/src/arrow/r/man/to_arrow.Rd b/src/arrow/r/man/to_arrow.Rd
new file mode 100644
index 000000000..e0c31b8dc
--- /dev/null
+++ b/src/arrow/r/man/to_arrow.Rd
@@ -0,0 +1,33 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/duckdb.R
+\name{to_arrow}
+\alias{to_arrow}
+\title{Create an Arrow object from others}
+\usage{
+to_arrow(.data)
+}
+\arguments{
+\item{.data}{the object to be converted}
+}
+\value{
+an \code{arrow_dplyr_query} object, to be used in dplyr pipelines.
+}
+\description{
+This can be used in pipelines that pass data back and forth between Arrow and
+other processes (like DuckDB).
+}
+\examples{
+\dontshow{if (getFromNamespace("run_duckdb_examples", "arrow")()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+library(dplyr)
+
+ds <- InMemoryDataset$create(mtcars)
+
+ds \%>\%
+ filter(mpg < 30) \%>\%
+ to_duckdb() \%>\%
+ group_by(cyl) \%>\%
+ summarize(mean_mpg = mean(mpg, na.rm = TRUE)) \%>\%
+ to_arrow() \%>\%
+ collect()
+\dontshow{\}) # examplesIf}
+}
diff --git a/src/arrow/r/man/to_duckdb.Rd b/src/arrow/r/man/to_duckdb.Rd
new file mode 100644
index 000000000..12186d432
--- /dev/null
+++ b/src/arrow/r/man/to_duckdb.Rd
@@ -0,0 +1,56 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/duckdb.R
+\name{to_duckdb}
+\alias{to_duckdb}
+\title{Create a (virtual) DuckDB table from an Arrow object}
+\usage{
+to_duckdb(
+ .data,
+ con = arrow_duck_connection(),
+ table_name = unique_arrow_tablename(),
+ auto_disconnect = FALSE
+)
+}
+\arguments{
+\item{.data}{the Arrow object (e.g. Dataset, Table) to use for the DuckDB table}
+
+\item{con}{a DuckDB connection to use (default will create one and store it
+in \code{options("arrow_duck_con")})}
+
+\item{table_name}{a name to use in DuckDB for this object. The default is a
+unique string \code{"arrow_"} followed by numbers.}
+
+\item{auto_disconnect}{should the table be automatically cleaned up when the
+resulting object is removed (and garbage collected)? Default: \code{FALSE}}
+}
+\value{
+A \code{tbl} of the new table in DuckDB
+}
+\description{
+This will do the necessary configuration to create a (virtual) table in DuckDB
+that is backed by the Arrow object given. No data is copied or modified until
+\code{collect()} or \code{compute()} are called or a query is run against the table.
+}
+\details{
+The result is a dbplyr-compatible object that can be used in d(b)plyr pipelines.
+
+If \code{auto_disconnect = TRUE}, the DuckDB table that is created will be configured
+to be unregistered when the \code{tbl} object is garbage collected. This is helpful
+if you don't want to have extra table objects in DuckDB after you've finished
+using them. Currently, this cleanup can, however, sometimes lead to hangs if
+tables are created and deleted in quick succession, hence the default value
+of \code{FALSE}
+}
+\examples{
+\dontshow{if (getFromNamespace("run_duckdb_examples", "arrow")()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+library(dplyr)
+
+ds <- InMemoryDataset$create(mtcars)
+
+ds \%>\%
+ filter(mpg < 30) \%>\%
+ to_duckdb() \%>\%
+ group_by(cyl) \%>\%
+ summarize(mean_mpg = mean(mpg, na.rm = TRUE))
+\dontshow{\}) # examplesIf}
+}
diff --git a/src/arrow/r/man/type.Rd b/src/arrow/r/man/type.Rd
new file mode 100644
index 000000000..d55bbe24b
--- /dev/null
+++ b/src/arrow/r/man/type.Rd
@@ -0,0 +1,27 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/type.R
+\name{type}
+\alias{type}
+\title{infer the arrow Array type from an R vector}
+\usage{
+type(x)
+}
+\arguments{
+\item{x}{an R vector}
+}
+\value{
+an arrow logical type
+}
+\description{
+infer the arrow Array type from an R vector
+}
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+type(1:10)
+type(1L:10L)
+type(c(1, 1.5, 2))
+type(c("A", "B", "C"))
+type(mtcars)
+type(Sys.Date())
+\dontshow{\}) # examplesIf}
+}
diff --git a/src/arrow/r/man/unify_schemas.Rd b/src/arrow/r/man/unify_schemas.Rd
new file mode 100644
index 000000000..50c80c2dd
--- /dev/null
+++ b/src/arrow/r/man/unify_schemas.Rd
@@ -0,0 +1,27 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/schema.R
+\name{unify_schemas}
+\alias{unify_schemas}
+\title{Combine and harmonize schemas}
+\usage{
+unify_schemas(..., schemas = list(...))
+}
+\arguments{
+\item{...}{\link{Schema}s to unify}
+
+\item{schemas}{Alternatively, a list of schemas}
+}
+\value{
+A \code{Schema} with the union of fields contained in the inputs, or
+\code{NULL} if any of \code{schemas} is \code{NULL}
+}
+\description{
+Combine and harmonize schemas
+}
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+a <- schema(b = double(), c = bool())
+z <- schema(b = double(), k = utf8())
+unify_schemas(a, z)
+\dontshow{\}) # examplesIf}
+}
diff --git a/src/arrow/r/man/value_counts.Rd b/src/arrow/r/man/value_counts.Rd
new file mode 100644
index 000000000..7e64d1550
--- /dev/null
+++ b/src/arrow/r/man/value_counts.Rd
@@ -0,0 +1,24 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/compute.R
+\name{value_counts}
+\alias{value_counts}
+\title{\code{table} for Arrow objects}
+\usage{
+value_counts(x)
+}
+\arguments{
+\item{x}{\code{Array} or \code{ChunkedArray}}
+}
+\value{
+A \code{StructArray} containing "values" (same type as \code{x}) and "counts"
+\code{Int64}.
+}
+\description{
+This function tabulates the values in the array and returns a table of counts.
+}
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+cyl_vals <- Array$create(mtcars$cyl)
+counts <- value_counts(cyl_vals)
+\dontshow{\}) # examplesIf}
+}
diff --git a/src/arrow/r/man/write_csv_arrow.Rd b/src/arrow/r/man/write_csv_arrow.Rd
new file mode 100644
index 000000000..55a239ca9
--- /dev/null
+++ b/src/arrow/r/man/write_csv_arrow.Rd
@@ -0,0 +1,32 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/csv.R
+\name{write_csv_arrow}
+\alias{write_csv_arrow}
+\title{Write CSV file to disk}
+\usage{
+write_csv_arrow(x, sink, include_header = TRUE, batch_size = 1024L)
+}
+\arguments{
+\item{x}{\code{data.frame}, \link{RecordBatch}, or \link{Table}}
+
+\item{sink}{A string file path, URI, or \link{OutputStream}, or path in a file
+system (\code{SubTreeFileSystem})}
+
+\item{include_header}{Whether to write an initial header line with column names}
+
+\item{batch_size}{Maximum number of rows processed at a time. Default is 1024.}
+}
+\value{
+The input \code{x}, invisibly. Note that if \code{sink} is an \link{OutputStream},
+the stream will be left open.
+}
+\description{
+Write CSV file to disk
+}
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+tf <- tempfile()
+on.exit(unlink(tf))
+write_csv_arrow(mtcars, tf)
+\dontshow{\}) # examplesIf}
+}
diff --git a/src/arrow/r/man/write_dataset.Rd b/src/arrow/r/man/write_dataset.Rd
new file mode 100644
index 000000000..76bbaf7c7
--- /dev/null
+++ b/src/arrow/r/man/write_dataset.Rd
@@ -0,0 +1,115 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/dataset-write.R
+\name{write_dataset}
+\alias{write_dataset}
+\title{Write a dataset}
+\usage{
+write_dataset(
+ dataset,
+ path,
+ format = c("parquet", "feather", "arrow", "ipc", "csv"),
+ partitioning = dplyr::group_vars(dataset),
+ basename_template = paste0("part-{i}.", as.character(format)),
+ hive_style = TRUE,
+ existing_data_behavior = c("overwrite", "error", "delete_matching"),
+ ...
+)
+}
+\arguments{
+\item{dataset}{\link{Dataset}, \link{RecordBatch}, \link{Table}, \code{arrow_dplyr_query}, or
+\code{data.frame}. If an \code{arrow_dplyr_query}, the query will be evaluated and
+the result will be written. This means that you can \code{select()}, \code{filter()}, \code{mutate()},
+etc. to transform the data before it is written if you need to.}
+
+\item{path}{string path, URI, or \code{SubTreeFileSystem} referencing a directory
+to write to (directory will be created if it does not exist)}
+
+\item{format}{a string identifier of the file format. Default is to use
+"parquet" (see \link{FileFormat})}
+
+\item{partitioning}{\code{Partitioning} or a character vector of columns to
+use as partition keys (to be written as path segments). Default is to
+use the current \code{group_by()} columns.}
+
+\item{basename_template}{string template for the names of files to be written.
+Must contain \code{"{i}"}, which will be replaced with an autoincremented
+integer to generate basenames of datafiles. For example, \code{"part-{i}.feather"}
+will yield \verb{"part-0.feather", ...}.}
+
+\item{hive_style}{logical: write partition segments as Hive-style
+(\code{key1=value1/key2=value2/file.ext}) or as just bare values. Default is \code{TRUE}.}
+
+\item{existing_data_behavior}{The behavior to use when there is already data
+in the destination directory. Must be one of overwrite, error, or
+delete_matching. When this is set to "overwrite" (the default) then any
+new files created will overwrite existing files. When this is set to
+"error" then the operation will fail if the destination directory is not
+empty. When this is set to "delete_matching" then the writer will delete
+any existing partitions if data is going to be written to those partitions
+and will leave alone partitions which data is not written to.}
+
+\item{...}{additional format-specific arguments. For available Parquet
+options, see \code{\link[=write_parquet]{write_parquet()}}. The available Feather options are
+\itemize{
+\item \code{use_legacy_format} logical: write data formatted so that Arrow libraries
+versions 0.14 and lower can read it. Default is \code{FALSE}. You can also
+enable this by setting the environment variable \code{ARROW_PRE_0_15_IPC_FORMAT=1}.
+\item \code{metadata_version}: A string like "V5" or the equivalent integer indicating
+the Arrow IPC MetadataVersion. Default (NULL) will use the latest version,
+unless the environment variable \code{ARROW_PRE_1_0_METADATA_VERSION=1}, in
+which case it will be V4.
+\item \code{codec}: A \link{Codec} which will be used to compress body buffers of written
+files. Default (NULL) will not compress body buffers.
+\item \code{null_fallback}: character to be used in place of missing values (\code{NA} or
+\code{NULL}) when using Hive-style partitioning. See \code{\link[=hive_partition]{hive_partition()}}.
+}}
+}
+\value{
+The input \code{dataset}, invisibly
+}
+\description{
+This function allows you to write a dataset. By writing to more efficient
+binary storage formats, and by specifying relevant partitioning, you can
+make it much faster to read and query.
+}
+\examples{
+\dontshow{if (arrow_with_dataset() & arrow_with_parquet() & requireNamespace("dplyr", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+# You can write datasets partitioned by the values in a column (here: "cyl").
+# This creates a structure of the form cyl=X/part-Z.parquet.
+one_level_tree <- tempfile()
+write_dataset(mtcars, one_level_tree, partitioning = "cyl")
+list.files(one_level_tree, recursive = TRUE)
+
+# You can also partition by the values in multiple columns
+# (here: "cyl" and "gear").
+# This creates a structure of the form cyl=X/gear=Y/part-Z.parquet.
+two_levels_tree <- tempfile()
+write_dataset(mtcars, two_levels_tree, partitioning = c("cyl", "gear"))
+list.files(two_levels_tree, recursive = TRUE)
+
+# In the two previous examples we would have:
+# X = {4,6,8}, the number of cylinders.
+# Y = {3,4,5}, the number of forward gears.
+# Z = {0,1,2}, the number of saved parts, starting from 0.
+
+# You can obtain the same result as as the previous examples using arrow with
+# a dplyr pipeline. This will be the same as two_levels_tree above, but the
+# output directory will be different.
+library(dplyr)
+two_levels_tree_2 <- tempfile()
+mtcars \%>\%
+ group_by(cyl, gear) \%>\%
+ write_dataset(two_levels_tree_2)
+list.files(two_levels_tree_2, recursive = TRUE)
+
+# And you can also turn off the Hive-style directory naming where the column
+# name is included with the values by using `hive_style = FALSE`.
+
+# Write a structure X/Y/part-Z.parquet.
+two_levels_tree_no_hive <- tempfile()
+mtcars \%>\%
+ group_by(cyl, gear) \%>\%
+ write_dataset(two_levels_tree_no_hive, hive_style = FALSE)
+list.files(two_levels_tree_no_hive, recursive = TRUE)
+\dontshow{\}) # examplesIf}
+}
diff --git a/src/arrow/r/man/write_feather.Rd b/src/arrow/r/man/write_feather.Rd
new file mode 100644
index 000000000..c6273b61b
--- /dev/null
+++ b/src/arrow/r/man/write_feather.Rd
@@ -0,0 +1,61 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/feather.R
+\name{write_feather}
+\alias{write_feather}
+\title{Write data in the Feather format}
+\usage{
+write_feather(
+ x,
+ sink,
+ version = 2,
+ chunk_size = 65536L,
+ compression = c("default", "lz4", "uncompressed", "zstd"),
+ compression_level = NULL
+)
+}
+\arguments{
+\item{x}{\code{data.frame}, \link{RecordBatch}, or \link{Table}}
+
+\item{sink}{A string file path, URI, or \link{OutputStream}, or path in a file
+system (\code{SubTreeFileSystem})}
+
+\item{version}{integer Feather file version. Version 2 is the current.
+Version 1 is the more limited legacy format.}
+
+\item{chunk_size}{For V2 files, the number of rows that each chunk of data
+should have in the file. Use a smaller \code{chunk_size} when you need faster
+random row access. Default is 64K. This option is not supported for V1.}
+
+\item{compression}{Name of compression codec to use, if any. Default is
+"lz4" if LZ4 is available in your build of the Arrow C++ library, otherwise
+"uncompressed". "zstd" is the other available codec and generally has better
+compression ratios in exchange for slower read and write performance
+See \code{\link[=codec_is_available]{codec_is_available()}}. This option is not supported for V1.}
+
+\item{compression_level}{If \code{compression} is "zstd", you may
+specify an integer compression level. If omitted, the compression codec's
+default compression level is used.}
+}
+\value{
+The input \code{x}, invisibly. Note that if \code{sink} is an \link{OutputStream},
+the stream will be left open.
+}
+\description{
+Feather provides binary columnar serialization for data frames.
+It is designed to make reading and writing data frames efficient,
+and to make sharing data across data analysis languages easy.
+This function writes both the original, limited specification of the format
+and the version 2 specification, which is the Apache Arrow IPC file format.
+}
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+tf <- tempfile()
+on.exit(unlink(tf))
+write_feather(mtcars, tf)
+\dontshow{\}) # examplesIf}
+}
+\seealso{
+\link{RecordBatchWriter} for lower-level access to writing Arrow IPC data.
+
+\link{Schema} for information about schemas and metadata handling.
+}
diff --git a/src/arrow/r/man/write_ipc_stream.Rd b/src/arrow/r/man/write_ipc_stream.Rd
new file mode 100644
index 000000000..2f215f25f
--- /dev/null
+++ b/src/arrow/r/man/write_ipc_stream.Rd
@@ -0,0 +1,45 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/deprecated.R, R/ipc_stream.R
+\name{write_arrow}
+\alias{write_arrow}
+\alias{write_ipc_stream}
+\title{Write Arrow IPC stream format}
+\usage{
+write_arrow(x, sink, ...)
+
+write_ipc_stream(x, sink, ...)
+}
+\arguments{
+\item{x}{\code{data.frame}, \link{RecordBatch}, or \link{Table}}
+
+\item{sink}{A string file path, URI, or \link{OutputStream}, or path in a file
+system (\code{SubTreeFileSystem})}
+
+\item{...}{extra parameters passed to \code{write_feather()}.}
+}
+\value{
+\code{x}, invisibly.
+}
+\description{
+Apache Arrow defines two formats for \href{https://arrow.apache.org/docs/format/Columnar.html#serialization-and-interprocess-communication-ipc}{serializing data for interprocess communication (IPC)}:
+a "stream" format and a "file" format, known as Feather. \code{write_ipc_stream()}
+and \code{\link[=write_feather]{write_feather()}} write those formats, respectively.
+}
+\details{
+\code{write_arrow()}, a wrapper around \code{write_ipc_stream()} and \code{write_feather()}
+with some nonstandard behavior, is deprecated. You should explicitly choose
+the function that will write the desired IPC format (stream or file) since
+either can be written to a file or \code{OutputStream}.
+}
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+tf <- tempfile()
+on.exit(unlink(tf))
+write_ipc_stream(mtcars, tf)
+\dontshow{\}) # examplesIf}
+}
+\seealso{
+\code{\link[=write_feather]{write_feather()}} for writing IPC files. \code{\link[=write_to_raw]{write_to_raw()}} to
+serialize data to a buffer.
+\link{RecordBatchWriter} for a lower-level interface.
+}
diff --git a/src/arrow/r/man/write_parquet.Rd b/src/arrow/r/man/write_parquet.Rd
new file mode 100644
index 000000000..d7147f7e8
--- /dev/null
+++ b/src/arrow/r/man/write_parquet.Rd
@@ -0,0 +1,108 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/parquet.R
+\name{write_parquet}
+\alias{write_parquet}
+\title{Write Parquet file to disk}
+\usage{
+write_parquet(
+ x,
+ sink,
+ chunk_size = NULL,
+ version = NULL,
+ compression = default_parquet_compression(),
+ compression_level = NULL,
+ use_dictionary = NULL,
+ write_statistics = NULL,
+ data_page_size = NULL,
+ use_deprecated_int96_timestamps = FALSE,
+ coerce_timestamps = NULL,
+ allow_truncated_timestamps = FALSE,
+ properties = NULL,
+ arrow_properties = NULL
+)
+}
+\arguments{
+\item{x}{\code{data.frame}, \link{RecordBatch}, or \link{Table}}
+
+\item{sink}{A string file path, URI, or \link{OutputStream}, or path in a file
+system (\code{SubTreeFileSystem})}
+
+\item{chunk_size}{chunk size in number of rows. If NULL, the total number of rows is used.}
+
+\item{version}{parquet version, "1.0" or "2.0". Default "1.0". Numeric values
+are coerced to character.}
+
+\item{compression}{compression algorithm. Default "snappy". See details.}
+
+\item{compression_level}{compression level. Meaning depends on compression algorithm}
+
+\item{use_dictionary}{Specify if we should use dictionary encoding. Default \code{TRUE}}
+
+\item{write_statistics}{Specify if we should write statistics. Default \code{TRUE}}
+
+\item{data_page_size}{Set a target threshold for the approximate encoded
+size of data pages within a column chunk (in bytes). Default 1 MiB.}
+
+\item{use_deprecated_int96_timestamps}{Write timestamps to INT96 Parquet format. Default \code{FALSE}.}
+
+\item{coerce_timestamps}{Cast timestamps a particular resolution. Can be
+\code{NULL}, "ms" or "us". Default \code{NULL} (no casting)}
+
+\item{allow_truncated_timestamps}{Allow loss of data when coercing timestamps to a
+particular resolution. E.g. if microsecond or nanosecond data is lost when coercing
+to "ms", do not raise an exception}
+
+\item{properties}{A \code{ParquetWriterProperties} object, used instead of the options
+enumerated in this function's signature. Providing \code{properties} as an argument
+is deprecated; if you need to assemble \code{ParquetWriterProperties} outside
+of \code{write_parquet()}, use \code{ParquetFileWriter} instead.}
+
+\item{arrow_properties}{A \code{ParquetArrowWriterProperties} object. Like
+\code{properties}, this argument is deprecated.}
+}
+\value{
+the input \code{x} invisibly.
+}
+\description{
+\href{https://parquet.apache.org/}{Parquet} is a columnar storage file format.
+This function enables you to write Parquet files from R.
+}
+\details{
+Due to features of the format, Parquet files cannot be appended to.
+If you want to use the Parquet format but also want the ability to extend
+your dataset, you can write to additional Parquet files and then treat
+the whole directory of files as a \link{Dataset} you can query.
+See \code{vignette("dataset", package = "arrow")} for examples of this.
+
+The parameters \code{compression}, \code{compression_level}, \code{use_dictionary} and
+\code{write_statistics} support various patterns:
+\itemize{
+\item The default \code{NULL} leaves the parameter unspecified, and the C++ library
+uses an appropriate default for each column (defaults listed above)
+\item A single, unnamed, value (e.g. a single string for \code{compression}) applies to all columns
+\item An unnamed vector, of the same size as the number of columns, to specify a
+value for each column, in positional order
+\item A named vector, to specify the value for the named columns, the default
+value for the setting is used when not supplied
+}
+
+The \code{compression} argument can be any of the following (case insensitive):
+"uncompressed", "snappy", "gzip", "brotli", "zstd", "lz4", "lzo" or "bz2".
+Only "uncompressed" is guaranteed to be available, but "snappy" and "gzip"
+are almost always included. See \code{\link[=codec_is_available]{codec_is_available()}}.
+The default "snappy" is used if available, otherwise "uncompressed". To
+disable compression, set \code{compression = "uncompressed"}.
+Note that "uncompressed" columns may still have dictionary encoding.
+}
+\examples{
+\dontshow{if (arrow_with_parquet()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+tf1 <- tempfile(fileext = ".parquet")
+write_parquet(data.frame(x = 1:5), tf1)
+
+# using compression
+if (codec_is_available("gzip")) {
+ tf2 <- tempfile(fileext = ".gz.parquet")
+ write_parquet(data.frame(x = 1:5), tf2, compression = "gzip", compression_level = 5)
+}
+\dontshow{\}) # examplesIf}
+}
diff --git a/src/arrow/r/man/write_to_raw.Rd b/src/arrow/r/man/write_to_raw.Rd
new file mode 100644
index 000000000..a3c6e324b
--- /dev/null
+++ b/src/arrow/r/man/write_to_raw.Rd
@@ -0,0 +1,28 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/ipc_stream.R
+\name{write_to_raw}
+\alias{write_to_raw}
+\title{Write Arrow data to a raw vector}
+\usage{
+write_to_raw(x, format = c("stream", "file"))
+}
+\arguments{
+\item{x}{\code{data.frame}, \link{RecordBatch}, or \link{Table}}
+
+\item{format}{one of \code{c("stream", "file")}, indicating the IPC format to use}
+}
+\value{
+A \code{raw} vector containing the bytes of the IPC serialized data.
+}
+\description{
+\code{\link[=write_ipc_stream]{write_ipc_stream()}} and \code{\link[=write_feather]{write_feather()}} write data to a sink and return
+the data (\code{data.frame}, \code{RecordBatch}, or \code{Table}) they were given.
+This function wraps those so that you can serialize data to a buffer and
+access that buffer as a \code{raw} vector in R.
+}
+\examples{
+\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+# The default format is "stream"
+mtcars_raw <- write_to_raw(mtcars)
+\dontshow{\}) # examplesIf}
+}